diff options
author | Campbell Barton <ideasman42@gmail.com> | 2017-04-27 14:41:57 +0300 |
---|---|---|
committer | Campbell Barton <ideasman42@gmail.com> | 2017-04-27 14:42:08 +0300 |
commit | 98b6c6f2c0227b9a6d9db8bd47ee6cad27f35b4b (patch) | |
tree | 0e13eee0ea3845dcafa0440323635f3ae38a7a5e | |
parent | 21d31f8f58ff1454b663faac0d1260f7c5168a65 (diff) | |
parent | bdf8ad6c4e521884a544f6dbfa244c27720cd4bb (diff) |
Merge branch 'master' into blender2.8
-rw-r--r-- | CMakeLists.txt | 14 | ||||
-rwxr-xr-x | build_files/cmake/cmake_netbeans_project.py | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_globals.h | 16 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_image_opencl.h | 7 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel.cpp | 52 | ||||
-rw-r--r-- | intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h | 82 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_image.h | 13 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_voxel.h | 2 | ||||
-rw-r--r-- | intern/cycles/render/image.cpp | 326 | ||||
-rw-r--r-- | intern/cycles/render/image.h | 15 | ||||
-rw-r--r-- | intern/cycles/render/nodes.cpp | 8 | ||||
-rw-r--r-- | intern/cycles/render/scene.h | 18 | ||||
-rw-r--r-- | intern/cycles/util/util_texture.h | 78 | ||||
-rw-r--r-- | intern/cycles/util/util_types.h | 19 | ||||
-rwxr-xr-x | release/bin/blender-thumbnailer.py | 2 | ||||
-rw-r--r-- | source/blender/editors/space_view3d/drawobject.c | 9 | ||||
-rw-r--r-- | source/blender/editors/space_view3d/drawsimdebug.c | 2 | ||||
-rw-r--r-- | source/blender/editors/space_view3d/view3d_intern.h | 2 | ||||
-rw-r--r-- | source/creator/creator_args.c | 1 |
19 files changed, 356 insertions, 312 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b701cabedd..3f2b3dad520 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -192,7 +192,7 @@ endif() #----------------------------------------------------------------------------- # Options -# First platform spesific non-cached vars +# First platform specific non-cached vars if(UNIX AND NOT APPLE) set(WITH_X11 ON) endif() @@ -916,16 +916,16 @@ endif() if(MSVC) # for some reason this fails on msvc add_definitions(-D__LITTLE_ENDIAN__) - -# OSX-Note: as we do crosscompiling with specific set architecture, -# endianess-detection and autosetting is counterproductive + +# OSX-Note: as we do cross-compiling with specific set architecture, +# endianess-detection and auto-setting is counterproductive # so we just set endianess according CMAKE_OSX_ARCHITECTURES elseif(CMAKE_OSX_ARCHITECTURES MATCHES i386 OR CMAKE_OSX_ARCHITECTURES MATCHES x86_64) add_definitions(-D__LITTLE_ENDIAN__) -elseif(CMAKE_OSX_ARCHITECTURES MATCHES ppc OR CMAKE_OSX_ARCHITECTURES MATCHES ppc64) +elseif(CMAKE_OSX_ARCHITECTURES MATCHES ppc OR CMAKE_OSX_ARCHITECTURES MATCHES ppc64) add_definitions(-D__BIG_ENDIAN__) - + else() include(TestBigEndian) test_big_endian(_SYSTEM_BIG_ENDIAN) @@ -1526,7 +1526,7 @@ endif() set(CMAKE_C_FLAGS "${C_WARNINGS} ${CMAKE_C_FLAGS} ${PLATFORM_CFLAGS}") set(CMAKE_CXX_FLAGS "${CXX_WARNINGS} ${CMAKE_CXX_FLAGS} ${PLATFORM_CFLAGS}") -# defined above, platform spesific but shared names +# defined above, platform specific but shared names mark_as_advanced( CYCLES_OSL OSL_LIB_EXEC diff --git a/build_files/cmake/cmake_netbeans_project.py b/build_files/cmake/cmake_netbeans_project.py index 5b074b6975a..97eb6b245f5 100755 --- a/build_files/cmake/cmake_netbeans_project.py +++ b/build_files/cmake/cmake_netbeans_project.py @@ -84,7 +84,7 @@ def create_nb_project_main(): make_exe = cmake_cache_var("CMAKE_MAKE_PROGRAM") make_exe_basename = os.path.basename(make_exe) - # --------------- NB spesific + # --------------- NB specific defines = [("%s=%s" % cdef) if cdef[1] else cdef[0] for cdef in defines] defines += [cdef.replace("#define", "").strip() for cdef in cmake_compiler_defines()] diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h index c9c97ea977e..b1f3283d5fc 100644 --- a/intern/cycles/kernel/kernel_globals.h +++ b/intern/cycles/kernel/kernel_globals.h @@ -19,6 +19,10 @@ #ifndef __KERNEL_GLOBALS_H__ #define __KERNEL_GLOBALS_H__ +#ifdef __KERNEL_CPU__ +#include "util/util_vector.h" +#endif + CCL_NAMESPACE_BEGIN /* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in @@ -38,12 +42,12 @@ struct Intersection; struct VolumeStep; typedef struct KernelGlobals { - texture_image_uchar4 texture_byte4_images[TEX_NUM_BYTE4_CPU]; - texture_image_float4 texture_float4_images[TEX_NUM_FLOAT4_CPU]; - texture_image_half4 texture_half4_images[TEX_NUM_HALF4_CPU]; - texture_image_float texture_float_images[TEX_NUM_FLOAT_CPU]; - texture_image_uchar texture_byte_images[TEX_NUM_BYTE_CPU]; - texture_image_half texture_half_images[TEX_NUM_HALF_CPU]; + vector<texture_image_float4> texture_float4_images; + vector<texture_image_uchar4> texture_byte4_images; + vector<texture_image_half4> texture_half4_images; + vector<texture_image_float> texture_float_images; + vector<texture_image_uchar> texture_byte_images; + vector<texture_image_half> texture_half_images; # define KERNEL_TEX(type, ttype, name) ttype name; # define KERNEL_IMAGE_TEX(type, ttype, name) diff --git a/intern/cycles/kernel/kernel_image_opencl.h b/intern/cycles/kernel/kernel_image_opencl.h index 0352c58037d..15579f55a41 100644 --- a/intern/cycles/kernel/kernel_image_opencl.h +++ b/intern/cycles/kernel/kernel_image_opencl.h @@ -20,18 +20,19 @@ ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset) { + const texture_type = kernel_tex_type(id); /* Float4 */ - if(id < TEX_START_BYTE4_OPENCL) { + if(texture_type == IMAGE_DATA_TYPE_FLOAT4) { return kernel_tex_fetch(__tex_image_float4_packed, offset); } /* Byte4 */ - else if(id < TEX_START_FLOAT_OPENCL) { + else if(texture_type == IMAGE_DATA_TYPE_BYTE4) { uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset); float f = 1.0f/255.0f; return make_float4(r.x*f, r.y*f, r.z*f, r.w*f); } /* Float */ - else if(id < TEX_START_BYTE_OPENCL) { + else if(texture_type == IMAGE_DATA_TYPE_FLOAT) { float f = kernel_tex_fetch(__tex_image_float_packed, offset); return make_float4(f, f, f, 1.0f); } diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp index 16992c681e6..db2de6836d3 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel.cpp +++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp @@ -95,9 +95,12 @@ void kernel_tex_copy(KernelGlobals *kg, else if(strstr(name, "__tex_image_float4")) { texture_image_float4 *tex = NULL; int id = atoi(name + strlen("__tex_image_float4_")); - int array_index = id; + int array_index = kernel_tex_index(id); - if(array_index >= 0 && array_index < TEX_NUM_FLOAT4_CPU) { + if(array_index >= 0) { + if(array_index >= kg->texture_float4_images.size()) { + kg->texture_float4_images.resize(array_index+1); + } tex = &kg->texture_float4_images[array_index]; } @@ -111,9 +114,12 @@ void kernel_tex_copy(KernelGlobals *kg, else if(strstr(name, "__tex_image_float")) { texture_image_float *tex = NULL; int id = atoi(name + strlen("__tex_image_float_")); - int array_index = id - TEX_START_FLOAT_CPU; - - if(array_index >= 0 && array_index < TEX_NUM_FLOAT_CPU) { + int array_index = kernel_tex_index(id); + + if(array_index >= 0) { + if(array_index >= kg->texture_float_images.size()) { + kg->texture_float_images.resize(array_index+1); + } tex = &kg->texture_float_images[array_index]; } @@ -127,9 +133,12 @@ void kernel_tex_copy(KernelGlobals *kg, else if(strstr(name, "__tex_image_byte4")) { texture_image_uchar4 *tex = NULL; int id = atoi(name + strlen("__tex_image_byte4_")); - int array_index = id - TEX_START_BYTE4_CPU; - - if(array_index >= 0 && array_index < TEX_NUM_BYTE4_CPU) { + int array_index = kernel_tex_index(id); + + if(array_index >= 0) { + if(array_index >= kg->texture_byte4_images.size()) { + kg->texture_byte4_images.resize(array_index+1); + } tex = &kg->texture_byte4_images[array_index]; } @@ -143,9 +152,12 @@ void kernel_tex_copy(KernelGlobals *kg, else if(strstr(name, "__tex_image_byte")) { texture_image_uchar *tex = NULL; int id = atoi(name + strlen("__tex_image_byte_")); - int array_index = id - TEX_START_BYTE_CPU; - - if(array_index >= 0 && array_index < TEX_NUM_BYTE_CPU) { + int array_index = kernel_tex_index(id); + + if(array_index >= 0) { + if(array_index >= kg->texture_byte_images.size()) { + kg->texture_byte_images.resize(array_index+1); + } tex = &kg->texture_byte_images[array_index]; } @@ -159,9 +171,12 @@ void kernel_tex_copy(KernelGlobals *kg, else if(strstr(name, "__tex_image_half4")) { texture_image_half4 *tex = NULL; int id = atoi(name + strlen("__tex_image_half4_")); - int array_index = id - TEX_START_HALF4_CPU; - - if(array_index >= 0 && array_index < TEX_NUM_HALF4_CPU) { + int array_index = kernel_tex_index(id); + + if(array_index >= 0) { + if(array_index >= kg->texture_half4_images.size()) { + kg->texture_half4_images.resize(array_index+1); + } tex = &kg->texture_half4_images[array_index]; } @@ -175,9 +190,12 @@ void kernel_tex_copy(KernelGlobals *kg, else if(strstr(name, "__tex_image_half")) { texture_image_half *tex = NULL; int id = atoi(name + strlen("__tex_image_half_")); - int array_index = id - TEX_START_HALF_CPU; - - if(array_index >= 0 && array_index < TEX_NUM_HALF_CPU) { + int array_index = kernel_tex_index(id); + + if(array_index >= 0) { + if(array_index >= kg->texture_half_images.size()) { + kg->texture_half_images.resize(array_index+1); + } tex = &kg->texture_half_images[array_index]; } diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h index af68907a5c2..f6bb4c25012 100644 --- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h +++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h @@ -23,51 +23,59 @@ CCL_NAMESPACE_BEGIN ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float x, float y) { - if(tex >= TEX_START_HALF_CPU) - return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp(x, y); - else if(tex >= TEX_START_BYTE_CPU) - return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp(x, y); - else if(tex >= TEX_START_FLOAT_CPU) - return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp(x, y); - else if(tex >= TEX_START_HALF4_CPU) - return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y); - else if(tex >= TEX_START_BYTE4_CPU) - return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp(x, y); - else - return kg->texture_float4_images[tex].interp(x, y); + switch(kernel_tex_type(tex)) { + case IMAGE_DATA_TYPE_HALF: + return kg->texture_half_images[kernel_tex_index(tex)].interp(x, y); + case IMAGE_DATA_TYPE_BYTE: + return kg->texture_byte_images[kernel_tex_index(tex)].interp(x, y); + case IMAGE_DATA_TYPE_FLOAT: + return kg->texture_float_images[kernel_tex_index(tex)].interp(x, y); + case IMAGE_DATA_TYPE_HALF4: + return kg->texture_half4_images[kernel_tex_index(tex)].interp(x, y); + case IMAGE_DATA_TYPE_BYTE4: + return kg->texture_byte4_images[kernel_tex_index(tex)].interp(x, y); + case IMAGE_DATA_TYPE_FLOAT4: + default: + return kg->texture_float4_images[kernel_tex_index(tex)].interp(x, y); + } } ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, float x, float y, float z) { - if(tex >= TEX_START_HALF_CPU) - return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d(x, y, z); - else if(tex >= TEX_START_BYTE_CPU) - return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d(x, y, z); - else if(tex >= TEX_START_FLOAT_CPU) - return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d(x, y, z); - else if(tex >= TEX_START_HALF4_CPU) - return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z); - else if(tex >= TEX_START_BYTE4_CPU) - return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d(x, y, z); - else - return kg->texture_float4_images[tex].interp_3d(x, y, z); - + switch(kernel_tex_type(tex)) { + case IMAGE_DATA_TYPE_HALF: + return kg->texture_half_images[kernel_tex_index(tex)].interp_3d(x, y, z); + case IMAGE_DATA_TYPE_BYTE: + return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d(x, y, z); + case IMAGE_DATA_TYPE_FLOAT: + return kg->texture_float_images[kernel_tex_index(tex)].interp_3d(x, y, z); + case IMAGE_DATA_TYPE_HALF4: + return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d(x, y, z); + case IMAGE_DATA_TYPE_BYTE4: + return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d(x, y, z); + case IMAGE_DATA_TYPE_FLOAT4: + default: + return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d(x, y, z); + } } ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation) { - if(tex >= TEX_START_HALF_CPU) - return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation); - else if(tex >= TEX_START_BYTE_CPU) - return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d_ex(x, y, z, interpolation); - else if(tex >= TEX_START_FLOAT_CPU) - return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d_ex(x, y, z, interpolation); - else if(tex >= TEX_START_HALF4_CPU) - return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation); - else if(tex >= TEX_START_BYTE4_CPU) - return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d_ex(x, y, z, interpolation); - else - return kg->texture_float4_images[tex].interp_3d_ex(x, y, z, interpolation); + switch(kernel_tex_type(tex)) { + case IMAGE_DATA_TYPE_HALF: + return kg->texture_half_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + case IMAGE_DATA_TYPE_BYTE: + return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + case IMAGE_DATA_TYPE_FLOAT: + return kg->texture_float_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + case IMAGE_DATA_TYPE_HALF4: + return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + case IMAGE_DATA_TYPE_BYTE4: + return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + case IMAGE_DATA_TYPE_FLOAT4: + default: + return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation); + } } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h index 76acc9253a1..4b5e4ebac00 100644 --- a/intern/cycles/kernel/svm/svm_image.h +++ b/intern/cycles/kernel/svm/svm_image.h @@ -151,8 +151,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, # else CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); /* float4, byte4 and half4 */ - if(id < TEX_START_FLOAT_CUDA_KEPLER) + const int texture_type = kernel_tex_type(id); + if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_HALF4) { r = kernel_tex_image_interp_float4(tex, x, y); + } /* float, byte and half */ else { float f = kernel_tex_image_interp_float(tex, x, y); @@ -166,8 +168,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, if(use_alpha && alpha != 1.0f && alpha != 0.0f) { r_ssef = r_ssef / ssef(alpha); - if(id >= TEX_NUM_FLOAT4_IMAGES) + const int texture_type = kernel_tex_type(id); + if(texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) { r_ssef = min(r_ssef, ssef(1.0f)); + } r.w = alpha; } @@ -181,8 +185,9 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, r.x *= invw; r.y *= invw; r.z *= invw; - - if(id >= TEX_NUM_FLOAT4_IMAGES) { + + const int texture_type = kernel_tex_type(id); + if(texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) { r.x = min(r.x, 1.0f); r.y = min(r.y, 1.0f); r.z = min(r.z, 1.0f); diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h index 9e826c8c23f..1d97e8344bd 100644 --- a/intern/cycles/kernel/svm/svm_voxel.h +++ b/intern/cycles/kernel/svm/svm_voxel.h @@ -46,7 +46,7 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg, # if defined(__KERNEL_CUDA__) # if __CUDA_ARCH__ >= 300 CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id); - if(id < TEX_START_HALF4_CUDA_KEPLER) + if(kernel_tex_type(id) == IMAGE_DATA_TYPE_FLOAT4 || kernel_tex_type(id) == IMAGE_DATA_TYPE_BYTE4 || kernel_tex_type(id) == IMAGE_DATA_TYPE_HALF4) r = kernel_tex_image_interp_3d_float4(tex, co.x, co.y, co.z); else { float f = kernel_tex_image_interp_3d_float(tex, co.x, co.y, co.z); diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp index a8c4f446bea..697b9f75658 100644 --- a/intern/cycles/render/image.cpp +++ b/intern/cycles/render/image.cpp @@ -49,54 +49,24 @@ ImageManager::ImageManager(const DeviceInfo& info) } /* Set image limits */ -#define SET_TEX_IMAGES_LIMITS(ARCH) \ - { \ - tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_ ## ARCH; \ - tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_ ## ARCH; \ - tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \ - tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_ ## ARCH; \ - tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_ ## ARCH; \ - tex_num_images[IMAGE_DATA_TYPE_HALF] = TEX_NUM_HALF_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_START_FLOAT4_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_BYTE4] = TEX_START_BYTE4_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_FLOAT] = TEX_START_FLOAT_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_BYTE] = TEX_START_BYTE_ ## ARCH; \ - tex_start_images[IMAGE_DATA_TYPE_HALF] = TEX_START_HALF_ ## ARCH; \ - } - - if(device_type == DEVICE_CPU) { - SET_TEX_IMAGES_LIMITS(CPU); - } - else if(device_type == DEVICE_CUDA) { - if(info.has_bindless_textures) { - SET_TEX_IMAGES_LIMITS(CUDA_KEPLER); - } - else { - SET_TEX_IMAGES_LIMITS(CUDA); + max_num_images = TEX_NUM_MAX; + has_half_images = true; + cuda_fermi_limits = false; + + if(device_type == DEVICE_CUDA) { + if(!info.has_bindless_textures) { + /* CUDA Fermi hardware (SM 2.x) has a hard limit on the number of textures */ + cuda_fermi_limits = true; + has_half_images = false; } } else if(device_type == DEVICE_OPENCL) { - SET_TEX_IMAGES_LIMITS(OPENCL); + has_half_images = false; } - else { - /* Should not happen. */ - tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = 0; - tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0; - tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0; - tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0; - tex_num_images[IMAGE_DATA_TYPE_BYTE] = 0; - tex_num_images[IMAGE_DATA_TYPE_HALF] = 0; - tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = 0; - tex_start_images[IMAGE_DATA_TYPE_BYTE4] = 0; - tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0; - tex_start_images[IMAGE_DATA_TYPE_FLOAT] = 0; - tex_start_images[IMAGE_DATA_TYPE_BYTE] = 0; - tex_start_images[IMAGE_DATA_TYPE_HALF] = 0; - assert(0); + + for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { + tex_num_images[type] = 0; } - -#undef SET_TEX_IMAGES_LIMITS } ImageManager::~ImageManager() @@ -133,7 +103,7 @@ bool ImageManager::set_animation_frame_update(int frame) return false; } -ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filename, +ImageDataType ImageManager::get_image_metadata(const string& filename, void *builtin_data, bool& is_linear) { @@ -226,26 +196,42 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen } } -/* We use a consecutive slot counting scheme on the devices, in order - * float4, byte4, half4, float, byte, half. +/* The lower three bits of a device texture slot number indicate its type. * These functions convert the slot ids from ImageManager "images" ones - * to device ones and vice versa. */ + * to device ones and vice versa. + * + * There are special cases for CUDA Fermi, since there we have only 90 image texture + * slots available and shold keep the flattended numbers in the 0-89 range. + */ int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type) { - return slot + tex_start_images[type]; + if(cuda_fermi_limits) { + if(type == IMAGE_DATA_TYPE_BYTE4) { + return slot + TEX_START_BYTE4_CUDA; + } + else { + return slot; + } + } + + return (slot << IMAGE_DATA_TYPE_SHIFT) | (type); } int ImageManager::flattened_slot_to_type_index(int flat_slot, ImageDataType *type) { - for(int i = IMAGE_DATA_NUM_TYPES - 1; i >= 0; i--) { - if(flat_slot >= tex_start_images[i]) { - *type = (ImageDataType)i; - return flat_slot - tex_start_images[i]; + if(cuda_fermi_limits) { + if(flat_slot >= 4) { + *type = IMAGE_DATA_TYPE_BYTE4; + return flat_slot - TEX_START_BYTE4_CUDA; + } + else { + *type = IMAGE_DATA_TYPE_FLOAT4; + return flat_slot; } } - /* Should not happen. */ - return flat_slot; + *type = (ImageDataType)(flat_slot & IMAGE_DATA_TYPE_MASK); + return flat_slot >> IMAGE_DATA_TYPE_SHIFT; } string ImageManager::name_from_type(int type) @@ -299,14 +285,22 @@ int ImageManager::add_image(const string& filename, is_float = (type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4); /* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */ - if((type == IMAGE_DATA_TYPE_FLOAT || - type == IMAGE_DATA_TYPE_HALF4 || - type == IMAGE_DATA_TYPE_HALF) && - tex_num_images[type] == 0) { - type = IMAGE_DATA_TYPE_FLOAT4; + if(!has_half_images) { + if(type == IMAGE_DATA_TYPE_HALF4) { + type = IMAGE_DATA_TYPE_FLOAT4; + } + else if(type == IMAGE_DATA_TYPE_HALF) { + type = IMAGE_DATA_TYPE_FLOAT; + } } - if(type == IMAGE_DATA_TYPE_BYTE && tex_num_images[type] == 0) { - type = IMAGE_DATA_TYPE_BYTE4; + + if(cuda_fermi_limits) { + if(type == IMAGE_DATA_TYPE_FLOAT) { + type = IMAGE_DATA_TYPE_FLOAT4; + } + else if(type == IMAGE_DATA_TYPE_BYTE) { + type = IMAGE_DATA_TYPE_BYTE4; + } } /* Fnd existing image. */ @@ -338,14 +332,30 @@ int ImageManager::add_image(const string& filename, break; } - if(slot == images[type].size()) { - /* Max images limit reached. */ - if(images[type].size() == tex_num_images[type]) { + /* Count if we're over the limit */ + if(cuda_fermi_limits) { + if(tex_num_images[IMAGE_DATA_TYPE_BYTE4] == TEX_NUM_BYTE4_CUDA + || tex_num_images[IMAGE_DATA_TYPE_FLOAT4] == TEX_NUM_FLOAT4_CUDA) + { printf("ImageManager::add_image: Reached %s image limit (%d), skipping '%s'\n", - name_from_type(type).c_str(), tex_num_images[type], filename.c_str()); + name_from_type(type).c_str(), tex_num_images[type], filename.c_str()); return -1; } - + } + else { + /* Very unlikely, since max_num_images is insanely big. But better safe than sorry. */ + int tex_count = 0; + for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { + tex_count += tex_num_images[type]; + } + if(tex_count > max_num_images) { + printf("ImageManager::add_image: Reached image limit (%d), skipping '%s'\n", + max_num_images, filename.c_str()); + return -1; + } + } + + if(slot == images[type].size()) { images[type].resize(images[type].size() + 1); } @@ -362,6 +372,8 @@ int ImageManager::add_image(const string& filename, img->use_alpha = use_alpha; images[type][slot] = img; + + ++tex_num_images[type]; need_update = true; @@ -666,16 +678,12 @@ void ImageManager::device_load_image(Device *device, /* Slot assignment */ int flat_slot = type_index_to_flattened_slot(slot, type); - string name; - if(flat_slot >= 100) - name = string_printf("__tex_image_%s_%d", name_from_type(type).c_str(), flat_slot); - else if(flat_slot >= 10) - name = string_printf("__tex_image_%s_0%d", name_from_type(type).c_str(), flat_slot); - else - name = string_printf("__tex_image_%s_00%d", name_from_type(type).c_str(), flat_slot); + string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot); if(type == IMAGE_DATA_TYPE_FLOAT4) { - device_vector<float4>& tex_img = dscene->tex_float4_image[slot]; + if(dscene->tex_float4_image[slot] == NULL) + dscene->tex_float4_image[slot] = new device_vector<float4>(); + device_vector<float4>& tex_img = *dscene->tex_float4_image[slot]; if(tex_img.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -705,7 +713,9 @@ void ImageManager::device_load_image(Device *device, } } else if(type == IMAGE_DATA_TYPE_FLOAT) { - device_vector<float>& tex_img = dscene->tex_float_image[slot]; + if(dscene->tex_float_image[slot] == NULL) + dscene->tex_float_image[slot] = new device_vector<float>(); + device_vector<float>& tex_img = *dscene->tex_float_image[slot]; if(tex_img.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -732,7 +742,9 @@ void ImageManager::device_load_image(Device *device, } } else if(type == IMAGE_DATA_TYPE_BYTE4) { - device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot]; + if(dscene->tex_byte4_image[slot] == NULL) + dscene->tex_byte4_image[slot] = new device_vector<uchar4>(); + device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot]; if(tex_img.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -762,7 +774,9 @@ void ImageManager::device_load_image(Device *device, } } else if(type == IMAGE_DATA_TYPE_BYTE){ - device_vector<uchar>& tex_img = dscene->tex_byte_image[slot]; + if(dscene->tex_byte_image[slot] == NULL) + dscene->tex_byte_image[slot] = new device_vector<uchar>(); + device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot]; if(tex_img.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -788,7 +802,9 @@ void ImageManager::device_load_image(Device *device, } } else if(type == IMAGE_DATA_TYPE_HALF4){ - device_vector<half4>& tex_img = dscene->tex_half4_image[slot]; + if(dscene->tex_half4_image[slot] == NULL) + dscene->tex_half4_image[slot] = new device_vector<half4>(); + device_vector<half4>& tex_img = *dscene->tex_half4_image[slot]; if(tex_img.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -817,7 +833,9 @@ void ImageManager::device_load_image(Device *device, } } else if(type == IMAGE_DATA_TYPE_HALF){ - device_vector<half>& tex_img = dscene->tex_half_image[slot]; + if(dscene->tex_half_image[slot] == NULL) + dscene->tex_half_image[slot] = new device_vector<half>(); + device_vector<half>& tex_img = *dscene->tex_half_image[slot]; if(tex_img.device_pointer) { thread_scoped_lock device_lock(device_mutex); @@ -857,69 +875,50 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, ImageD ((OSL::TextureSystem*)osl_texture_system)->invalidate(filename); #endif } - else if(type == IMAGE_DATA_TYPE_FLOAT4) { - device_vector<float4>& tex_img = dscene->tex_float4_image[slot]; - - if(tex_img.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(tex_img); - } - - tex_img.clear(); - } - else if(type == IMAGE_DATA_TYPE_FLOAT) { - device_vector<float>& tex_img = dscene->tex_float_image[slot]; - - if(tex_img.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(tex_img); - } - - tex_img.clear(); - } - else if(type == IMAGE_DATA_TYPE_BYTE4) { - device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot]; - - if(tex_img.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(tex_img); - } - - tex_img.clear(); - } - else if(type == IMAGE_DATA_TYPE_BYTE){ - device_vector<uchar>& tex_img = dscene->tex_byte_image[slot]; - - if(tex_img.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(tex_img); + else { + device_memory *tex_img = NULL; + switch(type) { + case IMAGE_DATA_TYPE_FLOAT4: + tex_img = dscene->tex_float4_image[slot]; + dscene->tex_float4_image[slot] = NULL; + break; + case IMAGE_DATA_TYPE_BYTE4: + tex_img = dscene->tex_byte4_image[slot]; + dscene->tex_byte4_image[slot]= NULL; + break; + case IMAGE_DATA_TYPE_HALF4: + tex_img = dscene->tex_half4_image[slot]; + dscene->tex_half4_image[slot]= NULL; + break; + case IMAGE_DATA_TYPE_FLOAT: + tex_img = dscene->tex_float_image[slot]; + dscene->tex_float_image[slot] = NULL; + break; + case IMAGE_DATA_TYPE_BYTE: + tex_img = dscene->tex_byte_image[slot]; + dscene->tex_byte_image[slot]= NULL; + break; + case IMAGE_DATA_TYPE_HALF: + tex_img = dscene->tex_half_image[slot]; + dscene->tex_half_image[slot]= NULL; + break; + default: + assert(0); + tex_img = NULL; } + if(tex_img) { + if(tex_img->device_pointer) { + thread_scoped_lock device_lock(device_mutex); + device->tex_free(*tex_img); + } - tex_img.clear(); - } - else if(type == IMAGE_DATA_TYPE_HALF4){ - device_vector<half4>& tex_img = dscene->tex_half4_image[slot]; - - if(tex_img.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(tex_img); + delete tex_img; } - - tex_img.clear(); - } - else if(type == IMAGE_DATA_TYPE_HALF){ - device_vector<half>& tex_img = dscene->tex_half_image[slot]; - - if(tex_img.device_pointer) { - thread_scoped_lock device_lock(device_mutex); - device->tex_free(tex_img); - } - - tex_img.clear(); } delete images[type][slot]; images[type][slot] = NULL; + --tex_num_images[type]; } } @@ -934,6 +933,32 @@ void ImageManager::device_update(Device *device, TaskPool pool; for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) { + switch(type) { + case IMAGE_DATA_TYPE_FLOAT4: + if(dscene->tex_float4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_FLOAT4]) + dscene->tex_float4_image.resize(tex_num_images[IMAGE_DATA_TYPE_FLOAT4]); + break; + case IMAGE_DATA_TYPE_BYTE4: + if(dscene->tex_byte4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_BYTE4]) + dscene->tex_byte4_image.resize(tex_num_images[IMAGE_DATA_TYPE_BYTE4]); + break; + case IMAGE_DATA_TYPE_HALF4: + if(dscene->tex_half4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_HALF4]) + dscene->tex_half4_image.resize(tex_num_images[IMAGE_DATA_TYPE_HALF4]); + break; + case IMAGE_DATA_TYPE_BYTE: + if(dscene->tex_byte_image.size() <= tex_num_images[IMAGE_DATA_TYPE_BYTE]) + dscene->tex_byte_image.resize(tex_num_images[IMAGE_DATA_TYPE_BYTE]); + break; + case IMAGE_DATA_TYPE_FLOAT: + if(dscene->tex_float_image.size() <= tex_num_images[IMAGE_DATA_TYPE_FLOAT]) + dscene->tex_float_image.resize(tex_num_images[IMAGE_DATA_TYPE_FLOAT]); + break; + case IMAGE_DATA_TYPE_HALF: + if(dscene->tex_half_image.size() <= tex_num_images[IMAGE_DATA_TYPE_HALF]) + dscene->tex_half_image.resize(tex_num_images[IMAGE_DATA_TYPE_HALF]); + break; + } for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) continue; @@ -1029,7 +1054,7 @@ void ImageManager::device_pack_images(Device *device, if(!images[type][slot]) continue; - device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot]; + device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot]; size += tex_img.size(); } @@ -1039,7 +1064,7 @@ void ImageManager::device_pack_images(Device *device, if(!images[type][slot]) continue; - device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot]; + device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot]; uint8_t options = pack_image_options(type, slot); @@ -1059,7 +1084,7 @@ void ImageManager::device_pack_images(Device *device, if(!images[type][slot]) continue; - device_vector<float4>& tex_img = dscene->tex_float4_image[slot]; + device_vector<float4>& tex_img = *dscene->tex_float4_image[slot]; size += tex_img.size(); } @@ -1069,7 +1094,7 @@ void ImageManager::device_pack_images(Device *device, if(!images[type][slot]) continue; - device_vector<float4>& tex_img = dscene->tex_float4_image[slot]; + device_vector<float4>& tex_img = *dscene->tex_float4_image[slot]; /* todo: support 3D textures, only CPU for now */ @@ -1091,7 +1116,7 @@ void ImageManager::device_pack_images(Device *device, if(!images[type][slot]) continue; - device_vector<uchar>& tex_img = dscene->tex_byte_image[slot]; + device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot]; size += tex_img.size(); } @@ -1101,7 +1126,7 @@ void ImageManager::device_pack_images(Device *device, if(!images[type][slot]) continue; - device_vector<uchar>& tex_img = dscene->tex_byte_image[slot]; + device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot]; uint8_t options = pack_image_options(type, slot); @@ -1121,7 +1146,7 @@ void ImageManager::device_pack_images(Device *device, if(!images[type][slot]) continue; - device_vector<float>& tex_img = dscene->tex_float_image[slot]; + device_vector<float>& tex_img = *dscene->tex_float_image[slot]; size += tex_img.size(); } @@ -1131,7 +1156,7 @@ void ImageManager::device_pack_images(Device *device, if(!images[type][slot]) continue; - device_vector<float>& tex_img = dscene->tex_float_image[slot]; + device_vector<float>& tex_img = *dscene->tex_float_image[slot]; /* todo: support 3D textures, only CPU for now */ @@ -1201,16 +1226,23 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene) images[type].clear(); } - device->tex_free(dscene->tex_image_byte4_packed); + dscene->tex_float4_image.clear(); + dscene->tex_byte4_image.clear(); + dscene->tex_half4_image.clear(); + dscene->tex_float_image.clear(); + dscene->tex_byte_image.clear(); + dscene->tex_half_image.clear(); + device->tex_free(dscene->tex_image_float4_packed); - device->tex_free(dscene->tex_image_byte_packed); + device->tex_free(dscene->tex_image_byte4_packed); device->tex_free(dscene->tex_image_float_packed); + device->tex_free(dscene->tex_image_byte_packed); device->tex_free(dscene->tex_image_packed_info); - dscene->tex_image_byte4_packed.clear(); dscene->tex_image_float4_packed.clear(); - dscene->tex_image_byte_packed.clear(); + dscene->tex_image_byte4_packed.clear(); dscene->tex_image_float_packed.clear(); + dscene->tex_image_byte_packed.clear(); dscene->tex_image_packed_info.clear(); } diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h index 996b5a5b65f..76c2cc46f12 100644 --- a/intern/cycles/render/image.h +++ b/intern/cycles/render/image.h @@ -37,17 +37,6 @@ public: explicit ImageManager(const DeviceInfo& info); ~ImageManager(); - enum ImageDataType { - IMAGE_DATA_TYPE_FLOAT4 = 0, - IMAGE_DATA_TYPE_BYTE4 = 1, - IMAGE_DATA_TYPE_HALF4 = 2, - IMAGE_DATA_TYPE_FLOAT = 3, - IMAGE_DATA_TYPE_BYTE = 4, - IMAGE_DATA_TYPE_HALF = 5, - - IMAGE_DATA_NUM_TYPES - }; - int add_image(const string& filename, void *builtin_data, bool animated, @@ -124,7 +113,9 @@ public: private: int tex_num_images[IMAGE_DATA_NUM_TYPES]; - int tex_start_images[IMAGE_DATA_NUM_TYPES]; + int max_num_images; + bool has_half_images; + bool cuda_fermi_limits; thread_mutex device_mutex; int animation_frame; diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index f9679d52235..9b565c3ede1 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -364,9 +364,9 @@ void ImageTextureNode::compile(OSLCompiler& compiler) image_manager = compiler.image_manager; if(is_float == -1) { if(builtin_data == NULL) { - ImageManager::ImageDataType type; + ImageDataType type; type = image_manager->get_image_metadata(filename.string(), NULL, is_linear); - if(type == ImageManager::IMAGE_DATA_TYPE_FLOAT || type == ImageManager::IMAGE_DATA_TYPE_FLOAT4) + if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4) is_float = 1; } else { @@ -553,9 +553,9 @@ void EnvironmentTextureNode::compile(OSLCompiler& compiler) image_manager = compiler.image_manager; if(is_float == -1) { if(builtin_data == NULL) { - ImageManager::ImageDataType type; + ImageDataType type; type = image_manager->get_image_metadata(filename.string(), NULL, is_linear); - if(type == ImageManager::IMAGE_DATA_TYPE_FLOAT || type == ImageManager::IMAGE_DATA_TYPE_FLOAT4) + if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4) is_float = 1; } else { diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 2b5267642a2..b02f9f35393 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -114,18 +114,18 @@ public: device_vector<uint> sobol_directions; /* cpu images */ - device_vector<uchar4> tex_byte4_image[TEX_NUM_BYTE4_CPU]; - device_vector<float4> tex_float4_image[TEX_NUM_FLOAT4_CPU]; - device_vector<float> tex_float_image[TEX_NUM_FLOAT_CPU]; - device_vector<uchar> tex_byte_image[TEX_NUM_BYTE_CPU]; - device_vector<half4> tex_half4_image[TEX_NUM_HALF4_CPU]; - device_vector<half> tex_half_image[TEX_NUM_HALF_CPU]; - + std::vector<device_vector<float4>* > tex_float4_image; + std::vector<device_vector<uchar4>* > tex_byte4_image; + std::vector<device_vector<half4>* > tex_half4_image; + std::vector<device_vector<float>* > tex_float_image; + std::vector<device_vector<uchar>* > tex_byte_image; + std::vector<device_vector<half>* > tex_half_image; + /* opencl images */ - device_vector<uchar4> tex_image_byte4_packed; device_vector<float4> tex_image_float4_packed; - device_vector<uchar> tex_image_byte_packed; + device_vector<uchar4> tex_image_byte4_packed; device_vector<float> tex_image_float_packed; + device_vector<uchar> tex_image_byte_packed; device_vector<uint4> tex_image_packed_info; KernelData data; diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index aff928ea2ee..df255f43059 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -21,62 +21,22 @@ CCL_NAMESPACE_BEGIN /* Texture limits on devices. */ -/* CPU */ -#define TEX_NUM_FLOAT4_CPU 1024 -#define TEX_NUM_BYTE4_CPU 1024 -#define TEX_NUM_HALF4_CPU 1024 -#define TEX_NUM_FLOAT_CPU 1024 -#define TEX_NUM_BYTE_CPU 1024 -#define TEX_NUM_HALF_CPU 1024 -#define TEX_START_FLOAT4_CPU 0 -#define TEX_START_BYTE4_CPU TEX_NUM_FLOAT4_CPU -#define TEX_START_HALF4_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU) -#define TEX_START_FLOAT_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU) -#define TEX_START_BYTE_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU) -#define TEX_START_HALF_CPU (TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU) - /* CUDA (Geforce 4xx and 5xx) */ -#define TEX_NUM_FLOAT4_CUDA 5 -#define TEX_NUM_BYTE4_CUDA 85 -#define TEX_NUM_HALF4_CUDA 0 -#define TEX_NUM_FLOAT_CUDA 0 -#define TEX_NUM_BYTE_CUDA 0 -#define TEX_NUM_HALF_CUDA 0 -#define TEX_START_FLOAT4_CUDA 0 -#define TEX_START_BYTE4_CUDA TEX_NUM_FLOAT4_CUDA -#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA) -#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA) -#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA) -#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA) - -/* CUDA (Kepler, Geforce 6xx and above) */ -#define TEX_NUM_FLOAT4_CUDA_KEPLER 1024 -#define TEX_NUM_BYTE4_CUDA_KEPLER 1024 -#define TEX_NUM_HALF4_CUDA_KEPLER 1024 -#define TEX_NUM_FLOAT_CUDA_KEPLER 1024 -#define TEX_NUM_BYTE_CUDA_KEPLER 1024 -#define TEX_NUM_HALF_CUDA_KEPLER 1024 -#define TEX_START_FLOAT4_CUDA_KEPLER 0 -#define TEX_START_BYTE4_CUDA_KEPLER TEX_NUM_FLOAT4_CUDA_KEPLER -#define TEX_START_HALF4_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER) -#define TEX_START_FLOAT_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER) -#define TEX_START_BYTE_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER) -#define TEX_START_HALF_CUDA_KEPLER (TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER) - -/* OpenCL */ -#define TEX_NUM_FLOAT4_OPENCL 1024 -#define TEX_NUM_BYTE4_OPENCL 1024 -#define TEX_NUM_HALF4_OPENCL 0 -#define TEX_NUM_FLOAT_OPENCL 1024 -#define TEX_NUM_BYTE_OPENCL 1024 -#define TEX_NUM_HALF_OPENCL 0 -#define TEX_START_FLOAT4_OPENCL 0 -#define TEX_START_BYTE4_OPENCL TEX_NUM_FLOAT4_OPENCL -#define TEX_START_HALF4_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL) -#define TEX_START_FLOAT_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL) -#define TEX_START_BYTE_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL) -#define TEX_START_HALF_OPENCL (TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL) - +#define TEX_NUM_FLOAT4_CUDA 5 +#define TEX_NUM_BYTE4_CUDA 84 +#define TEX_NUM_HALF4_CUDA 0 +#define TEX_NUM_FLOAT_CUDA 0 +#define TEX_NUM_BYTE_CUDA 0 +#define TEX_NUM_HALF_CUDA 0 +#define TEX_START_FLOAT4_CUDA 0 +#define TEX_START_BYTE4_CUDA TEX_NUM_FLOAT4_CUDA +#define TEX_START_HALF4_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA) +#define TEX_START_FLOAT_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA) +#define TEX_START_BYTE_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA) +#define TEX_START_HALF_CUDA (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA) + +/* Any architecture other than old CUDA cards */ +#define TEX_NUM_MAX (INT_MAX >> 4) /* Color to use when textures are not found. */ #define TEX_IMAGE_MISSING_R 1 @@ -84,6 +44,14 @@ CCL_NAMESPACE_BEGIN #define TEX_IMAGE_MISSING_B 1 #define TEX_IMAGE_MISSING_A 1 +#if defined (__KERNEL_CUDA__) && (__CUDA_ARCH__ < 300) +# define kernel_tex_type(tex) (tex < TEX_START_BYTE4_CUDA ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4) +# define kernel_tex_index(tex) (tex) +#else +# define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK) +# define kernel_tex_index(tex) (tex >> IMAGE_DATA_TYPE_SHIFT) +#endif + CCL_NAMESPACE_END #endif /* __UTIL_TEXTURE_H__ */ diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 137cc73b70b..296343ecfd3 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -154,6 +154,25 @@ enum InterpolationType { INTERPOLATION_NUM_TYPES, }; +/* Texture types + * Since we store the type in the lower bits of a flat index, + * the shift and bit mask constant below need to be kept in sync. + */ + +enum ImageDataType { + IMAGE_DATA_TYPE_FLOAT4 = 0, + IMAGE_DATA_TYPE_BYTE4 = 1, + IMAGE_DATA_TYPE_HALF4 = 2, + IMAGE_DATA_TYPE_FLOAT = 3, + IMAGE_DATA_TYPE_BYTE = 4, + IMAGE_DATA_TYPE_HALF = 5, + + IMAGE_DATA_NUM_TYPES +}; + +#define IMAGE_DATA_TYPE_SHIFT 3 +#define IMAGE_DATA_TYPE_MASK 0x7 + /* Extension types for textures. * * Defines how the image is extrapolated past its original bounds. diff --git a/release/bin/blender-thumbnailer.py b/release/bin/blender-thumbnailer.py index 5d2dd958a92..e050a681ca0 100755 --- a/release/bin/blender-thumbnailer.py +++ b/release/bin/blender-thumbnailer.py @@ -37,7 +37,7 @@ import struct def open_wrapper_get(): - """ wrap OS spesific read functionality here, fallback to 'open()' + """ wrap OS specific read functionality here, fallback to 'open()' """ class GFileWrapper: diff --git a/source/blender/editors/space_view3d/drawobject.c b/source/blender/editors/space_view3d/drawobject.c index bafd74e4c54..7fe4a1d3570 100644 --- a/source/blender/editors/space_view3d/drawobject.c +++ b/source/blender/editors/space_view3d/drawobject.c @@ -892,7 +892,7 @@ void view3d_cached_text_draw_add(const float co[3], memcpy(vos->str, str, alloc_len); } -void view3d_cached_text_draw_end(View3D *v3d, ARegion *ar, bool depth_write, float mat[4][4]) +void view3d_cached_text_draw_end(View3D *v3d, ARegion *ar, bool depth_write) { RegionView3D *rv3d = ar->regiondata; ViewCachedString *vos; @@ -902,9 +902,6 @@ void view3d_cached_text_draw_end(View3D *v3d, ARegion *ar, bool depth_write, flo /* project first and test */ for (vos = g_v3d_strings[g_v3d_string_level]; vos; vos = vos->next) { - if (mat && !(vos->flag & V3D_CACHE_TEXT_WORLDSPACE)) - mul_m4_v3(mat, vos->vec); - if (ED_view3d_project_short_ex(ar, (vos->flag & V3D_CACHE_TEXT_GLOBALSPACE) ? rv3d->persmat : rv3d->persmatob, (vos->flag & V3D_CACHE_TEXT_LOCALCLIP) != 0, @@ -8913,7 +8910,7 @@ afterdraw: draw_new_particle_system(scene, v3d, rv3d, base, psys, dt, dflag); } invert_m4_m4(ob->imat, ob->obmat); - view3d_cached_text_draw_end(v3d, ar, 0, NULL); + view3d_cached_text_draw_end(v3d, ar, 0); gpuMultMatrix(ob->obmat); @@ -9107,7 +9104,7 @@ afterdraw: /* return warning, this is cached text draw */ invert_m4_m4(ob->imat, ob->obmat); - view3d_cached_text_draw_end(v3d, ar, 1, NULL); + view3d_cached_text_draw_end(v3d, ar, 1); /* return warning, clear temp flag */ v3d->flag2 &= ~V3D_SHOW_SOLID_MATCAP; diff --git a/source/blender/editors/space_view3d/drawsimdebug.c b/source/blender/editors/space_view3d/drawsimdebug.c index e06336c621b..24ac1c5b4db 100644 --- a/source/blender/editors/space_view3d/drawsimdebug.c +++ b/source/blender/editors/space_view3d/drawsimdebug.c @@ -203,7 +203,7 @@ void draw_sim_debug_data(Scene *UNUSED(scene), View3D *v3d, ARegion *ar) view3d_cached_text_draw_begin(); draw_sim_debug_elements(_sim_debug_data, imat); - view3d_cached_text_draw_end(v3d, ar, false, NULL); + view3d_cached_text_draw_end(v3d, ar, false); gpuPopMatrix(); } diff --git a/source/blender/editors/space_view3d/view3d_intern.h b/source/blender/editors/space_view3d/view3d_intern.h index 51293f41fa5..dd9570eb656 100644 --- a/source/blender/editors/space_view3d/view3d_intern.h +++ b/source/blender/editors/space_view3d/view3d_intern.h @@ -168,7 +168,7 @@ void view3d_cached_text_draw_begin(void); void view3d_cached_text_draw_add(const float co[3], const char *str, const size_t str_len, short xoffs, short flag, const unsigned char col[4]); -void view3d_cached_text_draw_end(View3D *v3d, ARegion *ar, bool depth_write, float mat[4][4]); +void view3d_cached_text_draw_end(View3D *v3d, ARegion *ar, bool depth_write); bool check_object_draw_texture(struct Scene *scene, struct View3D *v3d, const char drawtype); diff --git a/source/creator/creator_args.c b/source/creator/creator_args.c index 3850846b0b9..d7406588952 100644 --- a/source/creator/creator_args.c +++ b/source/creator/creator_args.c @@ -1160,6 +1160,7 @@ static const char arg_handle_threads_set_doc[] = "<threads>\n" "\tUse amount of <threads> for rendering and other operations\n" "\t[1-" STRINGIFY(BLENDER_MAX_THREADS) "], 0 for systems processor count." +"(This must be the first argument)" ; static int arg_handle_threads_set(int argc, const char **argv, void *UNUSED(data)) { |