Merge branch 'master' into blender2.8

author: Campbell Barton <ideasman42@gmail.com> 2017-04-27 14:41:57 +0300
committer: Campbell Barton <ideasman42@gmail.com> 2017-04-27 14:42:08 +0300
commit: 98b6c6f2c0227b9a6d9db8bd47ee6cad27f35b4b (patch)
tree: 0e13eee0ea3845dcafa0440323635f3ae38a7a5e
parent: 21d31f8f58ff1454b663faac0d1260f7c5168a65 (diff)
parent: bdf8ad6c4e521884a544f6dbfa244c27720cd4bb (diff)
19 files changed, 356 insertions, 312 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0b701cabedd..3f2b3dad520 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -192,7 +192,7 @@ endif()
 #-----------------------------------------------------------------------------
 # Options
 
-# First platform spesific non-cached vars
+# First platform specific non-cached vars
 if(UNIX AND NOT APPLE)
 	set(WITH_X11 ON)
 endif()
@@ -916,16 +916,16 @@ endif()
 if(MSVC)
 	# for some reason this fails on msvc
 	add_definitions(-D__LITTLE_ENDIAN__)
-	
-# OSX-Note: as we do crosscompiling with specific set architecture,
-# endianess-detection and autosetting is counterproductive
+
+# OSX-Note: as we do cross-compiling with specific set architecture,
+# endianess-detection and auto-setting is counterproductive
 # so we just set endianess according CMAKE_OSX_ARCHITECTURES
 
 elseif(CMAKE_OSX_ARCHITECTURES MATCHES i386 OR CMAKE_OSX_ARCHITECTURES MATCHES x86_64)
 	add_definitions(-D__LITTLE_ENDIAN__)
-elseif(CMAKE_OSX_ARCHITECTURES MATCHES ppc OR CMAKE_OSX_ARCHITECTURES MATCHES ppc64)		
+elseif(CMAKE_OSX_ARCHITECTURES MATCHES ppc OR CMAKE_OSX_ARCHITECTURES MATCHES ppc64)
 	add_definitions(-D__BIG_ENDIAN__)
-	
+
 else()
 	include(TestBigEndian)
 	test_big_endian(_SYSTEM_BIG_ENDIAN)
@@ -1526,7 +1526,7 @@ endif()
 set(CMAKE_C_FLAGS "${C_WARNINGS} ${CMAKE_C_FLAGS} ${PLATFORM_CFLAGS}")
 set(CMAKE_CXX_FLAGS "${CXX_WARNINGS} ${CMAKE_CXX_FLAGS} ${PLATFORM_CFLAGS}")
 
-# defined above, platform spesific but shared names
+# defined above, platform specific but shared names
 mark_as_advanced(
 	CYCLES_OSL
 	OSL_LIB_EXEC
diff --git a/build_files/cmake/cmake_netbeans_project.py b/build_files/cmake/cmake_netbeans_project.py
index 5b074b6975a..97eb6b245f5 100755
--- a/build_files/cmake/cmake_netbeans_project.py
+++ b/build_files/cmake/cmake_netbeans_project.py
@@ -84,7 +84,7 @@ def create_nb_project_main():
         make_exe = cmake_cache_var("CMAKE_MAKE_PROGRAM")
         make_exe_basename = os.path.basename(make_exe)
 
-        # --------------- NB spesific
+        # --------------- NB specific
         defines = [("%s=%s" % cdef) if cdef[1] else cdef[0] for cdef in defines]
         defines += [cdef.replace("#define", "").strip() for cdef in cmake_compiler_defines()]
 
diff --git a/intern/cycles/kernel/kernel_globals.h b/intern/cycles/kernel/kernel_globals.h
index c9c97ea977e..b1f3283d5fc 100644
--- a/intern/cycles/kernel/kernel_globals.h
+++ b/intern/cycles/kernel/kernel_globals.h
@@ -19,6 +19,10 @@
 #ifndef __KERNEL_GLOBALS_H__
 #define __KERNEL_GLOBALS_H__
 
+#ifdef __KERNEL_CPU__
+#include "util/util_vector.h"
+#endif
+
 CCL_NAMESPACE_BEGIN
 
 /* On the CPU, we pass along the struct KernelGlobals to nearly everywhere in
@@ -38,12 +42,12 @@ struct Intersection;
 struct VolumeStep;
 
 typedef struct KernelGlobals {
-	texture_image_uchar4 texture_byte4_images[TEX_NUM_BYTE4_CPU];
-	texture_image_float4 texture_float4_images[TEX_NUM_FLOAT4_CPU];
-	texture_image_half4 texture_half4_images[TEX_NUM_HALF4_CPU];
-	texture_image_float texture_float_images[TEX_NUM_FLOAT_CPU];
-	texture_image_uchar texture_byte_images[TEX_NUM_BYTE_CPU];
-	texture_image_half texture_half_images[TEX_NUM_HALF_CPU];
+	vector<texture_image_float4> texture_float4_images;
+	vector<texture_image_uchar4> texture_byte4_images;
+	vector<texture_image_half4> texture_half4_images;
+	vector<texture_image_float> texture_float_images;
+	vector<texture_image_uchar> texture_byte_images;
+	vector<texture_image_half> texture_half_images;
 
 #  define KERNEL_TEX(type, ttype, name) ttype name;
 #  define KERNEL_IMAGE_TEX(type, ttype, name)
diff --git a/intern/cycles/kernel/kernel_image_opencl.h b/intern/cycles/kernel/kernel_image_opencl.h
index 0352c58037d..15579f55a41 100644
--- a/intern/cycles/kernel/kernel_image_opencl.h
+++ b/intern/cycles/kernel/kernel_image_opencl.h
@@ -20,18 +20,19 @@
 
 ccl_device_inline float4 svm_image_texture_read(KernelGlobals *kg, int id, int offset)
 {
+	const texture_type = kernel_tex_type(id);
 	/* Float4 */
-	if(id < TEX_START_BYTE4_OPENCL) {
+	if(texture_type == IMAGE_DATA_TYPE_FLOAT4) {
 		return kernel_tex_fetch(__tex_image_float4_packed, offset);
 	}
 	/* Byte4 */
-	else if(id < TEX_START_FLOAT_OPENCL) {
+	else if(texture_type == IMAGE_DATA_TYPE_BYTE4) {
 		uchar4 r = kernel_tex_fetch(__tex_image_byte4_packed, offset);
 		float f = 1.0f/255.0f;
 		return make_float4(r.x*f, r.y*f, r.z*f, r.w*f);
 	}
 	/* Float */
-	else if(id < TEX_START_BYTE_OPENCL) {
+	else if(texture_type == IMAGE_DATA_TYPE_FLOAT) {
 		float f = kernel_tex_fetch(__tex_image_float_packed, offset);
 		return make_float4(f, f, f, 1.0f);
 	}
diff --git a/intern/cycles/kernel/kernels/cpu/kernel.cpp b/intern/cycles/kernel/kernels/cpu/kernel.cpp
index 16992c681e6..db2de6836d3 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel.cpp
+++ b/intern/cycles/kernel/kernels/cpu/kernel.cpp
@@ -95,9 +95,12 @@ void kernel_tex_copy(KernelGlobals *kg,
 	else if(strstr(name, "__tex_image_float4")) {
 		texture_image_float4 *tex = NULL;
 		int id = atoi(name + strlen("__tex_image_float4_"));
-		int array_index = id;
+		int array_index = kernel_tex_index(id);
 
-		if(array_index >= 0 && array_index < TEX_NUM_FLOAT4_CPU) {
+		if(array_index >= 0) {
+			if(array_index >= kg->texture_float4_images.size()) {
+				kg->texture_float4_images.resize(array_index+1);
+			}
 			tex = &kg->texture_float4_images[array_index];
 		}
 
@@ -111,9 +114,12 @@ void kernel_tex_copy(KernelGlobals *kg,
 	else if(strstr(name, "__tex_image_float")) {
 		texture_image_float *tex = NULL;
 		int id = atoi(name + strlen("__tex_image_float_"));
-		int array_index = id - TEX_START_FLOAT_CPU;
-
-		if(array_index >= 0 && array_index < TEX_NUM_FLOAT_CPU) {
+		int array_index = kernel_tex_index(id);
+		
+		if(array_index >= 0) {
+			if(array_index >= kg->texture_float_images.size()) {
+				kg->texture_float_images.resize(array_index+1);
+			}
 			tex = &kg->texture_float_images[array_index];
 		}
 
@@ -127,9 +133,12 @@ void kernel_tex_copy(KernelGlobals *kg,
 	else if(strstr(name, "__tex_image_byte4")) {
 		texture_image_uchar4 *tex = NULL;
 		int id = atoi(name + strlen("__tex_image_byte4_"));
-		int array_index = id - TEX_START_BYTE4_CPU;
-
-		if(array_index >= 0 && array_index < TEX_NUM_BYTE4_CPU) {
+		int array_index = kernel_tex_index(id);
+		
+		if(array_index >= 0) {
+			if(array_index >= kg->texture_byte4_images.size()) {
+				kg->texture_byte4_images.resize(array_index+1);
+			}
 			tex = &kg->texture_byte4_images[array_index];
 		}
 
@@ -143,9 +152,12 @@ void kernel_tex_copy(KernelGlobals *kg,
 	else if(strstr(name, "__tex_image_byte")) {
 		texture_image_uchar *tex = NULL;
 		int id = atoi(name + strlen("__tex_image_byte_"));
-		int array_index = id - TEX_START_BYTE_CPU;
-
-		if(array_index >= 0 && array_index < TEX_NUM_BYTE_CPU) {
+		int array_index = kernel_tex_index(id);
+		
+		if(array_index >= 0) {
+			if(array_index >= kg->texture_byte_images.size()) {
+				kg->texture_byte_images.resize(array_index+1);
+			}
 			tex = &kg->texture_byte_images[array_index];
 		}
 
@@ -159,9 +171,12 @@ void kernel_tex_copy(KernelGlobals *kg,
 	else if(strstr(name, "__tex_image_half4")) {
 		texture_image_half4 *tex = NULL;
 		int id = atoi(name + strlen("__tex_image_half4_"));
-		int array_index = id - TEX_START_HALF4_CPU;
-
-		if(array_index >= 0 && array_index < TEX_NUM_HALF4_CPU) {
+		int array_index = kernel_tex_index(id);
+		
+		if(array_index >= 0) {
+			if(array_index >= kg->texture_half4_images.size()) {
+				kg->texture_half4_images.resize(array_index+1);
+			}
 			tex = &kg->texture_half4_images[array_index];
 		}
 
@@ -175,9 +190,12 @@ void kernel_tex_copy(KernelGlobals *kg,
 	else if(strstr(name, "__tex_image_half")) {
 		texture_image_half *tex = NULL;
 		int id = atoi(name + strlen("__tex_image_half_"));
-		int array_index = id - TEX_START_HALF_CPU;
-
-		if(array_index >= 0 && array_index < TEX_NUM_HALF_CPU) {
+		int array_index = kernel_tex_index(id);
+		
+		if(array_index >= 0) {
+			if(array_index >= kg->texture_half_images.size()) {
+				kg->texture_half_images.resize(array_index+1);
+			}
 			tex = &kg->texture_half_images[array_index];
 		}
 
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index af68907a5c2..f6bb4c25012 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -23,51 +23,59 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device float4 kernel_tex_image_interp_impl(KernelGlobals *kg, int tex, float x, float y)
 {
-	if(tex >= TEX_START_HALF_CPU)
-		return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp(x, y);
-	else if(tex >= TEX_START_BYTE_CPU)
-		return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp(x, y);
-	else if(tex >= TEX_START_FLOAT_CPU)
-		return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp(x, y);
-	else if(tex >= TEX_START_HALF4_CPU)
-		return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp(x, y);
-	else if(tex >= TEX_START_BYTE4_CPU)
-		return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp(x, y);
-	else
-		return kg->texture_float4_images[tex].interp(x, y);
+	switch(kernel_tex_type(tex)) {
+		case IMAGE_DATA_TYPE_HALF:
+			return kg->texture_half_images[kernel_tex_index(tex)].interp(x, y);
+		case IMAGE_DATA_TYPE_BYTE:
+			return kg->texture_byte_images[kernel_tex_index(tex)].interp(x, y);
+		case IMAGE_DATA_TYPE_FLOAT:
+			return kg->texture_float_images[kernel_tex_index(tex)].interp(x, y);
+		case IMAGE_DATA_TYPE_HALF4:
+			return kg->texture_half4_images[kernel_tex_index(tex)].interp(x, y);
+		case IMAGE_DATA_TYPE_BYTE4:
+			return kg->texture_byte4_images[kernel_tex_index(tex)].interp(x, y);
+		case IMAGE_DATA_TYPE_FLOAT4:
+		default:
+			return kg->texture_float4_images[kernel_tex_index(tex)].interp(x, y);
+	}
 }
 
 ccl_device float4 kernel_tex_image_interp_3d_impl(KernelGlobals *kg, int tex, float x, float y, float z)
 {
-	if(tex >= TEX_START_HALF_CPU)
-		return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d(x, y, z);
-	else if(tex >= TEX_START_BYTE_CPU)
-		return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d(x, y, z);
-	else if(tex >= TEX_START_FLOAT_CPU)
-		return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d(x, y, z);
-	else if(tex >= TEX_START_HALF4_CPU)
-		return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d(x, y, z);
-	else if(tex >= TEX_START_BYTE4_CPU)
-		return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d(x, y, z);
-	else
-		return kg->texture_float4_images[tex].interp_3d(x, y, z);
-
+	switch(kernel_tex_type(tex)) {
+		case IMAGE_DATA_TYPE_HALF:
+			return kg->texture_half_images[kernel_tex_index(tex)].interp_3d(x, y, z);
+		case IMAGE_DATA_TYPE_BYTE:
+			return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d(x, y, z);
+		case IMAGE_DATA_TYPE_FLOAT:
+			return kg->texture_float_images[kernel_tex_index(tex)].interp_3d(x, y, z);
+		case IMAGE_DATA_TYPE_HALF4:
+			return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
+		case IMAGE_DATA_TYPE_BYTE4:
+			return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
+		case IMAGE_DATA_TYPE_FLOAT4:
+		default:
+			return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d(x, y, z);
+	}
 }
 
 ccl_device float4 kernel_tex_image_interp_3d_ex_impl(KernelGlobals *kg, int tex, float x, float y, float z, int interpolation)
 {
-	if(tex >= TEX_START_HALF_CPU)
-		return kg->texture_half_images[tex - TEX_START_HALF_CPU].interp_3d_ex(x, y, z, interpolation);
-	else if(tex >= TEX_START_BYTE_CPU)
-		return kg->texture_byte_images[tex - TEX_START_BYTE_CPU].interp_3d_ex(x, y, z, interpolation);
-	else if(tex >= TEX_START_FLOAT_CPU)
-		return kg->texture_float_images[tex - TEX_START_FLOAT_CPU].interp_3d_ex(x, y, z, interpolation);
-	else if(tex >= TEX_START_HALF4_CPU)
-		return kg->texture_half4_images[tex - TEX_START_HALF4_CPU].interp_3d_ex(x, y, z, interpolation);
-	else if(tex >= TEX_START_BYTE4_CPU)
-		return kg->texture_byte4_images[tex - TEX_START_BYTE4_CPU].interp_3d_ex(x, y, z, interpolation);
-	else
-		return kg->texture_float4_images[tex].interp_3d_ex(x, y, z, interpolation);
+	switch(kernel_tex_type(tex)) {
+		case IMAGE_DATA_TYPE_HALF:
+			return kg->texture_half_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+		case IMAGE_DATA_TYPE_BYTE:
+			return kg->texture_byte_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+		case IMAGE_DATA_TYPE_FLOAT:
+			return kg->texture_float_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+		case IMAGE_DATA_TYPE_HALF4:
+			return kg->texture_half4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+		case IMAGE_DATA_TYPE_BYTE4:
+			return kg->texture_byte4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+		case IMAGE_DATA_TYPE_FLOAT4:
+		default:
+			return kg->texture_float4_images[kernel_tex_index(tex)].interp_3d_ex(x, y, z, interpolation);
+	}
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 76acc9253a1..4b5e4ebac00 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -151,8 +151,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
 #  else
 	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
 	/* float4, byte4 and half4 */
-	if(id < TEX_START_FLOAT_CUDA_KEPLER)
+	const int texture_type = kernel_tex_type(id);
+	if(texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_HALF4) {
 		r = kernel_tex_image_interp_float4(tex, x, y);
+	}
 	/* float, byte and half */
 	else {
 		float f = kernel_tex_image_interp_float(tex, x, y);
@@ -166,8 +168,10 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
 
 	if(use_alpha && alpha != 1.0f && alpha != 0.0f) {
 		r_ssef = r_ssef / ssef(alpha);
-		if(id >= TEX_NUM_FLOAT4_IMAGES)
+		const int texture_type = kernel_tex_type(id);
+		if(texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) {
 			r_ssef = min(r_ssef, ssef(1.0f));
+		}
 		r.w = alpha;
 	}
 
@@ -181,8 +185,9 @@ ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y,
 		r.x *= invw;
 		r.y *= invw;
 		r.z *= invw;
-
-		if(id >= TEX_NUM_FLOAT4_IMAGES) {
+		
+		const int texture_type = kernel_tex_type(id);
+		if(texture_type == IMAGE_DATA_TYPE_BYTE4 || texture_type == IMAGE_DATA_TYPE_BYTE) {
 			r.x = min(r.x, 1.0f);
 			r.y = min(r.y, 1.0f);
 			r.z = min(r.z, 1.0f);
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index 9e826c8c23f..1d97e8344bd 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -46,7 +46,7 @@ ccl_device void svm_node_tex_voxel(KernelGlobals *kg,
 #  if defined(__KERNEL_CUDA__)
 #    if __CUDA_ARCH__ >= 300
 	CUtexObject tex = kernel_tex_fetch(__bindless_mapping, id);
-	if(id < TEX_START_HALF4_CUDA_KEPLER)
+	if(kernel_tex_type(id) == IMAGE_DATA_TYPE_FLOAT4 || kernel_tex_type(id) == IMAGE_DATA_TYPE_BYTE4 || kernel_tex_type(id) == IMAGE_DATA_TYPE_HALF4)
 		r = kernel_tex_image_interp_3d_float4(tex, co.x, co.y, co.z);
 	else {
 		float f = kernel_tex_image_interp_3d_float(tex, co.x, co.y, co.z);
diff --git a/intern/cycles/render/image.cpp b/intern/cycles/render/image.cpp
index a8c4f446bea..697b9f75658 100644
--- a/intern/cycles/render/image.cpp
+++ b/intern/cycles/render/image.cpp
@@ -49,54 +49,24 @@ ImageManager::ImageManager(const DeviceInfo& info)
 	}
 
 	/* Set image limits */
-#define SET_TEX_IMAGES_LIMITS(ARCH) \
-	{ \
-		tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_NUM_FLOAT4_ ## ARCH; \
-		tex_num_images[IMAGE_DATA_TYPE_BYTE4] = TEX_NUM_BYTE4_ ## ARCH; \
-		tex_num_images[IMAGE_DATA_TYPE_HALF4] = TEX_NUM_HALF4_ ## ARCH; \
-		tex_num_images[IMAGE_DATA_TYPE_FLOAT] = TEX_NUM_FLOAT_ ## ARCH; \
-		tex_num_images[IMAGE_DATA_TYPE_BYTE] = TEX_NUM_BYTE_ ## ARCH; \
-		tex_num_images[IMAGE_DATA_TYPE_HALF] = TEX_NUM_HALF_ ## ARCH; \
-		tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = TEX_START_FLOAT4_ ## ARCH; \
-		tex_start_images[IMAGE_DATA_TYPE_BYTE4] = TEX_START_BYTE4_ ## ARCH; \
-		tex_start_images[IMAGE_DATA_TYPE_HALF4] = TEX_START_HALF4_ ## ARCH; \
-		tex_start_images[IMAGE_DATA_TYPE_FLOAT] = TEX_START_FLOAT_ ## ARCH; \
-		tex_start_images[IMAGE_DATA_TYPE_BYTE] = TEX_START_BYTE_ ## ARCH; \
-		tex_start_images[IMAGE_DATA_TYPE_HALF] = TEX_START_HALF_ ## ARCH; \
-	}
-
-	if(device_type == DEVICE_CPU) {
-		SET_TEX_IMAGES_LIMITS(CPU);
-	}
-	else if(device_type == DEVICE_CUDA) {
-		if(info.has_bindless_textures) {
-			SET_TEX_IMAGES_LIMITS(CUDA_KEPLER);
-		}
-		else {
-			SET_TEX_IMAGES_LIMITS(CUDA);
+	max_num_images = TEX_NUM_MAX;
+	has_half_images = true;
+	cuda_fermi_limits = false;
+	
+	if(device_type == DEVICE_CUDA) {
+		if(!info.has_bindless_textures) {
+			/* CUDA Fermi hardware (SM 2.x) has a hard limit on the number of textures */
+			cuda_fermi_limits = true;
+			has_half_images = false;
 		}
 	}
 	else if(device_type == DEVICE_OPENCL) {
-		SET_TEX_IMAGES_LIMITS(OPENCL);
+		has_half_images = false;
 	}
-	else {
-		/* Should not happen. */
-		tex_num_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
-		tex_num_images[IMAGE_DATA_TYPE_BYTE4] = 0;
-		tex_num_images[IMAGE_DATA_TYPE_HALF4] = 0;
-		tex_num_images[IMAGE_DATA_TYPE_FLOAT] = 0;
-		tex_num_images[IMAGE_DATA_TYPE_BYTE] = 0;
-		tex_num_images[IMAGE_DATA_TYPE_HALF] = 0;
-		tex_start_images[IMAGE_DATA_TYPE_FLOAT4] = 0;
-		tex_start_images[IMAGE_DATA_TYPE_BYTE4] = 0;
-		tex_start_images[IMAGE_DATA_TYPE_HALF4] = 0;
-		tex_start_images[IMAGE_DATA_TYPE_FLOAT] = 0;
-		tex_start_images[IMAGE_DATA_TYPE_BYTE] = 0;
-		tex_start_images[IMAGE_DATA_TYPE_HALF] = 0;
-		assert(0);
+	
+	for(size_t type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
+		tex_num_images[type] = 0;
 	}
-
-#undef SET_TEX_IMAGES_LIMITS
 }
 
 ImageManager::~ImageManager()
@@ -133,7 +103,7 @@ bool ImageManager::set_animation_frame_update(int frame)
 	return false;
 }
 
-ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filename,
+ImageDataType ImageManager::get_image_metadata(const string& filename,
                                                              void *builtin_data,
                                                              bool& is_linear)
 {
@@ -226,26 +196,42 @@ ImageManager::ImageDataType ImageManager::get_image_metadata(const string& filen
 	}
 }
 
-/* We use a consecutive slot counting scheme on the devices, in order
- * float4, byte4, half4, float, byte, half.
+/* The lower three bits of a device texture slot number indicate its type.
  * These functions convert the slot ids from ImageManager "images" ones
- * to device ones and vice versa. */
+ * to device ones and vice versa.
+ *
+ * There are special cases for CUDA Fermi, since there we have only 90 image texture
+ * slots available and shold keep the flattended numbers in the 0-89 range.
+ */
 int ImageManager::type_index_to_flattened_slot(int slot, ImageDataType type)
 {
-	return slot + tex_start_images[type];
+	if(cuda_fermi_limits) {
+		if(type == IMAGE_DATA_TYPE_BYTE4) {
+			return slot + TEX_START_BYTE4_CUDA;
+		}
+		else {
+			return slot;
+		}
+	}
+
+	return (slot << IMAGE_DATA_TYPE_SHIFT) | (type);
 }
 
 int ImageManager::flattened_slot_to_type_index(int flat_slot, ImageDataType *type)
 {
-	for(int i = IMAGE_DATA_NUM_TYPES - 1; i >= 0; i--) {
-		if(flat_slot >= tex_start_images[i]) {
-			*type = (ImageDataType)i;
-			return flat_slot - tex_start_images[i];
+	if(cuda_fermi_limits) {
+		if(flat_slot >= 4) {
+			*type = IMAGE_DATA_TYPE_BYTE4;
+			return flat_slot - TEX_START_BYTE4_CUDA;
+		}
+		else {
+			*type = IMAGE_DATA_TYPE_FLOAT4;
+			return flat_slot;
 		}
 	}
 
-	/* Should not happen. */
-	return flat_slot;
+	*type = (ImageDataType)(flat_slot & IMAGE_DATA_TYPE_MASK);
+	return flat_slot >> IMAGE_DATA_TYPE_SHIFT;
 }
 
 string ImageManager::name_from_type(int type)
@@ -299,14 +285,22 @@ int ImageManager::add_image(const string& filename,
 	is_float = (type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4);
 
 	/* No single channel and half textures on CUDA (Fermi) and no half on OpenCL, use available slots */
-	if((type == IMAGE_DATA_TYPE_FLOAT ||
-	    type == IMAGE_DATA_TYPE_HALF4 ||
-	    type == IMAGE_DATA_TYPE_HALF) &&
-	    tex_num_images[type] == 0) {
-		type = IMAGE_DATA_TYPE_FLOAT4;
+	if(!has_half_images) {
+		if(type == IMAGE_DATA_TYPE_HALF4) {
+			type = IMAGE_DATA_TYPE_FLOAT4;
+		}
+		else if(type == IMAGE_DATA_TYPE_HALF) {
+			type = IMAGE_DATA_TYPE_FLOAT;
+		}
 	}
-	if(type == IMAGE_DATA_TYPE_BYTE && tex_num_images[type] == 0) {
-		type = IMAGE_DATA_TYPE_BYTE4;
+
+	if(cuda_fermi_limits) {
+		if(type == IMAGE_DATA_TYPE_FLOAT) {
+			type = IMAGE_DATA_TYPE_FLOAT4;
+		}
+		else if(type == IMAGE_DATA_TYPE_BYTE) {
+			type = IMAGE_DATA_TYPE_BYTE4;
+		}
 	}
 
 	/* Fnd existing image. */
@@ -338,14 +332,30 @@ int ImageManager::add_image(const string& filename,
 			break;
 	}
 
-	if(slot == images[type].size()) {
-		/* Max images limit reached. */
-		if(images[type].size() == tex_num_images[type]) {
+	/* Count if we're over the limit */
+	if(cuda_fermi_limits) {
+		if(tex_num_images[IMAGE_DATA_TYPE_BYTE4] == TEX_NUM_BYTE4_CUDA
+			|| tex_num_images[IMAGE_DATA_TYPE_FLOAT4] == TEX_NUM_FLOAT4_CUDA)
+		{
 			printf("ImageManager::add_image: Reached %s image limit (%d), skipping '%s'\n",
-			       name_from_type(type).c_str(), tex_num_images[type], filename.c_str());
+				name_from_type(type).c_str(), tex_num_images[type], filename.c_str());
 			return -1;
 		}
-
+	}
+	else {
+		/* Very unlikely, since max_num_images is insanely big. But better safe than sorry. */
+		int tex_count = 0;
+		for (int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
+			tex_count += tex_num_images[type];
+		}
+		if(tex_count > max_num_images) {
+			printf("ImageManager::add_image: Reached image limit (%d), skipping '%s'\n",
+				max_num_images, filename.c_str());
+			return -1;
+		}
+	}
+	
+	if(slot == images[type].size()) {
 		images[type].resize(images[type].size() + 1);
 	}
 
@@ -362,6 +372,8 @@ int ImageManager::add_image(const string& filename,
 	img->use_alpha = use_alpha;
 
 	images[type][slot] = img;
+	
+	++tex_num_images[type];
 
 	need_update = true;
 
@@ -666,16 +678,12 @@ void ImageManager::device_load_image(Device *device,
 	/* Slot assignment */
 	int flat_slot = type_index_to_flattened_slot(slot, type);
 
-	string name;
-	if(flat_slot >= 100)
-		name = string_printf("__tex_image_%s_%d", name_from_type(type).c_str(), flat_slot);
-	else if(flat_slot >= 10)
-		name = string_printf("__tex_image_%s_0%d", name_from_type(type).c_str(), flat_slot);
-	else
-		name = string_printf("__tex_image_%s_00%d", name_from_type(type).c_str(), flat_slot);
+	string name = string_printf("__tex_image_%s_%03d", name_from_type(type).c_str(), flat_slot);
 
 	if(type == IMAGE_DATA_TYPE_FLOAT4) {
-		device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
+		if(dscene->tex_float4_image[slot] == NULL)
+			dscene->tex_float4_image[slot] = new device_vector<float4>();
+		device_vector<float4>& tex_img = *dscene->tex_float4_image[slot];
 
 		if(tex_img.device_pointer) {
 			thread_scoped_lock device_lock(device_mutex);
@@ -705,7 +713,9 @@ void ImageManager::device_load_image(Device *device,
 		}
 	}
 	else if(type == IMAGE_DATA_TYPE_FLOAT) {
-		device_vector<float>& tex_img = dscene->tex_float_image[slot];
+		if(dscene->tex_float_image[slot] == NULL)
+			dscene->tex_float_image[slot] = new device_vector<float>();
+		device_vector<float>& tex_img = *dscene->tex_float_image[slot];
 
 		if(tex_img.device_pointer) {
 			thread_scoped_lock device_lock(device_mutex);
@@ -732,7 +742,9 @@ void ImageManager::device_load_image(Device *device,
 		}
 	}
 	else if(type == IMAGE_DATA_TYPE_BYTE4) {
-		device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
+		if(dscene->tex_byte4_image[slot] == NULL)
+			dscene->tex_byte4_image[slot] = new device_vector<uchar4>();
+		device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot];
 
 		if(tex_img.device_pointer) {
 			thread_scoped_lock device_lock(device_mutex);
@@ -762,7 +774,9 @@ void ImageManager::device_load_image(Device *device,
 		}
 	}
 	else if(type == IMAGE_DATA_TYPE_BYTE){
-		device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
+		if(dscene->tex_byte_image[slot] == NULL)
+			dscene->tex_byte_image[slot] = new device_vector<uchar>();
+		device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot];
 
 		if(tex_img.device_pointer) {
 			thread_scoped_lock device_lock(device_mutex);
@@ -788,7 +802,9 @@ void ImageManager::device_load_image(Device *device,
 		}
 	}
 	else if(type == IMAGE_DATA_TYPE_HALF4){
-		device_vector<half4>& tex_img = dscene->tex_half4_image[slot];
+		if(dscene->tex_half4_image[slot] == NULL)
+			dscene->tex_half4_image[slot] = new device_vector<half4>();
+		device_vector<half4>& tex_img = *dscene->tex_half4_image[slot];
 
 		if(tex_img.device_pointer) {
 			thread_scoped_lock device_lock(device_mutex);
@@ -817,7 +833,9 @@ void ImageManager::device_load_image(Device *device,
 		}
 	}
 	else if(type == IMAGE_DATA_TYPE_HALF){
-		device_vector<half>& tex_img = dscene->tex_half_image[slot];
+		if(dscene->tex_half_image[slot] == NULL)
+			dscene->tex_half_image[slot] = new device_vector<half>();
+		device_vector<half>& tex_img = *dscene->tex_half_image[slot];
 
 		if(tex_img.device_pointer) {
 			thread_scoped_lock device_lock(device_mutex);
@@ -857,69 +875,50 @@ void ImageManager::device_free_image(Device *device, DeviceScene *dscene, ImageD
 			((OSL::TextureSystem*)osl_texture_system)->invalidate(filename);
 #endif
 		}
-		else if(type == IMAGE_DATA_TYPE_FLOAT4) {
-			device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
-
-			if(tex_img.device_pointer) {
-				thread_scoped_lock device_lock(device_mutex);
-				device->tex_free(tex_img);
-			}
-
-			tex_img.clear();
-		}
-		else if(type == IMAGE_DATA_TYPE_FLOAT) {
-			device_vector<float>& tex_img = dscene->tex_float_image[slot];
-
-			if(tex_img.device_pointer) {
-				thread_scoped_lock device_lock(device_mutex);
-				device->tex_free(tex_img);
-			}
-
-			tex_img.clear();
-		}
-		else if(type == IMAGE_DATA_TYPE_BYTE4) {
-			device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
-
-			if(tex_img.device_pointer) {
-				thread_scoped_lock device_lock(device_mutex);
-				device->tex_free(tex_img);
-			}
-
-			tex_img.clear();
-		}
-		else if(type == IMAGE_DATA_TYPE_BYTE){
-			device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
-
-			if(tex_img.device_pointer) {
-				thread_scoped_lock device_lock(device_mutex);
-				device->tex_free(tex_img);
+		else {
+			device_memory *tex_img = NULL;
+			switch(type) {
+				case IMAGE_DATA_TYPE_FLOAT4:
+					tex_img = dscene->tex_float4_image[slot];
+					dscene->tex_float4_image[slot] = NULL;
+					break;
+				case IMAGE_DATA_TYPE_BYTE4:
+					tex_img = dscene->tex_byte4_image[slot];
+					dscene->tex_byte4_image[slot]= NULL;
+					break;
+				case IMAGE_DATA_TYPE_HALF4:
+					tex_img = dscene->tex_half4_image[slot];
+					dscene->tex_half4_image[slot]= NULL;
+					break;
+				case IMAGE_DATA_TYPE_FLOAT:
+					tex_img = dscene->tex_float_image[slot];
+					dscene->tex_float_image[slot] = NULL;
+					break;
+				case IMAGE_DATA_TYPE_BYTE:
+					tex_img = dscene->tex_byte_image[slot];
+					dscene->tex_byte_image[slot]= NULL;
+					break;
+				case IMAGE_DATA_TYPE_HALF:
+					tex_img = dscene->tex_half_image[slot];
+					dscene->tex_half_image[slot]= NULL;
+					break;
+				default:
+					assert(0);
+					tex_img = NULL;
 			}
+			if(tex_img) {
+				if(tex_img->device_pointer) {
+					thread_scoped_lock device_lock(device_mutex);
+					device->tex_free(*tex_img);
+				}
 
-			tex_img.clear();
-		}
-		else if(type == IMAGE_DATA_TYPE_HALF4){
-			device_vector<half4>& tex_img = dscene->tex_half4_image[slot];
-
-			if(tex_img.device_pointer) {
-				thread_scoped_lock device_lock(device_mutex);
-				device->tex_free(tex_img);
+				delete tex_img;
 			}
-
-			tex_img.clear();
-		}
-		else if(type == IMAGE_DATA_TYPE_HALF){
-			device_vector<half>& tex_img = dscene->tex_half_image[slot];
-
-			if(tex_img.device_pointer) {
-				thread_scoped_lock device_lock(device_mutex);
-				device->tex_free(tex_img);
-			}
-
-			tex_img.clear();
 		}
 
 		delete images[type][slot];
 		images[type][slot] = NULL;
+		--tex_num_images[type];
 	}
 }
 
@@ -934,6 +933,32 @@ void ImageManager::device_update(Device *device,
 	TaskPool pool;
 
 	for(int type = 0; type < IMAGE_DATA_NUM_TYPES; type++) {
+		switch(type) {
+			case IMAGE_DATA_TYPE_FLOAT4:
+				if(dscene->tex_float4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_FLOAT4])
+					dscene->tex_float4_image.resize(tex_num_images[IMAGE_DATA_TYPE_FLOAT4]);
+				break;
+			case IMAGE_DATA_TYPE_BYTE4:
+				if(dscene->tex_byte4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_BYTE4])
+					dscene->tex_byte4_image.resize(tex_num_images[IMAGE_DATA_TYPE_BYTE4]);
+				break;
+			case IMAGE_DATA_TYPE_HALF4:
+				if(dscene->tex_half4_image.size() <= tex_num_images[IMAGE_DATA_TYPE_HALF4])
+					dscene->tex_half4_image.resize(tex_num_images[IMAGE_DATA_TYPE_HALF4]);
+				break;
+			case IMAGE_DATA_TYPE_BYTE:
+				if(dscene->tex_byte_image.size() <= tex_num_images[IMAGE_DATA_TYPE_BYTE])
+					dscene->tex_byte_image.resize(tex_num_images[IMAGE_DATA_TYPE_BYTE]);
+				break;
+			case IMAGE_DATA_TYPE_FLOAT:
+				if(dscene->tex_float_image.size() <= tex_num_images[IMAGE_DATA_TYPE_FLOAT])
+					dscene->tex_float_image.resize(tex_num_images[IMAGE_DATA_TYPE_FLOAT]);
+				break;
+			case IMAGE_DATA_TYPE_HALF:
+				if(dscene->tex_half_image.size() <= tex_num_images[IMAGE_DATA_TYPE_HALF])
+					dscene->tex_half_image.resize(tex_num_images[IMAGE_DATA_TYPE_HALF]);
+				break;
+		}
 		for(size_t slot = 0; slot < images[type].size(); slot++) {
 			if(!images[type][slot])
 				continue;
@@ -1029,7 +1054,7 @@ void ImageManager::device_pack_images(Device *device,
 		if(!images[type][slot])
 			continue;
 
-		device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
+		device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot];
 		size += tex_img.size();
 	}
 
@@ -1039,7 +1064,7 @@ void ImageManager::device_pack_images(Device *device,
 		if(!images[type][slot])
 			continue;
 
-		device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
+		device_vector<uchar4>& tex_img = *dscene->tex_byte4_image[slot];
 
 		uint8_t options = pack_image_options(type, slot);
 
@@ -1059,7 +1084,7 @@ void ImageManager::device_pack_images(Device *device,
 		if(!images[type][slot])
 			continue;
 
-		device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
+		device_vector<float4>& tex_img = *dscene->tex_float4_image[slot];
 		size += tex_img.size();
 	}
 
@@ -1069,7 +1094,7 @@ void ImageManager::device_pack_images(Device *device,
 		if(!images[type][slot])
 			continue;
 
-		device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
+		device_vector<float4>& tex_img = *dscene->tex_float4_image[slot];
 
 		/* todo: support 3D textures, only CPU for now */
 
@@ -1091,7 +1116,7 @@ void ImageManager::device_pack_images(Device *device,
 		if(!images[type][slot])
 			continue;
 
-		device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
+		device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot];
 		size += tex_img.size();
 	}
 
@@ -1101,7 +1126,7 @@ void ImageManager::device_pack_images(Device *device,
 		if(!images[type][slot])
 			continue;
 
-		device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
+		device_vector<uchar>& tex_img = *dscene->tex_byte_image[slot];
 
 		uint8_t options = pack_image_options(type, slot);
 
@@ -1121,7 +1146,7 @@ void ImageManager::device_pack_images(Device *device,
 		if(!images[type][slot])
 			continue;
 
-		device_vector<float>& tex_img = dscene->tex_float_image[slot];
+		device_vector<float>& tex_img = *dscene->tex_float_image[slot];
 		size += tex_img.size();
 	}
 
@@ -1131,7 +1156,7 @@ void ImageManager::device_pack_images(Device *device,
 		if(!images[type][slot])
 			continue;
 
-		device_vector<float>& tex_img = dscene->tex_float_image[slot];
+		device_vector<float>& tex_img = *dscene->tex_float_image[slot];
 
 		/* todo: support 3D textures, only CPU for now */
 
@@ -1201,16 +1226,23 @@ void ImageManager::device_free(Device *device, DeviceScene *dscene)
 		images[type].clear();
 	}
 
-	device->tex_free(dscene->tex_image_byte4_packed);
+	dscene->tex_float4_image.clear();
+	dscene->tex_byte4_image.clear();
+	dscene->tex_half4_image.clear();
+	dscene->tex_float_image.clear();
+	dscene->tex_byte_image.clear();
+	dscene->tex_half_image.clear();
+
 	device->tex_free(dscene->tex_image_float4_packed);
-	device->tex_free(dscene->tex_image_byte_packed);
+	device->tex_free(dscene->tex_image_byte4_packed);
 	device->tex_free(dscene->tex_image_float_packed);
+	device->tex_free(dscene->tex_image_byte_packed);
 	device->tex_free(dscene->tex_image_packed_info);
 
-	dscene->tex_image_byte4_packed.clear();
 	dscene->tex_image_float4_packed.clear();
-	dscene->tex_image_byte_packed.clear();
+	dscene->tex_image_byte4_packed.clear();
 	dscene->tex_image_float_packed.clear();
+	dscene->tex_image_byte_packed.clear();
 	dscene->tex_image_packed_info.clear();
 }
 
diff --git a/intern/cycles/render/image.h b/intern/cycles/render/image.h
index 996b5a5b65f..76c2cc46f12 100644
--- a/intern/cycles/render/image.h
+++ b/intern/cycles/render/image.h
@@ -37,17 +37,6 @@ public:
 	explicit ImageManager(const DeviceInfo& info);
 	~ImageManager();
 
-	enum ImageDataType {
-		IMAGE_DATA_TYPE_FLOAT4 = 0,
-		IMAGE_DATA_TYPE_BYTE4 = 1,
-		IMAGE_DATA_TYPE_HALF4 = 2,
-		IMAGE_DATA_TYPE_FLOAT = 3,
-		IMAGE_DATA_TYPE_BYTE = 4,
-		IMAGE_DATA_TYPE_HALF = 5,
-
-		IMAGE_DATA_NUM_TYPES
-	};
-
 	int add_image(const string& filename,
 	              void *builtin_data,
 	              bool animated,
@@ -124,7 +113,9 @@ public:
 
 private:
 	int tex_num_images[IMAGE_DATA_NUM_TYPES];
-	int tex_start_images[IMAGE_DATA_NUM_TYPES];
+	int max_num_images;
+	bool has_half_images;
+	bool cuda_fermi_limits;
 
 	thread_mutex device_mutex;
 	int animation_frame;
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index f9679d52235..9b565c3ede1 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -364,9 +364,9 @@ void ImageTextureNode::compile(OSLCompiler& compiler)
 	image_manager = compiler.image_manager;
 	if(is_float == -1) {
 		if(builtin_data == NULL) {
-			ImageManager::ImageDataType type;
+			ImageDataType type;
 			type = image_manager->get_image_metadata(filename.string(), NULL, is_linear);
-			if(type == ImageManager::IMAGE_DATA_TYPE_FLOAT || type == ImageManager::IMAGE_DATA_TYPE_FLOAT4)
+			if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4)
 				is_float = 1;
 		}
 		else {
@@ -553,9 +553,9 @@ void EnvironmentTextureNode::compile(OSLCompiler& compiler)
 	image_manager = compiler.image_manager;
 	if(is_float == -1) {
 		if(builtin_data == NULL) {
-			ImageManager::ImageDataType type;
+			ImageDataType type;
 			type = image_manager->get_image_metadata(filename.string(), NULL, is_linear);
-			if(type == ImageManager::IMAGE_DATA_TYPE_FLOAT || type == ImageManager::IMAGE_DATA_TYPE_FLOAT4)
+			if(type == IMAGE_DATA_TYPE_FLOAT || type == IMAGE_DATA_TYPE_FLOAT4)
 				is_float = 1;
 		}
 		else {
diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h
index 2b5267642a2..b02f9f35393 100644
--- a/intern/cycles/render/scene.h
+++ b/intern/cycles/render/scene.h
@@ -114,18 +114,18 @@ public:
 	device_vector<uint> sobol_directions;
 
 	/* cpu images */
-	device_vector<uchar4> tex_byte4_image[TEX_NUM_BYTE4_CPU];
-	device_vector<float4> tex_float4_image[TEX_NUM_FLOAT4_CPU];
-	device_vector<float> tex_float_image[TEX_NUM_FLOAT_CPU];
-	device_vector<uchar> tex_byte_image[TEX_NUM_BYTE_CPU];
-	device_vector<half4> tex_half4_image[TEX_NUM_HALF4_CPU];
-	device_vector<half> tex_half_image[TEX_NUM_HALF_CPU];
-
+	std::vector<device_vector<float4>* > tex_float4_image;
+	std::vector<device_vector<uchar4>* > tex_byte4_image;
+	std::vector<device_vector<half4>* > tex_half4_image;
+	std::vector<device_vector<float>* > tex_float_image;
+	std::vector<device_vector<uchar>* > tex_byte_image;
+	std::vector<device_vector<half>* > tex_half_image;
+	
 	/* opencl images */
-	device_vector<uchar4> tex_image_byte4_packed;
 	device_vector<float4> tex_image_float4_packed;
-	device_vector<uchar> tex_image_byte_packed;
+	device_vector<uchar4> tex_image_byte4_packed;
 	device_vector<float> tex_image_float_packed;
+	device_vector<uchar> tex_image_byte_packed;
 	device_vector<uint4> tex_image_packed_info;
 
 	KernelData data;
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index aff928ea2ee..df255f43059 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -21,62 +21,22 @@ CCL_NAMESPACE_BEGIN
 
 /* Texture limits on devices. */
 
-/* CPU */
-#define TEX_NUM_FLOAT4_CPU		1024
-#define TEX_NUM_BYTE4_CPU		1024
-#define TEX_NUM_HALF4_CPU		1024
-#define TEX_NUM_FLOAT_CPU		1024
-#define TEX_NUM_BYTE_CPU		1024
-#define TEX_NUM_HALF_CPU		1024
-#define TEX_START_FLOAT4_CPU	0
-#define TEX_START_BYTE4_CPU		TEX_NUM_FLOAT4_CPU
-#define TEX_START_HALF4_CPU		(TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU)
-#define TEX_START_FLOAT_CPU		(TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU)
-#define TEX_START_BYTE_CPU		(TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU)
-#define TEX_START_HALF_CPU		(TEX_NUM_FLOAT4_CPU + TEX_NUM_BYTE4_CPU + TEX_NUM_HALF4_CPU + TEX_NUM_FLOAT_CPU + TEX_NUM_BYTE_CPU)
-
 /* CUDA (Geforce 4xx and 5xx) */
-#define TEX_NUM_FLOAT4_CUDA		5
-#define TEX_NUM_BYTE4_CUDA		85
-#define TEX_NUM_HALF4_CUDA		0
-#define TEX_NUM_FLOAT_CUDA		0
-#define TEX_NUM_BYTE_CUDA		0
-#define TEX_NUM_HALF_CUDA		0
-#define TEX_START_FLOAT4_CUDA	0
-#define TEX_START_BYTE4_CUDA	TEX_NUM_FLOAT4_CUDA
-#define TEX_START_HALF4_CUDA	(TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA)
-#define TEX_START_FLOAT_CUDA	(TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA)
-#define TEX_START_BYTE_CUDA		(TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA)
-#define TEX_START_HALF_CUDA		(TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA)
-
-/* CUDA (Kepler, Geforce 6xx and above) */
-#define TEX_NUM_FLOAT4_CUDA_KEPLER		1024
-#define TEX_NUM_BYTE4_CUDA_KEPLER		1024
-#define TEX_NUM_HALF4_CUDA_KEPLER		1024
-#define TEX_NUM_FLOAT_CUDA_KEPLER		1024
-#define TEX_NUM_BYTE_CUDA_KEPLER		1024
-#define TEX_NUM_HALF_CUDA_KEPLER		1024
-#define TEX_START_FLOAT4_CUDA_KEPLER	0
-#define TEX_START_BYTE4_CUDA_KEPLER		TEX_NUM_FLOAT4_CUDA_KEPLER
-#define TEX_START_HALF4_CUDA_KEPLER		(TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER)
-#define TEX_START_FLOAT_CUDA_KEPLER		(TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER)
-#define TEX_START_BYTE_CUDA_KEPLER		(TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER)
-#define TEX_START_HALF_CUDA_KEPLER		(TEX_NUM_FLOAT4_CUDA_KEPLER + TEX_NUM_BYTE4_CUDA_KEPLER + TEX_NUM_HALF4_CUDA_KEPLER + TEX_NUM_FLOAT_CUDA_KEPLER + TEX_NUM_BYTE_CUDA_KEPLER)
-
-/* OpenCL */
-#define TEX_NUM_FLOAT4_OPENCL	1024
-#define TEX_NUM_BYTE4_OPENCL	1024
-#define TEX_NUM_HALF4_OPENCL	0
-#define TEX_NUM_FLOAT_OPENCL	1024
-#define TEX_NUM_BYTE_OPENCL		1024
-#define TEX_NUM_HALF_OPENCL		0
-#define TEX_START_FLOAT4_OPENCL	0
-#define TEX_START_BYTE4_OPENCL	TEX_NUM_FLOAT4_OPENCL
-#define TEX_START_HALF4_OPENCL	(TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL)
-#define TEX_START_FLOAT_OPENCL	(TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL)
-#define TEX_START_BYTE_OPENCL	(TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL)
-#define TEX_START_HALF_OPENCL	(TEX_NUM_FLOAT4_OPENCL + TEX_NUM_BYTE4_OPENCL + TEX_NUM_HALF4_OPENCL + TEX_NUM_FLOAT_OPENCL + TEX_NUM_BYTE_OPENCL)
-
+#define TEX_NUM_FLOAT4_CUDA      5
+#define TEX_NUM_BYTE4_CUDA       84
+#define TEX_NUM_HALF4_CUDA       0
+#define TEX_NUM_FLOAT_CUDA       0
+#define TEX_NUM_BYTE_CUDA        0
+#define TEX_NUM_HALF_CUDA        0
+#define TEX_START_FLOAT4_CUDA    0
+#define TEX_START_BYTE4_CUDA     TEX_NUM_FLOAT4_CUDA
+#define TEX_START_HALF4_CUDA     (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA)
+#define TEX_START_FLOAT_CUDA     (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA)
+#define TEX_START_BYTE_CUDA      (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA)
+#define TEX_START_HALF_CUDA      (TEX_NUM_FLOAT4_CUDA + TEX_NUM_BYTE4_CUDA + TEX_NUM_HALF4_CUDA + TEX_NUM_FLOAT_CUDA + TEX_NUM_BYTE_CUDA)
+
+/* Any architecture other than old CUDA cards */
+#define TEX_NUM_MAX (INT_MAX >> 4)
 
 /* Color to use when textures are not found. */
 #define TEX_IMAGE_MISSING_R 1
@@ -84,6 +44,14 @@ CCL_NAMESPACE_BEGIN
 #define TEX_IMAGE_MISSING_B 1
 #define TEX_IMAGE_MISSING_A 1
 
+#if defined (__KERNEL_CUDA__) && (__CUDA_ARCH__ < 300)
+#  define kernel_tex_type(tex) (tex < TEX_START_BYTE4_CUDA ? IMAGE_DATA_TYPE_FLOAT4 : IMAGE_DATA_TYPE_BYTE4)
+#  define kernel_tex_index(tex) (tex)
+#else
+#  define kernel_tex_type(tex) (tex & IMAGE_DATA_TYPE_MASK)
+#  define kernel_tex_index(tex) (tex >> IMAGE_DATA_TYPE_SHIFT)
+#endif
+
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_TEXTURE_H__ */
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 137cc73b70b..296343ecfd3 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -154,6 +154,25 @@ enum InterpolationType {
 	INTERPOLATION_NUM_TYPES,
 };
 
+/* Texture types
+ * Since we store the type in the lower bits of a flat index,
+ * the shift and bit mask constant below need to be kept in sync. 
+ */
+
+enum ImageDataType {
+	IMAGE_DATA_TYPE_FLOAT4 = 0,
+	IMAGE_DATA_TYPE_BYTE4 = 1,
+	IMAGE_DATA_TYPE_HALF4 = 2,
+	IMAGE_DATA_TYPE_FLOAT = 3,
+	IMAGE_DATA_TYPE_BYTE = 4,
+	IMAGE_DATA_TYPE_HALF = 5,
+	
+	IMAGE_DATA_NUM_TYPES
+};
+
+#define IMAGE_DATA_TYPE_SHIFT 3
+#define IMAGE_DATA_TYPE_MASK 0x7
+
 /* Extension types for textures.
  *
  * Defines how the image is extrapolated past its original bounds.
diff --git a/release/bin/blender-thumbnailer.py b/release/bin/blender-thumbnailer.py
index 5d2dd958a92..e050a681ca0 100755
--- a/release/bin/blender-thumbnailer.py
+++ b/release/bin/blender-thumbnailer.py
@@ -37,7 +37,7 @@ import struct
 
 
 def open_wrapper_get():
-    """ wrap OS spesific read functionality here, fallback to 'open()'
+    """ wrap OS specific read functionality here, fallback to 'open()'
     """
 
     class GFileWrapper:
diff --git a/source/blender/editors/space_view3d/drawobject.c b/source/blender/editors/space_view3d/drawobject.c
index bafd74e4c54..7fe4a1d3570 100644
--- a/source/blender/editors/space_view3d/drawobject.c
+++ b/source/blender/editors/space_view3d/drawobject.c
@@ -892,7 +892,7 @@ void view3d_cached_text_draw_add(const float co[3],
 	memcpy(vos->str, str, alloc_len);
 }
 
-void view3d_cached_text_draw_end(View3D *v3d, ARegion *ar, bool depth_write, float mat[4][4])
+void view3d_cached_text_draw_end(View3D *v3d, ARegion *ar, bool depth_write)
 {
 	RegionView3D *rv3d = ar->regiondata;
 	ViewCachedString *vos;
@@ -902,9 +902,6 @@ void view3d_cached_text_draw_end(View3D *v3d, ARegion *ar, bool depth_write, flo
 
 	/* project first and test */
 	for (vos = g_v3d_strings[g_v3d_string_level]; vos; vos = vos->next) {
-		if (mat && !(vos->flag & V3D_CACHE_TEXT_WORLDSPACE))
-			mul_m4_v3(mat, vos->vec);
-
 		if (ED_view3d_project_short_ex(ar,
 		                               (vos->flag & V3D_CACHE_TEXT_GLOBALSPACE) ? rv3d->persmat : rv3d->persmatob,
 		                               (vos->flag & V3D_CACHE_TEXT_LOCALCLIP) != 0,
@@ -8913,7 +8910,7 @@ afterdraw:
 			draw_new_particle_system(scene, v3d, rv3d, base, psys, dt, dflag);
 		}
 		invert_m4_m4(ob->imat, ob->obmat);
-		view3d_cached_text_draw_end(v3d, ar, 0, NULL);
+		view3d_cached_text_draw_end(v3d, ar, 0);
 
 		gpuMultMatrix(ob->obmat);
 		
@@ -9107,7 +9104,7 @@ afterdraw:
 	
 	/* return warning, this is cached text draw */
 	invert_m4_m4(ob->imat, ob->obmat);
-	view3d_cached_text_draw_end(v3d, ar, 1, NULL);
+	view3d_cached_text_draw_end(v3d, ar, 1);
 	/* return warning, clear temp flag */
 	v3d->flag2 &= ~V3D_SHOW_SOLID_MATCAP;
 	
diff --git a/source/blender/editors/space_view3d/drawsimdebug.c b/source/blender/editors/space_view3d/drawsimdebug.c
index e06336c621b..24ac1c5b4db 100644
--- a/source/blender/editors/space_view3d/drawsimdebug.c
+++ b/source/blender/editors/space_view3d/drawsimdebug.c
@@ -203,7 +203,7 @@ void draw_sim_debug_data(Scene *UNUSED(scene), View3D *v3d, ARegion *ar)
 	
 	view3d_cached_text_draw_begin();
 	draw_sim_debug_elements(_sim_debug_data, imat);
-	view3d_cached_text_draw_end(v3d, ar, false, NULL);
+	view3d_cached_text_draw_end(v3d, ar, false);
 	
 	gpuPopMatrix();
 }
diff --git a/source/blender/editors/space_view3d/view3d_intern.h b/source/blender/editors/space_view3d/view3d_intern.h
index 51293f41fa5..dd9570eb656 100644
--- a/source/blender/editors/space_view3d/view3d_intern.h
+++ b/source/blender/editors/space_view3d/view3d_intern.h
@@ -168,7 +168,7 @@ void view3d_cached_text_draw_begin(void);
 void view3d_cached_text_draw_add(const float co[3],
                                  const char *str, const size_t str_len,
                                  short xoffs, short flag, const unsigned char col[4]);
-void view3d_cached_text_draw_end(View3D *v3d, ARegion *ar, bool depth_write, float mat[4][4]);
+void view3d_cached_text_draw_end(View3D *v3d, ARegion *ar, bool depth_write);
 
 bool check_object_draw_texture(struct Scene *scene, struct View3D *v3d, const char drawtype);
 
diff --git a/source/creator/creator_args.c b/source/creator/creator_args.c
index 3850846b0b9..d7406588952 100644
--- a/source/creator/creator_args.c
+++ b/source/creator/creator_args.c
@@ -1160,6 +1160,7 @@ static const char arg_handle_threads_set_doc[] =
 "<threads>\n"
 "\tUse amount of <threads> for rendering and other operations\n"
 "\t[1-" STRINGIFY(BLENDER_MAX_THREADS) "], 0 for systems processor count."
+"(This must be the first argument)"
 ;
 static int arg_handle_threads_set(int argc, const char **argv, void *UNUSED(data))
 {
author	Campbell Barton <ideasman42@gmail.com>	2017-04-27 14:41:57 +0300
committer	Campbell Barton <ideasman42@gmail.com>	2017-04-27 14:42:08 +0300
commit	98b6c6f2c0227b9a6d9db8bd47ee6cad27f35b4b (patch)
tree	0e13eee0ea3845dcafa0440323635f3ae38a7a5e
parent	21d31f8f58ff1454b663faac0d1260f7c5168a65 (diff)
parent	bdf8ad6c4e521884a544f6dbfa244c27720cd4bb (diff)