54 files changed, 2108 insertions, 267 deletions
diff --git a/build_files/buildbot/slave_compile.py b/build_files/buildbot/slave_compile.py
index 5e06c7057ce..d763ddfb3e0 100644
--- a/build_files/buildbot/slave_compile.py
+++ b/build_files/buildbot/slave_compile.py
@@ -73,6 +73,7 @@ if 'cmake' in builder:
         if builder.endswith('x86_64_10_6_cmake'):
             cmake_extra_options.append('-DCMAKE_OSX_ARCHITECTURES:STRING=x86_64')
         cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-hack/bin/nvcc')
+        cmake_extra_options.append('-DCUDA_NVCC8_EXECUTABLE=/usr/local/cuda8-hack/bin/nvcc')
 
     elif builder.startswith('win'):
         if builder.endswith('_vc2015'):
@@ -89,6 +90,8 @@ if 'cmake' in builder:
             elif builder.startswith('win32'):
                 bits = 32
                 cmake_options.extend(['-G', 'Visual Studio 12 2013'])
+        cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v7.5/bin/nvcc.exe')
+        cmake_extra_options.append('-DCUDA_NVCC8_EXECUTABLE:FILEPATH=C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v8.0/bin/nvcc.exe')
 
     elif builder.startswith('linux'):
         tokens = builder.split("_")
@@ -108,10 +111,14 @@ if 'cmake' in builder:
             cuda_chroot_name = 'buildbot_' + deb_name + '_x86_64'
             targets = ['player', 'blender', 'cuda']
 
+        cmake_extra_options.append('-DCUDA_NVCC_EXECUTABLE=/usr/local/cuda-7.5/bin/nvcc')
+        cmake_extra_options.append('-DCUDA_NVCC8_EXECUTABLE=/usr/local/cuda-8.0/bin/nvcc')
+
     cmake_options.append("-C" + os.path.join(blender_dir, cmake_config_file))
 
     # Prepare CMake options needed to configure cuda binaries compilation.
     cuda_cmake_options.append("-DWITH_CYCLES_CUDA_BINARIES=%s" % ('ON' if build_cubins else 'OFF'))
+    cuda_cmake_options.append("-DCYCLES_CUDA_BINARIES_ARCH=sm_20;sm_21;sm_30;sm_35;sm_37;sm_50;sm_52;sm_60;sm_61")
     if build_cubins or 'cuda' in targets:
         if bits == 32:
             cuda_cmake_options.append("-DCUDA_64_BIT_DEVICE_CODE=OFF")
diff --git a/intern/audaspace/jack/AUD_JackLibrary.cpp b/intern/audaspace/jack/AUD_JackLibrary.cpp
index 63306ee0b15..9ed6862bbb9 100644
--- a/intern/audaspace/jack/AUD_JackLibrary.cpp
+++ b/intern/audaspace/jack/AUD_JackLibrary.cpp
@@ -44,7 +44,20 @@ static bool jack_supported = false;
 void AUD_jack_init(void)
 {
 #ifdef WITH_JACK_DYNLOAD
-	jack_handle = dlopen("libjack.so", RTLD_LAZY);
+	const char *names[] = {"libjack.so",
+	                       "libjack.so.0",
+	                       "libjack.so.1",
+	                       "libjack.so.2",
+	                       NULL};
+	int index = 0;
+	while (names[index] != NULL) {
+		jack_handle = dlopen(names[index], RTLD_LAZY);
+		if (jack_handle != NULL) {
+			// Found existing library.
+			break;
+		}
+		++index;
+	}
 
 	if (!jack_handle) {
 		return;
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 2d404918a38..6a511ea7316 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -249,121 +249,161 @@ public:
 		return DebugFlags().cuda.adaptive_compile;
 	}
 
+	/* Common NVCC flags which stays the same regardless of shading model,
+	 * kernel sources md5 and only depends on compiler or compilation settings.
+	 */
+	string compile_kernel_get_common_cflags(
+	        const DeviceRequestedFeatures& requested_features)
+	{
+		const int cuda_version = cuewCompilerVersion();
+		const int machine = system_cpu_bits();
+		const string kernel_path = path_get("kernel");
+		const string include = kernel_path;
+		string cflags = string_printf("-m%d "
+		                              "--ptxas-options=\"-v\" "
+		                              "--use_fast_math "
+		                              "-DNVCC "
+		                              "-D__KERNEL_CUDA_VERSION__=%d "
+		                               "-I\"%s\"",
+		                              machine,
+		                              cuda_version,
+		                              include.c_str());
+		if(use_adaptive_compilation()) {
+			cflags += " " + requested_features.get_build_options();
+		}
+		const char *extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
+		if(extra_cflags) {
+			cflags += string(" ") + string(extra_cflags);
+		}
+#ifdef WITH_CYCLES_DEBUG
+		cflags += " -D__KERNEL_DEBUG__";
+#endif
+		return cflags;
+	}
+
+	bool compile_check_compiler() {
+		const char *nvcc = cuewCompilerPath();
+		if(nvcc == NULL) {
+			cuda_error_message("CUDA nvcc compiler not found. "
+			                   "Install CUDA toolkit in default location.");
+			return false;
+		}
+		const int cuda_version = cuewCompilerVersion();
+		VLOG(1) << "Found nvcc " << nvcc
+		        << ", CUDA version " << cuda_version
+		        << ".";
+		const int major = cuda_version / 10, minor = cuda_version & 10;
+		if(cuda_version == 0) {
+			cuda_error_message("CUDA nvcc compiler version could not be parsed.");
+			return false;
+		}
+		if(cuda_version < 60) {
+			printf("Unsupported CUDA version %d.%d detected, "
+			       "you need CUDA 7.5 or newer.\n",
+			       major, minor);
+			return false;
+		}
+		else if(cuda_version != 75 && cuda_version != 80) {
+			printf("CUDA version %d.%d detected, build may succeed but only "
+			       "CUDA 7.5 and 8.0 are officially supported.\n",
+			       major, minor);
+		}
+		return true;
+	}
+
 	string compile_kernel(const DeviceRequestedFeatures& requested_features)
 	{
 		/* Compute cubin name. */
 		int major, minor;
 		cuDeviceComputeCapability(&major, &minor, cuDevId);
-		string cubin;
-
-		/* Adaptive Compile.
-		 * If enabled, always use that */
-		bool use_adaptive_compile = use_adaptive_compilation();
 
 		/* Attempt to use kernel provided with Blender. */
-		if(!use_adaptive_compile) {
-			cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin", major, minor));
-			VLOG(1) << "Testing for pre-compiled kernel " << cubin;
+		if(!use_adaptive_compilation()) {
+			const string cubin = path_get(string_printf("lib/kernel_sm_%d%d.cubin",
+			                                            major, minor));
+			VLOG(1) << "Testing for pre-compiled kernel " << cubin << ".";
 			if(path_exists(cubin)) {
-				VLOG(1) << "Using precompiled kernel";
+				VLOG(1) << "Using precompiled kernel.";
 				return cubin;
 			}
 		}
 
+		const string common_cflags =
+		        compile_kernel_get_common_cflags(requested_features);
+
 		/* Try to use locally compiled kernel. */
-		string kernel_path = path_get("kernel");
-		string md5 = path_files_md5_hash(kernel_path);
-
-		string feature_build_options;
-		if(use_adaptive_compile) {
-			feature_build_options = requested_features.get_build_options();
-			string device_md5 = util_md5_string(feature_build_options);
-			cubin = string_printf("cycles_kernel_%s_sm%d%d_%s.cubin",
-		                          device_md5.c_str(),
-		                          major, minor,
-		                          md5.c_str());
-		}
-		else {
-			cubin = string_printf("cycles_kernel_sm%d%d_%s.cubin", major, minor, md5.c_str());
-		}
+		const string kernel_path = path_get("kernel");
+		const string kernel_md5 = path_files_md5_hash(kernel_path);
+
+		/* We include cflags into md5 so changing cuda toolkit or changing other
+		 * compiler command line arguments makes sure cubin gets re-built.
+		 */
+		const string cubin_md5 = util_md5_string(kernel_md5 + common_cflags);
 
-		cubin = path_user_get(path_join("cache", cubin));
-		VLOG(1) << "Testing for locally compiled kernel " << cubin;
-		/* If exists already, use it. */
+		const string cubin_file = string_printf("cycles_kernel_sm%d%d_%s.cubin",
+		                                        major, minor,
+		                                        cubin_md5.c_str());
+		const string cubin = path_user_get(path_join("cache", cubin_file));
+		VLOG(1) << "Testing for locally compiled kernel " << cubin << ".";
 		if(path_exists(cubin)) {
-			VLOG(1) << "Using locally compiled kernel";
+			VLOG(1) << "Using locally compiled kernel.";
 			return cubin;
 		}
 
 #ifdef _WIN32
 		if(have_precompiled_kernels()) {
-			if(major < 2)
-				cuda_error_message(string_printf("CUDA device requires compute capability 2.0 or up, found %d.%d. Your GPU is not supported.", major, minor));
-			else
-				cuda_error_message(string_printf("CUDA binary kernel for this graphics card compute capability (%d.%d) not found.", major, minor));
+			if(major < 2) {
+				cuda_error_message(string_printf(
+				        "CUDA device requires compute capability 2.0 or up, "
+				        "found %d.%d. Your GPU is not supported.",
+				        major, minor));
+			}
+			else {
+				cuda_error_message(string_printf(
+				        "CUDA binary kernel for this graphics card compute "
+				        "capability (%d.%d) not found.",
+				        major, minor));
+			}
 			return "";
 		}
 #endif
 
-		/* If not, find CUDA compiler. */
-		const char *nvcc = cuewCompilerPath();
-
-		if(nvcc == NULL) {
-			cuda_error_message("CUDA nvcc compiler not found. Install CUDA toolkit in default location.");
-			return "";
-		}
-
-		int cuda_version = cuewCompilerVersion();
-		VLOG(1) << "Found nvcc " << nvcc << ", CUDA version " << cuda_version;
-
-		if(cuda_version == 0) {
-			cuda_error_message("CUDA nvcc compiler version could not be parsed.");
-			return "";
-		}
-		if(cuda_version < 60) {
-			printf("Unsupported CUDA version %d.%d detected, you need CUDA 7.5.\n", cuda_version/10, cuda_version%10);
+		/* Compile. */
+		if(!compile_check_compiler()) {
 			return "";
 		}
-		else if(cuda_version != 75)
-			printf("CUDA version %d.%d detected, build may succeed but only CUDA 7.5 is officially supported.\n", cuda_version/10, cuda_version%10);
-
-		/* Compile. */
-		string kernel = path_join(kernel_path, path_join("kernels", path_join("cuda", "kernel.cu")));
-		string include = kernel_path;
-		const int machine = system_cpu_bits();
-
+		const char *nvcc = cuewCompilerPath();
+		const string kernel = path_join(kernel_path,
+		                          path_join("kernels",
+		                                    path_join("cuda", "kernel.cu")));
 		double starttime = time_dt();
 		printf("Compiling CUDA kernel ...\n");
 
 		path_create_directories(cubin);
 
-		string command = string_printf("\"%s\" -arch=sm_%d%d -m%d --cubin \"%s\" "
-			"-o \"%s\" --ptxas-options=\"-v\" --use_fast_math -I\"%s\" "
-			"-DNVCC -D__KERNEL_CUDA_VERSION__=%d",
-			nvcc, major, minor, machine, kernel.c_str(), cubin.c_str(), include.c_str(), cuda_version);
-
-		if(use_adaptive_compile)
-			command += " " + feature_build_options;
-
-		const char* extra_cflags = getenv("CYCLES_CUDA_EXTRA_CFLAGS");
-		if(extra_cflags) {
-			command += string(" ") + string(extra_cflags);
-		}
-
-#ifdef WITH_CYCLES_DEBUG
-		command += " -D__KERNEL_DEBUG__";
-#endif
+		string command = string_printf("\"%s\" "
+		                               "-arch=sm_%d%d "
+		                               "--cubin \"%s\" "
+		                               "-o \"%s\" "
+		                               "%s ",
+		                               nvcc,
+		                               major, minor,
+		                               kernel.c_str(),
+		                               cubin.c_str(),
+		                               common_cflags.c_str());
 
 		printf("%s\n", command.c_str());
 
 		if(system(command.c_str()) == -1) {
-			cuda_error_message("Failed to execute compilation command, see console for details.");
+			cuda_error_message("Failed to execute compilation command, "
+			                   "see console for details.");
 			return "";
 		}
 
 		/* Verify if compilation succeeded */
 		if(!path_exists(cubin)) {
-			cuda_error_message("CUDA kernel compilation failed, see console for details.");
+			cuda_error_message("CUDA kernel compilation failed, "
+			                   "see console for details.");
 			return "";
 		}
 
@@ -964,11 +1004,11 @@ public:
 		if(!background) {
 			PixelMem pmem = pixel_mem_map[mem];
 			CUdeviceptr buffer;
-			
+
 			size_t bytes;
 			cuda_assert(cuGraphicsMapResources(1, &pmem.cuPBOresource, 0));
 			cuda_assert(cuGraphicsResourceGetMappedPointer(&buffer, &bytes, pmem.cuPBOresource));
-			
+
 			return buffer;
 		}
 
@@ -1000,9 +1040,9 @@ public:
 				glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(GLhalf)*4, NULL, GL_DYNAMIC_DRAW);
 			else
 				glBufferData(GL_PIXEL_UNPACK_BUFFER, pmem.w*pmem.h*sizeof(uint8_t)*4, NULL, GL_DYNAMIC_DRAW);
-			
+
 			glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-			
+
 			glGenTextures(1, &pmem.cuTexId);
 			glBindTexture(GL_TEXTURE_2D, pmem.cuTexId);
 			if(mem.data_type == TYPE_HALF)
@@ -1012,7 +1052,7 @@ public:
 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
 			glBindTexture(GL_TEXTURE_2D, 0);
-			
+
 			CUresult result = cuGraphicsGLRegisterBuffer(&pmem.cuPBOresource, pmem.cuPBO, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
 
 			if(result == CUDA_SUCCESS) {
@@ -1114,9 +1154,9 @@ public:
 			else
 				glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, (void*)offset);
 			glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-			
+
 			glEnable(GL_TEXTURE_2D);
-			
+
 			if(transparent) {
 				glEnable(GL_BLEND);
 				glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
@@ -1181,7 +1221,7 @@ public:
 
 			if(transparent)
 				glDisable(GL_BLEND);
-			
+
 			glBindTexture(GL_TEXTURE_2D, 0);
 			glDisable(GL_TEXTURE_2D);
 
@@ -1197,12 +1237,12 @@ public:
 	{
 		if(task->type == DeviceTask::PATH_TRACE) {
 			RenderTile tile;
-			
+
 			bool branched = task->integrator_branched;
 
 			/* Upload Bindless Mapping */
 			load_bindless_mapping();
-			
+
 			/* keep rendering tiles until done */
 			while(task->acquire_tile(this, tile)) {
 				int start_sample = tile.start_sample;
@@ -1339,7 +1379,7 @@ void device_cuda_info(vector<DeviceInfo>& devices)
 		fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result));
 		return;
 	}
-	
+
 	vector<DeviceInfo> display_devices;
 
 	for(int num = 0; num < count; num++) {
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 28695a2ebf9..f4d154ca19e 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -248,12 +248,20 @@ if(WITH_CYCLES_CUDA_BINARIES)
 			set(cuda_debug_flags "")
 		endif()
 
-		set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
+		set(cuda_nvcc_command ${CUDA_NVCC_EXECUTABLE})
+		set(cuda_nvcc_version ${CUDA_VERSION})
+
+		if(DEFINED CUDA_NVCC8_EXECUTABLE  AND ((${arch} STREQUAL "sm_60") OR (${arch} STREQUAL "sm_61")))
+			set(cuda_nvcc_command ${CUDA_NVCC8_EXECUTABLE})
+			set(cuda_nvcc_version "80")
+		endif()
+
+		set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${cuda_nvcc_version}")
 		set(cuda_math_flags "--use_fast_math")
 
 		add_custom_command(
 			OUTPUT ${cuda_cubin}
-			COMMAND ${CUDA_NVCC_EXECUTABLE}
+			COMMAND ${cuda_nvcc_command}
 					-arch=${arch}
 					${CUDA_NVCC_FLAGS}
 					-m${CUDA_BITS}
@@ -270,7 +278,6 @@ if(WITH_CYCLES_CUDA_BINARIES)
 					-DCCL_NAMESPACE_BEGIN=
 					-DCCL_NAMESPACE_END=
 					-DNVCC
-
 			DEPENDS ${cuda_sources})
 
 		delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
@@ -278,6 +285,9 @@ if(WITH_CYCLES_CUDA_BINARIES)
 
 		unset(cuda_extra_flags)
 		unset(cuda_debug_flags)
+
+		unset(cuda_nvcc_command)
+		unset(cuda_nvcc_version)
 	endmacro()
 
 	foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
diff --git a/intern/cycles/kernel/bvh/bvh_shadow_all.h b/intern/cycles/kernel/bvh/bvh_shadow_all.h
index 1d6fa303d3e..e9eeff31ecc 100644
--- a/intern/cycles/kernel/bvh/bvh_shadow_all.h
+++ b/intern/cycles/kernel/bvh/bvh_shadow_all.h
@@ -37,11 +37,16 @@
  *
  */
 
-ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
-                                            const Ray *ray,
-                                            Intersection *isect_array,
-                                            const uint max_hits,
-                                            uint *num_hits)
+#ifndef __KERNEL_GPU__
+ccl_device
+#else
+ccl_device_inline
+#endif
+bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+                                 const Ray *ray,
+                                 Intersection *isect_array,
+                                 const uint max_hits,
+                                 uint *num_hits)
 {
 	/* todo:
 	 * - likely and unlikely for if() statements
diff --git a/intern/cycles/kernel/bvh/bvh_subsurface.h b/intern/cycles/kernel/bvh/bvh_subsurface.h
index 18978efcfa3..d9623c94b2e 100644
--- a/intern/cycles/kernel/bvh/bvh_subsurface.h
+++ b/intern/cycles/kernel/bvh/bvh_subsurface.h
@@ -35,12 +35,17 @@
  *
  */
 
-ccl_device void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
-                                            const Ray *ray,
-                                            SubsurfaceIntersection *ss_isect,
-                                            int subsurface_object,
-                                            uint *lcg_state,
-                                            int max_hits)
+#ifndef __KERNEL_GPU__
+ccl_device
+#else
+ccl_device_inline
+#endif
+void BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+                                 const Ray *ray,
+                                 SubsurfaceIntersection *ss_isect,
+                                 int subsurface_object,
+                                 uint *lcg_state,
+                                 int max_hits)
 {
 	/* todo:
 	 * - test if pushing distance on the stack helps (for non shadow rays)
diff --git a/intern/cycles/kernel/bvh/bvh_traversal.h b/intern/cycles/kernel/bvh/bvh_traversal.h
index 68a11b65ad7..b1a52968a26 100644
--- a/intern/cycles/kernel/bvh/bvh_traversal.h
+++ b/intern/cycles/kernel/bvh/bvh_traversal.h
@@ -40,16 +40,21 @@
  *
  */
 
-ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
-                                            const Ray *ray,
-                                            Intersection *isect,
-                                            const uint visibility
+#ifndef __KERNEL_GPU__
+ccl_device
+#else
+ccl_device_inline
+#endif
+bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+                                 const Ray *ray,
+                                 Intersection *isect,
+                                 const uint visibility
 #if BVH_FEATURE(BVH_HAIR_MINIMUM_WIDTH)
-                                            , uint *lcg_state,
-                                            float difl,
-                                            float extmax
+                                 , uint *lcg_state,
+                                 float difl,
+                                 float extmax
 #endif
-                                            )
+                                 )
 {
 	/* todo:
 	 * - test if pushing distance on the stack helps (for non shadow rays)
diff --git a/intern/cycles/kernel/bvh/bvh_volume.h b/intern/cycles/kernel/bvh/bvh_volume.h
index 03499e94347..107373c17dc 100644
--- a/intern/cycles/kernel/bvh/bvh_volume.h
+++ b/intern/cycles/kernel/bvh/bvh_volume.h
@@ -36,10 +36,15 @@
  *
  */
 
-ccl_device bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
-                                            const Ray *ray,
-                                            Intersection *isect,
-                                            const uint visibility)
+#ifndef __KERNEL_GPU__
+ccl_device
+#else
+ccl_device_inline
+#endif
+bool BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+                                 const Ray *ray,
+                                 Intersection *isect,
+                                 const uint visibility)
 {
 	/* todo:
 	 * - test if pushing distance on the stack helps (for non shadow rays)
diff --git a/intern/cycles/kernel/bvh/bvh_volume_all.h b/intern/cycles/kernel/bvh/bvh_volume_all.h
index 7eddc2891d0..1f6515c9862 100644
--- a/intern/cycles/kernel/bvh/bvh_volume_all.h
+++ b/intern/cycles/kernel/bvh/bvh_volume_all.h
@@ -36,11 +36,16 @@
  *
  */
 
-ccl_device uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
-                                            const Ray *ray,
-                                            Intersection *isect_array,
-                                            const uint max_hits,
-                                            const uint visibility)
+#ifndef __KERNEL_GPU__
+ccl_device
+#else
+ccl_device_inline
+#endif
+uint BVH_FUNCTION_FULL_NAME(BVH)(KernelGlobals *kg,
+                                 const Ray *ray,
+                                 Intersection *isect_array,
+                                 const uint max_hits,
+                                 const uint visibility)
 {
 	/* todo:
 	 * - test if pushing distance on the stack helps (for non shadow rays)
diff --git a/intern/cycles/kernel/closure/bsdf.h b/intern/cycles/kernel/closure/bsdf.h
index 72096f4d873..19cb8a04d48 100644
--- a/intern/cycles/kernel/closure/bsdf.h
+++ b/intern/cycles/kernel/closure/bsdf.h
@@ -40,7 +40,15 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device int bsdf_sample(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, float randu, float randv, float3 *eval, float3 *omega_in, differential3 *domega_in, float *pdf)
+ccl_device_inline int bsdf_sample(KernelGlobals *kg,
+                                  ShaderData *sd,
+                                  const ShaderClosure *sc,
+                                  float randu,
+                                  float randv,
+                                  float3 *eval,
+                                  float3 *omega_in,
+                                  differential3 *domega_in,
+                                  float *pdf)
 {
 	int label;
 
@@ -157,7 +165,16 @@ ccl_device int bsdf_sample(KernelGlobals *kg, ShaderData *sd, const ShaderClosur
 	return label;
 }
 
-ccl_device float3 bsdf_eval(KernelGlobals *kg, ShaderData *sd, const ShaderClosure *sc, const float3 omega_in, float *pdf)
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_inline
+#endif
+float3 bsdf_eval(KernelGlobals *kg,
+                 ShaderData *sd,
+                 const ShaderClosure *sc,
+                 const float3 omega_in,
+                 float *pdf)
 {
 	float3 eval;
 
diff --git a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
index 8ed76bea525..9929246ae5c 100644
--- a/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
+++ b/intern/cycles/kernel/closure/bsdf_ashikhmin_shirley.h
@@ -62,7 +62,11 @@ ccl_device_inline float bsdf_ashikhmin_shirley_roughness_to_exponent(float rough
 	return 2.0f / (roughness*roughness) - 2.0f;
 }
 
-ccl_device float3 bsdf_ashikhmin_shirley_eval_reflect(const ShaderClosure *sc, const float3 I, const float3 omega_in, float *pdf)
+ccl_device_inline float3 bsdf_ashikhmin_shirley_eval_reflect(
+        const ShaderClosure *sc,
+        const float3 I,
+        const float3 omega_in,
+        float *pdf)
 {
 	const MicrofacetBsdf *bsdf = (const MicrofacetBsdf*)sc;
 	float3 N = bsdf->N;
diff --git a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
index afd4a8da62a..6ebe2f6a751 100644
--- a/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
+++ b/intern/cycles/kernel/closure/bsdf_microfacet_multi_impl.h
@@ -25,11 +25,18 @@
  * energy is used. In combination with MIS, that is enough to produce an unbiased result, although
  * the balance heuristic isn't necessarily optimal anymore.
  */
-ccl_device float3 MF_FUNCTION_FULL_NAME(mf_eval)(float3 wi, float3 wo, const bool wo_outside, const float3 color, const float alpha_x, const float alpha_y, ccl_addr_space uint* lcg_state
+ccl_device_inline float3 MF_FUNCTION_FULL_NAME(mf_eval)(
+        float3 wi,
+        float3 wo,
+        const bool wo_outside,
+        const float3 color,
+        const float alpha_x,
+        const float alpha_y,
+         ccl_addr_space uint *lcg_state
 #ifdef MF_MULTI_GLASS
-	, const float eta
+        , const float eta
 #elif defined(MF_MULTI_GLOSSY)
-	, float3 *n, float3 *k
+        , float3 *n, float3 *k
 #endif
 )
 {
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index e31d790dd84..b713ff56b2f 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -143,7 +143,7 @@ ccl_device float bssrdf_cubic_pdf(const ShaderClosure *sc, float r)
 }
 
 /* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
-ccl_device float bssrdf_cubic_quintic_root_find(float xi)
+ccl_device_inline float bssrdf_cubic_quintic_root_find(float xi)
 {
 	/* newton-raphson iteration, usually succeeds in 2-4 iterations, except
 	 * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
@@ -257,7 +257,7 @@ ccl_device float bssrdf_burley_pdf(const ShaderClosure *sc, float r)
  * Returns scaled radius, meaning the result is to be scaled up by d.
  * Since there's no closed form solution we do Newton-Raphson method to find it.
  */
-ccl_device float bssrdf_burley_root_find(float xi)
+ccl_device_inline float bssrdf_burley_root_find(float xi)
 {
 	const float tolerance = 1e-6f;
 	const int max_iteration_count = 10;
@@ -403,7 +403,7 @@ ccl_device void bssrdf_sample(const ShaderClosure *sc, float xi, float *r, float
 		bssrdf_burley_sample(sc, xi, r, h);
 }
 
-ccl_device float bssrdf_pdf(const ShaderClosure *sc, float r)
+ccl_device_inline float bssrdf_pdf(const ShaderClosure *sc, float r)
 {
 	if(sc->type == CLOSURE_BSSRDF_CUBIC_ID)
 		return bssrdf_cubic_pdf(sc, r);
diff --git a/intern/cycles/kernel/geom/geom_primitive.h b/intern/cycles/kernel/geom/geom_primitive.h
index 44734d1b70d..b16f0c9a99b 100644
--- a/intern/cycles/kernel/geom/geom_primitive.h
+++ b/intern/cycles/kernel/geom/geom_primitive.h
@@ -23,7 +23,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Generic primitive attribute reading functions */
 
-ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float *dx, float *dy)
+ccl_device_inline float primitive_attribute_float(KernelGlobals *kg,
+                                                  const ShaderData *sd,
+                                                  AttributeElement elem,
+                                                  int offset,
+                                                  float *dx, float *dy)
 {
 	if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) {
 		if(subd_triangle_patch(kg, sd) == ~0)
@@ -48,7 +52,12 @@ ccl_device float primitive_attribute_float(KernelGlobals *kg, const ShaderData *
 	}
 }
 
-ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData *sd, AttributeElement elem, int offset, float3 *dx, float3 *dy)
+ccl_device_inline float3 primitive_attribute_float3(KernelGlobals *kg,
+                                                    const ShaderData *sd,
+                                                    AttributeElement elem,
+                                                    int offset,
+                                                    float3 *dx,
+                                                    float3 *dy)
 {
 	if(ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE) {
 		if(subd_triangle_patch(kg, sd) == ~0)
@@ -75,7 +84,7 @@ ccl_device float3 primitive_attribute_float3(KernelGlobals *kg, const ShaderData
 
 /* Default UV coordinate */
 
-ccl_device float3 primitive_uv(KernelGlobals *kg, ShaderData *sd)
+ccl_device_inline float3 primitive_uv(KernelGlobals *kg, ShaderData *sd)
 {
 	AttributeElement elem_uv;
 	int offset_uv = find_attribute(kg, sd, ATTR_STD_UV, &elem_uv);
@@ -144,7 +153,7 @@ ccl_device float3 primitive_tangent(KernelGlobals *kg, ShaderData *sd)
 
 /* Motion vector for motion pass */
 
-ccl_device float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
+ccl_device_inline float4 primitive_motion_vector(KernelGlobals *kg, ShaderData *sd)
 {
 	/* center position */
 	float3 center;
diff --git a/intern/cycles/kernel/geom/geom_subd_triangle.h b/intern/cycles/kernel/geom/geom_subd_triangle.h
index e4597aba56e..bf9be182345 100644
--- a/intern/cycles/kernel/geom/geom_subd_triangle.h
+++ b/intern/cycles/kernel/geom/geom_subd_triangle.h
@@ -22,7 +22,7 @@ CCL_NAMESPACE_BEGIN
 
 ccl_device_inline uint subd_triangle_patch(KernelGlobals *kg, const ShaderData *sd)
 {
-	return kernel_tex_fetch(__tri_patch, ccl_fetch(sd, prim));
+	return (ccl_fetch(sd, prim) != PRIM_NONE) ? kernel_tex_fetch(__tri_patch, ccl_fetch(sd, prim)) : ~0;
 }
 
 /* UV coords of triangle within patch */
diff --git a/intern/cycles/kernel/geom/geom_volume.h b/intern/cycles/kernel/geom/geom_volume.h
index 2044aafc877..7c8182bc430 100644
--- a/intern/cycles/kernel/geom/geom_volume.h
+++ b/intern/cycles/kernel/geom/geom_volume.h
@@ -44,7 +44,9 @@ ccl_device float4 volume_image_texture_3d(int id, float x, float y, float z)
 }
 #endif  /* __KERNEL_GPU__ */
 
-ccl_device float3 volume_normalized_position(KernelGlobals *kg, const ShaderData *sd, float3 P)
+ccl_device_inline float3 volume_normalized_position(KernelGlobals *kg,
+                                                    const ShaderData *sd,
+                                                    float3 P)
 {
 	/* todo: optimize this so it's just a single matrix multiplication when
 	 * possible (not motion blur), or perhaps even just translation + scale */
diff --git a/intern/cycles/kernel/kernel_bake.h b/intern/cycles/kernel/kernel_bake.h
index 9ee0b09529e..bfbf73df54f 100644
--- a/intern/cycles/kernel/kernel_bake.h
+++ b/intern/cycles/kernel/kernel_bake.h
@@ -18,8 +18,12 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __BAKING__
 
-ccl_device void compute_light_pass(KernelGlobals *kg, ShaderData *sd, PathRadiance *L, RNG rng,
-                                   int pass_filter, int sample)
+ccl_device_inline void compute_light_pass(KernelGlobals *kg,
+                                          ShaderData *sd,
+                                          PathRadiance *L,
+                                          RNG rng,
+                                          int pass_filter,
+                                          int sample)
 {
 	/* initialize master radiance accumulator */
 	kernel_assert(kernel_data.film.use_light_pass);
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index f6c103d59dd..88514de514c 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -211,7 +211,10 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg, float raster_x, fl
 
 /* Panorama Camera */
 
-ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float raster_y, float lens_u, float lens_v, ccl_addr_space Ray *ray)
+ccl_device_inline void camera_sample_panorama(KernelGlobals *kg,
+                                              float raster_x, float raster_y,
+                                              float lens_u, float lens_v,
+                                              ccl_addr_space Ray *ray)
 {
 	Transform rastertocamera = kernel_data.cam.rastertocamera;
 	float3 Pcamera = transform_perspective(&rastertocamera, make_float3(raster_x, raster_y, 0.0f));
@@ -303,8 +306,12 @@ ccl_device void camera_sample_panorama(KernelGlobals *kg, float raster_x, float
 
 /* Common */
 
-ccl_device void camera_sample(KernelGlobals *kg, int x, int y, float filter_u, float filter_v,
-	float lens_u, float lens_v, float time, ccl_addr_space Ray *ray)
+ccl_device_inline void camera_sample(KernelGlobals *kg,
+                                     int x, int y,
+                                     float filter_u, float filter_v,
+                                     float lens_u, float lens_v,
+                                     float time,
+                                     ccl_addr_space Ray *ray)
 {
 	/* pixel filter */
 	int filter_table_offset = kernel_data.film.filter_table_offset;
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 08f6f457805..a039b414006 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -36,7 +36,11 @@
 /* Qualifier wrappers for different names on different devices */
 
 #define ccl_device  __device__ __inline__
-#define ccl_device_inline  __device__ __inline__
+#if (__KERNEL_CUDA_VERSION__ == 80) && (__CUDA_ARCH__ < 500)
+#  define ccl_device_inline  __device__ __forceinline__
+#else
+#  define ccl_device_inline  __device__ __inline__
+#endif
 #define ccl_device_noinline  __device__ __noinline__
 #define ccl_global
 #define ccl_constant
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 93c4bd3f7d5..1e829eaa1fa 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -44,11 +44,11 @@ typedef struct LightSample {
  *
  * Note: light_p is modified when sample_coord is true.
  */
-ccl_device float area_light_sample(float3 P,
-                                   float3 *light_p,
-                                   float3 axisu, float3 axisv,
-                                   float randu, float randv,
-                                   bool sample_coord)
+ccl_device_inline float area_light_sample(float3 P,
+                                          float3 *light_p,
+                                          float3 axisu, float3 axisv,
+                                          float randu, float randv,
+                                          bool sample_coord)
 {
 	/* In our name system we're using P for the center,
 	 * which is o in the paper.
@@ -268,11 +268,11 @@ ccl_device_inline bool background_portal_data_fetch_and_check_side(KernelGlobals
 	return false;
 }
 
-ccl_device float background_portal_pdf(KernelGlobals *kg,
-                                       float3 P,
-                                       float3 direction,
-                                       int ignore_portal,
-                                       bool *is_possible)
+ccl_device_inline float background_portal_pdf(KernelGlobals *kg,
+                                              float3 P,
+                                              float3 direction,
+                                              int ignore_portal,
+                                              bool *is_possible)
 {
 	float portal_pdf = 0.0f;
 
@@ -367,7 +367,10 @@ ccl_device float3 background_portal_sample(KernelGlobals *kg,
 	return make_float3(0.0f, 0.0f, 0.0f);
 }
 
-ccl_device float3 background_light_sample(KernelGlobals *kg, float3 P, float randu, float randv, float *pdf)
+ccl_device_inline float3 background_light_sample(KernelGlobals *kg,
+                                                 float3 P,
+                                                 float randu, float randv,
+                                                 float *pdf)
 {
 	/* Probability of sampling portals instead of the map. */
 	float portal_sampling_pdf = kernel_data.integrator.portal_pdf;
@@ -507,8 +510,11 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3
 	return t*t/cos_pi;
 }
 
-ccl_device void lamp_light_sample(KernelGlobals *kg, int lamp,
-	float randu, float randv, float3 P, LightSample *ls)
+ccl_device_inline void lamp_light_sample(KernelGlobals *kg,
+                                         int lamp,
+                                         float randu, float randv,
+                                         float3 P,
+                                         LightSample *ls)
 {
 	float4 data0 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 0);
 	float4 data1 = kernel_tex_fetch(__light_data, lamp*LIGHT_SIZE + 1);
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index d5b31037723..1f08f3459e6 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -436,7 +436,7 @@ ccl_device_noinline void kernel_path_ao(KernelGlobals *kg,
 
 #ifdef __SUBSURFACE__
 
-ccl_device bool kernel_path_subsurface_scatter(
+ccl_device_inline bool kernel_path_subsurface_scatter(
         KernelGlobals *kg,
         ShaderData *sd,
         ShaderData *emission_sd,
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 56516967d8f..64f1468eacf 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -18,13 +18,13 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __BRANCHED_PATH__
 
-ccl_device void kernel_branched_path_ao(KernelGlobals *kg,
-                                        ShaderData *sd,
-                                        ShaderData *emission_sd,
-                                        PathRadiance *L,
-                                        PathState *state,
-                                        RNG *rng,
-                                        float3 throughput)
+ccl_device_inline void kernel_branched_path_ao(KernelGlobals *kg,
+                                               ShaderData *sd,
+                                               ShaderData *emission_sd,
+                                               PathRadiance *L,
+                                               PathState *state,
+                                               RNG *rng,
+                                               float3 throughput)
 {
 	int num_samples = kernel_data.integrator.ao_samples;
 	float num_samples_inv = 1.0f/num_samples;
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index 74b1ae0ca32..250b8e92a45 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -222,8 +222,13 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg, ccl_
 #endif
 
 /* path tracing: bounce off or through surface to with new direction stored in ray */
-ccl_device_inline bool kernel_path_surface_bounce(KernelGlobals *kg, ccl_addr_space RNG *rng,
-	ShaderData *sd, ccl_addr_space float3 *throughput, ccl_addr_space PathState *state, PathRadiance *L, ccl_addr_space Ray *ray)
+ccl_device bool kernel_path_surface_bounce(KernelGlobals *kg,
+                                           ccl_addr_space RNG *rng,
+                                           ShaderData *sd,
+                                           ccl_addr_space float3 *throughput,
+                                           ccl_addr_space PathState *state,
+                                           PathRadiance *L,
+                                           ccl_addr_space Ray *ray)
 {
 	/* no BSDF? we can stop here */
 	if(ccl_fetch(sd, flag) & SD_BSDF) {
diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h
index e45522a4641..5fd4f2fad4c 100644
--- a/intern/cycles/kernel/kernel_path_volume.h
+++ b/intern/cycles/kernel/kernel_path_volume.h
@@ -18,8 +18,14 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __VOLUME_SCATTER__
 
-ccl_device void kernel_path_volume_connect_light(KernelGlobals *kg, RNG *rng,
-	ShaderData *sd, ShaderData *emission_sd, float3 throughput, PathState *state, PathRadiance *L)
+ccl_device_inline void kernel_path_volume_connect_light(
+        KernelGlobals *kg,
+        RNG *rng,
+        ShaderData *sd,
+        ShaderData *emission_sd,
+        float3 throughput,
+        PathState *state,
+        PathRadiance *L)
 {
 #ifdef __EMISSION__
 	if(!kernel_data.integrator.use_direct_light)
diff --git a/intern/cycles/kernel/kernel_projection.h b/intern/cycles/kernel/kernel_projection.h
index 8be6742699a..3437d83ed7d 100644
--- a/intern/cycles/kernel/kernel_projection.h
+++ b/intern/cycles/kernel/kernel_projection.h
@@ -130,7 +130,10 @@ ccl_device float2 direction_to_fisheye_equisolid(float3 dir, float lens, float w
 	return make_float2(u, v);
 }
 
-ccl_device float3 fisheye_equisolid_to_direction(float u, float v, float lens, float fov, float width, float height)
+ccl_device_inline float3 fisheye_equisolid_to_direction(float u, float v,
+                                                        float lens,
+                                                        float fov,
+                                                        float width, float height)
 {
 	u = (u - 0.5f) * width;
 	v = (v - 0.5f) * height;
@@ -189,7 +192,7 @@ ccl_device float2 direction_to_mirrorball(float3 dir)
 	return make_float2(u, v);
 }
 
-ccl_device float3 panorama_to_direction(KernelGlobals *kg, float u, float v)
+ccl_device_inline float3 panorama_to_direction(KernelGlobals *kg, float u, float v)
 {
 	switch(kernel_data.cam.panorama_type) {
 		case PANORAMA_EQUIRECTANGULAR:
@@ -205,7 +208,7 @@ ccl_device float3 panorama_to_direction(KernelGlobals *kg, float u, float v)
 	}
 }
 
-ccl_device float2 direction_to_panorama(KernelGlobals *kg, float3 dir)
+ccl_device_inline float2 direction_to_panorama(KernelGlobals *kg, float3 dir)
 {
 	switch(kernel_data.cam.panorama_type) {
 		case PANORAMA_EQUIRECTANGULAR:
@@ -221,9 +224,9 @@ ccl_device float2 direction_to_panorama(KernelGlobals *kg, float3 dir)
 	}
 }
 
-ccl_device float3 spherical_stereo_position(KernelGlobals *kg,
-                                            float3 dir,
-                                            float3 pos)
+ccl_device_inline float3 spherical_stereo_position(KernelGlobals *kg,
+                                                   float3 dir,
+                                                   float3 pos)
 {
 	float interocular_offset = kernel_data.cam.interocular_offset;
 
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index b7641c37d93..98d321c9c16 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -149,8 +149,16 @@ ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg,
 /* ShaderData setup from BSSRDF scatter */
 
 #ifdef __SUBSURFACE__
-ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderData *sd,
-	const Intersection *isect, const Ray *ray)
+#  ifndef __KERNEL_CUDS__
+ccl_device
+#  else
+ccl_device_inline
+#  endif
+void shader_setup_from_subsurface(
+        KernelGlobals *kg,
+        ShaderData *sd,
+        const Intersection *isect,
+        const Ray *ray)
 {
 	bool backfacing = sd->flag & SD_BACKFACING;
 
@@ -226,14 +234,14 @@ ccl_device_inline void shader_setup_from_subsurface(KernelGlobals *kg, ShaderDat
 
 /* ShaderData setup from position sampled on mesh */
 
-ccl_device void shader_setup_from_sample(KernelGlobals *kg,
-                                         ShaderData *sd,
-                                         const float3 P,
-                                         const float3 Ng,
-                                         const float3 I,
-                                         int shader, int object, int prim,
-                                         float u, float v, float t,
-                                         float time)
+ccl_device_inline void shader_setup_from_sample(KernelGlobals *kg,
+                                                ShaderData *sd,
+                                                const float3 P,
+                                                const float3 Ng,
+                                                const float3 I,
+                                                int shader, int object, int prim,
+                                                float u, float v, float t,
+                                                float time)
 {
 	/* vectors */
 	ccl_fetch(sd, P) = P;
@@ -445,7 +453,7 @@ ccl_device_inline void shader_setup_from_volume(KernelGlobals *kg, ShaderData *s
 /* Merging */
 
 #if defined(__BRANCHED_PATH__) || defined(__VOLUME__)
-ccl_device void shader_merge_closures(ShaderData *sd)
+ccl_device_inline void shader_merge_closures(ShaderData *sd)
 {
 	/* merge identical closures, better when we sample a single closure at a time */
 	for(int i = 0; i < sd->num_closure; i++) {
@@ -530,12 +538,18 @@ ccl_device_inline void _shader_bsdf_multi_eval_branched(KernelGlobals *kg,
 }
 #endif
 
-ccl_device void shader_bsdf_eval(KernelGlobals *kg,
-                                 ShaderData *sd,
-                                 const float3 omega_in,
-                                 BsdfEval *eval,
-                                 float light_pdf,
-                                 bool use_mis)
+
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_inline
+#endif
+void shader_bsdf_eval(KernelGlobals *kg,
+                      ShaderData *sd,
+                      const float3 omega_in,
+                      BsdfEval *eval,
+                      float light_pdf,
+                      bool use_mis)
 {
 	bsdf_eval_init(eval, NBUILTIN_CLOSURES, make_float3(0.0f, 0.0f, 0.0f), kernel_data.film.use_light_pass);
 
@@ -554,9 +568,13 @@ ccl_device void shader_bsdf_eval(KernelGlobals *kg,
 	}
 }
 
-ccl_device int shader_bsdf_sample(KernelGlobals *kg, ShaderData *sd,
-	float randu, float randv, BsdfEval *bsdf_eval,
-	float3 *omega_in, differential3 *domega_in, float *pdf)
+ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
+                                         ShaderData *sd,
+                                         float randu, float randv,
+                                         BsdfEval *bsdf_eval,
+                                         float3 *omega_in,
+                                         differential3 *domega_in,
+                                         float *pdf)
 {
 	int sampled = 0;
 
@@ -991,8 +1009,12 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg, const ShaderData *
 
 /* Volume Evaluation */
 
-ccl_device void shader_eval_volume(KernelGlobals *kg, ShaderData *sd,
-	PathState *state, VolumeStack *stack, int path_flag, ShaderContext ctx)
+ccl_device_inline void shader_eval_volume(KernelGlobals *kg,
+                                          ShaderData *sd,
+                                          PathState *state,
+                                          VolumeStack *stack,
+                                          int path_flag,
+                                          ShaderContext ctx)
 {
 	/* reset closures once at the start, we will be accumulating the closures
 	 * for all volumes in the stack into a single array of closures */
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 888df7f06fd..f404666177a 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -85,7 +85,11 @@ ccl_device ShaderClosure *subsurface_scatter_pick_closure(KernelGlobals *kg, Sha
 	return NULL;
 }
 
-ccl_device float3 subsurface_scatter_eval(ShaderData *sd, ShaderClosure *sc, float disk_r, float r, bool all)
+ccl_device_inline float3 subsurface_scatter_eval(ShaderData *sd,
+                                                 ShaderClosure *sc,
+                                                 float disk_r,
+                                                 float r,
+                                                 bool all)
 {
 #ifdef BSSRDF_MULTI_EVAL
 	/* this is the veach one-sample model with balance heuristic, some pdf
@@ -231,7 +235,7 @@ ccl_device void subsurface_color_bump_blur(KernelGlobals *kg,
 /* Subsurface scattering step, from a point on the surface to other
  * nearby points on the same object.
  */
-ccl_device int subsurface_scatter_multi_intersect(
+ccl_device_inline int subsurface_scatter_multi_intersect(
         KernelGlobals *kg,
         SubsurfaceIntersection* ss_isect,
         ShaderData *sd,
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index 01c87e6d89d..9dafed9afd1 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -36,7 +36,11 @@ typedef struct VolumeShaderCoefficients {
 } VolumeShaderCoefficients;
 
 /* evaluate shader to get extinction coefficient at P */
-ccl_device bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, float3 *extinction)
+ccl_device_inline bool volume_shader_extinction_sample(KernelGlobals *kg,
+                                                       ShaderData *sd,
+                                                       PathState *state,
+                                                       float3 P,
+                                                       float3 *extinction)
 {
 	sd->P = P;
 	shader_eval_volume(kg, sd, state, state->volume_stack, PATH_RAY_SHADOW, SHADER_CONTEXT_SHADOW);
@@ -58,7 +62,11 @@ ccl_device bool volume_shader_extinction_sample(KernelGlobals *kg, ShaderData *s
 }
 
 /* evaluate shader to get absorption, scattering and emission at P */
-ccl_device bool volume_shader_sample(KernelGlobals *kg, ShaderData *sd, PathState *state, float3 P, VolumeShaderCoefficients *coeff)
+ccl_device_inline bool volume_shader_sample(KernelGlobals *kg,
+                                            ShaderData *sd,
+                                            PathState *state,
+                                            float3 P,
+                                            VolumeShaderCoefficients *coeff)
 {
 	sd->P = P;
 	shader_eval_volume(kg, sd, state, state->volume_stack, state->flag, SHADER_CONTEXT_VOLUME);
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index 6c557684099..bd6013e9205 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -87,7 +87,12 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
 	}
 }
 
-ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_noinline
+#endif
+void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
 	NodeAttributeType type, mesh_type;
 	AttributeElement elem;
@@ -123,7 +128,15 @@ ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *
 	}
 }
 
-ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+#ifndef __KERNEL_CUDS__
+ccl_device
+#else
+ccl_device_noinline
+#endif
+void svm_node_attr_bump_dy(KernelGlobals *kg,
+                           ShaderData *sd,
+                           float *stack,
+                           uint4 node)
 {
 	NodeAttributeType type, mesh_type;
 	AttributeElement elem;
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
index bb06254c3a9..7d512f7ff4d 100644
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ b/intern/cycles/kernel/svm/svm_geometry.h
@@ -18,7 +18,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Geometry Node */
 
-ccl_device void svm_node_geometry(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device_inline void svm_node_geometry(KernelGlobals *kg,
+                                         ShaderData *sd,
+                                         float *stack,
+                                         uint type,
+                                         uint out_offset)
 {
 	float3 data;
 
@@ -94,7 +98,11 @@ ccl_device void svm_node_object_info(KernelGlobals *kg, ShaderData *sd, float *s
 
 /* Particle Info */
 
-ccl_device void svm_node_particle_info(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device void svm_node_particle_info(KernelGlobals *kg,
+                                       ShaderData *sd,
+                                       float *stack,
+                                       uint type,
+                                       uint out_offset)
 {
 	switch(type) {
 		case NODE_INFO_PAR_INDEX: {
@@ -146,7 +154,11 @@ ccl_device void svm_node_particle_info(KernelGlobals *kg, ShaderData *sd, float
 
 /* Hair Info */
 
-ccl_device void svm_node_hair_info(KernelGlobals *kg, ShaderData *sd, float *stack, uint type, uint out_offset)
+ccl_device void svm_node_hair_info(KernelGlobals *kg,
+                                   ShaderData *sd,
+                                   float *stack,
+                                   uint type,
+                                   uint out_offset)
 {
 	float data;
 	float3 data3;
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
index f959d90f309..368740f64c7 100644
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@@ -21,12 +21,12 @@ CCL_NAMESPACE_BEGIN
 
 /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
 
-ccl_device float4 rgb_ramp_lookup(KernelGlobals *kg,
-                                  int offset,
-                                  float f,
-                                  bool interpolate,
-                                  bool extrapolate,
-                                  int table_size)
+ccl_device_inline float4 rgb_ramp_lookup(KernelGlobals *kg,
+                                         int offset,
+                                         float f,
+                                         bool interpolate,
+                                         bool extrapolate,
+                                         int table_size)
 {
 	if((f < 0.0f || f > 1.0f) && extrapolate) {
 		float4 t0, dy;
diff --git a/intern/cycles/kernel/svm/svm_ramp_util.h b/intern/cycles/kernel/svm/svm_ramp_util.h
index 495d98cf250..9f2ce1276f9 100644
--- a/intern/cycles/kernel/svm/svm_ramp_util.h
+++ b/intern/cycles/kernel/svm/svm_ramp_util.h
@@ -21,11 +21,11 @@ CCL_NAMESPACE_BEGIN
 
 /* NOTE: svm_ramp.h, svm_ramp_util.h and node_ramp_util.h must stay consistent */
 
-ccl_device float3 rgb_ramp_lookup(const float3 *ramp,
-                                  float f,
-                                  bool interpolate,
-                                  bool extrapolate,
-                                  int table_size)
+ccl_device_inline float3 rgb_ramp_lookup(const float3 *ramp,
+                                         float f,
+                                         bool interpolate,
+                                         bool extrapolate,
+                                         int table_size)
 {
 	if ((f < 0.0f || f > 1.0f) && extrapolate) {
 		float3 t0, dy;
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index 276b6f26f5e..b39d6a3e009 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -99,12 +99,12 @@ ccl_device void svm_node_tex_coord(KernelGlobals *kg,
 	stack_store_float3(stack, out_offset, data);
 }
 
-ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
-                                                  ShaderData *sd,
-                                                  int path_flag,
-                                                  float *stack,
-                                                  uint4 node,
-                                                  int *offset)
+ccl_device void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
+                                           ShaderData *sd,
+                                           int path_flag,
+                                           float *stack,
+                                           uint4 node,
+                                           int *offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
 	float3 data;
@@ -184,12 +184,12 @@ ccl_device_inline void svm_node_tex_coord_bump_dx(KernelGlobals *kg,
 #endif
 }
 
-ccl_device_inline void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
-                                                  ShaderData *sd,
-                                                  int path_flag,
-                                                  float *stack,
-                                                  uint4 node,
-                                                  int *offset)
+ccl_device void svm_node_tex_coord_bump_dy(KernelGlobals *kg,
+                                           ShaderData *sd,
+                                           int path_flag,
+                                           float *stack,
+                                           uint4 node,
+                                           int *offset)
 {
 #ifdef __RAY_DIFFERENTIALS__
 	float3 data;
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index 30ccd523add..6eed9bc1a99 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -34,11 +34,11 @@ CCL_NAMESPACE_BEGIN
 
 /* Wireframe Node */
 
-ccl_device float wireframe(KernelGlobals *kg,
-                           ShaderData *sd,
-                           float size,
-                           int pixel_size,
-                           float3 *P)
+ccl_device_inline float wireframe(KernelGlobals *kg,
+                                  ShaderData *sd,
+                                  float size,
+                                  int pixel_size,
+                                  float3 *P)
 {
 #ifdef __HAIR__
 	if(ccl_fetch(sd, prim) != PRIM_NONE && ccl_fetch(sd, type) & PRIMITIVE_ALL_TRIANGLE)
diff --git a/intern/cycles/render/constant_fold.cpp b/intern/cycles/render/constant_fold.cpp
index 073bafce98d..200a4c497cd 100644
--- a/intern/cycles/render/constant_fold.cpp
+++ b/intern/cycles/render/constant_fold.cpp
@@ -40,7 +40,8 @@ bool ConstantFolder::all_inputs_constant() const
 
 void ConstantFolder::make_constant(float value) const
 {
-	VLOG(1) << "Replacing " << node->name << " with constant " << value << ".";
+	VLOG(1) << "Folding " << node->name << "::" << output->name() << " to constant (" << value << ").";
+
 	foreach(ShaderInput *sock, output->links) {
 		sock->set(value);
 	}
@@ -50,6 +51,8 @@ void ConstantFolder::make_constant(float value) const
 
 void ConstantFolder::make_constant(float3 value) const
 {
+	VLOG(1) << "Folding " << node->name << "::" << output->name() << " to constant " << value << ".";
+
 	foreach(ShaderInput *sock, output->links) {
 		sock->set(value);
 	}
@@ -90,6 +93,8 @@ void ConstantFolder::bypass(ShaderOutput *new_output) const
 {
 	assert(new_output);
 
+	VLOG(1) << "Folding " << node->name << "::" << output->name() << " to socket " << new_output->parent->name << "::" << new_output->name() << ".";
+
 	/* Remove all outgoing links from socket and connect them to new_output instead.
 	 * The graph->relink method affects node inputs, so it's not safe to use in constant
 	 * folding if the node has multiple outputs and will thus be folded multiple times. */
@@ -105,6 +110,9 @@ void ConstantFolder::bypass(ShaderOutput *new_output) const
 void ConstantFolder::discard() const
 {
 	assert(output->type() == SocketType::CLOSURE);
+
+	VLOG(1) << "Discarding closure " << node->name << ".";
+
 	graph->disconnect(output);
 }
 
diff --git a/intern/cycles/render/graph.cpp b/intern/cycles/render/graph.cpp
index 3eeb7ffc2bc..6e795ef896a 100644
--- a/intern/cycles/render/graph.cpp
+++ b/intern/cycles/render/graph.cpp
@@ -24,6 +24,7 @@
 #include "util_debug.h"
 #include "util_foreach.h"
 #include "util_queue.h"
+#include "util_logging.h"
 
 CCL_NAMESPACE_BEGIN
 
@@ -543,6 +544,7 @@ void ShaderGraph::deduplicate_nodes()
 	ShaderNodeSet scheduled, done;
 	map<ustring, ShaderNodeSet> candidates;
 	queue<ShaderNode*> traverse_queue;
+	int num_deduplicated = 0;
 
 	/* Schedule nodes which doesn't have any dependencies. */
 	foreach(ShaderNode *node, nodes) {
@@ -557,8 +559,10 @@ void ShaderGraph::deduplicate_nodes()
 		traverse_queue.pop();
 		done.insert(node);
 		/* Schedule the nodes which were depending on the current node. */
+		bool has_output_links = false;
 		foreach(ShaderOutput *output, node->outputs) {
 			foreach(ShaderInput *input, output->links) {
+				has_output_links = true;
 				if(scheduled.find(input->parent) != scheduled.end()) {
 					/* Node might not be optimized yet but scheduled already
 					 * by other dependencies. No need to re-schedule it.
@@ -572,6 +576,10 @@ void ShaderGraph::deduplicate_nodes()
 				}
 			}
 		}
+		/* Only need to care about nodes that are actually used */
+		if(!has_output_links) {
+			continue;
+		}
 		/* Try to merge this node with another one. */
 		ShaderNode *merge_with = NULL;
 		foreach(ShaderNode *other_node, candidates[node->type->name]) {
@@ -585,11 +593,16 @@ void ShaderGraph::deduplicate_nodes()
 			for(int i = 0; i < node->outputs.size(); ++i) {
 				relink(node, node->outputs[i], merge_with->outputs[i]);
 			}
+			num_deduplicated++;
 		}
 		else {
 			candidates[node->type->name].insert(node);
 		}
 	}
+
+	if(num_deduplicated > 0) {
+		VLOG(1) << "Deduplicated " << num_deduplicated << " nodes.";
+	}
 }
 
 void ShaderGraph::break_cycles(ShaderNode *node, vector<bool>& visited, vector<bool>& on_stack)
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index ae6042cef34..4cd77f8c6e1 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -238,14 +238,19 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
 
 	/* count */
 	size_t num_lights = 0;
+	size_t num_portals = 0;
 	size_t num_background_lights = 0;
 	size_t num_triangles = 0;
 
 	bool background_mis = false;
 
 	foreach(Light *light, scene->lights) {
-		if(light->is_enabled)
+		if(light->is_enabled) {
 			num_lights++;
+		}
+		if(light->is_portal) {
+			num_portals++;
+		}
 	}
 
 	foreach(Object *object, scene->objects) {
@@ -435,9 +440,9 @@ void LightManager::device_update_distribution(Device *device, DeviceScene *dscen
 		device->tex_alloc("__light_distribution", dscene->light_distribution);
 
 		/* Portals */
-		if(num_background_lights > 0 && light_index != num_lights) {
+		if(num_portals > 0) {
 			kintegrator->portal_offset = light_index;
-			kintegrator->num_portals = num_lights - light_index;
+			kintegrator->num_portals = num_portals;
 			kintegrator->portal_pdf = background_mis? 0.5f: 1.0f;
 		}
 		else {
@@ -601,10 +606,10 @@ void LightManager::device_update_points(Device *device,
                                         Scene *scene)
 {
 	int num_scene_lights = scene->lights.size();
-	int num_lights = 0;
 
+	int num_lights = 0;
 	foreach(Light *light, scene->lights) {
-		if(light->is_enabled) {
+		if(light->is_enabled || light->is_portal) {
 			num_lights++;
 		}
 	}
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index c377e899936..7caa801671d 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -1689,6 +1689,19 @@ void ConvertNode::constant_fold(const ConstantFolder& folder)
 			}
 		}
 	}
+	else {
+		ShaderInput *in = inputs[0];
+		ShaderNode *prev = in->link->parent;
+
+		/* no-op conversion of A to B to A */
+		if(prev->type == node_types[to][from]) {
+			ShaderInput *prev_in = prev->inputs[0];
+
+			if(SocketType::is_float3(from) && (to == SocketType::FLOAT || SocketType::is_float3(to)) && prev_in->link) {
+				folder.bypass(prev_in->link);
+			}
+		}
+	}
 }
 
 void ConvertNode::compile(SVMCompiler& compiler)
diff --git a/intern/cycles/test/CMakeLists.txt b/intern/cycles/test/CMakeLists.txt
index a6bcf980df2..80fe893826a 100644
--- a/intern/cycles/test/CMakeLists.txt
+++ b/intern/cycles/test/CMakeLists.txt
@@ -26,12 +26,12 @@ set(ALL_CYCLES_LIBRARIES
 	cycles_device
 	cycles_bvh
 	cycles_graph
-	cycles_kernel_osl
 	cycles_util
 	${OPENIMAGEIO_LIBRARIES}
 )
 if(WITH_CYCLES_OSL)
 	list(APPEND ALL_CYCLES_LIBRARIES
+		cycles_kernel_osl
 		${OSL_LIBRARIES}
 		${LLVM_LIBRARIES}
 	)
diff --git a/intern/cycles/test/render_graph_finalize_test.cpp b/intern/cycles/test/render_graph_finalize_test.cpp
index 4566894d490..633e517ce9f 100644
--- a/intern/cycles/test/render_graph_finalize_test.cpp
+++ b/intern/cycles/test/render_graph_finalize_test.cpp
@@ -40,6 +40,7 @@ public:
 	  : name_(name)
 	{
 		node_ = new T();
+		node_->name = name;
 	}
 
 	const string& name() const {
@@ -59,6 +60,13 @@ public:
 		return *this;
 	}
 
+	template<typename T2, typename V>
+	ShaderNodeBuilder& set(V T2::*pfield, V value)
+	{
+		static_cast<T*>(node_)->*pfield = value;
+		return *this;
+	}
+
 protected:
 	string name_;
 	ShaderNode *node_;
@@ -69,6 +77,7 @@ public:
 	explicit ShaderGraphBuilder(ShaderGraph *graph)
 	  : graph_(graph)
 	{
+		node_map_["Output"] = graph->output();
 	}
 
 	ShaderNode *find_node(const string& name)
@@ -110,6 +119,35 @@ public:
 		return *this;
 	}
 
+	/* Common input/output boilerplate. */
+	ShaderGraphBuilder& add_attribute(const string &name)
+	{
+		return (*this)
+			.add_node(ShaderNodeBuilder<AttributeNode>(name)
+			          .set(&AttributeNode::attribute, ustring(name)));
+	}
+
+	ShaderGraphBuilder& output_closure(const string& from)
+	{
+		return (*this).add_connection(from, "Output::Surface");
+	}
+
+	ShaderGraphBuilder& output_color(const string& from)
+	{
+		return (*this)
+			.add_node(ShaderNodeBuilder<EmissionNode>("EmissionNode"))
+			.add_connection(from, "EmissionNode::Color")
+			.output_closure("EmissionNode::Emission");
+	}
+
+	ShaderGraphBuilder& output_value(const string& from)
+	{
+		return (*this)
+			.add_node(ShaderNodeBuilder<EmissionNode>("EmissionNode"))
+			.add_connection(from, "EmissionNode::Strength")
+			.output_closure("EmissionNode::Emission");
+	}
+
 protected:
 	ShaderGraph *graph_;
 	map<string, ShaderNode *> node_map_;
@@ -127,21 +165,1366 @@ protected:
 	ShaderGraph graph; \
 	ShaderGraphBuilder builder(&graph); \
 
+#define EXPECT_ANY_MESSAGE(log) \
+	EXPECT_CALL(log, Log(_, _, _)).Times(AnyNumber()); \
+
+#define CORRECT_INFO_MESSAGE(log, message) \
+	EXPECT_CALL(log, Log(google::INFO, _, HasSubstr(message)));
+
+#define INVALID_INFO_MESSAGE(log, message) \
+	EXPECT_CALL(log, Log(google::INFO, _, HasSubstr(message))).Times(0);
+
+/*
+ * Test deduplication of nodes that have inputs, some of them folded.
+ */
+TEST(render_graph, deduplicate_deep)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Value1::Value to constant (0.8).");
+	CORRECT_INFO_MESSAGE(log, "Folding Value2::Value to constant (0.8).");
+	CORRECT_INFO_MESSAGE(log, "Deduplicated 2 nodes.");
+
+	builder
+		.add_node(ShaderNodeBuilder<GeometryNode>("Geometry1"))
+		.add_node(ShaderNodeBuilder<GeometryNode>("Geometry2"))
+		.add_node(ShaderNodeBuilder<ValueNode>("Value1")
+		          .set(&ValueNode::value, 0.8f))
+		.add_node(ShaderNodeBuilder<ValueNode>("Value2")
+		          .set(&ValueNode::value, 0.8f))
+		.add_node(ShaderNodeBuilder<NoiseTextureNode>("Noise1"))
+		.add_node(ShaderNodeBuilder<NoiseTextureNode>("Noise2"))
+		.add_node(ShaderNodeBuilder<MixNode>("Mix")
+		          .set(&MixNode::type, NODE_MIX_BLEND)
+		          .set("Fac", 0.5f))
+		.add_connection("Geometry1::Parametric", "Noise1::Vector")
+		.add_connection("Value1::Value", "Noise1::Scale")
+		.add_connection("Noise1::Color", "Mix::Color1")
+		.add_connection("Geometry2::Parametric", "Noise2::Vector")
+		.add_connection("Value2::Value", "Noise2::Scale")
+		.add_connection("Noise2::Color", "Mix::Color2")
+		.output_color("Mix::Color");
+
+	graph.finalize(&scene);
+
+	EXPECT_EQ(graph.nodes.size(), 5);
+}
+
+/*
+ * Test RGB to BW node.
+ */
 TEST(render_graph, constant_fold_rgb_to_bw)
 {
 	DEFINE_COMMON_VARIABLES(builder, log);
 
-	EXPECT_CALL(log, Log(_, _, _)).Times(AnyNumber());
-	EXPECT_CALL(log, Log(google::INFO, _,
-	                     HasSubstr("Replacing rgb_to_bw with constant 0.8.")));
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding RGBToBWNodeNode::Val to constant (0.8).");
+	CORRECT_INFO_MESSAGE(log, "Folding convert_float_to_color::value_color to constant (0.8, 0.8, 0.8).");
 
 	builder
-		.add_node(ShaderNodeBuilder<OutputNode>("OutputNode"))
-		.add_node(ShaderNodeBuilder<EmissionNode>("EmissionNode"))
 		.add_node(ShaderNodeBuilder<RGBToBWNode>("RGBToBWNodeNode")
 		          .set("Color", make_float3(0.8f, 0.8f, 0.8f)))
-		.add_connection("RGBToBWNodeNode::Val", "EmissionNode::Color")
-		.add_connection("EmissionNode::Emission", "OutputNode::Surface");
+		.output_color("RGBToBWNodeNode::Val");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - folding of Emission nodes that don't emit to nothing.
+ */
+TEST(render_graph, constant_fold_emission1)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Discarding closure Emission.");
+
+	builder
+		.add_node(ShaderNodeBuilder<EmissionNode>("Emission")
+		          .set("Color", make_float3(0.0f, 0.0f, 0.0f)))
+		.output_closure("Emission::Emission");
+
+	graph.finalize(&scene);
+}
+
+TEST(render_graph, constant_fold_emission2)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Discarding closure Emission.");
+
+	builder
+		.add_node(ShaderNodeBuilder<EmissionNode>("Emission")
+		          .set("Strength", 0.0f))
+		.output_closure("Emission::Emission");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - folding of Background nodes that don't emit to nothing.
+ */
+TEST(render_graph, constant_fold_background1)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Discarding closure Background.");
+
+	builder
+		.add_node(ShaderNodeBuilder<BackgroundNode>("Background")
+		          .set("Color", make_float3(0.0f, 0.0f, 0.0f)))
+		.output_closure("Background::Background");
+
+	graph.finalize(&scene);
+}
+
+TEST(render_graph, constant_fold_background2)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Discarding closure Background.");
+
+	builder
+		.add_node(ShaderNodeBuilder<BackgroundNode>("Background")
+		          .set("Strength", 0.0f))
+		.output_closure("Background::Background");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of Add Closure with only one input.
+ */
+TEST(render_graph, constant_fold_shader_add)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding AddClosure1::Closure to socket Diffuse::BSDF.");
+	CORRECT_INFO_MESSAGE(log, "Folding AddClosure2::Closure to socket Diffuse::BSDF.");
+	INVALID_INFO_MESSAGE(log, "Folding AddClosure3");
+
+	builder
+		.add_node(ShaderNodeBuilder<DiffuseBsdfNode>("Diffuse"))
+		.add_node(ShaderNodeBuilder<AddClosureNode>("AddClosure1"))
+		.add_node(ShaderNodeBuilder<AddClosureNode>("AddClosure2"))
+		.add_node(ShaderNodeBuilder<AddClosureNode>("AddClosure3"))
+		.add_connection("Diffuse::BSDF", "AddClosure1::Closure1")
+		.add_connection("Diffuse::BSDF", "AddClosure2::Closure2")
+		.add_connection("AddClosure1::Closure", "AddClosure3::Closure1")
+		.add_connection("AddClosure2::Closure", "AddClosure3::Closure2")
+		.output_closure("AddClosure3::Closure");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of Mix Closure with 0 or 1 fac.
+ *  - Folding of Mix Closure with both inputs folded to the same node.
+ */
+TEST(render_graph, constant_fold_shader_mix)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding MixClosure1::Closure to socket Diffuse::BSDF.");
+	CORRECT_INFO_MESSAGE(log, "Folding MixClosure2::Closure to socket Diffuse::BSDF.");
+	CORRECT_INFO_MESSAGE(log, "Folding MixClosure3::Closure to socket Diffuse::BSDF.");
+
+	builder
+		.add_attribute("Attribute")
+		.add_node(ShaderNodeBuilder<DiffuseBsdfNode>("Diffuse"))
+		/* choose left */
+		.add_node(ShaderNodeBuilder<MixClosureNode>("MixClosure1")
+		          .set("Fac", 0.0f))
+		.add_connection("Diffuse::BSDF", "MixClosure1::Closure1")
+		/* choose right */
+		.add_node(ShaderNodeBuilder<MixClosureNode>("MixClosure2")
+		          .set("Fac", 1.0f))
+		.add_connection("Diffuse::BSDF", "MixClosure2::Closure2")
+		/* both inputs folded the same */
+		.add_node(ShaderNodeBuilder<MixClosureNode>("MixClosure3"))
+		.add_connection("Attribute::Fac", "MixClosure3::Fac")
+		.add_connection("MixClosure1::Closure", "MixClosure3::Closure1")
+		.add_connection("MixClosure2::Closure", "MixClosure3::Closure2")
+		.output_closure("MixClosure3::Closure");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of Invert with all constant inputs.
+ */
+TEST(render_graph, constant_fold_invert)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Invert::Color to constant (0.68, 0.5, 0.32).");
+
+	builder
+		.add_node(ShaderNodeBuilder<InvertNode>("Invert")
+		          .set("Fac", 0.8f)
+		          .set("Color", make_float3(0.2f, 0.5f, 0.8f)))
+		.output_color("Invert::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of Invert with zero Fac.
+ */
+TEST(render_graph, constant_fold_invert_fac_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Invert::Color to socket Attribute::Color.");
+
+	builder
+		.add_attribute("Attribute")
+		.add_node(ShaderNodeBuilder<InvertNode>("Invert")
+		          .set("Fac", 0.0f))
+		.add_connection("Attribute::Color", "Invert::Color")
+		.output_color("Invert::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of MixRGB Add with all constant inputs (clamp false).
+ */
+TEST(render_graph, constant_fold_mix_add)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding MixAdd::Color to constant (0.62, 1.14, 1.42).");
+
+	builder
+		.add_node(ShaderNodeBuilder<MixNode>("MixAdd")
+		          .set(&MixNode::type, NODE_MIX_ADD)
+		          .set(&MixNode::use_clamp, false)
+		          .set("Fac", 0.8f)
+		          .set("Color1", make_float3(0.3, 0.5, 0.7))
+		          .set("Color2", make_float3(0.4, 0.8, 0.9)))
+		.output_color("MixAdd::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of MixRGB Add with all constant inputs (clamp true).
+ */
+TEST(render_graph, constant_fold_mix_add_clamp)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding MixAdd::Color to constant (0.62, 1, 1).");
+
+	builder
+		.add_node(ShaderNodeBuilder<MixNode>("MixAdd")
+		          .set(&MixNode::type, NODE_MIX_ADD)
+		          .set(&MixNode::use_clamp, true)
+		          .set("Fac", 0.8f)
+		          .set("Color1", make_float3(0.3, 0.5, 0.7))
+		          .set("Color2", make_float3(0.4, 0.8, 0.9)))
+		.output_color("MixAdd::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - No folding on fac 0 for dodge.
+ */
+TEST(render_graph, constant_fold_part_mix_dodge_no_fac_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	INVALID_INFO_MESSAGE(log, "Folding ");
+
+	builder
+		.add_attribute("Attribute1")
+		.add_attribute("Attribute2")
+		.add_node(ShaderNodeBuilder<MixNode>("Mix")
+		          .set(&MixNode::type, NODE_MIX_DODGE)
+		          .set(&MixNode::use_clamp, false)
+		          .set("Fac", 0.0f))
+		.add_connection("Attribute1::Color", "Mix::Color1")
+		.add_connection("Attribute2::Color", "Mix::Color2")
+		.output_color("Mix::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - No folding on fac 0 for light.
+ */
+TEST(render_graph, constant_fold_part_mix_light_no_fac_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	INVALID_INFO_MESSAGE(log, "Folding ");
+
+	builder
+		.add_attribute("Attribute1")
+		.add_attribute("Attribute2")
+		.add_node(ShaderNodeBuilder<MixNode>("Mix")
+		          .set(&MixNode::type, NODE_MIX_LIGHT)
+		          .set(&MixNode::use_clamp, false)
+		          .set("Fac", 0.0f))
+		.add_connection("Attribute1::Color", "Mix::Color1")
+		.add_connection("Attribute2::Color", "Mix::Color2")
+		.output_color("Mix::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - No folding on fac 0 for burn.
+ */
+TEST(render_graph, constant_fold_part_mix_burn_no_fac_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	INVALID_INFO_MESSAGE(log, "Folding ");
+
+	builder
+		.add_attribute("Attribute1")
+		.add_attribute("Attribute2")
+		.add_node(ShaderNodeBuilder<MixNode>("Mix")
+		          .set(&MixNode::type, NODE_MIX_BURN)
+		          .set(&MixNode::use_clamp, false)
+		          .set("Fac", 0.0f))
+		.add_connection("Attribute1::Color", "Mix::Color1")
+		.add_connection("Attribute2::Color", "Mix::Color2")
+		.output_color("Mix::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - No folding on fac 0 for clamped blend.
+ */
+TEST(render_graph, constant_fold_part_mix_blend_clamped_no_fac_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	INVALID_INFO_MESSAGE(log, "Folding ");
+
+	builder
+		.add_attribute("Attribute1")
+		.add_attribute("Attribute2")
+		.add_node(ShaderNodeBuilder<MixNode>("Mix")
+		          .set(&MixNode::type, NODE_MIX_BLEND)
+		          .set(&MixNode::use_clamp, true)
+		          .set("Fac", 0.0f))
+		.add_connection("Attribute1::Color", "Mix::Color1")
+		.add_connection("Attribute2::Color", "Mix::Color2")
+		.output_color("Mix::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of Mix with 0 or 1 Fac.
+ *  - Folding of Mix with both inputs folded to the same node.
+ */
+TEST(render_graph, constant_fold_part_mix_blend)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding MixBlend1::Color to socket Attribute1::Color.");
+	CORRECT_INFO_MESSAGE(log, "Folding MixBlend2::Color to socket Attribute1::Color.");
+	CORRECT_INFO_MESSAGE(log, "Folding MixBlend3::Color to socket Attribute1::Color.");
+
+	builder
+		.add_attribute("Attribute1")
+		.add_attribute("Attribute2")
+		/* choose left */
+		.add_node(ShaderNodeBuilder<MixNode>("MixBlend1")
+		          .set(&MixNode::type, NODE_MIX_BLEND)
+		          .set(&MixNode::use_clamp, false)
+		          .set("Fac", 0.0f))
+		.add_connection("Attribute1::Color", "MixBlend1::Color1")
+		.add_connection("Attribute2::Color", "MixBlend1::Color2")
+		/* choose right */
+		.add_node(ShaderNodeBuilder<MixNode>("MixBlend2")
+		          .set(&MixNode::type, NODE_MIX_BLEND)
+		          .set(&MixNode::use_clamp, false)
+		          .set("Fac", 1.0f))
+		.add_connection("Attribute1::Color", "MixBlend2::Color2")
+		.add_connection("Attribute2::Color", "MixBlend2::Color1")
+		/* both inputs folded to Attribute1 */
+		.add_node(ShaderNodeBuilder<MixNode>("MixBlend3")
+		          .set(&MixNode::type, NODE_MIX_BLEND)
+		          .set(&MixNode::use_clamp, false))
+		.add_connection("Attribute1::Fac", "MixBlend3::Fac")
+		.add_connection("MixBlend1::Color", "MixBlend3::Color1")
+		.add_connection("MixBlend2::Color", "MixBlend3::Color2")
+		.output_color("MixBlend3::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - NOT folding of MixRGB Sub with the same inputs and fac NOT 1.
+ */
+TEST(render_graph, constant_fold_part_mix_sub_same_fac_bad)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	INVALID_INFO_MESSAGE(log, "Folding Mix::");
+
+	builder
+		.add_attribute("Attribute")
+		.add_node(ShaderNodeBuilder<MixNode>("Mix")
+		          .set(&MixNode::type, NODE_MIX_SUB)
+		          .set(&MixNode::use_clamp, true)
+		          .set("Fac", 0.5f))
+		.add_connection("Attribute::Color", "Mix::Color1")
+		.add_connection("Attribute::Color", "Mix::Color2")
+		.output_color("Mix::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of MixRGB Sub with the same inputs and fac 1.
+ */
+TEST(render_graph, constant_fold_part_mix_sub_same_fac_1)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Mix::Color to constant (0, 0, 0).");
+
+	builder
+		.add_attribute("Attribute")
+		.add_node(ShaderNodeBuilder<MixNode>("Mix")
+		          .set(&MixNode::type, NODE_MIX_SUB)
+		          .set(&MixNode::use_clamp, true)
+		          .set("Fac", 1.0f))
+		.add_connection("Attribute::Color", "Mix::Color1")
+		.add_connection("Attribute::Color", "Mix::Color2")
+		.output_color("Mix::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Graph for testing partial folds of MixRGB with one constant argument.
+ * Includes 4 tests: constant on each side with fac either unknown or 1.
+ */
+static void build_mix_partial_test_graph(ShaderGraphBuilder &builder, NodeMix type, float3 constval)
+{
+	builder
+		.add_attribute("Attribute")
+		/* constant on the left */
+		.add_node(ShaderNodeBuilder<MixNode>("Mix_Cx_Fx")
+		          .set(&MixNode::type, type)
+		          .set(&MixNode::use_clamp, false)
+		          .set("Color1", constval))
+		.add_node(ShaderNodeBuilder<MixNode>("Mix_Cx_F1")
+		          .set(&MixNode::type, type)
+		          .set(&MixNode::use_clamp, false)
+		          .set("Color1", constval)
+		          .set("Fac", 1.0f))
+		.add_connection("Attribute::Fac", "Mix_Cx_Fx::Fac")
+		.add_connection("Attribute::Color", "Mix_Cx_Fx::Color2")
+		.add_connection("Attribute::Color", "Mix_Cx_F1::Color2")
+		/* constant on the right */
+		.add_node(ShaderNodeBuilder<MixNode>("Mix_xC_Fx")
+		          .set(&MixNode::type, type)
+		          .set(&MixNode::use_clamp, false)
+		          .set("Color2", constval))
+		.add_node(ShaderNodeBuilder<MixNode>("Mix_xC_F1")
+		          .set(&MixNode::type, type)
+		          .set(&MixNode::use_clamp, false)
+		          .set("Color2", constval)
+		          .set("Fac", 1.0f))
+		.add_connection("Attribute::Fac", "Mix_xC_Fx::Fac")
+		.add_connection("Attribute::Color", "Mix_xC_Fx::Color1")
+		.add_connection("Attribute::Color", "Mix_xC_F1::Color1")
+		/* results of actual tests simply added up to connect to output */
+		.add_node(ShaderNodeBuilder<MixNode>("Out12")
+		          .set(&MixNode::type, NODE_MIX_ADD)
+		          .set(&MixNode::use_clamp, true)
+		          .set("Fac", 1.0f))
+		.add_node(ShaderNodeBuilder<MixNode>("Out34")
+		          .set(&MixNode::type, NODE_MIX_ADD)
+		          .set(&MixNode::use_clamp, true)
+		          .set("Fac", 1.0f))
+		.add_node(ShaderNodeBuilder<MixNode>("Out1234")
+		          .set(&MixNode::type, NODE_MIX_ADD)
+		          .set(&MixNode::use_clamp, true)
+		          .set("Fac", 1.0f))
+		.add_connection("Mix_Cx_Fx::Color", "Out12::Color1")
+		.add_connection("Mix_Cx_F1::Color", "Out12::Color2")
+		.add_connection("Mix_xC_Fx::Color", "Out34::Color1")
+		.add_connection("Mix_xC_F1::Color", "Out34::Color2")
+		.add_connection("Out12::Color", "Out1234::Color1")
+		.add_connection("Out34::Color", "Out1234::Color2")
+		.output_color("Out1234::Color");
+}
+
+/*
+ * Tests: partial folding for RGB Add with known 0.
+ */
+TEST(render_graph, constant_fold_part_mix_add_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* 0 + X (fac 1) == X */
+	INVALID_INFO_MESSAGE(log, "Folding Mix_Cx_Fx::Color");
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_Cx_F1::Color to socket Attribute::Color.");
+	/* X + 0 (fac ?) == X */
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_Fx::Color to socket Attribute::Color.");
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_F1::Color to socket Attribute::Color.");
+	INVALID_INFO_MESSAGE(log, "Folding Out");
+
+	build_mix_partial_test_graph(builder, NODE_MIX_ADD, make_float3(0, 0, 0));
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for RGB Sub with known 0.
+ */
+TEST(render_graph, constant_fold_part_mix_sub_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	INVALID_INFO_MESSAGE(log, "Folding Mix_Cx_Fx::Color");
+	INVALID_INFO_MESSAGE(log, "Folding Mix_Cx_F1::Color");
+	/* X - 0 (fac ?) == X */
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_Fx::Color to socket Attribute::Color.");
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_F1::Color to socket Attribute::Color.");
+	INVALID_INFO_MESSAGE(log, "Folding Out");
+
+	build_mix_partial_test_graph(builder, NODE_MIX_SUB, make_float3(0, 0, 0));
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for RGB Mul with known 1.
+ */
+TEST(render_graph, constant_fold_part_mix_mul_1)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* 1 * X (fac 1) == X */
+	INVALID_INFO_MESSAGE(log, "Folding Mix_Cx_Fx::Color");
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_Cx_F1::Color to socket Attribute::Color.");
+	/* X * 1 (fac ?) == X */
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_Fx::Color to socket Attribute::Color.");
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_F1::Color to socket Attribute::Color.");
+	INVALID_INFO_MESSAGE(log, "Folding Out");
+
+	build_mix_partial_test_graph(builder, NODE_MIX_MUL, make_float3(1, 1, 1));
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for RGB Div with known 1.
+ */
+TEST(render_graph, constant_fold_part_mix_div_1)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	INVALID_INFO_MESSAGE(log, "Folding Mix_Cx_Fx::Color");
+	INVALID_INFO_MESSAGE(log, "Folding Mix_Cx_F1::Color");
+	/* X / 1 (fac ?) == X */
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_Fx::Color to socket Attribute::Color.");
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_F1::Color to socket Attribute::Color.");
+	INVALID_INFO_MESSAGE(log, "Folding Out");
+
+	build_mix_partial_test_graph(builder, NODE_MIX_DIV, make_float3(1, 1, 1));
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for RGB Mul with known 0.
+ */
+TEST(render_graph, constant_fold_part_mix_mul_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* 0 * ? (fac ?) == 0 */
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_Cx_Fx::Color to constant (0, 0, 0).");
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_Cx_F1::Color to constant (0, 0, 0).");
+	/* ? * 0 (fac 1) == 0 */
+	INVALID_INFO_MESSAGE(log, "Folding Mix_xC_Fx::Color");
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_xC_F1::Color to constant (0, 0, 0).");
+
+	CORRECT_INFO_MESSAGE(log, "Folding Out12::Color to constant (0, 0, 0).");
+	INVALID_INFO_MESSAGE(log, "Folding Out1234");
+
+	build_mix_partial_test_graph(builder, NODE_MIX_MUL, make_float3(0, 0, 0));
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for RGB Div with known 0.
+ */
+TEST(render_graph, constant_fold_part_mix_div_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* 0 / ? (fac ?) == 0 */
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_Cx_Fx::Color to constant (0, 0, 0).");
+	CORRECT_INFO_MESSAGE(log, "Folding Mix_Cx_F1::Color to constant (0, 0, 0).");
+	INVALID_INFO_MESSAGE(log, "Folding Mix_xC_Fx::Color");
+	INVALID_INFO_MESSAGE(log, "Folding Mix_xC_F1::Color");
+
+	CORRECT_INFO_MESSAGE(log, "Folding Out12::Color to constant (0, 0, 0).");
+	INVALID_INFO_MESSAGE(log, "Folding Out1234");
+
+	build_mix_partial_test_graph(builder, NODE_MIX_DIV, make_float3(0, 0, 0));
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: Separate/Combine RGB with all constant inputs.
+ */
+TEST(render_graph, constant_fold_separate_combine_rgb)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding SeparateRGB::R to constant (0.3).");
+	CORRECT_INFO_MESSAGE(log, "Folding SeparateRGB::G to constant (0.5).");
+	CORRECT_INFO_MESSAGE(log, "Folding SeparateRGB::B to constant (0.7).");
+	CORRECT_INFO_MESSAGE(log, "Folding CombineRGB::Image to constant (0.3, 0.5, 0.7).");
+
+	builder
+		.add_node(ShaderNodeBuilder<SeparateRGBNode>("SeparateRGB")
+		          .set("Image", make_float3(0.3f, 0.5f, 0.7f)))
+		.add_node(ShaderNodeBuilder<CombineRGBNode>("CombineRGB"))
+		.add_connection("SeparateRGB::R", "CombineRGB::R")
+		.add_connection("SeparateRGB::G", "CombineRGB::G")
+		.add_connection("SeparateRGB::B", "CombineRGB::B")
+		.output_color("CombineRGB::Image");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: Separate/Combine XYZ with all constant inputs.
+ */
+TEST(render_graph, constant_fold_separate_combine_xyz)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding SeparateXYZ::X to constant (0.3).");
+	CORRECT_INFO_MESSAGE(log, "Folding SeparateXYZ::Y to constant (0.5).");
+	CORRECT_INFO_MESSAGE(log, "Folding SeparateXYZ::Z to constant (0.7).");
+	CORRECT_INFO_MESSAGE(log, "Folding CombineXYZ::Vector to constant (0.3, 0.5, 0.7).");
+	CORRECT_INFO_MESSAGE(log, "Folding convert_vector_to_color::value_color to constant (0.3, 0.5, 0.7).");
+
+	builder
+		.add_node(ShaderNodeBuilder<SeparateXYZNode>("SeparateXYZ")
+		          .set("Vector", make_float3(0.3f, 0.5f, 0.7f)))
+		.add_node(ShaderNodeBuilder<CombineXYZNode>("CombineXYZ"))
+		.add_connection("SeparateXYZ::X", "CombineXYZ::X")
+		.add_connection("SeparateXYZ::Y", "CombineXYZ::Y")
+		.add_connection("SeparateXYZ::Z", "CombineXYZ::Z")
+		.output_color("CombineXYZ::Vector");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: Separate/Combine HSV with all constant inputs.
+ */
+TEST(render_graph, constant_fold_separate_combine_hsv)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding SeparateHSV::H to constant (0.583333).");
+	CORRECT_INFO_MESSAGE(log, "Folding SeparateHSV::S to constant (0.571429).");
+	CORRECT_INFO_MESSAGE(log, "Folding SeparateHSV::V to constant (0.7).");
+	CORRECT_INFO_MESSAGE(log, "Folding CombineHSV::Color to constant (0.3, 0.5, 0.7).");
+
+	builder
+		.add_node(ShaderNodeBuilder<SeparateHSVNode>("SeparateHSV")
+		          .set("Color", make_float3(0.3f, 0.5f, 0.7f)))
+		.add_node(ShaderNodeBuilder<CombineHSVNode>("CombineHSV"))
+		.add_connection("SeparateHSV::H", "CombineHSV::H")
+		.add_connection("SeparateHSV::S", "CombineHSV::S")
+		.add_connection("SeparateHSV::V", "CombineHSV::V")
+		.output_color("CombineHSV::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: Gamma with all constant inputs.
+ */
+TEST(render_graph, constant_fold_gamma)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Gamma::Color to constant (0.164317, 0.353553, 0.585662).");
+
+	builder
+		.add_node(ShaderNodeBuilder<GammaNode>("Gamma")
+		          .set("Color", make_float3(0.3f, 0.5f, 0.7f))
+		          .set("Gamma", 1.5f))
+		.output_color("Gamma::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: BrightnessContrast with all constant inputs.
+ */
+TEST(render_graph, constant_fold_bright_contrast)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding BrightContrast::Color to constant (0.16, 0.6, 1.04).");
+
+	builder
+		.add_node(ShaderNodeBuilder<BrightContrastNode>("BrightContrast")
+		          .set("Color", make_float3(0.3f, 0.5f, 0.7f))
+		          .set("Bright", 0.1f)
+		          .set("Contrast", 1.2f))
+		.output_color("BrightContrast::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: blackbody with all constant inputs.
+ */
+TEST(render_graph, constant_fold_blackbody)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Blackbody::Color to constant (3.94163, 0.226523, 0).");
+
+	builder
+		.add_node(ShaderNodeBuilder<BlackbodyNode>("Blackbody")
+		          .set("Temperature", 1200.0f))
+		.output_color("Blackbody::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: Math with all constant inputs (clamp false).
+ */
+TEST(render_graph, constant_fold_math)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Math::Value to constant (1.6).");
+
+	builder
+		.add_node(ShaderNodeBuilder<MathNode>("Math")
+		          .set(&MathNode::type, NODE_MATH_ADD)
+		          .set(&MathNode::use_clamp, false)
+		          .set("Value1", 0.7f)
+		          .set("Value2", 0.9f))
+		.output_value("Math::Value");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: Math with all constant inputs (clamp true).
+ */
+TEST(render_graph, constant_fold_math_clamp)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Math::Value to constant (1).");
+
+	builder
+		.add_node(ShaderNodeBuilder<MathNode>("Math")
+		          .set(&MathNode::type, NODE_MATH_ADD)
+		          .set(&MathNode::use_clamp, true)
+		          .set("Value1", 0.7f)
+		          .set("Value2", 0.9f))
+		.output_value("Math::Value");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Graph for testing partial folds of Math with one constant argument.
+ * Includes 2 tests: constant on each side.
+ */
+static void build_math_partial_test_graph(ShaderGraphBuilder &builder, NodeMath type, float constval)
+{
+	builder
+		.add_attribute("Attribute")
+		/* constant on the left */
+		.add_node(ShaderNodeBuilder<MathNode>("Math_Cx")
+		          .set(&MathNode::type, type)
+		          .set(&MathNode::use_clamp, false)
+		          .set("Value1", constval))
+		.add_connection("Attribute::Fac", "Math_Cx::Value2")
+		/* constant on the right */
+		.add_node(ShaderNodeBuilder<MathNode>("Math_xC")
+		          .set(&MathNode::type, type)
+		          .set(&MathNode::use_clamp, false)
+		          .set("Value2", constval))
+		.add_connection("Attribute::Fac", "Math_xC::Value1")
+		/* output sum */
+		.add_node(ShaderNodeBuilder<MathNode>("Out")
+		          .set(&MathNode::type, NODE_MATH_ADD)
+		          .set(&MathNode::use_clamp, true))
+		.add_connection("Math_Cx::Value", "Out::Value1")
+		.add_connection("Math_xC::Value", "Out::Value2")
+		.output_value("Out::Value");
+}
+
+/*
+ * Tests: partial folding for Math Add with known 0.
+ */
+TEST(render_graph, constant_fold_part_math_add_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* X + 0 == 0 + X == X */
+	CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to socket Attribute::Fac.");
+	CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
+	INVALID_INFO_MESSAGE(log, "Folding Out::");
+
+	build_math_partial_test_graph(builder, NODE_MATH_ADD, 0.0f);
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for Math Sub with known 0.
+ */
+TEST(render_graph, constant_fold_part_math_sub_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* X - 0 == X */
+	INVALID_INFO_MESSAGE(log, "Folding Math_Cx::");
+	CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
+	INVALID_INFO_MESSAGE(log, "Folding Out::");
+
+	build_math_partial_test_graph(builder, NODE_MATH_SUBTRACT, 0.0f);
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for Math Mul with known 1.
+ */
+TEST(render_graph, constant_fold_part_math_mul_1)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* X * 1 == 1 * X == X */
+	CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to socket Attribute::Fac.");
+	CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
+	INVALID_INFO_MESSAGE(log, "Folding Out::");
+
+	build_math_partial_test_graph(builder, NODE_MATH_MULTIPLY, 1.0f);
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for Math Div with known 1.
+ */
+TEST(render_graph, constant_fold_part_math_div_1)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* X / 1 == X */
+	INVALID_INFO_MESSAGE(log, "Folding Math_Cx::");
+	CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
+	INVALID_INFO_MESSAGE(log, "Folding Out::");
+
+	build_math_partial_test_graph(builder, NODE_MATH_DIVIDE, 1.0f);
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for Math Mul with known 0.
+ */
+TEST(render_graph, constant_fold_part_math_mul_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* X * 0 == 0 * X == 0 */
+	CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (0).");
+	CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to constant (0).");
+	CORRECT_INFO_MESSAGE(log, "Folding Out::Value to constant (0)");
+	CORRECT_INFO_MESSAGE(log, "Discarding closure EmissionNode.");
+
+	build_math_partial_test_graph(builder, NODE_MATH_MULTIPLY, 0.0f);
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for Math Div with known 0.
+ */
+TEST(render_graph, constant_fold_part_math_div_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* 0 / X == 0 */
+	CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (0).");
+	INVALID_INFO_MESSAGE(log, "Folding Math_xC::");
+	INVALID_INFO_MESSAGE(log, "Folding Out::");
+
+	build_math_partial_test_graph(builder, NODE_MATH_DIVIDE, 0.0f);
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: Vector Math with all constant inputs.
+ */
+TEST(render_graph, constant_fold_vector_math)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding VectorMath::Value to constant (1).");
+	CORRECT_INFO_MESSAGE(log, "Folding VectorMath::Vector to constant (3, 0, 0).");
+	CORRECT_INFO_MESSAGE(log, "Folding convert_vector_to_float::value_float to constant (1).");
+	CORRECT_INFO_MESSAGE(log, "Folding Math::Value to constant (2).");
+	CORRECT_INFO_MESSAGE(log, "Folding convert_float_to_color::value_color to constant (2, 2, 2).");
+
+	builder
+		.add_node(ShaderNodeBuilder<VectorMathNode>("VectorMath")
+		          .set(&VectorMathNode::type, NODE_VECTOR_MATH_SUBTRACT)
+		          .set("Vector1", make_float3(1.3f, 0.5f, 0.7f))
+		          .set("Vector2", make_float3(-1.7f, 0.5f, 0.7f)))
+		.add_node(ShaderNodeBuilder<MathNode>("Math")
+		          .set(&MathNode::type, NODE_MATH_ADD))
+		.add_connection("VectorMath::Vector", "Math::Value1")
+		.add_connection("VectorMath::Value", "Math::Value2")
+		.output_color("Math::Value");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Graph for testing partial folds of Vector Math with one constant argument.
+ * Includes 2 tests: constant on each side.
+ */
+static void build_vecmath_partial_test_graph(ShaderGraphBuilder &builder, NodeVectorMath type, float3 constval)
+{
+	builder
+		.add_attribute("Attribute")
+		/* constant on the left */
+		.add_node(ShaderNodeBuilder<VectorMathNode>("Math_Cx")
+		          .set(&VectorMathNode::type, type)
+		          .set("Vector1", constval))
+		.add_connection("Attribute::Vector", "Math_Cx::Vector2")
+		/* constant on the right */
+		.add_node(ShaderNodeBuilder<VectorMathNode>("Math_xC")
+		          .set(&VectorMathNode::type, type)
+		          .set("Vector2", constval))
+		.add_connection("Attribute::Vector", "Math_xC::Vector1")
+		/* output sum */
+		.add_node(ShaderNodeBuilder<VectorMathNode>("Out")
+		          .set(&VectorMathNode::type, NODE_VECTOR_MATH_ADD))
+		.add_connection("Math_Cx::Vector", "Out::Vector1")
+		.add_connection("Math_xC::Vector", "Out::Vector2")
+		.output_color("Out::Vector");
+}
+
+/*
+ * Tests: partial folding for Vector Math Add with known 0.
+ */
+TEST(render_graph, constant_fold_part_vecmath_add_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* X + 0 == 0 + X == X */
+	CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Vector to socket Attribute::Vector.");
+	CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Vector to socket Attribute::Vector.");
+	INVALID_INFO_MESSAGE(log, "Folding Out::");
+
+	build_vecmath_partial_test_graph(builder, NODE_VECTOR_MATH_ADD, make_float3(0,0,0));
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for Vector Math Sub with known 0.
+ */
+TEST(render_graph, constant_fold_part_vecmath_sub_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* X - 0 == X */
+	INVALID_INFO_MESSAGE(log, "Folding Math_Cx::");
+	CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Vector to socket Attribute::Vector.");
+	INVALID_INFO_MESSAGE(log, "Folding Out::");
+
+	build_vecmath_partial_test_graph(builder, NODE_VECTOR_MATH_SUBTRACT, make_float3(0,0,0));
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for Vector Math Dot Product with known 0.
+ */
+TEST(render_graph, constant_fold_part_vecmath_dot_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* X * 0 == 0 * X == X */
+	CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Vector to constant (0, 0, 0).");
+	CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Vector to constant (0, 0, 0).");
+	CORRECT_INFO_MESSAGE(log, "Folding Out::Vector to constant (0, 0, 0).");
+	CORRECT_INFO_MESSAGE(log, "Discarding closure EmissionNode.");
+
+	build_vecmath_partial_test_graph(builder, NODE_VECTOR_MATH_DOT_PRODUCT, make_float3(0,0,0));
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: partial folding for Vector Math Cross Product with known 0.
+ */
+TEST(render_graph, constant_fold_part_vecmath_cross_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	/* X * 0 == 0 * X == X */
+	CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Vector to constant (0, 0, 0).");
+	CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Vector to constant (0, 0, 0).");
+	CORRECT_INFO_MESSAGE(log, "Folding Out::Vector to constant (0, 0, 0).");
+	CORRECT_INFO_MESSAGE(log, "Discarding closure EmissionNode.");
+
+	build_vecmath_partial_test_graph(builder, NODE_VECTOR_MATH_CROSS_PRODUCT, make_float3(0,0,0));
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: Bump with no height input folded to Normal input.
+ */
+TEST(render_graph, constant_fold_bump)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Bump::Normal to socket Geometry1::Normal.");
+
+	builder
+		.add_node(ShaderNodeBuilder<GeometryNode>("Geometry1"))
+		.add_node(ShaderNodeBuilder<BumpNode>("Bump"))
+		.add_connection("Geometry1::Normal", "Bump::Normal")
+		.output_color("Bump::Normal");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests: Bump with no inputs folded to Geometry::Normal.
+ */
+TEST(render_graph, constant_fold_bump_no_input)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Bump::Normal to socket geometry::Normal.");
+
+	builder
+		.add_node(ShaderNodeBuilder<BumpNode>("Bump"))
+		.output_color("Bump::Normal");
+
+	graph.finalize(&scene);
+}
+
+template<class T>
+void init_test_curve(array<T> &buffer, T start, T end, int steps)
+{
+	buffer.resize(steps);
+
+	for (int i = 0; i < steps; i++)
+		buffer[i] = lerp(start, end, float(i)/(steps-1));
+}
+
+/*
+ * Tests:
+ *  - Folding of RGB Curves with all constant inputs.
+ */
+TEST(render_graph, constant_fold_rgb_curves)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Curves::Color to constant (0.275, 0.5, 0.475).");
+
+	array<float3> curve;
+	init_test_curve(curve, make_float3(0.0f, 0.25f, 1.0f), make_float3(1.0f, 0.75f, 0.0f), 257);
+
+	builder
+		.add_node(ShaderNodeBuilder<RGBCurvesNode>("Curves")
+		          .set(&CurvesNode::curves, curve)
+		          .set(&CurvesNode::min_x, 0.1f)
+		          .set(&CurvesNode::max_x, 0.9f)
+		          .set("Fac", 0.5f)
+		          .set("Color", make_float3(0.3f, 0.5f, 0.7f)))
+		.output_color("Curves::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of RGB Curves with zero Fac.
+ */
+TEST(render_graph, constant_fold_rgb_curves_fac_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Curves::Color to socket Attribute::Color.");
+
+	array<float3> curve;
+	init_test_curve(curve, make_float3(0.0f, 0.25f, 1.0f), make_float3(1.0f, 0.75f, 0.0f), 257);
+
+	builder
+		.add_attribute("Attribute")
+		.add_node(ShaderNodeBuilder<RGBCurvesNode>("Curves")
+		          .set(&CurvesNode::curves, curve)
+		          .set(&CurvesNode::min_x, 0.1f)
+		          .set(&CurvesNode::max_x, 0.9f)
+		          .set("Fac", 0.0f))
+		.add_connection("Attribute::Color", "Curves::Color")
+		.output_color("Curves::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of Vector Curves with all constant inputs.
+ */
+TEST(render_graph, constant_fold_vector_curves)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Curves::Vector to constant (0.275, 0.5, 0.475).");
+
+	array<float3> curve;
+	init_test_curve(curve, make_float3(0.0f, 0.25f, 1.0f), make_float3(1.0f, 0.75f, 0.0f), 257);
+
+	builder
+		.add_node(ShaderNodeBuilder<VectorCurvesNode>("Curves")
+		          .set(&CurvesNode::curves, curve)
+		          .set(&CurvesNode::min_x, 0.1f)
+		          .set(&CurvesNode::max_x, 0.9f)
+		          .set("Fac", 0.5f)
+		          .set("Vector", make_float3(0.3f, 0.5f, 0.7f)))
+		.output_color("Curves::Vector");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of Vector Curves with zero Fac.
+ */
+TEST(render_graph, constant_fold_vector_curves_fac_0)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Curves::Vector to socket Attribute::Vector.");
+
+	array<float3> curve;
+	init_test_curve(curve, make_float3(0.0f, 0.25f, 1.0f), make_float3(1.0f, 0.75f, 0.0f), 257);
+
+	builder
+		.add_attribute("Attribute")
+		.add_node(ShaderNodeBuilder<VectorCurvesNode>("Curves")
+		          .set(&CurvesNode::curves, curve)
+		          .set(&CurvesNode::min_x, 0.1f)
+		          .set(&CurvesNode::max_x, 0.9f)
+		          .set("Fac", 0.0f))
+		.add_connection("Attribute::Vector", "Curves::Vector")
+		.output_color("Curves::Vector");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of Color Ramp with all constant inputs.
+ */
+TEST(render_graph, constant_fold_rgb_ramp)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Ramp::Color to constant (0.14, 0.39, 0.64).");
+	CORRECT_INFO_MESSAGE(log, "Folding Ramp::Alpha to constant (0.89).");
+
+	array<float3> curve;
+	array<float> alpha;
+	init_test_curve(curve, make_float3(0.0f, 0.25f, 0.5f), make_float3(0.25f, 0.5f, 0.75f), 9);
+	init_test_curve(alpha, 0.75f, 1.0f, 9);
+
+	builder
+		.add_node(ShaderNodeBuilder<RGBRampNode>("Ramp")
+		          .set(&RGBRampNode::ramp, curve)
+		          .set(&RGBRampNode::ramp_alpha, alpha)
+		          .set(&RGBRampNode::interpolate, true)
+		          .set("Fac", 0.56f))
+		.add_node(ShaderNodeBuilder<MixNode>("Mix")
+		          .set(&MixNode::type, NODE_MIX_ADD))
+		.add_connection("Ramp::Color", "Mix::Color1")
+		.add_connection("Ramp::Alpha", "Mix::Color2")
+		.output_color("Mix::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of Color Ramp with all constant inputs (interpolate false).
+ */
+TEST(render_graph, constant_fold_rgb_ramp_flat)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Ramp::Color to constant (0.125, 0.375, 0.625).");
+	CORRECT_INFO_MESSAGE(log, "Folding Ramp::Alpha to constant (0.875).");
+
+	array<float3> curve;
+	array<float> alpha;
+	init_test_curve(curve, make_float3(0.0f, 0.25f, 0.5f), make_float3(0.25f, 0.5f, 0.75f), 9);
+	init_test_curve(alpha, 0.75f, 1.0f, 9);
+
+	builder
+		.add_node(ShaderNodeBuilder<RGBRampNode>("Ramp")
+		          .set(&RGBRampNode::ramp, curve)
+		          .set(&RGBRampNode::ramp_alpha, alpha)
+		          .set(&RGBRampNode::interpolate, false)
+		          .set("Fac", 0.56f))
+		.add_node(ShaderNodeBuilder<MixNode>("Mix")
+		          .set(&MixNode::type, NODE_MIX_ADD))
+		.add_connection("Ramp::Color", "Mix::Color1")
+		.add_connection("Ramp::Alpha", "Mix::Color2")
+		.output_color("Mix::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of redundant conversion of float to color to float.
+ */
+TEST(render_graph, constant_fold_convert_float_color_float)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding Invert::Color to socket convert_float_to_color::value_color.");
+	CORRECT_INFO_MESSAGE(log, "Folding convert_color_to_float::value_float to socket Attribute::Fac.");
+
+	builder
+		.add_attribute("Attribute")
+		.add_node(ShaderNodeBuilder<InvertNode>("Invert")
+		          .set("Fac", 0.0f))
+		.add_connection("Attribute::Fac", "Invert::Color")
+		.output_value("Invert::Color");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - Folding of redundant conversion of color to vector to color.
+ */
+TEST(render_graph, constant_fold_convert_color_vector_color)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding VecAdd::Vector to socket convert_color_to_vector::value_vector.");
+	CORRECT_INFO_MESSAGE(log, "Folding convert_vector_to_color::value_color to socket Attribute::Color.");
+
+	builder
+		.add_attribute("Attribute")
+		.add_node(ShaderNodeBuilder<VectorMathNode>("VecAdd")
+		          .set(&VectorMathNode::type, NODE_VECTOR_MATH_ADD)
+		          .set("Vector2", make_float3(0,0,0)))
+		.add_connection("Attribute::Color", "VecAdd::Vector1")
+		.output_color("VecAdd::Vector");
+
+	graph.finalize(&scene);
+}
+
+/*
+ * Tests:
+ *  - NOT folding conversion of color to float to color.
+ */
+TEST(render_graph, constant_fold_convert_color_float_color)
+{
+	DEFINE_COMMON_VARIABLES(builder, log);
+
+	EXPECT_ANY_MESSAGE(log);
+	CORRECT_INFO_MESSAGE(log, "Folding MathAdd::Value to socket convert_color_to_float::value_float.");
+	INVALID_INFO_MESSAGE(log, "Folding convert_float_to_color::");
+
+	builder
+		.add_attribute("Attribute")
+		.add_node(ShaderNodeBuilder<MathNode>("MathAdd")
+		          .set(&MathNode::type, NODE_MATH_ADD)
+		          .set("Value2", 0.0f))
+		.add_connection("Attribute::Color", "MathAdd::Value1")
+		.output_color("MathAdd::Value");
 
 	graph.finalize(&scene);
 }
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 016f4a6a794..13aba0646d2 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -1477,10 +1477,10 @@ ccl_device bool ray_triangle_intersect(
 	return true;
 }
 
-ccl_device bool ray_triangle_intersect_uv(
-	float3 ray_P, float3 ray_D, float ray_t,
-	float3 v0, float3 v1, float3 v2,
-	float *isect_u, float *isect_v, float *isect_t)
+ccl_device_inline bool ray_triangle_intersect_uv(
+        float3 ray_P, float3 ray_D, float ray_t,
+        float3 v0, float3 v1, float3 v2,
+        float *isect_u, float *isect_v, float *isect_t)
 {
 	/* Calculate intersection */
 	float3 e1 = v1 - v0;
diff --git a/intern/ghost/intern/GHOST_SystemCocoa.mm b/intern/ghost/intern/GHOST_SystemCocoa.mm
index bce390732fe..4b8cb537ecf 100644
--- a/intern/ghost/intern/GHOST_SystemCocoa.mm
+++ b/intern/ghost/intern/GHOST_SystemCocoa.mm
@@ -289,6 +289,7 @@ extern "C" int GHOST_HACK_getFirstFile(char buf[FIRSTFILEBUFLG])
 	GHOST_SystemCocoa *systemCocoa;
 }
 - (void)setSystemCocoa:(GHOST_SystemCocoa *)sysCocoa;
+- (void)applicationDidFinishLaunching:(NSNotification *)aNotification;
 - (BOOL)application:(NSApplication *)theApplication openFile:(NSString *)filename;
 - (NSApplicationTerminateReply)applicationShouldTerminate:(NSApplication *)sender;
 - (void)applicationWillTerminate:(NSNotification *)aNotification;
@@ -302,6 +303,15 @@ extern "C" int GHOST_HACK_getFirstFile(char buf[FIRSTFILEBUFLG])
 	systemCocoa = sysCocoa;
 }
 
+- (void)applicationDidFinishLaunching:(NSNotification *)aNotification
+{
+	// raise application to front, convenient when starting from the terminal
+	// and important for launching the animation player. we call this after the
+	// application finishes launching, as doing it earlier can make us end up
+	// with a frontmost window but an inactive application
+	[NSApp activateIgnoringOtherApps:YES];
+}
+
 - (BOOL)application:(NSApplication *)theApplication openFile:(NSString *)filename
 {
 	return systemCocoa->handleOpenDocumentRequest(filename);
diff --git a/intern/ghost/intern/GHOST_WindowCocoa.mm b/intern/ghost/intern/GHOST_WindowCocoa.mm
index 00e00b6a1ea..e23cf9ccd37 100644
--- a/intern/ghost/intern/GHOST_WindowCocoa.mm
+++ b/intern/ghost/intern/GHOST_WindowCocoa.mm
@@ -622,8 +622,6 @@ GHOST_WindowCocoa::GHOST_WindowCocoa(
 		m_lionStyleFullScreen = true;
 	}
 	
-	[NSApp activateIgnoringOtherApps:YES]; // raise application to front, important for new blender instance animation play case
-	
 	[pool drain];
 }
 
diff --git a/intern/smoke/intern/FLUID_3D.cpp b/intern/smoke/intern/FLUID_3D.cpp
index 4faec894801..8a27818ff36 100644
--- a/intern/smoke/intern/FLUID_3D.cpp
+++ b/intern/smoke/intern/FLUID_3D.cpp
@@ -993,6 +993,9 @@ void FLUID_3D::project()
 
 	copyBorderAll(_pressure, 0, _zRes);
 
+	// fix fluid compression caused in isolated components by obstacle movement
+	fixObstacleCompression(_divergence);
+
 	// solve Poisson equation
 	solvePressurePre(_pressure, _divergence, _obstacles);
 
@@ -1291,6 +1294,142 @@ void FLUID_3D::setObstacleBoundaries(float *_pressure, int zBegin, int zEnd)
 	}	// z-loop
 }
 
+void FLUID_3D::floodFillComponent(int *buffer, size_t *queue, size_t limit, size_t pos, int from, int to)
+{
+	/* Flood 'from' cells with 'to' in the grid. Rely on (from != 0 && from != to && edges == 0) to stop. */
+	int offsets[] = { -1, +1, -_xRes, +_xRes, -_slabSize, +_slabSize };
+	size_t qend = 0;
+
+	buffer[pos] = to;
+	queue[qend++] = pos;
+
+	for (size_t qidx = 0; qidx < qend; qidx++)
+	{
+		pos = queue[qidx];
+
+		for (int i = 0; i < 6; i++)
+		{
+			size_t next = pos + offsets[i];
+
+			if (next < limit && buffer[next] == from)
+			{
+				buffer[next] = to;
+				queue[qend++] = next;
+			}
+		}
+	}
+}
+
+void FLUID_3D::mergeComponents(int *buffer, size_t *queue, size_t cur, size_t other)
+{
+	/* Replace higher value with lower. */
+	if (buffer[other] < buffer[cur])
+	{
+		floodFillComponent(buffer, queue, cur, cur, buffer[cur], buffer[other]);
+	}
+	else if (buffer[cur] < buffer[other])
+	{
+		floodFillComponent(buffer, queue, cur, other, buffer[other], buffer[cur]);
+	}
+}
+
+void FLUID_3D::fixObstacleCompression(float *divergence)
+{
+	int x, y, z;
+	size_t index;
+
+	/* Find compartments completely separated by obstacles.
+	 * Edge of the domain is automatically component 0. */
+	int *component = new int[_totalCells];
+	size_t *queue = new size_t[_totalCells];
+
+	memset(component, 0, sizeof(int) * _totalCells);
+
+	int next_id = 1;
+
+	for (z = 1, index = _slabSize + _xRes + 1; z < _zRes - 1; z++, index += 2 * _xRes)
+	{
+		for (y = 1; y < _yRes - 1; y++, index += 2)
+		{
+			for (x = 1; x < _xRes - 1; x++, index++)
+			{
+				if(!_obstacles[index])
+				{
+					/* Check for connection to the domain edge at iteration end. */
+					if ((x == _xRes-2 && !_obstacles[index + 1]) ||
+					    (y == _yRes-2 && !_obstacles[index + _xRes]) ||
+					    (z == _zRes-2 && !_obstacles[index + _slabSize]))
+					{
+						component[index] = 0;
+					}
+					else {
+						component[index] = next_id;
+					}
+
+					if (!_obstacles[index - 1])
+						mergeComponents(component, queue, index, index - 1);
+					if (!_obstacles[index - _xRes])
+						mergeComponents(component, queue, index, index - _xRes);
+					if (!_obstacles[index - _slabSize])
+						mergeComponents(component, queue, index, index - _slabSize);
+
+					if (component[index] == next_id)
+						next_id++;
+				}
+			}
+		}
+	}
+
+	delete[] queue;
+
+	/* Compute average divergence within each component. */
+	float *total_divergence = new float[next_id];
+	int *component_size = new int[next_id];
+
+	memset(total_divergence, 0, sizeof(float) * next_id);
+	memset(component_size, 0, sizeof(int) * next_id);
+
+	for (z = 1, index = _slabSize + _xRes + 1; z < _zRes - 1; z++, index += 2 * _xRes)
+	{
+		for (y = 1; y < _yRes - 1; y++, index += 2)
+		{
+			for (x = 1; x < _xRes - 1; x++, index++)
+			{
+				if(!_obstacles[index])
+				{
+					int ci = component[index];
+
+					component_size[ci]++;
+					total_divergence[ci] += divergence[index];
+				}
+			}
+		}
+	}
+
+	/* Adjust divergence to make the average zero in each component except the edge. */
+	total_divergence[0] = 0.0f;
+
+	for (z = 1, index = _slabSize + _xRes + 1; z < _zRes - 1; z++, index += 2 * _xRes)
+	{
+		for (y = 1; y < _yRes - 1; y++, index += 2)
+		{
+			for (x = 1; x < _xRes - 1; x++, index++)
+			{
+				if(!_obstacles[index])
+				{
+					int ci = component[index];
+
+					divergence[index] -= total_divergence[ci] / component_size[ci];
+				}
+			}
+		}
+	}
+
+	delete[] component;
+	delete[] component_size;
+	delete[] total_divergence;
+}
+
 //////////////////////////////////////////////////////////////////////
 // add buoyancy forces
 //////////////////////////////////////////////////////////////////////
@@ -1650,4 +1789,4 @@ void FLUID_3D::updateFlame(float *react, float *flame, int total_cells)
 		else
 			flame[index] = 0.0f;
 	}
-}
-\ No newline at end of file
+}
diff --git a/intern/smoke/intern/FLUID_3D.h b/intern/smoke/intern/FLUID_3D.h
index cd2147b2bee..fe20c10d71d 100644
--- a/intern/smoke/intern/FLUID_3D.h
+++ b/intern/smoke/intern/FLUID_3D.h
@@ -195,6 +195,8 @@ struct FLUID_3D
 		void setObstacleBoundaries(float *_pressure, int zBegin, int zEnd);
 		void setObstaclePressure(float *_pressure, int zBegin, int zEnd);
 
+		void fixObstacleCompression(float *divergence);
+
 	public:
 		// advection, accessed e.g. by WTURBULENCE class
 		//void advectMacCormack();
@@ -202,6 +204,9 @@ struct FLUID_3D
 		void advectMacCormackEnd1(int zBegin, int zEnd);
 		void advectMacCormackEnd2(int zBegin, int zEnd);
 
+		void floodFillComponent(int *components, size_t *queue, size_t limit, size_t start, int from, int to);
+		void mergeComponents(int *components, size_t *queue, size_t cur, size_t other);
+
 		/* burning */
 		float *_burning_rate; // RNA pointer
 		float *_flame_smoke; // RNA pointer
diff --git a/source/blender/blenkernel/intern/mesh_evaluate.c b/source/blender/blenkernel/intern/mesh_evaluate.c
index 1c86fbcfe8e..fa113ef5eef 100644
--- a/source/blender/blenkernel/intern/mesh_evaluate.c
+++ b/source/blender/blenkernel/intern/mesh_evaluate.c
@@ -434,7 +434,7 @@ MLoopNorSpace *BKE_lnor_space_create(MLoopNorSpaceArray *lnors_spacearr)
 }
 
 /* This threshold is a bit touchy (usual float precision issue), this value seems OK. */
-#define LNOR_SPACE_TRIGO_THRESHOLD (1.0f - 1e-6f)
+#define LNOR_SPACE_TRIGO_THRESHOLD (1.0f - 1e-4f)
 
 /* Should only be called once.
  * Beware, this modifies ref_vec and other_vec in place!
diff --git a/source/blender/blenkernel/intern/particle_system.c b/source/blender/blenkernel/intern/particle_system.c
index e7561ee699e..8e3e2f5d6d0 100644
--- a/source/blender/blenkernel/intern/particle_system.c
+++ b/source/blender/blenkernel/intern/particle_system.c
@@ -3311,7 +3311,7 @@ static float get_base_time_step(ParticleSettings *part)
 	return 1.0f / (float) (part->subframes + 1);
 }
 /* Update time step size to suit current conditions. */
-static float update_timestep(ParticleSystem *psys, ParticleSimulationData *sim, float t_frac)
+static void update_timestep(ParticleSystem *psys, ParticleSimulationData *sim)
 {
 	float dt_target;
 	if (sim->courant_num == 0.0f)
@@ -3331,7 +3331,10 @@ static float update_timestep(ParticleSystem *psys, ParticleSimulationData *sim,
 		psys->dt_frac = interpf(dt_target, psys->dt_frac, TIMESTEP_EXPANSION_FACTOR);
 	else
 		psys->dt_frac = dt_target;
+}
 
+static float sync_timestep(ParticleSystem *psys, float t_frac)
+{
 	/* Sync with frame end if it's close. */
 	if (t_frac == 1.0f)
 		return psys->dt_frac;
@@ -3991,7 +3994,9 @@ static void system_step(ParticleSimulationData *sim, float cfra, const bool use_
 				printf("%f,%f,%f,%f\n", cfra+dframe+t_frac - 1.f, t_frac, dt_frac, sim->courant_num);
 #endif
 				if (part->time_flag & PART_TIME_AUTOSF)
-					dt_frac = update_timestep(psys, sim, t_frac);
+					update_timestep(psys, sim);
+				/* Even without AUTOSF dt_frac may not add up to 1.0 due to float precision. */
+				dt_frac = sync_timestep(psys, t_frac);
 			}
 		}
 	}
diff --git a/source/blender/blenkernel/intern/smoke.c b/source/blender/blenkernel/intern/smoke.c
index c7e073a7fc1..43569f9ded2 100644
--- a/source/blender/blenkernel/intern/smoke.c
+++ b/source/blender/blenkernel/intern/smoke.c
@@ -707,6 +707,7 @@ typedef struct ObstaclesFromDMData {
 	bool has_velocity;
 	float *vert_vel;
 	float *velocityX, *velocityY, *velocityZ;
+	int *num_obstacles;
 } ObstaclesFromDMData;
 
 static void obstacles_from_derivedmesh_task_cb(void *userdata, const int z)
@@ -755,8 +756,10 @@ static void obstacles_from_derivedmesh_task_cb(void *userdata, const int z)
 				/* tag obstacle cells */
 				data->obstacle_map[index] = 1;
 
-				if (data->has_velocity)
+				if (data->has_velocity) {
 					data->obstacle_map[index] |= 8;
+					data->num_obstacles[index]++;
+				}
 			}
 		}
 	}
@@ -764,7 +767,7 @@ static void obstacles_from_derivedmesh_task_cb(void *userdata, const int z)
 
 static void obstacles_from_derivedmesh(
         Object *coll_ob, SmokeDomainSettings *sds, SmokeCollSettings *scs,
-        unsigned char *obstacle_map, float *velocityX, float *velocityY, float *velocityZ, float dt)
+        unsigned char *obstacle_map, float *velocityX, float *velocityY, float *velocityZ, int *num_obstacles, float dt)
 {
 	if (!scs->dm) return;
 	{
@@ -835,7 +838,8 @@ static void obstacles_from_derivedmesh(
 			    .sds = sds, .mvert = mvert, .mloop = mloop, .looptri = looptri,
 			    .tree = &treeData, .obstacle_map = obstacle_map,
 			    .has_velocity = has_velocity, .vert_vel = vert_vel,
-			    .velocityX = velocityX, .velocityY = velocityY, .velocityZ = velocityZ
+			    .velocityX = velocityX, .velocityY = velocityY, .velocityZ = velocityZ,
+			    .num_obstacles = num_obstacles
 			};
 			BLI_task_parallel_range(
 			            sds->res_min[2], sds->res_max[2], &data, obstacles_from_derivedmesh_task_cb, true);
@@ -871,6 +875,8 @@ static void update_obstacles(Scene *scene, Object *ob, SmokeDomainSettings *sds,
 	float *b = smoke_get_color_b(sds->fluid);
 	unsigned int z;
 
+	int *num_obstacles = MEM_callocN(sizeof(int) * sds->res[0] * sds->res[1] * sds->res[2], "smoke_num_obstacles");
+
 	smoke_get_ob_velocity(sds->fluid, &velx, &vely, &velz);
 
 	// TODO: delete old obstacle flags
@@ -900,7 +906,7 @@ static void update_obstacles(Scene *scene, Object *ob, SmokeDomainSettings *sds,
 		if ((smd2->type & MOD_SMOKE_TYPE_COLL) && smd2->coll)
 		{
 			SmokeCollSettings *scs = smd2->coll;
-			obstacles_from_derivedmesh(collob, sds, scs, obstacles, velx, vely, velz, dt);
+			obstacles_from_derivedmesh(collob, sds, scs, obstacles, velx, vely, velz, num_obstacles, dt);
 		}
 	}
 
@@ -926,7 +932,15 @@ static void update_obstacles(Scene *scene, Object *ob, SmokeDomainSettings *sds,
 				b[z] = 0;
 			}
 		}
+		/* average velocities from multiple obstacles in one cell */
+		if (num_obstacles[z]) {
+			velx[z] /= num_obstacles[z];
+			vely[z] /= num_obstacles[z];
+			velz[z] /= num_obstacles[z];
+		}
 	}
+
+	MEM_freeN(num_obstacles);
 }
 
 /**********************************************************
diff --git a/source/blender/blenlib/intern/BLI_heap.c b/source/blender/blenlib/intern/BLI_heap.c
index 0a8dafc2dc1..d7fd1caa8da 100644
--- a/source/blender/blenlib/intern/BLI_heap.c
+++ b/source/blender/blenlib/intern/BLI_heap.c
@@ -55,9 +55,10 @@ struct HeapNode_Chunk {
  * or we allocate past the reserved number.
  *
  * \note Optimize number for 64kb allocs.
+ * \note keep type in sync with tot_nodes in heap_node_alloc_chunk.
  */
 #define HEAP_CHUNK_DEFAULT_NUM \
-	((MEM_SIZE_OPTIMAL((1 << 16) - sizeof(struct HeapNode_Chunk))) / sizeof(HeapNode))
+	((unsigned int)((MEM_SIZE_OPTIMAL((1 << 16) - sizeof(struct HeapNode_Chunk))) / sizeof(HeapNode)))
 
 struct Heap {
 	unsigned int size;
diff --git a/source/blender/compositor/nodes/COM_FilterNode.cpp b/source/blender/compositor/nodes/COM_FilterNode.cpp
index 7493f24ba6b..e8b08ce2ce1 100644
--- a/source/blender/compositor/nodes/COM_FilterNode.cpp
+++ b/source/blender/compositor/nodes/COM_FilterNode.cpp
@@ -49,7 +49,7 @@ void FilterNode::convertToOperations(NodeConverter &converter, const CompositorC
 			operation->set3x3Filter(-1, -1, -1, -1, 9, -1, -1, -1, -1);
 			break;
 		case CMP_FILT_LAPLACE:
-			operation = new ConvolutionFilterOperation();
+			operation = new ConvolutionEdgeFilterOperation();
 			operation->set3x3Filter(-1 / 8.0f, -1 / 8.0f, -1 / 8.0f, -1 / 8.0f, 1.0f, -1 / 8.0f, -1 / 8.0f, -1 / 8.0f, -1 / 8.0f);
 			break;
 		case CMP_FILT_SOBEL:
diff --git a/source/blender/editors/screen/screen_edit.c b/source/blender/editors/screen/screen_edit.c
index 62aeca4b9d1..677a6472c72 100644
--- a/source/blender/editors/screen/screen_edit.c
+++ b/source/blender/editors/screen/screen_edit.c
@@ -1943,6 +1943,7 @@ ScrArea *ED_screen_state_toggle(bContext *C, wmWindow *win, ScrArea *sa, const s
 		sc = ED_screen_add(win, oldscreen->scene, newname);
 		sc->state = state;
 		sc->redraws_flag = oldscreen->redraws_flag;
+		sc->temp = oldscreen->temp;
 
 		/* timer */
 		sc->animtimer = oldscreen->animtimer;
diff --git a/source/blender/editors/space_view3d/drawmesh.c b/source/blender/editors/space_view3d/drawmesh.c
index f5289a0d245..74a50497164 100644
--- a/source/blender/editors/space_view3d/drawmesh.c
+++ b/source/blender/editors/space_view3d/drawmesh.c
@@ -435,12 +435,16 @@ static void draw_textured_begin(Scene *scene, View3D *v3d, RegionView3D *rv3d, O
 	else {
 		/* draw with lights in the scene otherwise */
 		solidtex = false;
-		if (v3d->flag2 & V3D_SHADELESS_TEX)
+		if (v3d->flag2 & V3D_SHADELESS_TEX) {
 			Gtexdraw.is_lit = 0;
-		else
-			Gtexdraw.is_lit = GPU_scene_object_lights(scene, ob, v3d->lay, rv3d->viewmat, !rv3d->is_persp);
+		}
+		else {
+			Gtexdraw.is_lit = GPU_scene_object_lights(
+			                      scene, ob, v3d->localvd ? v3d->localvd->lay : v3d->lay,
+			                      rv3d->viewmat, !rv3d->is_persp);
+		}
 	}
-	
+
 	rgba_float_to_uchar(obcol, ob->col);
 
 	if (solidtex || v3d->drawtype == OB_TEXTURE) is_tex = true;
diff --git a/source/blender/editors/transform/transform.c b/source/blender/editors/transform/transform.c
index 8c906160a00..ef6cff19181 100644
--- a/source/blender/editors/transform/transform.c
+++ b/source/blender/editors/transform/transform.c
@@ -3997,10 +3997,8 @@ static void applyRotation(TransInfo *t, const int UNUSED(mval[2]))
 
 	applySnapping(t, &final);
 
-	if (applyNumInput(&t->num, &final)) {
-		/* Clamp between -PI and PI */
-		final = angle_wrap_rad(final);
-	}
+	/* Used to clamp final result in [-PI, PI[ range, no idea why, inheritance from 2.4x area, see T48998. */
+	applyNumInput(&t->num, &final);
 
 	t->values[0] = final;
 
diff --git a/source/blender/makesrna/intern/rna_smoke.c b/source/blender/makesrna/intern/rna_smoke.c
index 9a31952b84b..dad5577dc12 100644
--- a/source/blender/makesrna/intern/rna_smoke.c
+++ b/source/blender/makesrna/intern/rna_smoke.c
@@ -76,6 +76,21 @@ static void rna_Smoke_resetCache(Main *UNUSED(bmain), Scene *UNUSED(scene), Poin
 	DAG_id_tag_update(ptr->id.data, OB_RECALC_DATA);
 }
 
+static void rna_Smoke_cachetype_set(struct PointerRNA *ptr, int value)
+{
+	SmokeDomainSettings *settings = (SmokeDomainSettings *)ptr->data;
+	Object *ob = (Object *)ptr->id.data;
+
+	if (value != settings->cache_file_format) {
+		/* Clear old caches. */
+		PTCacheID id;
+		BKE_ptcache_id_from_smoke(&id, ob, settings->smd);
+		BKE_ptcache_id_clear(&id, PTCACHE_CLEAR_ALL, 0);
+
+		settings->cache_file_format = value;
+	}
+}
+
 static void rna_Smoke_reset(Main *bmain, Scene *scene, PointerRNA *ptr)
 {
 	SmokeDomainSettings *settings = (SmokeDomainSettings *)ptr->data;
@@ -701,6 +716,7 @@ static void rna_def_smoke_domain_settings(BlenderRNA *brna)
 	prop = RNA_def_property(srna, "cache_file_format", PROP_ENUM, PROP_NONE);
 	RNA_def_property_enum_sdna(prop, NULL, "cache_file_format");
 	RNA_def_property_enum_items(prop, cache_file_type_items);
+	RNA_def_property_enum_funcs(prop, NULL, "rna_Smoke_cachetype_set", NULL);
 	RNA_def_property_ui_text(prop, "File Format", "Select the file format to be used for caching");
 	RNA_def_property_update(prop, NC_OBJECT | ND_MODIFIER, "rna_Smoke_resetCache");
 }