Merge branch 'master' into blender2.8

Conflicts: source/blender/blenkernel/intern/depsgraph.c source/blender/blenloader/intern/versioning_270.c source/blender/depsgraph/intern/builder/deg_builder_relations.cc source/blender/makesrna/intern/rna_main_api.c source/blender/makesrna/intern/rna_particle.c
author: Bastien Montagne <montagne29@wanadoo.fr> 2016-12-12 18:17:57 +0300
committer: Bastien Montagne <montagne29@wanadoo.fr> 2016-12-12 18:17:57 +0300
commit: 9be6d5ff18fd593e853647d9eec8b3fb074acd7c (patch)
tree: 485fb9203c310b5119a549e698db241012ed39ff /intern
parent: 54528079e3cfaf74eaa119615386564820b45276 (diff)
parent: 5f852a4324212221500d11b2c7594f5e0ca894c6 (diff)
38 files changed, 674 insertions, 297 deletions
diff --git a/intern/cycles/app/cycles_standalone.cpp b/intern/cycles/app/cycles_standalone.cpp
index b21e8630cdb..9816d614a7c 100644
--- a/intern/cycles/app/cycles_standalone.cpp
+++ b/intern/cycles/app/cycles_standalone.cpp
@@ -72,20 +72,17 @@ static void session_print(const string& str)
 
 static void session_print_status()
 {
-	int sample, tile;
-	double total_time, sample_time, render_time;
 	string status, substatus;
 
 	/* get status */
-	sample = options.session->progress.get_sample();
-	options.session->progress.get_tile(tile, total_time, sample_time, render_time);
+	float progress = options.session->progress.get_progress();
 	options.session->progress.get_status(status, substatus);
 
 	if(substatus != "")
 		status += ": " + substatus;
 
 	/* print status */
-	status = string_printf("Sample %d   %s", sample, status.c_str());
+	status = string_printf("Progress %05.2f   %s", (double) progress*100, status.c_str());
 	session_print(status);
 }
 
@@ -167,13 +164,12 @@ static void display_info(Progress& progress)
 	latency = (elapsed - last);
 	last = elapsed;
 
-	int sample, tile;
-	double total_time, sample_time, render_time;
+	double total_time, sample_time;
 	string status, substatus;
 
-	sample = progress.get_sample();
-	progress.get_tile(tile, total_time, sample_time, render_time);
+	progress.get_time(total_time, sample_time);
 	progress.get_status(status, substatus);
+	float progress_val = progress.get_progress();
 
 	if(substatus != "")
 		status += ": " + substatus;
@@ -184,10 +180,10 @@ static void display_info(Progress& progress)
 	        "%s"
 	        "        Time: %.2f"
 	        "        Latency: %.4f"
-	        "        Sample: %d"
+	        "        Progress: %05.2f"
 	        "        Average: %.4f"
 	        "        Interactive: %s",
-	        status.c_str(), total_time, latency, sample, sample_time, interactive.c_str());
+	        status.c_str(), total_time, latency, (double) progress_val*100, sample_time, interactive.c_str());
 
 	view_display_info(str.c_str());
 
diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py
index 3346beea3b2..cbff5a537dc 100644
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -288,7 +288,7 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
                 description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). "
                             "Zero disables the test and never ignores lights",
                 min=0.0, max=1.0,
-                default=0.05,
+                default=0.01,
                 )
 
         cls.caustics_reflective = BoolProperty(
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index e16cea0ebaf..71c1eefe65f 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -930,38 +930,13 @@ void BlenderSession::get_status(string& status, string& substatus)
 
 void BlenderSession::get_progress(float& progress, double& total_time, double& render_time)
 {
-	double tile_time;
-	int tile, sample, samples_per_tile;
-	int tile_total = session->tile_manager.state.num_tiles;
-	int samples = session->tile_manager.state.sample + 1;
-	int total_samples = session->tile_manager.get_num_effective_samples();
-
-	session->progress.get_tile(tile, total_time, render_time, tile_time);
-
-	sample = session->progress.get_sample();
-	samples_per_tile = session->tile_manager.get_num_effective_samples();
-
-	if(background && samples_per_tile && tile_total)
-		progress = ((float)sample / (float)(tile_total * samples_per_tile));
-	else if(!background && samples > 0 && total_samples != INT_MAX)
-		progress = ((float)samples) / total_samples;
-	else
-		progress = 0.0;
+	session->progress.get_time(total_time, render_time);
+	progress = session->progress.get_progress();
 }
 
 void BlenderSession::update_bake_progress()
 {
-	float progress;
-	int sample, samples_per_task, parts_total;
-
-	sample = session->progress.get_sample();
-	samples_per_task = scene->bake_manager->num_samples;
-	parts_total = scene->bake_manager->num_parts;
-
-	if(samples_per_task)
-		progress = ((float)sample / (float)(parts_total * samples_per_task));
-	else
-		progress = 0.0;
+	float progress = session->progress.get_progress();
 
 	if(progress != last_progress) {
 		b_engine.update_progress(progress);
diff --git a/intern/cycles/device/device.cpp b/intern/cycles/device/device.cpp
index ff9387b0a8a..31c99f49d6d 100644
--- a/intern/cycles/device/device.cpp
+++ b/intern/cycles/device/device.cpp
@@ -64,6 +64,8 @@ std::ostream& operator <<(std::ostream &os,
 	   << string_from_bool(requested_features.use_integrator_branched) << std::endl;
 	os << "Use Patch Evaluation: "
 	   << string_from_bool(requested_features.use_patch_evaluation) << std::endl;
+	os << "Use Transparent Shadows: "
+	   << string_from_bool(requested_features.use_transparent) << std::endl;
 	return os;
 }
 
diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h
index b9bdffa2618..ccee25ae34e 100644
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@@ -117,6 +117,9 @@ public:
 
 	/* Use OpenSubdiv patch evaluation */
 	bool use_patch_evaluation;
+	
+	/* Use Transparent shadows */
+	bool use_transparent;
 
 	DeviceRequestedFeatures()
 	{
@@ -133,6 +136,7 @@ public:
 		use_volume = false;
 		use_integrator_branched = false;
 		use_patch_evaluation = false;
+		use_transparent = false;
 	}
 
 	bool modified(const DeviceRequestedFeatures& requested_features)
@@ -148,7 +152,8 @@ public:
 		         use_subsurface == requested_features.use_subsurface &&
 		         use_volume == requested_features.use_volume &&
 		         use_integrator_branched == requested_features.use_integrator_branched &&
-		         use_patch_evaluation == requested_features.use_patch_evaluation);
+		         use_patch_evaluation == requested_features.use_patch_evaluation &&
+		         use_transparent == requested_features.use_transparent);
 	}
 
 	/* Convert the requested features structure to a build options,
@@ -189,6 +194,9 @@ public:
 		if(!use_patch_evaluation) {
 			build_options += " -D__NO_PATCH_EVAL__";
 		}
+		if(!use_transparent) {
+			build_options += " -D__NO_TRANSPARENT__";
+		}
 		return build_options;
 	}
 };
@@ -220,6 +228,7 @@ public:
 	DeviceInfo info;
 	virtual const string& error_message() { return error_msg; }
 	bool have_error() { return !error_message().empty(); }
+	virtual bool show_samples() const { return false; }
 
 	/* statistics */
 	Stats &stats;
diff --git a/intern/cycles/device/device_cpu.cpp b/intern/cycles/device/device_cpu.cpp
index aed86d8d853..c8e001ec2fd 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -112,6 +112,11 @@ public:
 		task_pool.stop();
 	}
 
+	virtual bool show_samples() const
+	{
+		return (TaskScheduler::num_threads() == 1);
+	}
+
 	void mem_alloc(device_memory& mem, MemoryType /*type*/)
 	{
 		mem.device_pointer = mem.data_pointer;
@@ -275,7 +280,7 @@ public:
 
 				tile.sample = sample + 1;
 
-				task.update_progress(&tile);
+				task.update_progress(&tile, tile.w*tile.h);
 			}
 
 			task.release_tile(tile);
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index fbb97f78e70..233f94be1bf 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -115,6 +115,12 @@ public:
 		return path_exists(cubins_path);
 	}
 
+	virtual bool show_samples() const
+	{
+		/* The CUDADevice only processes one tile at a time, so showing samples is fine. */
+		return true;
+	}
+
 /*#ifdef NDEBUG
 #define cuda_abort()
 #else
@@ -1267,7 +1273,7 @@ public:
 
 					tile.sample = sample + 1;
 
-					task->update_progress(&tile);
+					task->update_progress(&tile, tile.w*tile.h);
 				}
 
 				task->release_tile(tile);
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 48fd159d508..31b800640d3 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -89,6 +89,14 @@ public:
 		return error_msg;
 	}
 
+	virtual bool show_samples() const
+	{
+		if(devices.size() > 1) {
+			return false;
+		}
+		return devices.front().device->show_samples();
+	}
+
 	bool load_kernels(const DeviceRequestedFeatures& requested_features)
 	{
 		foreach(SubDevice& sub, devices)
diff --git a/intern/cycles/device/device_network.cpp b/intern/cycles/device/device_network.cpp
index 3eb5ad2d2db..53eef6cf199 100644
--- a/intern/cycles/device/device_network.cpp
+++ b/intern/cycles/device/device_network.cpp
@@ -51,6 +51,11 @@ public:
 
 	thread_mutex rpc_lock;
 
+	virtual bool show_samples() const
+	{
+		return false;
+	}
+
 	NetworkDevice(DeviceInfo& info, Stats &stats, const char *address)
 	: Device(info, stats, true), socket(io_service)
 	{
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index 1f1128a28f8..48d18035c13 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -19,6 +19,8 @@
 
 #include "device_task.h"
 
+#include "buffers.h"
+
 #include "util_algorithm.h"
 #include "util_time.h"
 
@@ -99,14 +101,18 @@ void DeviceTask::split(list<DeviceTask>& tasks, int num, int max_size)
 	}
 }
 
-void DeviceTask::update_progress(RenderTile *rtile)
+void DeviceTask::update_progress(RenderTile *rtile, int pixel_samples)
 {
 	if((type != PATH_TRACE) &&
 	   (type != SHADER))
 		return;
 
-	if(update_progress_sample)
-		update_progress_sample();
+	if(update_progress_sample) {
+		if(pixel_samples == -1) {
+			pixel_samples = shader_w;
+		}
+		update_progress_sample(pixel_samples, rtile? rtile->sample : 0);
+	}
 
 	if(update_tile_sample) {
 		double current_time = time_dt();
diff --git a/intern/cycles/device/device_task.h b/intern/cycles/device/device_task.h
index 8423e83bdfd..8bd54c3d2b0 100644
--- a/intern/cycles/device/device_task.h
+++ b/intern/cycles/device/device_task.h
@@ -56,10 +56,10 @@ public:
 	int get_subtask_count(int num, int max_size = 0);
 	void split(list<DeviceTask>& tasks, int num, int max_size = 0);
 
-	void update_progress(RenderTile *rtile);
+	void update_progress(RenderTile *rtile, int pixel_samples = -1);
 
 	function<bool(Device *device, RenderTile&)> acquire_tile;
-	function<void(void)> update_progress_sample;
+	function<void(long, int)> update_progress_sample;
 	function<void(RenderTile&)> update_tile_sample;
 	function<void(RenderTile&)> release_tile;
 	function<bool(void)> get_cancel;
diff --git a/intern/cycles/device/opencl/opencl_mega.cpp b/intern/cycles/device/opencl/opencl_mega.cpp
index 369c086df57..6ea7619e022 100644
--- a/intern/cycles/device/opencl/opencl_mega.cpp
+++ b/intern/cycles/device/opencl/opencl_mega.cpp
@@ -39,6 +39,10 @@ public:
 	{
 	}
 
+	virtual bool show_samples() const {
+		return true;
+	}
+
 	virtual void load_kernels(const DeviceRequestedFeatures& /*requested_features*/,
 	                          vector<OpenCLProgram*> &programs)
 	{
@@ -120,7 +124,7 @@ public:
 
 					tile.sample = sample + 1;
 
-					task->update_progress(&tile);
+					task->update_progress(&tile, tile.w*tile.h);
 				}
 
 				/* Complete kernel execution before release tile */
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 239e73a40fd..3c3c2150128 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -247,6 +247,10 @@ public:
 		}
 	}
 
+	virtual bool show_samples() const {
+		return false;
+	}
+
 	/* Split kernel utility functions. */
 	size_t get_tex_size(const char *tex_name)
 	{
diff --git a/intern/cycles/kernel/closure/bsdf_hair.h b/intern/cycles/kernel/closure/bsdf_hair.h
index bede5f45e7e..daaa26dc6ad 100644
--- a/intern/cycles/kernel/closure/bsdf_hair.h
+++ b/intern/cycles/kernel/closure/bsdf_hair.h
@@ -267,7 +267,10 @@ ccl_device int bsdf_hair_transmission_sample(const ShaderClosure *sc, float3 Ng,
 
 	*eval = make_float3(*pdf, *pdf, *pdf);
 
-	kernel_assert(dot(locy, *omega_in) < 0.0f);
+	/* TODO(sergey): Should always be negative, but seems some precision issue
+	 * is involved here.
+	 */
+	kernel_assert(dot(locy, *omega_in) < 1e-4f);
 
 	return LABEL_TRANSMIT|LABEL_GLOSSY;
 }
diff --git a/intern/cycles/kernel/geom/geom_curve.h b/intern/cycles/kernel/geom/geom_curve.h
index 84aaaab7453..636dbcc71e0 100644
--- a/intern/cycles/kernel/geom/geom_curve.h
+++ b/intern/cycles/kernel/geom/geom_curve.h
@@ -255,6 +255,17 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 		int ka = max(k0 - 1, v00.x);
 		int kb = min(k1 + 1, v00.x + v00.y - 1);
 
+#ifdef __KERNEL_AVX2__
+		avxf P_curve_0_1, P_curve_2_3;
+		if(type & PRIMITIVE_CURVE) {
+			P_curve_0_1 = _mm256_loadu2_m128(&kg->__curve_keys.data[k0].x, &kg->__curve_keys.data[ka].x);
+			P_curve_2_3 = _mm256_loadu2_m128(&kg->__curve_keys.data[kb].x, &kg->__curve_keys.data[k1].x);
+		}
+		else {
+			int fobject = (object == OBJECT_NONE) ? kernel_tex_fetch(__prim_object, curveAddr) : object;
+			motion_cardinal_curve_keys_avx(kg, fobject, prim, time, ka, k0, k1, kb, &P_curve_0_1,&P_curve_2_3);
+		}
+#else  /* __KERNEL_AVX2__ */
 		ssef P_curve[4];
 
 		if(type & PRIMITIVE_CURVE) {
@@ -267,6 +278,7 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 			int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, curveAddr): object;
 			motion_cardinal_curve_keys(kg, fobject, prim, time, ka, k0, k1, kb, (float4*)&P_curve);
 		}
+#endif  /* __KERNEL_AVX2__ */
 
 		ssef rd_sgn = set_sign_bit<0, 1, 1, 1>(shuffle<0>(rd_ss));
 		ssef mul_zxxy = shuffle<2, 0, 0, 1>(vdir) * rd_sgn;
@@ -278,6 +290,33 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 		ssef htfm1 = shuffle<1, 0, 1, 3>(load1f_first(extract<0>(d_ss)), vdir0);
 		ssef htfm2 = shuffle<1, 3, 2, 3>(mul_shuf, vdir0);
 
+#ifdef __KERNEL_AVX2__
+		const avxf vPP = _mm256_broadcast_ps(&P.m128);
+		const avxf htfm00 = avxf(htfm0.m128, htfm0.m128);
+		const avxf htfm11 = avxf(htfm1.m128, htfm1.m128);
+		const avxf htfm22 = avxf(htfm2.m128, htfm2.m128);
+
+		const avxf p01 = madd(shuffle<0>(P_curve_0_1 - vPP),
+		                      htfm00,
+		                      madd(shuffle<1>(P_curve_0_1 - vPP),
+		                           htfm11,
+		                           shuffle<2>(P_curve_0_1 - vPP) * htfm22));
+		const avxf p23 = madd(shuffle<0>(P_curve_2_3 - vPP),
+		                      htfm00,
+		                      madd(shuffle<1>(P_curve_2_3 - vPP),
+		                           htfm11,
+		                           shuffle<2>(P_curve_2_3 - vPP)*htfm22));
+
+		const ssef p0 = _mm256_castps256_ps128(p01);
+		const ssef p1 = _mm256_extractf128_ps(p01, 1);
+		const ssef p2 = _mm256_castps256_ps128(p23);
+		const ssef p3 = _mm256_extractf128_ps(p23, 1);
+
+		const ssef P_curve_1 = _mm256_extractf128_ps(P_curve_0_1, 1);
+		r_st = ((float4 &)P_curve_1).w;
+		const ssef P_curve_2 = _mm256_castps256_ps128(P_curve_2_3);
+		r_en = ((float4 &)P_curve_2).w;
+#else  /* __KERNEL_AVX2__ */
 		ssef htfm[] = { htfm0, htfm1, htfm2 };
 		ssef vP = load4f(P);
 		ssef p0 = transform_point_T3(htfm, P_curve[0] - vP);
@@ -285,6 +324,10 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 		ssef p2 = transform_point_T3(htfm, P_curve[2] - vP);
 		ssef p3 = transform_point_T3(htfm, P_curve[3] - vP);
 
+		r_st = ((float4 &)P_curve[1]).w;
+		r_en = ((float4 &)P_curve[2]).w;
+#endif  /* __KERNEL_AVX2__ */
+
 		float fc = 0.71f;
 		ssef vfc = ssef(fc);
 		ssef vfcxp3 = vfc * p3;
@@ -294,8 +337,6 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 		vcurve_coef[2] = madd(ssef(fc * 2.0f), p0, madd(ssef(fc - 3.0f), p1, msub(ssef(3.0f - 2.0f * fc), p2, vfcxp3)));
 		vcurve_coef[3] = msub(ssef(fc - 2.0f), p2 - p1, msub(vfc, p0, vfcxp3));
 
-		r_st = ((float4 &)P_curve[1]).w;
-		r_en = ((float4 &)P_curve[2]).w;
 	}
 #else
 	float3 curve_coef[4];
@@ -383,8 +424,9 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 
 	/* begin loop */
 	while(!(tree >> (depth))) {
-		float i_st = tree * resol;
-		float i_en = i_st + (level * resol);
+		const float i_st = tree * resol;
+		const float i_en = i_st + (level * resol);
+
 #ifdef __KERNEL_SSE2__
 		ssef vi_st = ssef(i_st), vi_en = ssef(i_en);
 		ssef vp_st = madd(madd(madd(vcurve_coef[3], vi_st, vcurve_coef[2]), vi_st, vcurve_coef[1]), vi_st, vcurve_coef[0]);
@@ -458,13 +500,23 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 
 			if(flags & CURVE_KN_RIBBONS) {
 				float3 tg = (p_en - p_st);
+#ifdef __KERNEL_SSE__
+				const float3 tg_sq = tg * tg;
+				float w = tg_sq.x + tg_sq.y;
+#else
 				float w = tg.x * tg.x + tg.y * tg.y;
+#endif
 				if(w == 0) {
 					tree++;
 					level = tree & -tree;
 					continue;
 				}
+#ifdef __KERNEL_SSE__
+				const float3 p_sttg = p_st * tg;
+				w = -(p_sttg.x + p_sttg.y) / w;
+#else
 				w = -(p_st.x * tg.x + p_st.y * tg.y) / w;
+#endif
 				w = saturate(w);
 
 				/* compute u on the curve segment */
@@ -496,7 +548,13 @@ ccl_device_forceinline bool bvh_cardinal_curve_intersect(KernelGlobals *kg, Inte
 				if(difl != 0.0f) {
 					mw_extension = min(difl * fabsf(bmaxz), extmax);
 					r_ext = mw_extension + r_curr;
+#ifdef __KERNEL_SSE__
+					const float3 p_curr_sq = p_curr * p_curr;
+					const float3 dxxx = _mm_sqrt_ss(_mm_hadd_ps(p_curr_sq.m128, p_curr_sq.m128));
+					float d = dxxx.x;
+#else
 					float d = sqrtf(p_curr.x * p_curr.x + p_curr.y * p_curr.y);
+#endif
 					float d0 = d - r_curr;
 					float d1 = d + r_curr;
 					float inv_mw_extension = 1.0f/mw_extension;
diff --git a/intern/cycles/kernel/geom/geom_motion_curve.h b/intern/cycles/kernel/geom/geom_motion_curve.h
index 6de5aa7ea99..80b33fad68b 100644
--- a/intern/cycles/kernel/geom/geom_motion_curve.h
+++ b/intern/cycles/kernel/geom/geom_motion_curve.h
@@ -118,7 +118,12 @@ ccl_device_inline void motion_cardinal_curve_keys_for_step(KernelGlobals *kg, in
 }
 
 /* return 2 curve key locations */
-ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object, int prim, float time, int k0, int k1, int k2, int k3, float4 keys[4])
+ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg,
+                                                  int object,
+                                                  int prim,
+                                                  float time,
+                                                  int k0, int k1, int k2, int k3,
+                                                  float4 keys[4])
 {
 	/* get motion info */
 	int numsteps, numkeys;
@@ -147,6 +152,65 @@ ccl_device_inline void motion_cardinal_curve_keys(KernelGlobals *kg, int object,
 	keys[3] = (1.0f - t)*keys[3] + t*next_keys[3];
 }
 
+#ifdef __KERNEL_AVX2__
+/* Similar to above, but returns keys as pair of two AVX registers with each
+ * holding two float4.
+ */
+ccl_device_inline void motion_cardinal_curve_keys_avx(KernelGlobals *kg,
+                                                      int object,
+                                                      int prim,
+                                                      float time,
+                                                      int k0, int k1,
+                                                      int k2, int k3,
+                                                      avxf *out_keys_0_1,
+                                                      avxf *out_keys_2_3)
+{
+	/* Get motion info. */
+	int numsteps, numkeys;
+	object_motion_info(kg, object, &numsteps, NULL, &numkeys);
+
+	/* Figure out which steps we need to fetch and their interpolation factor. */
+	int maxstep = numsteps * 2;
+	int step = min((int)(time*maxstep), maxstep - 1);
+	float t = time*maxstep - step;
+
+	/* Find attribute. */
+	AttributeElement elem;
+	int offset = find_attribute_curve_motion(kg,
+	                                         object,
+	                                         ATTR_STD_MOTION_VERTEX_POSITION,
+	                                         &elem);
+	kernel_assert(offset != ATTR_STD_NOT_FOUND);
+
+	/* Fetch key coordinates. */
+	float4 next_keys[4];
+	float4 keys[4];
+	motion_cardinal_curve_keys_for_step(kg,
+	                                    offset,
+	                                    numkeys,
+	                                    numsteps,
+	                                    step,
+	                                    k0, k1, k2, k3,
+	                                    keys);
+	motion_cardinal_curve_keys_for_step(kg,
+	                                    offset,
+	                                    numkeys,
+	                                    numsteps,
+	                                    step + 1,
+	                                    k0, k1, k2, k3,
+	                                    next_keys);
+
+	const avxf keys_0_1 = avxf(keys[0].m128, keys[1].m128);
+	const avxf keys_2_3 = avxf(keys[2].m128, keys[3].m128);
+	const avxf next_keys_0_1 = avxf(next_keys[0].m128, next_keys[1].m128);
+	const avxf next_keys_2_3 = avxf(next_keys[2].m128, next_keys[3].m128);
+
+	/* Interpolate between steps. */
+	*out_keys_0_1 = (1.0f - t) * keys_0_1 + t*next_keys_0_1;
+	*out_keys_2_3 = (1.0f - t) * keys_2_3 + t*next_keys_2_3;
+}
+#endif
+
 #endif
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/geom/geom_motion_triangle.h b/intern/cycles/kernel/geom/geom_motion_triangle.h
index 3cbe59aaece..538c332c63a 100644
--- a/intern/cycles/kernel/geom/geom_motion_triangle.h
+++ b/intern/cycles/kernel/geom/geom_motion_triangle.h
@@ -323,11 +323,11 @@ ccl_device_noinline void motion_triangle_shader_setup(KernelGlobals *kg, ShaderD
  * time and do a ray intersection with the resulting triangle */
 
 ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection *isect,
-	float3 P, float3 dir, float time, uint visibility, int object, int triAddr)
+	float3 P, float3 dir, float time, uint visibility, int object, int prim_addr)
 {
 	/* primitive index for vertex location lookup */
-	int prim = kernel_tex_fetch(__prim_index, triAddr);
-	int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
+	int prim = kernel_tex_fetch(__prim_index, prim_addr);
+	int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
 
 	/* get vertex locations for intersection */
 	float3 verts[3];
@@ -340,13 +340,13 @@ ccl_device_inline bool motion_triangle_intersect(KernelGlobals *kg, Intersection
 #ifdef __VISIBILITY_FLAG__
 		/* visibility flag test. we do it here under the assumption
 		 * that most triangles are culled by node flags */
-		if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
+		if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
 #endif
 		{
 			isect->t = t;
 			isect->u = u;
 			isect->v = v;
-			isect->prim = triAddr;
+			isect->prim = prim_addr;
 			isect->object = object;
 			isect->type = PRIMITIVE_MOTION_TRIANGLE;
 		
@@ -369,14 +369,14 @@ ccl_device_inline void motion_triangle_intersect_subsurface(
         float3 dir,
         float time,
         int object,
-        int triAddr,
+        int prim_addr,
         float tmax,
         uint *lcg_state,
         int max_hits)
 {
 	/* primitive index for vertex location lookup */
-	int prim = kernel_tex_fetch(__prim_index, triAddr);
-	int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, triAddr): object;
+	int prim = kernel_tex_fetch(__prim_index, prim_addr);
+	int fobject = (object == OBJECT_NONE)? kernel_tex_fetch(__prim_object, prim_addr): object;
 
 	/* get vertex locations for intersection */
 	float3 verts[3];
@@ -413,7 +413,7 @@ ccl_device_inline void motion_triangle_intersect_subsurface(
 		isect->t = t;
 		isect->u = u;
 		isect->v = v;
-		isect->prim = triAddr;
+		isect->prim = prim_addr;
 		isect->object = object;
 		isect->type = PRIMITIVE_MOTION_TRIANGLE;
 
diff --git a/intern/cycles/kernel/geom/geom_triangle_intersect.h b/intern/cycles/kernel/geom/geom_triangle_intersect.h
index eb7340583c8..4db121d94f4 100644
--- a/intern/cycles/kernel/geom/geom_triangle_intersect.h
+++ b/intern/cycles/kernel/geom/geom_triangle_intersect.h
@@ -108,7 +108,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
                                           float3 P,
                                           uint visibility,
                                           int object,
-                                          int triAddr)
+                                          int prim_addr)
 {
 	const int kx = isect_precalc->kx;
 	const int ky = isect_precalc->ky;
@@ -118,7 +118,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 	const float Sz = isect_precalc->Sz;
 
 	/* Calculate vertices relative to ray origin. */
-	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr);
+	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
 
 #if defined(__KERNEL_AVX2__) && defined(__KERNEL_SSE__)
 	const avxf avxf_P(P.m128, P.m128);
@@ -129,10 +129,10 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 	const avxf AB = tri_ab - avxf_P;
 	const avxf BC = tri_bc - avxf_P;
 
-	const __m256i permuteMask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);
+	const __m256i permute_mask = _mm256_set_epi32(0x3, kz, ky, kx, 0x3, kz, ky, kx);
 
-	const avxf AB_k = shuffle(AB, permuteMask);
-	const avxf BC_k = shuffle(BC, permuteMask);
+	const avxf AB_k = shuffle(AB, permute_mask);
+	const avxf BC_k = shuffle(BC, permute_mask);
 
 	/* Akz, Akz, Bkz, Bkz, Bkz, Bkz, Ckz, Ckz */
 	const avxf ABBC_kz = shuffle<2>(AB_k, BC_k);
@@ -155,14 +155,14 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 	/* By, Bx, Cy, Cx, By, Bx, Ay, Ax */
 	const avxf BCBA_yx = permute<3,2,7,6,3,2,1,0>(ABBC_xy);
 
-	const avxf negMask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);
+	const avxf neg_mask(0,0,0,0,0x80000000, 0x80000000, 0x80000000, 0x80000000);
 
 	/* W           U                             V
 	 * (AxBy-AyBx) (BxCy-ByCx) XX XX (BxBy-ByBx) (CxAy-CyAx) XX XX
 	 */
-	const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, negMask /* Dont care */);
+	const avxf WUxxxxVxx_neg = _mm256_hsub_ps(ABBC_xy * BCBA_yx, neg_mask /* Dont care */);
 
-	const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ negMask;
+	const avxf WUVWnegWUVW = permute<0,1,5,0,0,1,5,0>(WUxxxxVxx_neg) ^ neg_mask;
 
 	/* Calculate scaled barycentric coordinates. */
 	float WUVW_array[4];
@@ -231,7 +231,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 #ifdef __VISIBILITY_FLAG__
 	/* visibility flag test. we do it here under the assumption
 	 * that most triangles are culled by node flags */
-	if(kernel_tex_fetch(__prim_visibility, triAddr) & visibility)
+	if(kernel_tex_fetch(__prim_visibility, prim_addr) & visibility)
 #endif
 	{
 #ifdef __KERNEL_CUDA__
@@ -241,7 +241,7 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
 #endif
 		/* Normalize U, V, W, and T. */
 		const float inv_det = 1.0f / det;
-		isect->prim = triAddr;
+		isect->prim = prim_addr;
 		isect->object = object;
 		isect->type = PRIMITIVE_TRIANGLE;
 		isect->u = U * inv_det;
@@ -264,7 +264,7 @@ ccl_device_inline void triangle_intersect_subsurface(
         SubsurfaceIntersection *ss_isect,
         float3 P,
         int object,
-        int triAddr,
+        int prim_addr,
         float tmax,
         uint *lcg_state,
         int max_hits)
@@ -277,7 +277,7 @@ ccl_device_inline void triangle_intersect_subsurface(
 	const float Sz = isect_precalc->Sz;
 
 	/* Calculate vertices relative to ray origin. */
-	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, triAddr);
+	const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, prim_addr);
 	const float4 tri_a = kernel_tex_fetch(__prim_tri_verts, tri_vindex+0),
 	             tri_b = kernel_tex_fetch(__prim_tri_verts, tri_vindex+1),
 	             tri_c = kernel_tex_fetch(__prim_tri_verts, tri_vindex+2);
@@ -415,7 +415,7 @@ ccl_device_inline void triangle_intersect_subsurface(
 
 	/* record intersection */
 	Intersection *isect = &ss_isect->hits[hit];
-	isect->prim = triAddr;
+	isect->prim = prim_addr;
 	isect->object = object;
 	isect->type = PRIMITIVE_TRIANGLE;
 	isect->u = U * inv_det;
diff --git a/intern/cycles/kernel/kernel_jitter.h b/intern/cycles/kernel/kernel_jitter.h
index aec7bc33acd..67546131746 100644
--- a/intern/cycles/kernel/kernel_jitter.h
+++ b/intern/cycles/kernel/kernel_jitter.h
@@ -149,6 +149,15 @@ ccl_device_inline uint cmj_hash(uint i, uint p)
 	return i;
 }
 
+ccl_device_inline uint cmj_hash_simple(uint i, uint p)
+{
+	i = (i ^ 61) ^ p;
+	i += i << 3;
+	i ^= i >> 4;
+	i *= 0x27d4eb2d;
+	return i;
+}
+
 ccl_device_inline float cmj_randfloat(uint i, uint p)
 {
 	return cmj_hash(i, p) * (1.0f / 4294967808.0f);
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index 6d89a89ed5b..6a36c68d69f 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -141,6 +141,10 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 #endif  /* __LAMP_MIS__ */
 
 #ifdef __VOLUME__
+		/* Sanitize volume stack. */
+		if(!hit) {
+			kernel_volume_clean_stack(kg, state->volume_stack);
+		}
 		/* volume attenuation, emission, scatter */
 		if(state->volume_stack[0].shader != SHADER_NONE) {
 			Ray volume_ray = *ray;
@@ -658,6 +662,10 @@ ccl_device_inline float4 kernel_path_integrate(KernelGlobals *kg,
 #endif  /* __LAMP_MIS__ */
 
 #ifdef __VOLUME__
+		/* Sanitize volume stack. */
+		if(!hit) {
+			kernel_volume_clean_stack(kg, state.volume_stack);
+		}
 		/* volume attenuation, emission, scatter */
 		if(state.volume_stack[0].shader != SHADER_NONE) {
 			Ray volume_ray = ray;
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index c84727ace99..10174e1c4ce 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -294,6 +294,10 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 #endif  /* __KERNEL_DEBUG__ */
 
 #ifdef __VOLUME__
+		/* Sanitize volume stack. */
+		if(!hit) {
+			kernel_volume_clean_stack(kg, state.volume_stack);
+		}
 		/* volume attenuation, emission, scatter */
 		if(state.volume_stack[0].shader != SHADER_NONE) {
 			Ray volume_ray = ray;
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 2b767da5041..e773753396f 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -120,13 +120,11 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, ccl_addr_space RNG *
 	/* Cranly-Patterson rotation using rng seed */
 	float shift;
 
-	/* using the same *rng value to offset seems to give correlation issues,
-	 * we could hash it with the dimension but this has a performance impact,
-	 * we need to find a solution for this */
-	if(dimension & 1)
-		shift = (*rng >> 16) * (1.0f/(float)0xFFFF);
-	else
-		shift = (*rng & 0xFFFF) * (1.0f/(float)0xFFFF);
+	/* Hash rng with dimension to solve correlation issues.
+	 * See T38710, T50116.
+	 */
+	RNG tmp_rng = cmj_hash_simple(dimension, *rng);
+	shift = tmp_rng * (1.0f/(float)0xFFFFFFFF);
 
 	return r + shift - floorf(r + shift);
 #endif
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index a6c31d4a518..fd961836ec9 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -192,6 +192,9 @@ CCL_NAMESPACE_BEGIN
 #ifdef __NO_PATCH_EVAL__
 #  undef __PATCH_EVAL__
 #endif
+#ifdef __NO_TRANSPARENT__
+#  undef __TRANSPARENT_SHADOWS__
+#endif
 
 /* Random Numbers */
 
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index dd7b0d9812d..c7cb29b5af2 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -582,17 +582,12 @@ ccl_device VolumeIntegrateResult kernel_volume_integrate_heterogeneous_distance(
 ccl_device_noinline VolumeIntegrateResult kernel_volume_integrate(KernelGlobals *kg,
 	PathState *state, ShaderData *sd, Ray *ray, PathRadiance *L, float3 *throughput, RNG *rng, bool heterogeneous)
 {
-	/* workaround to fix correlation bug in T38710, can find better solution
-	 * in random number generator later, for now this is done here to not impact
-	 * performance of rendering without volumes */
-	RNG tmp_rng = cmj_hash(*rng, state->rng_offset);
-
 	shader_setup_from_volume(kg, sd, ray);
 
 	if(heterogeneous)
-		return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, &tmp_rng);
+		return kernel_volume_integrate_heterogeneous_distance(kg, state, ray, sd, L, throughput, rng);
 	else
-		return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, &tmp_rng, true);
+		return kernel_volume_integrate_homogeneous(kg, state, ray, sd, L, throughput, rng, true);
 }
 
 /* Decoupled Volume Sampling
@@ -1267,4 +1262,30 @@ ccl_device void kernel_volume_stack_update_for_subsurface(KernelGlobals *kg,
 }
 #endif
 
+/* Clean stack after the last bounce.
+ *
+ * It is expected that all volumes are closed manifolds, so at the time when ray
+ * hits nothing (for example, it is a last bounce which goes to environment) the
+ * only expected volume in the stack is the world's one. All the rest volume
+ * entries should have been exited already.
+ *
+ * This isn't always true because of ray intersection precision issues, which
+ * could lead us to an infinite non-world volume in the stack, causing render
+ * artifacts.
+ *
+ * Use this function after the last bounce to get rid of all volumes apart from
+ * the world's one after the last bounce to avoid render artifacts.
+ */
+ccl_device_inline void kernel_volume_clean_stack(KernelGlobals *kg,
+                                                 VolumeStack *volume_stack)
+{
+	if(kernel_data.background.volume_shader != SHADER_NONE) {
+		/* Keep the world's volume in stack. */
+		volume_stack[1].shader = SHADER_NONE;
+	}
+	else {
+		volume_stack[0].shader = SHADER_NONE;
+	}
+}
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
index 13310a61761..d9a297002c6 100644
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -135,20 +135,16 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
 {
 	size_t num_pixels = bake_data->size();
 
-	progress.reset_sample();
-	this->num_parts = 0;
+	int num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1;
 
-	/* calculate the total parts for the progress bar */
+	/* calculate the total pixel samples for the progress bar */
+	total_pixel_samples = 0;
 	for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
 		size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
-
-		DeviceTask task(DeviceTask::SHADER);
-		task.shader_w = shader_size;
-
-		this->num_parts += device->get_split_task_count(task);
+		total_pixel_samples += shader_size * num_samples;
 	}
-
-	this->num_samples = is_aa_pass(shader_type)? scene->integrator->aa_samples : 1;
+	progress.reset_sample();
+	progress.set_total_pixel_samples(total_pixel_samples);
 
 	for(size_t shader_offset = 0; shader_offset < num_pixels; shader_offset += m_shader_limit) {
 		size_t shader_size = (size_t)fminf(num_pixels - shader_offset, m_shader_limit);
@@ -187,9 +183,9 @@ bool BakeManager::bake(Device *device, DeviceScene *dscene, Scene *scene, Progre
 		task.shader_x = 0;
 		task.offset = shader_offset;
 		task.shader_w = d_output.size();
-		task.num_samples = this->num_samples;
+		task.num_samples = num_samples;
 		task.get_cancel = function_bind(&Progress::get_cancel, &progress);
-		task.update_progress_sample = function_bind(&Progress::increment_sample_update, &progress);
+		task.update_progress_sample = function_bind(&Progress::add_samples_update, &progress, _1, _2);
 
 		device->task_add(task);
 		device->task_wait();
diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h
index 8377e387197..25f5eb3c897 100644
--- a/intern/cycles/render/bake.h
+++ b/intern/cycles/render/bake.h
@@ -73,8 +73,7 @@ public:
 
 	bool need_update;
 
-	int num_samples;
-	int num_parts;
+	int total_pixel_samples;
 
 private:
 	BakeData *m_bake_data;
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index 3fb2bb1cf92..c7f37a13fba 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -1442,14 +1442,14 @@ void PointDensityTextureNode::compile(SVMCompiler& compiler)
 		else {
 			if(use_density) {
 				compiler.add_node(NODE_VALUE_F,
-								  __float_as_int(0.0f),
-								  compiler.stack_assign(density_out));
+				                  __float_as_int(0.0f),
+				                  compiler.stack_assign(density_out));
 			}
 			if(use_color) {
 				compiler.add_node(NODE_VALUE_V, compiler.stack_assign(color_out));
 				compiler.add_node(NODE_VALUE_V, make_float3(TEX_IMAGE_MISSING_R,
-															TEX_IMAGE_MISSING_G,
-															TEX_IMAGE_MISSING_B));
+				                                            TEX_IMAGE_MISSING_G,
+				                                            TEX_IMAGE_MISSING_B));
 			}
 		}
 	}
@@ -2421,7 +2421,7 @@ void BackgroundNode::compile(SVMCompiler& compiler)
 	if(color_in->link || strength_in->link) {
 		compiler.add_node(NODE_EMISSION_WEIGHT,
 		                  compiler.stack_assign(color_in),
-						  compiler.stack_assign(strength_in));
+		                  compiler.stack_assign(strength_in));
 	}
 	else
 		compiler.add_node(NODE_CLOSURE_SET_WEIGHT, color*strength);
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 9d8c9fed7af..33721048722 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -67,10 +67,7 @@ Session::Session(const SessionParams& params_)
 	session_thread = NULL;
 	scene = NULL;
 
-	start_time = 0.0;
 	reset_time = 0.0;
-	preview_time = 0.0;
-	paused_time = 0.0;
 	last_update_time = 0.0;
 
 	delayed_reset.do_reset = false;
@@ -201,12 +198,10 @@ void Session::run_gpu()
 {
 	bool tiles_written = false;
 
-	start_time = time_dt();
 	reset_time = time_dt();
-	paused_time = 0.0;
 	last_update_time = time_dt();
 
-	progress.set_render_start_time(start_time + paused_time);
+	progress.set_render_start_time();
 
 	while(!progress.get_cancel()) {
 		/* advance to next tile */
@@ -233,13 +228,9 @@ void Session::run_gpu()
 				update_status_time(pause, no_tiles);
 
 				while(1) {
-					double pause_start = time_dt();
+					scoped_timer pause_timer;
 					pause_cond.wait(pause_lock);
-					paused_time += time_dt() - pause_start;
-
-					if(!params.background)
-						progress.set_start_time(start_time + paused_time);
-					progress.set_render_start_time(start_time + paused_time);
+					progress.add_skip_time(pause_timer, params.background);
 
 					update_status_time(pause, no_tiles);
 					progress.set_update();
@@ -255,7 +246,9 @@ void Session::run_gpu()
 
 		if(!no_tiles) {
 			/* update scene */
+			scoped_timer update_timer;
 			update_scene();
+			progress.add_skip_time(update_timer, params.background);
 
 			if(!device->error_message().empty())
 				progress.set_error(device->error_message());
@@ -523,13 +516,9 @@ void Session::run_cpu()
 				update_status_time(pause, no_tiles);
 
 				while(1) {
-					double pause_start = time_dt();
+					scoped_timer pause_timer;
 					pause_cond.wait(pause_lock);
-					paused_time += time_dt() - pause_start;
-
-					if(!params.background)
-						progress.set_start_time(start_time + paused_time);
-					progress.set_render_start_time(start_time + paused_time);
+					progress.add_skip_time(pause_timer, params.background);
 
 					update_status_time(pause, no_tiles);
 					progress.set_update();
@@ -550,7 +539,9 @@ void Session::run_cpu()
 			thread_scoped_lock buffers_lock(buffers_mutex);
 
 			/* update scene */
+			scoped_timer update_timer;
 			update_scene();
+			progress.add_skip_time(update_timer, params.background);
 
 			if(!device->error_message().empty())
 				progress.set_error(device->error_message());
@@ -645,6 +636,7 @@ DeviceRequestedFeatures Session::get_requested_device_features()
 	BakeManager *bake_manager = scene->bake_manager;
 	requested_features.use_baking = bake_manager->get_baking();
 	requested_features.use_integrator_branched = (scene->integrator->method == Integrator::BRANCHED_PATH);
+	requested_features.use_transparent &= scene->integrator->transparent_shadows;
 
 	return requested_features;
 }
@@ -718,14 +710,14 @@ void Session::reset_(BufferParams& buffer_params, int samples)
 	}
 
 	tile_manager.reset(buffer_params, samples);
+	progress.reset_sample();
 
-	start_time = time_dt();
-	preview_time = 0.0;
-	paused_time = 0.0;
+	bool show_progress = params.background || tile_manager.get_num_effective_samples() != INT_MAX;
+	progress.set_total_pixel_samples(show_progress? tile_manager.state.total_pixel_samples : 0);
 
 	if(!params.background)
-		progress.set_start_time(start_time);
-	progress.set_render_start_time(start_time);
+		progress.set_start_time();
+	progress.set_render_start_time();
 }
 
 void Session::reset(BufferParams& buffer_params, int samples)
@@ -827,61 +819,40 @@ void Session::update_scene()
 
 void Session::update_status_time(bool show_pause, bool show_done)
 {
-	int sample = tile_manager.state.sample;
-	int resolution = tile_manager.state.resolution_divider;
-	int num_tiles = tile_manager.state.num_tiles;
+	int progressive_sample = tile_manager.state.sample;
+	int num_samples = tile_manager.get_num_effective_samples();
+
 	int tile = tile_manager.state.num_rendered_tiles;
+	int num_tiles = tile_manager.state.num_tiles;
 
 	/* update status */
 	string status, substatus;
 
 	if(!params.progressive) {
-		const int progress_sample = progress.get_sample(),
-		          num_samples = tile_manager.get_num_effective_samples();
-		const bool is_gpu = params.device.type == DEVICE_CUDA || params.device.type == DEVICE_OPENCL;
-		const bool is_multidevice = params.device.multi_devices.size() > 1;
 		const bool is_cpu = params.device.type == DEVICE_CPU;
-		const bool is_last_tile = (num_samples * num_tiles - progress_sample) < num_samples;
+		const bool is_last_tile = (progress.get_finished_tiles() + 1) == num_tiles;
 
 		substatus = string_printf("Path Tracing Tile %d/%d", tile, num_tiles);
 
-		if((is_gpu && !is_multidevice && !device->info.use_split_kernel) ||
-		   (is_cpu && (num_tiles == 1 || is_last_tile)))
+		if(device->show_samples() || (is_cpu && is_last_tile))
 		{
-			/* When using split-kernel (OpenCL) each thread in a tile will be working on a different
-			 * sample. Can't display sample number when device uses split-kernel
+			/* Some devices automatically support showing the sample number:
+			 * - CUDADevice
+			 * - OpenCLDevice when using the megakernel (the split kernel renders multiple samples at the same time, so the current sample isn't really defined)
+			 * - CPUDevice when using one thread
+			 * For these devices, the current sample is always shown.
+			 *
+			 * The other option is when the last tile is currently being rendered by the CPU.
 			 */
-
-			/* when rendering on GPU multithreading happens within single tile, as in
-			 * tiles are handling sequentially and in this case we could display
-			 * currently rendering sample number
-			 * this helps a lot from feedback point of view.
-			 * also display the info on CPU, when using 1 tile only
-			 */
-
-			int status_sample = progress_sample;
-			if(tile > 1) {
-				/* sample counter is global for all tiles, subtract samples
-				 * from already finished tiles to get sample counter for
-				 * current tile only
-				 */
-				if(is_cpu && is_last_tile && num_tiles > 1) {
-					status_sample = num_samples - (num_samples * num_tiles - progress_sample);
-				}
-				else {
-					status_sample -= (tile - 1) * num_samples;
-				}
-			}
-
-			substatus += string_printf(", Sample %d/%d", status_sample, num_samples);
+			substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples);
 		}
 	}
 	else if(tile_manager.num_samples == INT_MAX)
-		substatus = string_printf("Path Tracing Sample %d", sample+1);
+		substatus = string_printf("Path Tracing Sample %d", progressive_sample+1);
 	else
 		substatus = string_printf("Path Tracing Sample %d/%d",
-		                          sample+1,
-		                          tile_manager.get_num_effective_samples());
+		                          progressive_sample+1,
+		                          num_samples);
 	
 	if(show_pause) {
 		status = "Paused";
@@ -895,22 +866,6 @@ void Session::update_status_time(bool show_pause, bool show_done)
 	}
 
 	progress.set_status(status, substatus);
-
-	/* update timing */
-	if(preview_time == 0.0 && resolution == 1)
-		preview_time = time_dt();
-	
-	double tile_time = (tile == 0 || sample == 0)? 0.0: (time_dt() - preview_time - paused_time) / sample;
-
-	/* negative can happen when we pause a bit before rendering, can discard that */
-	if(preview_time < 0.0) preview_time = 0.0;
-
-	progress.set_tile(tile, tile_time);
-}
-
-void Session::update_progress_sample()
-{
-	progress.increment_sample();
 }
 
 void Session::path_trace()
@@ -922,7 +877,7 @@ void Session::path_trace()
 	task.release_tile = function_bind(&Session::release_tile, this, _1);
 	task.get_cancel = function_bind(&Progress::get_cancel, &this->progress);
 	task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1);
-	task.update_progress_sample = function_bind(&Session::update_progress_sample, this);
+	task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2);
 	task.need_finish_queue = params.progressive_refine;
 	task.integrator_branched = scene->integrator->method == Integrator::BRANCHED_PATH;
 	task.requested_tile_size = params.tile_size;
diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h
index 1db4692e171..c7ff1446171 100644
--- a/intern/cycles/render/session.h
+++ b/intern/cycles/render/session.h
@@ -145,6 +145,10 @@ public:
 
 	void device_free();
 
+	/* Returns the rendering progress or 0 if no progress can be determined
+	 * (for example, when rendering with unlimited samples). */
+	float get_progress();
+
 protected:
 	struct DelayedReset {
 		thread_mutex mutex;
@@ -173,8 +177,6 @@ protected:
 	void update_tile_sample(RenderTile& tile);
 	void release_tile(RenderTile& tile);
 
-	void update_progress_sample();
-
 	bool device_use_gl;
 
 	thread *session_thread;
@@ -194,10 +196,7 @@ protected:
 
 	bool kernels_loaded;
 
-	double start_time;
 	double reset_time;
-	double preview_time;
-	double paused_time;
 
 	/* progressive refine */
 	double last_update_time;
diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp
index 06b6dd969d8..335edcbe609 100644
--- a/intern/cycles/render/shader.cpp
+++ b/intern/cycles/render/shader.cpp
@@ -571,6 +571,9 @@ void ShaderManager::get_requested_graph_features(ShaderGraph *graph,
 		if(node->has_surface_bssrdf()) {
 			requested_features->use_subsurface = true;
 		}
+		if(node->has_surface_transparent()) {
+			requested_features->use_transparent = true;
+		}
 	}
 }
 
diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp
index 3a6dfea11a7..e59d0c843a3 100644
--- a/intern/cycles/render/tile.cpp
+++ b/intern/cycles/render/tile.cpp
@@ -108,36 +108,57 @@ TileManager::~TileManager()
 {
 }
 
-void TileManager::reset(BufferParams& params_, int num_samples_)
+static int get_divider(int w, int h, int start_resolution)
 {
-	params = params_;
-
 	int divider = 1;
-	int w = params.width, h = params.height;
-
 	if(start_resolution != INT_MAX) {
 		while(w*h > start_resolution*start_resolution) {
 			w = max(1, w/2);
 			h = max(1, h/2);
 
-			divider *= 2;
+			divider <<= 1;
 		}
 	}
+	return divider;
+}
 
-	num_samples = num_samples_;
+void TileManager::reset(BufferParams& params_, int num_samples_)
+{
+	params = params_;
+
+	set_samples(num_samples_);
 
 	state.buffer = BufferParams();
 	state.sample = range_start_sample - 1;
 	state.num_tiles = 0;
 	state.num_rendered_tiles = 0;
 	state.num_samples = 0;
-	state.resolution_divider = divider;
+	state.resolution_divider = get_divider(params.width, params.height, start_resolution);
 	state.tiles.clear();
 }
 
 void TileManager::set_samples(int num_samples_)
 {
 	num_samples = num_samples_;
+
+	/* No real progress indication is possible when using unlimited samples. */
+	if(num_samples == INT_MAX) {
+		state.total_pixel_samples = 0;
+	}
+	else {
+		uint64_t pixel_samples = 0;
+		/* While rendering in the viewport, the initial preview resolution is increased to the native resolution
+		 * before the actual rendering begins. Therefore, additional pixel samples will be rendered. */
+		int divider = get_divider(params.width, params.height, start_resolution) / 2;
+		while(divider > 1) {
+			int image_w = max(1, params.width/divider);
+			int image_h = max(1, params.height/divider);
+			pixel_samples += image_w * image_h;
+			divider >>= 1;
+		}
+
+		state.total_pixel_samples = pixel_samples + get_num_effective_samples() * params.width*params.height;
+	}
 }
 
 /* If sliced is false, splits image into tiles and assigns equal amount of tiles to every render device.
diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h
index af1b1ed8b0f..5d92ebac355 100644
--- a/intern/cycles/render/tile.h
+++ b/intern/cycles/render/tile.h
@@ -64,6 +64,10 @@ public:
 		int resolution_divider;
 		int num_tiles;
 		int num_rendered_tiles;
+
+		/* Total samples over all pixels: Generally num_samples*num_pixels,
+		 * but can be higher due to the initial resolution division for previews. */
+		uint64_t total_pixel_samples;
 		/* This vector contains a list of tiles for every logical device in the session.
 		 * In each list, the tiles are sorted according to the tile order setting. */
 		vector<list<Tile> > tiles;
@@ -91,7 +95,7 @@ public:
 	/* Number to samples in the rendering range. */
 	int range_num_samples;
 
-	/* get number of actual samples to render. */
+	/* Get number of actual samples to render. */
 	int get_num_effective_samples();
 protected:
 
diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h
index 2db2c4dad1a..2451213963a 100644
--- a/intern/cycles/util/util_avxf.h
+++ b/intern/cycles/util/util_avxf.h
@@ -180,6 +180,14 @@ __forceinline const avxf nmadd(const avxf& a, const avxf& b, const avxf& c) {
 }
 #endif
 
+#ifndef _mm256_set_m128
+#  define _mm256_set_m128(/* __m128 */ hi, /* __m128 */ lo) \
+    _mm256_insertf128_ps(_mm256_castps128_ps256(lo), (hi), 0x1)
+#endif
+
+#define _mm256_loadu2_m128(/* float const* */ hiaddr, /* float const* */ loaddr) \
+    _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr))
+
 CCL_NAMESPACE_END
 
 #endif
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 3f4d3e06c0b..6cb68b53d16 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -596,8 +596,7 @@ ccl_device_inline float len_squared(const float4& a)
 
 ccl_device_inline float3 normalize(const float3& a)
 {
-	/* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) && 0
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
 	__m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
 	return _mm_div_ps(a.m128, norm);
 #else
@@ -798,8 +797,7 @@ ccl_device_inline float4 operator-(const float4& a)
 
 ccl_device_inline float4 operator*(const float4& a, const float4& b)
 {
-	/* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE__) && 0
+#ifdef __KERNEL_SSE__
 	return _mm_mul_ps(a.m128, b.m128);
 #else
 	return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
@@ -847,8 +845,7 @@ ccl_device_inline float4 operator/(const float4& a, const float4& b)
 
 ccl_device_inline float4 operator+(const float4& a, const float4& b)
 {
-	/* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE__) && 0
+#ifdef __KERNEL_SSE__
 	return _mm_add_ps(a.m128, b.m128);
 #else
 	return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index 4ae1d61dd17..14215056840 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -34,12 +34,12 @@ class Progress {
 public:
 	Progress()
 	{
-		tile = 0;
-		sample = 0;
+		pixel_samples = 0;
+		total_pixel_samples = 0;
+		current_tile_sample = 0;
+		finished_tiles = 0;
 		start_time = time_dt();
-		total_time = 0.0;
-		render_time = 0.0;
-		tile_time = 0.0;
+		render_start_time = time_dt();
 		status = "Initializing";
 		substatus = "";
 		sync_status = "";
@@ -62,22 +62,22 @@ public:
 		thread_scoped_lock lock(progress.progress_mutex);
 
 		progress.get_status(status, substatus);
-		progress.get_tile(tile, total_time, render_time, tile_time);
 
-		sample = progress.get_sample();
+		pixel_samples = progress.pixel_samples;
+		total_pixel_samples = progress.total_pixel_samples;
+		current_tile_sample = progress.get_current_sample();
 
 		return *this;
 	}
 
 	void reset()
 	{
-		tile = 0;
-		sample = 0;
+		pixel_samples = 0;
+		total_pixel_samples = 0;
+		current_tile_sample = 0;
+		finished_tiles = 0;
 		start_time = time_dt();
 		render_start_time = time_dt();
-		total_time = 0.0;
-		render_time = 0.0;
-		tile_time = 0.0;
 		status = "Initializing";
 		substatus = "";
 		sync_status = "";
@@ -139,69 +139,93 @@ public:
 
 	/* tile and timing information */
 
-	void set_start_time(double start_time_)
+	void set_start_time()
 	{
 		thread_scoped_lock lock(progress_mutex);
 
-		start_time = start_time_;
+		start_time = time_dt();
 	}
 
-	void set_render_start_time(double render_start_time_)
+	void set_render_start_time()
 	{
 		thread_scoped_lock lock(progress_mutex);
 
-		render_start_time = render_start_time_;
+		render_start_time = time_dt();
 	}
 
-	void set_tile(int tile_, double tile_time_)
+	void add_skip_time(const scoped_timer &start_timer, bool only_render)
 	{
-		thread_scoped_lock lock(progress_mutex);
+		double skip_time = time_dt() - start_timer.get_start();
 
-		tile = tile_;
-		total_time = time_dt() - start_time;
-		render_time = time_dt() - render_start_time;
-		tile_time = tile_time_;
+		render_start_time += skip_time;
+		if(!only_render) {
+			start_time += skip_time;
+		}
 	}
 
-	void get_tile(int& tile_, double& total_time_, double& render_time_, double& tile_time_)
+	void get_time(double& total_time_, double& render_time_)
 	{
 		thread_scoped_lock lock(progress_mutex);
 
-		tile_ = tile;
-		total_time_ = (total_time > 0.0)? total_time: 0.0;
-		render_time_ = (render_time > 0.0)? render_time: 0.0;
-		tile_time_ = tile_time;
+		total_time_ = time_dt() - start_time;
+		render_time_ = time_dt() - render_start_time;
 	}
 
-	void get_time(double& total_time_, double& render_time_)
+	void reset_sample()
 	{
-		total_time_ = (total_time > 0.0)? total_time: 0.0;
-		render_time_ = (render_time > 0.0)? render_time: 0.0;
+		thread_scoped_lock lock(progress_mutex);
+
+		pixel_samples = 0;
+		current_tile_sample = 0;
+		finished_tiles = 0;
 	}
 
-	void reset_sample()
+	void set_total_pixel_samples(uint64_t total_pixel_samples_)
 	{
 		thread_scoped_lock lock(progress_mutex);
 
-		sample = 0;
+		total_pixel_samples = total_pixel_samples_;
 	}
 
-	void increment_sample()
+	float get_progress()
+	{
+		if(total_pixel_samples > 0) {
+			return ((float) pixel_samples) / total_pixel_samples;
+		}
+		return 0.0f;
+	}
+
+	void add_samples(uint64_t pixel_samples_, int tile_sample)
 	{
 		thread_scoped_lock lock(progress_mutex);
 
-		sample++;
+		pixel_samples += pixel_samples_;
+		current_tile_sample = tile_sample;
 	}
 
-	void increment_sample_update()
+	void add_samples_update(uint64_t pixel_samples_, int tile_sample)
 	{
-		increment_sample();
+		add_samples(pixel_samples_, tile_sample);
 		set_update();
 	}
 
-	int get_sample()
+	void add_finished_tile()
+	{
+		thread_scoped_lock lock(progress_mutex);
+
+		finished_tiles++;
+	}
+
+	int get_current_sample()
+	{
+		/* Note that the value here always belongs to the last tile that updated,
+		 * so it's only useful if there is only one active tile. */
+		return current_tile_sample;
+	}
+
+	int get_finished_tiles()
 	{
-		return sample;
+		return finished_tiles;
 	}
 
 	/* status messages */
@@ -212,8 +236,6 @@ public:
 			thread_scoped_lock lock(progress_mutex);
 			status = status_;
 			substatus = substatus_;
-			total_time = time_dt() - start_time;
-			render_time = time_dt() - render_start_time;
 		}
 
 		set_update();
@@ -224,8 +246,6 @@ public:
 		{
 			thread_scoped_lock lock(progress_mutex);
 			substatus = substatus_;
-			total_time = time_dt() - start_time;
-			render_time = time_dt() - render_start_time;
 		}
 
 		set_update();
@@ -237,8 +257,6 @@ public:
 			thread_scoped_lock lock(progress_mutex);
 			sync_status = status_;
 			sync_substatus = substatus_;
-			total_time = time_dt() - start_time;
-			render_time = time_dt() - render_start_time;
 		}
 
 		set_update();
@@ -250,8 +268,6 @@ public:
 		{
 			thread_scoped_lock lock(progress_mutex);
 			sync_substatus = substatus_;
-			total_time = time_dt() - start_time;
-			render_time = time_dt() - render_start_time;
 		}
 
 		set_update();
@@ -292,12 +308,19 @@ protected:
 	function<void(void)> update_cb;
 	function<void(void)> cancel_cb;
 
-	int tile;    /* counter for rendered tiles */
-	int sample;  /* counter of rendered samples, global for all tiles */
+	/* pixel_samples counts how many samples have been rendered over all pixel, not just per pixel.
+	 * This makes the progress estimate more accurate when tiles with different sizes are used.
+	 *
+	 * total_pixel_samples is the total amount of pixel samples that will be rendered. */
+	uint64_t pixel_samples, total_pixel_samples;
+	/* Stores the current sample count of the last tile that called the update function.
+	 * It's used to display the sample count if only one tile is active. */
+	int current_tile_sample;
+	/* Stores the number of tiles that's already finished.
+	 * Used to determine whether all but the last tile are finished rendering, in which case the current_tile_sample is displayed. */
+	int finished_tiles;
 
 	double start_time, render_start_time;
-	double total_time, render_time;
-	double tile_time;
 
 	string status;
 	string substatus;
diff --git a/intern/cycles/util/util_time.h b/intern/cycles/util/util_time.h
index a5b074bffa0..65798244111 100644
--- a/intern/cycles/util/util_time.h
+++ b/intern/cycles/util/util_time.h
@@ -29,7 +29,7 @@ void time_sleep(double t);
 
 class scoped_timer {
 public:
-	explicit scoped_timer(double *value) : value_(value)
+	explicit scoped_timer(double *value = NULL) : value_(value)
 	{
 		time_start_ = time_dt();
 	}
@@ -40,6 +40,12 @@ public:
 			*value_ = time_dt() - time_start_;
 		}
 	}
+
+	double get_start() const
+	{
+		return time_start_;
+	}
+
 protected:
 	double *value_;
 	double time_start_;
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index ea5eb3b25b0..a0695f20488 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -74,7 +74,7 @@ ccl_device_inline float3 transform_perspective(const Transform *t, const float3
 ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
 {
 	/* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) && 0
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
 	ssef x, y, z, w, aa;
 	aa = a.m128;
 
@@ -103,8 +103,7 @@ ccl_device_inline float3 transform_point(const Transform *t, const float3 a)
 
 ccl_device_inline float3 transform_direction(const Transform *t, const float3 a)
 {
-	/* TODO(sergey): Disabled for now, causes crashes in certain cases. */
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) && 0
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
 	ssef x, y, z, w, aa;
 	aa = a.m128;
 	x = _mm_loadu_ps(&t->x.x);
diff --git a/intern/opencolorio/fallback_impl.cc b/intern/opencolorio/fallback_impl.cc
index d0a129360b0..87629422013 100644
--- a/intern/opencolorio/fallback_impl.cc
+++ b/intern/opencolorio/fallback_impl.cc
@@ -23,18 +23,26 @@
  * ***** END GPL LICENSE BLOCK *****
  */
 
-#include <string.h>
+#include <algorithm>
+#include <cstring>
 
 #include "MEM_guardedalloc.h"
 #include "BLI_math_color.h"
+#include "BLI_math_vector.h"
 
 #include "ocio_impl.h"
 
+using std::max;
+
 #define CONFIG_DEFAULT           ((OCIO_ConstConfigRcPtr*)1)
 
-#define PROCESSOR_LINEAR_TO_SRGB ((OCIO_ConstProcessorRcPtr*)1)
-#define PROCESSOR_SRGB_TO_LINEAR ((OCIO_ConstProcessorRcPtr*)2)
-#define PROCESSOR_UNKNOWN        ((OCIO_ConstProcessorRcPtr*)3)
+enum TransformType {
+	TRANSFORM_LINEAR_TO_SRGB,
+	TRANSFORM_SRGB_TO_LINEAR,
+	TRANSFORM_MATRIX,
+	TRANSFORM_EXPONENT,
+	TRANSFORM_UNKNOWN,
+};
 
 #define COLORSPACE_LINEAR        ((OCIO_ConstColorSpaceRcPtr*)1)
 #define COLORSPACE_SRGB          ((OCIO_ConstColorSpaceRcPtr*)2)
@@ -49,6 +57,145 @@ typedef struct OCIO_PackedImageDescription {
 	long yStrideBytes;
 } OCIO_PackedImageDescription;
 
+struct FallbackTransform {
+	FallbackTransform()
+		: type(TRANSFORM_UNKNOWN),
+		  linear_transform(NULL),
+		  display_transform(NULL)
+	{
+	}
+
+	~FallbackTransform()
+	{
+		delete linear_transform;
+		delete display_transform;
+	}
+
+	void applyRGB(float *pixel)
+	{
+		if (type == TRANSFORM_LINEAR_TO_SRGB) {
+			applyLinearRGB(pixel);
+			linearrgb_to_srgb_v3_v3(pixel, pixel);
+			applyDisplayRGB(pixel);
+		}
+		else if (type == TRANSFORM_SRGB_TO_LINEAR) {
+			srgb_to_linearrgb_v3_v3(pixel, pixel);
+		}
+		else if (type == TRANSFORM_EXPONENT) {
+			pixel[0] = powf(max(0.0f, pixel[0]), exponent[0]);
+			pixel[1] = powf(max(0.0f, pixel[1]), exponent[1]);
+			pixel[2] = powf(max(0.0f, pixel[2]), exponent[2]);
+		}
+		else if (type == TRANSFORM_MATRIX) {
+			float r = pixel[0];
+			float g = pixel[1];
+			float b = pixel[2];
+			pixel[0] = r*matrix[0] + g*matrix[1] + b*matrix[2];
+			pixel[1] = r*matrix[4] + g*matrix[5] + b*matrix[6];
+			pixel[2] = r*matrix[8] + g*matrix[9] + b*matrix[10];
+			pixel[0] += offset[0];
+			pixel[1] += offset[1];
+			pixel[2] += offset[2];
+		}
+	}
+
+	void applyRGBA(float *pixel)
+	{
+		if (type == TRANSFORM_LINEAR_TO_SRGB) {
+			applyLinearRGBA(pixel);
+			linearrgb_to_srgb_v4(pixel, pixel);
+			applyDisplayRGBA(pixel);
+		}
+		else if (type == TRANSFORM_SRGB_TO_LINEAR) {
+			srgb_to_linearrgb_v4(pixel, pixel);
+		}
+		else if (type == TRANSFORM_EXPONENT) {
+			pixel[0] = powf(max(0.0f, pixel[0]), exponent[0]);
+			pixel[1] = powf(max(0.0f, pixel[1]), exponent[1]);
+			pixel[2] = powf(max(0.0f, pixel[2]), exponent[2]);
+			pixel[3] = powf(max(0.0f, pixel[3]), exponent[3]);
+		}
+		else if (type == TRANSFORM_MATRIX) {
+			float r = pixel[0];
+			float g = pixel[1];
+			float b = pixel[2];
+			float a = pixel[3];
+			pixel[0] = r*matrix[0] + g*matrix[1] + b*matrix[2] + a*matrix[3];
+			pixel[1] = r*matrix[4] + g*matrix[5] + b*matrix[6] + a*matrix[7];
+			pixel[2] = r*matrix[8] + g*matrix[9] + b*matrix[10] + a*matrix[11];
+			pixel[3] = r*matrix[12] + g*matrix[13] + b*matrix[14] + a*matrix[15];
+			pixel[0] += offset[0];
+			pixel[1] += offset[1];
+			pixel[2] += offset[2];
+			pixel[3] += offset[3];
+		}
+	}
+
+	void applyLinearRGB(float *pixel)
+	{
+		if (linear_transform != NULL) {
+			linear_transform->applyRGB(pixel);
+		}
+	}
+
+	void applyLinearRGBA(float *pixel)
+	{
+		if (linear_transform != NULL) {
+			linear_transform->applyRGBA(pixel);
+		}
+	}
+
+	void applyDisplayRGB(float *pixel)
+	{
+		if (display_transform != NULL) {
+			display_transform->applyRGB(pixel);
+		}
+	}
+
+	void applyDisplayRGBA(float *pixel)
+	{
+		if (display_transform != NULL) {
+			display_transform->applyRGBA(pixel);
+		}
+	}
+
+	TransformType type;
+	FallbackTransform *linear_transform;
+	FallbackTransform *display_transform;
+	/* Exponent transform. */
+	float exponent[4];
+	/* Matrix transform. */
+	float matrix[16];
+	float offset[4];
+
+	MEM_CXX_CLASS_ALLOC_FUNCS("FallbackProcessor");
+};
+
+struct FallbackProcessor {
+	FallbackProcessor()
+		: transform(NULL)
+	{
+	}
+
+	~FallbackProcessor() {
+		delete transform;
+	}
+
+	void applyRGB(float *pixel)
+	{
+		transform->applyRGB(pixel);
+	}
+
+	void applyRGBA(float *pixel)
+	{
+		transform->applyRGBA(pixel);
+	}
+
+	FallbackTransform *transform;
+
+	MEM_CXX_CLASS_ALLOC_FUNCS("FallbackProcessor");
+};
+
 OCIO_ConstConfigRcPtr *FallbackImpl::getCurrentConfig(void)
 {
 	return CONFIG_DEFAULT;
@@ -233,19 +380,27 @@ OCIO_ConstProcessorRcPtr *FallbackImpl::configGetProcessorWithNames(
 {
 	OCIO_ConstColorSpaceRcPtr *cs_src = configGetColorSpace(config, srcName);
 	OCIO_ConstColorSpaceRcPtr *cs_dst = configGetColorSpace(config, dstName);
+	FallbackTransform *transform = new FallbackTransform();
 	if (cs_src == COLORSPACE_LINEAR && cs_dst == COLORSPACE_SRGB) {
-		return PROCESSOR_LINEAR_TO_SRGB;
+		transform->type = TRANSFORM_LINEAR_TO_SRGB;
 	}
 	else if (cs_src == COLORSPACE_SRGB && cs_dst == COLORSPACE_LINEAR) {
-		return PROCESSOR_SRGB_TO_LINEAR;
+		transform->type = TRANSFORM_SRGB_TO_LINEAR;
 	}
-	return 0;
+	else {
+		transform->type = TRANSFORM_UNKNOWN;
+	}
+	FallbackProcessor *processor = new FallbackProcessor();
+	processor->transform = transform;
+	return (OCIO_ConstProcessorRcPtr *)processor;
 }
 
 OCIO_ConstProcessorRcPtr *FallbackImpl::configGetProcessor(OCIO_ConstConfigRcPtr * /*config*/,
-                                                           OCIO_ConstTransformRcPtr *tfm)
+                                                           OCIO_ConstTransformRcPtr *transform)
 {
-	return (OCIO_ConstProcessorRcPtr*)tfm;
+	FallbackProcessor *processor = new FallbackProcessor();
+	processor->transform = (FallbackTransform *)transform;
+	return (OCIO_ConstProcessorRcPtr *)processor;
 }
 
 void FallbackImpl::processorApply(OCIO_ConstProcessorRcPtr *processor,
@@ -297,21 +452,13 @@ void FallbackImpl::processorApply_predivide(OCIO_ConstProcessorRcPtr *processor,
 void FallbackImpl::processorApplyRGB(OCIO_ConstProcessorRcPtr *processor,
                                      float *pixel)
 {
-	if (processor == PROCESSOR_LINEAR_TO_SRGB) {
-		linearrgb_to_srgb_v3_v3(pixel, pixel);
-	}
-	else if (processor == PROCESSOR_SRGB_TO_LINEAR) {
-		srgb_to_linearrgb_v3_v3(pixel, pixel);
-	}
+	((FallbackProcessor *)processor)->applyRGB(pixel);
 }
 
 void FallbackImpl::processorApplyRGBA(OCIO_ConstProcessorRcPtr *processor,
                                       float *pixel)
 {
-	if (processor == PROCESSOR_LINEAR_TO_SRGB)
-		linearrgb_to_srgb_v4(pixel, pixel);
-	else if (processor == PROCESSOR_SRGB_TO_LINEAR)
-		srgb_to_linearrgb_v4(pixel, pixel);
+	((FallbackProcessor *)processor)->applyRGBA(pixel);
 }
 
 void FallbackImpl::processorApplyRGBA_predivide(OCIO_ConstProcessorRcPtr *processor,
@@ -338,8 +485,9 @@ void FallbackImpl::processorApplyRGBA_predivide(OCIO_ConstProcessorRcPtr *proces
 	}
 }
 
-void FallbackImpl::processorRelease(OCIO_ConstProcessorRcPtr * /*p*/)
+void FallbackImpl::processorRelease(OCIO_ConstProcessorRcPtr *processor)
 {
+	delete (FallbackProcessor*)(processor);
 }
 
 const char *FallbackImpl::colorSpaceGetName(OCIO_ConstColorSpaceRcPtr *cs)
@@ -365,7 +513,9 @@ const char *FallbackImpl::colorSpaceGetFamily(OCIO_ConstColorSpaceRcPtr * /*cs*/
 
 OCIO_DisplayTransformRcPtr *FallbackImpl::createDisplayTransform(void)
 {
-	return (OCIO_DisplayTransformRcPtr*)PROCESSOR_LINEAR_TO_SRGB;
+	FallbackTransform *transform = new FallbackTransform();
+	transform->type = TRANSFORM_LINEAR_TO_SRGB;
+	return (OCIO_DisplayTransformRcPtr*)transform;
 }
 
 void FallbackImpl::displayTransformSetInputColorSpaceName(OCIO_DisplayTransformRcPtr * /*dt*/,
@@ -383,14 +533,18 @@ void FallbackImpl::displayTransformSetView(OCIO_DisplayTransformRcPtr * /*dt*/,
 {
 }
 
-void FallbackImpl::displayTransformSetDisplayCC(OCIO_DisplayTransformRcPtr * /*dt*/,
-                                                OCIO_ConstTransformRcPtr * /*et*/)
+void FallbackImpl::displayTransformSetDisplayCC(OCIO_DisplayTransformRcPtr *dt,
+                                                OCIO_ConstTransformRcPtr *et)
 {
+	FallbackTransform *transform = (FallbackTransform *)dt;
+	transform->display_transform = (FallbackTransform *)et;
 }
 
-void FallbackImpl::displayTransformSetLinearCC(OCIO_DisplayTransformRcPtr * /*dt*/,
-                                               OCIO_ConstTransformRcPtr * /*et*/)
+void FallbackImpl::displayTransformSetLinearCC(OCIO_DisplayTransformRcPtr *dt,
+                                               OCIO_ConstTransformRcPtr *et)
 {
+	FallbackTransform *transform = (FallbackTransform *)dt;
+	transform->linear_transform = (FallbackTransform *)et;
 }
 
 void FallbackImpl::displayTransformSetLooksOverride(OCIO_DisplayTransformRcPtr * /*dt*/,
@@ -432,12 +586,16 @@ void FallbackImpl::OCIO_PackedImageDescRelease(OCIO_PackedImageDesc* id)
 
 OCIO_ExponentTransformRcPtr *FallbackImpl::createExponentTransform(void)
 {
-	return (OCIO_ExponentTransformRcPtr*)PROCESSOR_UNKNOWN;
+	FallbackTransform *transform = new FallbackTransform();
+	transform->type = TRANSFORM_EXPONENT;
+	return (OCIO_ExponentTransformRcPtr *)transform;
 }
 
-void FallbackImpl::exponentTransformSetValue(OCIO_ExponentTransformRcPtr * /*et*/,
-                                             const float * /*exponent*/)
+void FallbackImpl::exponentTransformSetValue(OCIO_ExponentTransformRcPtr *et,
+                                             const float *exponent)
 {
+	FallbackTransform *transform = (FallbackTransform *)et;
+	copy_v4_v4(transform->exponent, exponent);
 }
 
 void FallbackImpl::exponentTransformRelease(OCIO_ExponentTransformRcPtr * /*et*/)
@@ -446,23 +604,44 @@ void FallbackImpl::exponentTransformRelease(OCIO_ExponentTransformRcPtr * /*et*/
 
 OCIO_MatrixTransformRcPtr *FallbackImpl::createMatrixTransform(void)
 {
-	return (OCIO_MatrixTransformRcPtr*)PROCESSOR_UNKNOWN;
+	FallbackTransform *transform = new FallbackTransform();
+	transform->type = TRANSFORM_MATRIX;
+	return (OCIO_MatrixTransformRcPtr *)transform;
 }
 
-void FallbackImpl::matrixTransformSetValue(OCIO_MatrixTransformRcPtr * /*mt*/,
-                                           const float * /*m44*/,
-                                           const float * /*offset4*/)
+void FallbackImpl::matrixTransformSetValue(OCIO_MatrixTransformRcPtr *mt,
+                                           const float *m44,
+                                           const float *offset4)
 {
+	FallbackTransform *transform = (FallbackTransform *)mt;
+	copy_m4_m4((float (*)[4])transform->matrix, (float (*)[4])m44);
+	copy_v4_v4(transform->offset, offset4);
 }
 
 void FallbackImpl::matrixTransformRelease(OCIO_MatrixTransformRcPtr * /*mt*/)
 {
 }
 
-void FallbackImpl::matrixTransformScale(float * /*m44*/,
-                                        float * /*offset44*/,
-                                        const float * /*scale4*/)
+void FallbackImpl::matrixTransformScale(float *m44,
+                                        float *offset4,
+                                        const float *scale4)
 {
+	if (scale4 == NULL) {
+		return;
+	}
+	if (m44 != NULL) {
+		memset(m44, 0, 16*sizeof(float));
+		m44[0] = scale4[0];
+		m44[5] = scale4[1];
+		m44[10] = scale4[2];
+		m44[15] = scale4[3];
+	}
+	if (offset4 != NULL) {
+		offset4[0] = 0.0f;
+		offset4[1] = 0.0f;
+		offset4[2] = 0.0f;
+		offset4[3] = 0.0f;
+	}
 }
 
 bool FallbackImpl::supportGLSLDraw(void)
author	Bastien Montagne <montagne29@wanadoo.fr>	2016-12-12 18:17:57 +0300
committer	Bastien Montagne <montagne29@wanadoo.fr>	2016-12-12 18:17:57 +0300
commit	9be6d5ff18fd593e853647d9eec8b3fb074acd7c (patch)
tree	485fb9203c310b5119a549e698db241012ed39ff /intern
parent	54528079e3cfaf74eaa119615386564820b45276 (diff)
parent	5f852a4324212221500d11b2c7594f5e0ca894c6 (diff)