63 files changed, 3338 insertions, 1571 deletions
diff --git a/build_files/cmake/config/blender_full.cmake b/build_files/cmake/config/blender_full.cmake
index bd8a2dbaf2c..87b8ed5f921 100644
--- a/build_files/cmake/config/blender_full.cmake
+++ b/build_files/cmake/config/blender_full.cmake
@@ -77,6 +77,6 @@ elseif(APPLE)
 	set(WITH_OPENSUBDIV          OFF CACHE BOOL "" FORCE)
 	set(WITH_CODEC_QUICKTIME     ON  CACHE BOOL "" FORCE)
 
-	include("${CMAKE_SOURCE_DIR}/build_files/cmake/platform/platform_apple_xcode.cmake")
+	include("${CMAKE_CURRENT_SOURCE_DIR}/../platform/platform_apple_xcode.cmake")
 	apple_check_quicktime()
 endif()
diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake
index 42e8c111714..b60c8c7380c 100644
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -78,6 +78,6 @@ elseif(APPLE)
 	set(WITH_OPENSUBDIV          OFF CACHE BOOL "" FORCE)
 	set(WITH_CODEC_QUICKTIME     ON  CACHE BOOL "" FORCE)
 
-	include("${CMAKE_SOURCE_DIR}/build_files/cmake/platform/platform_apple_xcode.cmake")
+	include("${CMAKE_CURRENT_SOURCE_DIR}/../platform/platform_apple_xcode.cmake")
 	apple_check_quicktime()
 endif()
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index c3772dfa2d8..3750225571d 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -196,10 +196,42 @@ set(SRC_UTIL_HEADERS
 	../util/util_math.h
 	../util/util_math_fast.h
 	../util/util_math_intersect.h
+	../util/util_math_float2.h
+	../util/util_math_float3.h
+	../util/util_math_float4.h
+	../util/util_math_int2.h
+	../util/util_math_int3.h
+	../util/util_math_int4.h
 	../util/util_static_assert.h
 	../util/util_transform.h
 	../util/util_texture.h
 	../util/util_types.h
+	../util/util_types_float2.h
+	../util/util_types_float2_impl.h
+	../util/util_types_float3.h
+	../util/util_types_float3_impl.h
+	../util/util_types_float4.h
+	../util/util_types_float4_impl.h
+	../util/util_types_int2.h
+	../util/util_types_int2_impl.h
+	../util/util_types_int3.h
+	../util/util_types_int3_impl.h
+	../util/util_types_int4.h
+	../util/util_types_int4_impl.h
+	../util/util_types_uchar2.h
+	../util/util_types_uchar2_impl.h
+	../util/util_types_uchar3.h
+	../util/util_types_uchar3_impl.h
+	../util/util_types_uchar4.h
+	../util/util_types_uchar4_impl.h
+	../util/util_types_uint2.h
+	../util/util_types_uint2_impl.h
+	../util/util_types_uint3.h
+	../util/util_types_uint3_impl.h
+	../util/util_types_uint4.h
+	../util/util_types_uint4_impl.h
+	../util/util_types_vector3.h
+	../util/util_types_vector3_impl.h
 )
 
 set(SRC_SPLIT_HEADERS
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index 36fd6c95fe7..085eb42325d 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -337,11 +337,6 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 				float num_samples_inv = 1.0f/num_samples;
 
 				for(int j = 0; j < num_samples; j++) {
-					/* workaround to fix correlation bug in T38710, can find better solution
-					 * in random number generator later, for now this is done here to not impact
-					 * performance of rendering without volumes */
-					RNG tmp_rng = cmj_hash(*rng, state.rng_offset);
-
 					PathState ps = state;
 					Ray pray = ray;
 					float3 tp = throughput;
@@ -352,8 +347,8 @@ ccl_device float4 kernel_branched_path_integrate(KernelGlobals *kg, RNG *rng, in
 					/* scatter sample. if we use distance sampling and take just one
 					 * sample for direct and indirect light, we could share this
 					 * computation, but makes code a bit complex */
-					float rphase = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_PHASE);
-					float rscatter = path_state_rng_1D_for_decision(kg, &tmp_rng, &ps, PRNG_SCATTER_DISTANCE);
+					float rphase = path_state_rng_1D_for_decision(kg, rng, &ps, PRNG_PHASE);
+					float rscatter = path_state_rng_1D_for_decision(kg, rng, &ps, PRNG_SCATTER_DISTANCE);
 
 					VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
 						&ps, &pray, &sd, &tp, rphase, rscatter, &volume_segment, NULL, false);
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index d4f0caff5de..e8a912ccc0b 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -20,14 +20,15 @@ CCL_NAMESPACE_BEGIN
 
 #ifdef __SOBOL__
 
-/* skip initial numbers that are not as well distributed, especially the
+/* Skip initial numbers that are not as well distributed, especially the
  * first sequence is just 0 everywhere, which can be problematic for e.g.
- * path termination */
+ * path termination.
+ */
 #define SOBOL_SKIP 64
 
-/* High Dimensional Sobol */
+/* High Dimensional Sobol. */
 
-/* van der corput radical inverse */
+/* Van der Corput radical inverse. */
 ccl_device uint van_der_corput(uint bits)
 {
 	bits = (bits << 16) | (bits >> 16);
@@ -38,58 +39,63 @@ ccl_device uint van_der_corput(uint bits)
 	return bits;
 }
 
-/* sobol radical inverse */
+/* Sobol radical inverse. */
 ccl_device uint sobol(uint i)
 {
 	uint r = 0;
-
-	for(uint v = 1U << 31; i; i >>= 1, v ^= v >> 1)
-		if(i & 1)
+	for(uint v = 1U << 31; i; i >>= 1, v ^= v >> 1) {
+		if(i & 1) {
 			r ^= v;
-
+		}
+	}
 	return r;
 }
 
-/* inverse of sobol radical inverse */
+/* Inverse of sobol radical inverse. */
 ccl_device uint sobol_inverse(uint i)
 {
 	const uint msb = 1U << 31;
 	uint r = 0;
-
-	for(uint v = 1; i; i <<= 1, v ^= v << 1)
-		if(i & msb)
+	for(uint v = 1; i; i <<= 1, v ^= v << 1) {
+		if(i & msb) {
 			r ^= v;
-
+		}
+	}
 	return r;
 }
 
-/* multidimensional sobol with generator matrices
- * dimension 0 and 1 are equal to van_der_corput() and sobol() respectively */
+/* Multidimensional sobol with generator matrices
+ * dimension 0 and 1 are equal to van_der_corput() and sobol() respectively.
+ */
 ccl_device uint sobol_dimension(KernelGlobals *kg, int index, int dimension)
 {
 	uint result = 0;
 	uint i = index;
-
-	for(uint j = 0; i; i >>= 1, j++)
-		if(i & 1)
+	for(uint j = 0; i; i >>= 1, j++) {
+		if(i & 1) {
 			result ^= kernel_tex_fetch(__sobol_directions, 32*dimension + j);
-	
+		}
+	}
 	return result;
 }
 
-/* lookup index and x/y coordinate, assumes m is a power of two */
-ccl_device uint sobol_lookup(const uint m, const uint frame, const uint ex, const uint ey, uint *x, uint *y)
+/* Lookup index and x/y coordinate, assumes m is a power of two. */
+ccl_device uint sobol_lookup(const uint m,
+                             const uint frame,
+                             const uint ex,
+                             const uint ey,
+                             uint *x, uint *y)
 {
-	/* shift is constant per frame */
+	/* Shift is constant per frame. */
 	const uint shift = frame << (m << 1);
 	const uint sobol_shift = sobol(shift);
-	/* van der Corput is its own inverse */
+	/* Van der Corput is its own inverse. */
 	const uint lower = van_der_corput(ex << (32 - m));
-	/* need to compensate for ey difference and shift */
+	/* Need to compensate for ey difference and shift. */
 	const uint sobol_lower = sobol(lower);
-	const uint mask = ~-(1 << m) << (32 - m); /* only m upper bits */
+	const uint mask = ~-(1 << m) << (32 - m);  /* Only m upper bits. */
 	const uint delta = ((ey << (32 - m)) ^ sobol_lower ^ sobol_shift) & mask;
-	/* only use m upper bits for the index (m is a power of two) */
+	/* Only use m upper bits for the index (m is a power of two). */
 	const uint sobol_result = delta | (delta >> m);
 	const uint upper = sobol_inverse(sobol_result);
 	const uint index = shift | upper | lower;
@@ -98,11 +104,14 @@ ccl_device uint sobol_lookup(const uint m, const uint frame, const uint ex, cons
 	return index;
 }
 
-ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension)
+ccl_device_forceinline float path_rng_1D(KernelGlobals *kg,
+                                         RNG *rng,
+                                         int sample, int num_samples,
+                                         int dimension)
 {
 #ifdef __CMJ__
 	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
-		/* correlated multi-jittered */
+		/* Correlated multi-jitter. */
 		int p = *rng + dimension;
 		return cmj_sample_1D(sample, num_samples, p);
 	}
@@ -113,7 +122,7 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample
 	float r = (float)result * (1.0f/(float)0xFFFFFFFF);
 	return r;
 #else
-	/* compute sobol sequence value using direction vectors */
+	/* Compute sobol sequence value using direction vectors. */
 	uint result = sobol_dimension(kg, sample + SOBOL_SKIP, dimension);
 	float r = (float)result * (1.0f/(float)0xFFFFFFFF);
 
@@ -130,24 +139,33 @@ ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample
 #endif
 }
 
-ccl_device_forceinline void path_rng_2D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension, float *fx, float *fy)
+ccl_device_forceinline void path_rng_2D(KernelGlobals *kg,
+                                        RNG *rng,
+                                        int sample, int num_samples,
+                                        int dimension,
+                                        float *fx, float *fy)
 {
 #ifdef __CMJ__
 	if(kernel_data.integrator.sampling_pattern == SAMPLING_PATTERN_CMJ) {
-		/* correlated multi-jittered */
+		/* Correlated multi-jitter. */
 		int p = *rng + dimension;
 		cmj_sample_2D(sample, num_samples, p, fx, fy);
 	}
 	else
 #endif
 	{
-		/* sobol */
+		/* Sobol. */
 		*fx = path_rng_1D(kg, rng, sample, num_samples, dimension);
 		*fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1);
 	}
 }
 
-ccl_device_inline void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy)
+ccl_device_inline void path_rng_init(KernelGlobals *kg,
+                                     ccl_global uint *rng_state,
+                                     int sample, int num_samples,
+                                     RNG *rng,
+                                     int x, int y,
+                                     float *fx, float *fy)
 {
 #ifdef __SOBOL_FULL_SCREEN__
 	uint px, py;
@@ -182,29 +200,43 @@ ccl_device_inline void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_sta
 #endif
 }
 
-ccl_device void path_rng_end(KernelGlobals *kg, ccl_global uint *rng_state, RNG rng)
+ccl_device void path_rng_end(KernelGlobals *kg,
+                             ccl_global uint *rng_state,
+                             RNG rng)
 {
 	/* nothing to do */
 }
 
-#else
+#else  /* __SOBOL__ */
 
 /* Linear Congruential Generator */
 
-ccl_device_forceinline float path_rng_1D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension)
+ccl_device_forceinline float path_rng_1D(KernelGlobals *kg,
+                                         RNG *rng,
+                                         int sample, int num_samples,
+                                         int dimension)
 {
 	/* implicit mod 2^32 */
 	*rng = (1103515245*(*rng) + 12345);
 	return (float)*rng * (1.0f/(float)0xFFFFFFFF);
 }
 
-ccl_device_inline void path_rng_2D(KernelGlobals *kg, RNG *rng, int sample, int num_samples, int dimension, float *fx, float *fy)
+ccl_device_inline void path_rng_2D(KernelGlobals *kg,
+                                   RNG *rng,
+                                   int sample, int num_samples,
+                                   int dimension,
+                                   float *fx, float *fy)
 {
 	*fx = path_rng_1D(kg, rng, sample, num_samples, dimension);
 	*fy = path_rng_1D(kg, rng, sample, num_samples, dimension + 1);
 }
 
-ccl_device void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int sample, int num_samples, RNG *rng, int x, int y, float *fx, float *fy)
+ccl_device void path_rng_init(KernelGlobals *kg,
+                              ccl_global uint *rng_state,
+                              int sample, int num_samples,
+                              RNG *rng,
+                              int x, int y,
+                              float *fx, float *fy)
 {
 	/* load state */
 	*rng = *rng_state;
@@ -220,13 +252,15 @@ ccl_device void path_rng_init(KernelGlobals *kg, ccl_global uint *rng_state, int
 	}
 }
 
-ccl_device void path_rng_end(KernelGlobals *kg, ccl_global uint *rng_state, RNG rng)
+ccl_device void path_rng_end(KernelGlobals *kg,
+                             ccl_global uint *rng_state,
+                             RNG rng)
 {
 	/* store state for next sample */
 	*rng_state = rng;
 }
 
-#endif
+#endif  /* __SOBOL__ */
 
 /* Linear Congruential Generator */
 
@@ -257,49 +291,108 @@ ccl_device uint lcg_init(uint seed)
  * dimension to avoid using the same sequence twice.
  *
  * For branches in the path we must be careful not to reuse the same number
- * in a sequence and offset accordingly. */
+ * in a sequence and offset accordingly.
+ */
 
-ccl_device_inline float path_state_rng_1D(KernelGlobals *kg, RNG *rng, const ccl_addr_space PathState *state, int dimension)
+ccl_device_inline float path_state_rng_1D(KernelGlobals *kg,
+                                          RNG *rng,
+                                          const ccl_addr_space PathState *state,
+                                          int dimension)
 {
-	return path_rng_1D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension);
+	return path_rng_1D(kg,
+	                   rng,
+	                   state->sample, state->num_samples,
+	                   state->rng_offset + dimension);
 }
 
-ccl_device_inline float path_state_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, const ccl_addr_space PathState *state, int dimension)
+ccl_device_inline float path_state_rng_1D_for_decision(
+        KernelGlobals *kg,
+        RNG *rng,
+        const ccl_addr_space PathState *state,
+        int dimension)
 {
-	/* the rng_offset is not increased for transparent bounces. if we do then
+	/* The rng_offset is not increased for transparent bounces. if we do then
 	 * fully transparent objects can become subtly visible by the different
 	 * sampling patterns used where the transparent object is.
 	 *
 	 * however for some random numbers that will determine if we next bounce
 	 * is transparent we do need to increase the offset to avoid always making
-	 * the same decision */
-	int rng_offset = state->rng_offset + state->transparent_bounce*PRNG_BOUNCE_NUM;
-	return path_rng_1D(kg, rng, state->sample, state->num_samples, rng_offset + dimension);
+	 * the same decision. */
+	const int rng_offset = state->rng_offset + state->transparent_bounce * PRNG_BOUNCE_NUM;
+	return path_rng_1D(kg,
+	                   rng,
+	                   state->sample, state->num_samples,
+	                   rng_offset + dimension);
 }
 
-ccl_device_inline void path_state_rng_2D(KernelGlobals *kg, RNG *rng, const ccl_addr_space PathState *state, int dimension, float *fx, float *fy)
+ccl_device_inline void path_state_rng_2D(KernelGlobals *kg,
+                                         RNG *rng,
+                                         const ccl_addr_space PathState *state,
+                                         int dimension,
+                                         float *fx, float *fy)
 {
-	path_rng_2D(kg, rng, state->sample, state->num_samples, state->rng_offset + dimension, fx, fy);
+	path_rng_2D(kg,
+	            rng,
+	            state->sample, state->num_samples,
+	            state->rng_offset + dimension,
+	            fx, fy);
 }
 
-ccl_device_inline float path_branched_rng_1D(KernelGlobals *kg, RNG *rng, const ccl_addr_space PathState *state, int branch, int num_branches, int dimension)
+ccl_device_inline float path_branched_rng_1D(
+        KernelGlobals *kg,
+        RNG *rng,
+        const ccl_addr_space PathState *state,
+        int branch,
+        int num_branches,
+        int dimension)
 {
-	return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension);
+	return path_rng_1D(kg,
+	                   rng,
+	                   state->sample * num_branches + branch,
+	                   state->num_samples * num_branches,
+	                   state->rng_offset + dimension);
 }
 
-ccl_device_inline float path_branched_rng_1D_for_decision(KernelGlobals *kg, RNG *rng, const ccl_addr_space PathState *state, int branch, int num_branches, int dimension)
+ccl_device_inline float path_branched_rng_1D_for_decision(
+        KernelGlobals *kg,
+        RNG *rng,
+        const ccl_addr_space PathState *state,
+        int branch,
+        int num_branches,
+        int dimension)
 {
-	int rng_offset = state->rng_offset + state->transparent_bounce*PRNG_BOUNCE_NUM;
-	return path_rng_1D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, rng_offset + dimension);
+	const int rng_offset = state->rng_offset + state->transparent_bounce * PRNG_BOUNCE_NUM;
+	return path_rng_1D(kg,
+	                   rng,
+	                   state->sample * num_branches + branch,
+	                   state->num_samples * num_branches,
+	                   rng_offset + dimension);
 }
 
-ccl_device_inline void path_branched_rng_2D(KernelGlobals *kg, RNG *rng, const ccl_addr_space PathState *state, int branch, int num_branches, int dimension, float *fx, float *fy)
+ccl_device_inline void path_branched_rng_2D(
+        KernelGlobals *kg,
+        RNG *rng,
+        const ccl_addr_space PathState *state,
+        int branch,
+        int num_branches,
+        int dimension,
+        float *fx, float *fy)
 {
-	path_rng_2D(kg, rng, state->sample*num_branches + branch, state->num_samples*num_branches, state->rng_offset + dimension, fx, fy);
+	path_rng_2D(kg,
+	            rng,
+	            state->sample * num_branches + branch,
+	            state->num_samples * num_branches,
+	            state->rng_offset + dimension,
+	            fx, fy);
 }
 
-/* Utitility functions to get light termination value, since it might not be needed in many cases. */
-ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg, RNG *rng, const ccl_addr_space PathState *state)
+/* Utitility functions to get light termination value,
+ * since it might not be needed in many cases.
+ */
+ccl_device_inline float path_state_rng_light_termination(
+        KernelGlobals *kg,
+        RNG *rng,
+        const ccl_addr_space PathState *state)
 {
 	if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
 		return path_state_rng_1D_for_decision(kg, rng, state, PRNG_LIGHT_TERMINATE);
@@ -307,15 +400,27 @@ ccl_device_inline float path_state_rng_light_termination(KernelGlobals *kg, RNG
 	return 0.0f;
 }
 
-ccl_device_inline float path_branched_rng_light_termination(KernelGlobals *kg, RNG *rng, const ccl_addr_space PathState *state, int branch, int num_branches)
+ccl_device_inline float path_branched_rng_light_termination(
+        KernelGlobals *kg,
+        RNG *rng,
+        const ccl_addr_space PathState *state,
+        int branch,
+        int num_branches)
 {
 	if(kernel_data.integrator.light_inv_rr_threshold > 0.0f) {
-		return path_branched_rng_1D_for_decision(kg, rng, state, branch, num_branches, PRNG_LIGHT_TERMINATE);
+		return path_branched_rng_1D_for_decision(kg,
+		                                         rng,
+		                                         state,
+		                                         branch,
+		                                         num_branches,
+		                                         PRNG_LIGHT_TERMINATE);
 	}
 	return 0.0f;
 }
 
-ccl_device_inline void path_state_branch(ccl_addr_space PathState *state, int branch, int num_branches)
+ccl_device_inline void path_state_branch(ccl_addr_space PathState *state,
+                                         int branch,
+                                         int num_branches)
 {
 	/* path is splitting into a branch, adjust so that each branch
 	 * still gets a unique sample from the same sequence */
@@ -324,14 +429,17 @@ ccl_device_inline void path_state_branch(ccl_addr_space PathState *state, int br
 	state->num_samples = state->num_samples*num_branches;
 }
 
-ccl_device_inline uint lcg_state_init(RNG *rng, int rng_offset, int sample, uint scramble)
+ccl_device_inline uint lcg_state_init(RNG *rng,
+                                      int rng_offset,
+                                      int sample,
+                                      uint scramble)
 {
 	return lcg_init(*rng + rng_offset + sample*scramble);
 }
 
 ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
 {
-	/* implicit mod 2^32 */
+	/* Implicit mod 2^32 */
 	*rng = (1103515245*(*rng) + 12345);
 	return (float)*rng * (1.0f/(float)0xFFFFFFFF);
 }
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index a015fef8284..388aba65460 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -53,6 +53,12 @@ set(SRC_HEADERS
 	util_math_cdf.h
 	util_math_fast.h
 	util_math_intersect.h
+	util_math_float2.h
+	util_math_float3.h
+	util_math_float4.h
+	util_math_int2.h
+	util_math_int3.h
+	util_math_int4.h
 	util_md5.h
 	util_opengl.h
 	util_optimization.h
@@ -80,6 +86,32 @@ set(SRC_HEADERS
 	util_time.h
 	util_transform.h
 	util_types.h
+	util_types_float2.h
+	util_types_float2_impl.h
+	util_types_float3.h
+	util_types_float3_impl.h
+	util_types_float4.h
+	util_types_float4_impl.h
+	util_types_int2.h
+	util_types_int2_impl.h
+	util_types_int3.h
+	util_types_int3_impl.h
+	util_types_int4.h
+	util_types_int4_impl.h
+	util_types_uchar2.h
+	util_types_uchar2_impl.h
+	util_types_uchar3.h
+	util_types_uchar3_impl.h
+	util_types_uchar4.h
+	util_types_uchar4_impl.h
+	util_types_uint2.h
+	util_types_uint2_impl.h
+	util_types_uint3.h
+	util_types_uint3_impl.h
+	util_types_uint4.h
+	util_types_uint4_impl.h
+	util_types_vector3.h
+	util_types_vector3_impl.h
 	util_vector.h
 	util_version.h
 	util_view.h
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 77781ed4574..52b4fa859b7 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -28,12 +28,10 @@
 
 
 #ifndef __KERNEL_OPENCL__
-
-#include <float.h>
-#include <math.h>
-#include <stdio.h>
-
-#endif
+#  include <float.h>
+#  include <math.h>
+#  include <stdio.h>
+#endif  /* __KERNEL_OPENCL__ */
 
 #include "util/util_types.h"
 
@@ -43,49 +41,44 @@ CCL_NAMESPACE_BEGIN
 
 /* Division */
 #ifndef M_PI_F
-#define M_PI_F    (3.1415926535897932f)  /* pi */
+#  define M_PI_F    (3.1415926535897932f)  /* pi */
 #endif
 #ifndef M_PI_2_F
-#define M_PI_2_F  (1.5707963267948966f)  /* pi/2 */
+#  define M_PI_2_F  (1.5707963267948966f)  /* pi/2 */
 #endif
 #ifndef M_PI_4_F
-#define M_PI_4_F  (0.7853981633974830f)  /* pi/4 */
+#  define M_PI_4_F  (0.7853981633974830f)  /* pi/4 */
 #endif
 #ifndef M_1_PI_F
-#define M_1_PI_F  (0.3183098861837067f)  /* 1/pi */
+#  define M_1_PI_F  (0.3183098861837067f)  /* 1/pi */
 #endif
 #ifndef M_2_PI_F
-#define M_2_PI_F  (0.6366197723675813f)  /* 2/pi */
+#  define M_2_PI_F  (0.6366197723675813f)  /* 2/pi */
 #endif
 
 /* Multiplication */
 #ifndef M_2PI_F
-#define M_2PI_F   (6.2831853071795864f)  /* 2*pi */
+#  define M_2PI_F   (6.2831853071795864f)  /* 2*pi */
 #endif
 #ifndef M_4PI_F
-#define M_4PI_F   (12.566370614359172f)  /* 4*pi */
+#  define M_4PI_F   (12.566370614359172f)  /* 4*pi */
 #endif
 
 /* Float sqrt variations */
-
 #ifndef M_SQRT2_F
-#define M_SQRT2_F (1.4142135623730950f)  /* sqrt(2) */
+#  define M_SQRT2_F (1.4142135623730950f)  /* sqrt(2) */
 #endif
-
 #ifndef M_LN2_F
-#define M_LN2_F   (0.6931471805599453f)  /* ln(2) */
+#  define M_LN2_F   (0.6931471805599453f)  /* ln(2) */
 #endif
-
 #ifndef M_LN10_F
-#define M_LN10_F  (2.3025850929940457f)  /* ln(10) */
+#  define M_LN10_F  (2.3025850929940457f)  /* ln(10) */
 #endif
 
 /* Scalar */
 
 #ifdef _WIN32
-
-#ifndef __KERNEL_OPENCL__
-
+#  ifndef __KERNEL_OPENCL__
 ccl_device_inline float fmaxf(float a, float b)
 {
 	return (a > b)? a: b;
@@ -95,13 +88,10 @@ ccl_device_inline float fminf(float a, float b)
 {
 	return (a < b)? a: b;
 }
-
-#endif
-
-#endif
+#  endif  /* !__KERNEL_OPENCL__ */
+#endif  /* _WIN32 */
 
 #ifndef __KERNEL_GPU__
-
 using std::isfinite;
 using std::isnan;
 
@@ -157,8 +147,7 @@ ccl_device_inline T max4(const T& a, const T& b, const T& c, const T& d)
 {
 	return max(max(a,b),max(c,d));
 }
-
-#endif
+#endif /* __KERNEL_GPU__ */
 
 ccl_device_inline float min4(float a, float b, float c, float d)
 {
@@ -170,13 +159,7 @@ ccl_device_inline float max4(float a, float b, float c, float d)
 	return max(max(a, b), max(c, d));
 }
 
-ccl_device_inline float max3(float3 a)
-{
-	return max(max(a.x, a.y), a.z);
-}
-
 #ifndef __KERNEL_OPENCL__
-
 ccl_device_inline int clamp(int a, int mn, int mx)
 {
 	return min(max(a, mn), mx);
@@ -191,17 +174,14 @@ ccl_device_inline float mix(float a, float b, float t)
 {
     return a + t*(b - a);
 }
-
-#endif
+#endif  /* __KERNEL_OPENCL__ */
 
 #ifndef __KERNEL_CUDA__
-
 ccl_device_inline float saturate(float a)
 {
 	return clamp(a, 0.0f, 1.0f);
 }
-
-#endif
+#endif  /* __KERNEL_CUDA__ */
 
 ccl_device_inline int float_to_int(float f)
 {
@@ -242,453 +222,6 @@ ccl_device_inline int mod(int x, int m)
 	return (x % m + m) % m;
 }
 
-/* Float2 Vector */
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline bool is_zero(const float2& a)
-{
-	return (a.x == 0.0f && a.y == 0.0f);
-}
-
-#endif
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline float average(const float2& a)
-{
-	return (a.x + a.y)*(1.0f/2.0f);
-}
-
-#endif
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline float2 operator-(const float2& a)
-{
-	return make_float2(-a.x, -a.y);
-}
-
-ccl_device_inline float2 operator*(const float2& a, const float2& b)
-{
-	return make_float2(a.x*b.x, a.y*b.y);
-}
-
-ccl_device_inline float2 operator*(const float2& a, float f)
-{
-	return make_float2(a.x*f, a.y*f);
-}
-
-ccl_device_inline float2 operator*(float f, const float2& a)
-{
-	return make_float2(a.x*f, a.y*f);
-}
-
-ccl_device_inline float2 operator/(float f, const float2& a)
-{
-	return make_float2(f/a.x, f/a.y);
-}
-
-ccl_device_inline float2 operator/(const float2& a, float f)
-{
-	float invf = 1.0f/f;
-	return make_float2(a.x*invf, a.y*invf);
-}
-
-ccl_device_inline float2 operator/(const float2& a, const float2& b)
-{
-	return make_float2(a.x/b.x, a.y/b.y);
-}
-
-ccl_device_inline float2 operator+(const float2& a, const float2& b)
-{
-	return make_float2(a.x+b.x, a.y+b.y);
-}
-
-ccl_device_inline float2 operator-(const float2& a, const float2& b)
-{
-	return make_float2(a.x-b.x, a.y-b.y);
-}
-
-ccl_device_inline float2 operator+=(float2& a, const float2& b)
-{
-	return a = a + b;
-}
-
-ccl_device_inline float2 operator*=(float2& a, const float2& b)
-{
-	return a = a * b;
-}
-
-ccl_device_inline float2 operator*=(float2& a, float f)
-{
-	return a = a * f;
-}
-
-ccl_device_inline float2 operator/=(float2& a, const float2& b)
-{
-	return a = a / b;
-}
-
-ccl_device_inline float2 operator/=(float2& a, float f)
-{
-	float invf = 1.0f/f;
-	return a = a * invf;
-}
-
-
-ccl_device_inline float dot(const float2& a, const float2& b)
-{
-	return a.x*b.x + a.y*b.y;
-}
-
-ccl_device_inline float cross(const float2& a, const float2& b)
-{
-	return (a.x*b.y - a.y*b.x);
-}
-
-#endif
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline bool operator==(const int2 a, const int2 b)
-{
-	return (a.x == b.x && a.y == b.y);
-}
-
-ccl_device_inline float len(const float2& a)
-{
-	return sqrtf(dot(a, a));
-}
-
-ccl_device_inline float2 normalize(const float2& a)
-{
-	return a/len(a);
-}
-
-ccl_device_inline float2 normalize_len(const float2& a, float *t)
-{
-	*t = len(a);
-	return a/(*t);
-}
-
-ccl_device_inline float2 safe_normalize(const float2& a)
-{
-	float t = len(a);
-	return (t != 0.0f)? a/t: a;
-}
-
-ccl_device_inline bool operator==(const float2& a, const float2& b)
-{
-	return (a.x == b.x && a.y == b.y);
-}
-
-ccl_device_inline bool operator!=(const float2& a, const float2& b)
-{
-	return !(a == b);
-}
-
-ccl_device_inline float2 min(const float2& a, const float2& b)
-{
-	return make_float2(min(a.x, b.x), min(a.y, b.y));
-}
-
-ccl_device_inline float2 max(const float2& a, const float2& b)
-{
-	return make_float2(max(a.x, b.x), max(a.y, b.y));
-}
-
-ccl_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx)
-{
-	return min(max(a, mn), mx);
-}
-
-ccl_device_inline float2 fabs(const float2& a)
-{
-	return make_float2(fabsf(a.x), fabsf(a.y));
-}
-
-ccl_device_inline float2 as_float2(const float4& a)
-{
-	return make_float2(a.x, a.y);
-}
-
-#endif
-
-#ifndef __KERNEL_GPU__
-
-ccl_device_inline void print_float2(const char *label, const float2& a)
-{
-	printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
-}
-
-#endif
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline float2 interp(const float2& a, const float2& b, float t)
-{
-	return a + t*(b - a);
-}
-
-#endif
-
-/* Float3 Vector */
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline float3 operator-(const float3& a)
-{
-#ifdef __KERNEL_SSE__
-	return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
-#else
-	return make_float3(-a.x, -a.y, -a.z);
-#endif
-}
-
-ccl_device_inline float3 operator*(const float3& a, const float3& b)
-{
-#ifdef __KERNEL_SSE__
-	return float3(_mm_mul_ps(a.m128,b.m128));
-#else
-	return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
-#endif
-}
-
-ccl_device_inline float3 operator*(const float3& a, const float f)
-{
-#ifdef __KERNEL_SSE__
-	return float3(_mm_mul_ps(a.m128,_mm_set1_ps(f)));
-#else
-	return make_float3(a.x*f, a.y*f, a.z*f);
-#endif
-}
-
-ccl_device_inline float3 operator*(const float f, const float3& a)
-{
-	/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
-#if defined(__KERNEL_SSE__) && 0
-	return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
-#else
-	return make_float3(a.x*f, a.y*f, a.z*f);
-#endif
-}
-
-ccl_device_inline float3 operator/(const float f, const float3& a)
-{
-	/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
-#if defined(__KERNEL_SSE__) && 0
-	__m128 rc = _mm_rcp_ps(a.m128);
-	return float3(_mm_mul_ps(_mm_set1_ps(f),rc));
-#else
-	return make_float3(f / a.x, f / a.y, f / a.z);
-#endif
-}
-
-ccl_device_inline float3 operator/(const float3& a, const float f)
-{
-	float invf = 1.0f/f;
-	return a * invf;
-}
-
-ccl_device_inline float3 operator/(const float3& a, const float3& b)
-{
-	/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
-#if defined(__KERNEL_SSE__) && 0
-	__m128 rc = _mm_rcp_ps(b.m128);
-	return float3(_mm_mul_ps(a, rc));
-#else
-	return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
-#endif
-}
-
-ccl_device_inline float3 operator+(const float3& a, const float3& b)
-{
-#ifdef __KERNEL_SSE__
-	return float3(_mm_add_ps(a.m128, b.m128));
-#else
-	return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
-#endif
-}
-
-ccl_device_inline float3 operator-(const float3& a, const float3& b)
-{
-#ifdef __KERNEL_SSE__
-	return float3(_mm_sub_ps(a.m128, b.m128));
-#else
-	return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
-#endif
-}
-
-ccl_device_inline float3 operator+=(float3& a, const float3& b)
-{
-	return a = a + b;
-}
-
-ccl_device_inline float3 operator*=(float3& a, const float3& b)
-{
-	return a = a * b;
-}
-
-ccl_device_inline float3 operator*=(float3& a, float f)
-{
-	return a = a * f;
-}
-
-ccl_device_inline float3 operator/=(float3& a, const float3& b)
-{
-	return a = a / b;
-}
-
-ccl_device_inline float3 operator/=(float3& a, float f)
-{
-	float invf = 1.0f/f;
-	return a = a * invf;
-}
-
-ccl_device_inline float dot(const float3& a, const float3& b)
-{
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-	return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
-#else	
-	return a.x*b.x + a.y*b.y + a.z*b.z;
-#endif
-}
-
-ccl_device_inline float dot_xy(const float3& a, const float3& b)
-{
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-	return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a,b),b));
-#else
-	return a.x*b.x + a.y*b.y;
-#endif
-}
-
-ccl_device_inline float dot(const float4& a, const float4& b)
-{
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-	return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
-#else	
-	return (a.x*b.x + a.y*b.y) + (a.z*b.z + a.w*b.w);
-#endif
-}
-
-ccl_device_inline float3 cross(const float3& a, const float3& b)
-{
-	float3 r = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
-	return r;
-}
-
-#endif
-
-ccl_device_inline float len(const float3 a)
-{
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-	return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F)));
-#else
-	return sqrtf(dot(a, a));
-#endif
-}
-
-ccl_device_inline float len_squared(const float3 a)
-{
-	return dot(a, a);
-}
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline float len_squared(const float4& a)
-{
-	return dot(a, a);
-}
-
-ccl_device_inline float3 normalize(const float3& a)
-{
-#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-	__m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
-	return float3(_mm_div_ps(a.m128, norm));
-#else
-	return a/len(a);
-#endif
-}
-
-#endif
-
-ccl_device_inline float3 saturate3(float3 a)
-{
-	return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
-}
-
-ccl_device_inline float3 normalize_len(const float3 a, float *t)
-{
-	*t = len(a);
-	float x = 1.0f / *t;
-	return a*x;
-}
-
-ccl_device_inline float3 safe_normalize(const float3 a)
-{
-	float t = len(a);
-	return (t != 0.0f)? a * (1.0f/t) : a;
-}
-
-ccl_device_inline float3 safe_normalize_len(const float3 a, float *t)
-{
-	*t = len(a);
-	return (*t != 0.0f)? a/(*t): a;
-}
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline bool operator==(const float3& a, const float3& b)
-{
-#ifdef __KERNEL_SSE__
-	return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7;
-#else
-	return (a.x == b.x && a.y == b.y && a.z == b.z);
-#endif
-}
-
-ccl_device_inline bool operator!=(const float3& a, const float3& b)
-{
-	return !(a == b);
-}
-
-ccl_device_inline float3 min(const float3& a, const float3& b)
-{
-#ifdef __KERNEL_SSE__
-	return float3(_mm_min_ps(a.m128, b.m128));
-#else
-	return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
-#endif
-}
-
-ccl_device_inline float3 max(const float3& a, const float3& b)
-{
-#ifdef __KERNEL_SSE__
-	return float3(_mm_max_ps(a.m128, b.m128));
-#else
-	return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
-#endif
-}
-
-ccl_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx)
-{
-	return min(max(a, mn), mx);
-}
-
-ccl_device_inline float3 fabs(const float3& a)
-{
-#ifdef __KERNEL_SSE__
-	__m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
-	return float3(_mm_and_ps(a.m128, mask));
-#else
-	return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
-#endif
-}
-
-#endif
-
 ccl_device_inline float3 float2_to_float3(const float2 a)
 {
 	return make_float3(a.x, a.y, 0.0f);
@@ -704,509 +237,21 @@ ccl_device_inline float4 float3_to_float4(const float3 a)
 	return make_float4(a.x, a.y, a.z, 1.0f);
 }
 
-#ifndef __KERNEL_GPU__
-
-ccl_device_inline void print_float3(const char *label, const float3& a)
-{
-	printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z);
-}
-
-ccl_device_inline float3 rcp(const float3& a)
-{
-#ifdef __KERNEL_SSE__
-	const float4 r(_mm_rcp_ps(a.m128));
-	return float3(_mm_sub_ps(_mm_add_ps(r, r),
-	                         _mm_mul_ps(_mm_mul_ps(r, r), a)));
-#else
-	return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z);
-#endif
-}
-
-#endif
-
-ccl_device_inline float3 interp(float3 a, float3 b, float t)
-{
-	return a + t*(b - a);
-}
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline float3 mix(const float3& a, const float3& b, float t)
-{
-	return a + t*(b - a);
-}
-
-#endif
-
-ccl_device_inline bool is_zero(const float3 a)
-{
-#ifdef __KERNEL_SSE__
-	return a == make_float3(0.0f);
-#else
-	return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f);
-#endif
-}
-
-ccl_device_inline float reduce_add(const float3 a)
-{
-	return (a.x + a.y + a.z);
-}
-
-ccl_device_inline float average(const float3 a)
-{
-	return reduce_add(a)*(1.0f/3.0f);
-}
-
-ccl_device_inline bool isequal_float3(const float3 a, const float3 b)
-{
-#ifdef __KERNEL_OPENCL__
-	return all(a == b);
-#else
-	return a == b;
-#endif
-}
-
-/* Float4 Vector */
-
-#ifdef __KERNEL_SSE__
-
-template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
-__forceinline const float4 shuffle(const float4& b)
-{
-	return float4(_mm_castsi128_ps(
-	        _mm_shuffle_epi32(_mm_castps_si128(b),
-	                          _MM_SHUFFLE(index_3, index_2, index_1, index_0))));
-}
-
-#if defined(__KERNEL_SSE3__)
-template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b)
-{
-	return float4(_mm_moveldup_ps(b));
-}
-
-template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b)
-{
-	return float4(_mm_movehdup_ps(b));
-}
-#endif
-
-template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b)
-{
-	return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b))));
-}
-
-#endif
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline float4 operator-(const float4& a)
-{
-#ifdef __KERNEL_SSE__
-	__m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
-	return float4(_mm_xor_ps(a.m128, mask));
-#else
-	return make_float4(-a.x, -a.y, -a.z, -a.w);
-#endif
-}
-
-ccl_device_inline float4 operator*(const float4& a, const float4& b)
-{
-#ifdef __KERNEL_SSE__
-	return float4(_mm_mul_ps(a.m128, b.m128));
-#else
-	return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
-#endif
-}
-
-ccl_device_inline float4 operator*(const float4& a, float f)
-{
-#if defined(__KERNEL_SSE__)
-	return a * make_float4(f);
-#else
-	return make_float4(a.x*f, a.y*f, a.z*f, a.w*f);
-#endif
-}
-
-ccl_device_inline float4 operator*(float f, const float4& a)
-{
-	return a * f;
-}
-
-ccl_device_inline float4 rcp(const float4& a)
-{
-#ifdef __KERNEL_SSE__
-	float4 r(_mm_rcp_ps(a.m128));
-	return float4(_mm_sub_ps(_mm_add_ps(r, r),
-	                         _mm_mul_ps(_mm_mul_ps(r, r), a)));
-#else
-	return make_float4(1.0f/a.x, 1.0f/a.y, 1.0f/a.z, 1.0f/a.w);
-#endif
-}
-
-ccl_device_inline float4 operator/(const float4& a, float f)
-{
-	return a * (1.0f/f);
-}
-
-ccl_device_inline float4 operator/(const float4& a, const float4& b)
-{
-#ifdef __KERNEL_SSE__
-	return a * rcp(b);
-#else
-	return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
-#endif
-
-}
-
-ccl_device_inline float4 operator+(const float4& a, const float4& b)
-{
-#ifdef __KERNEL_SSE__
-	return float4(_mm_add_ps(a.m128, b.m128));
-#else
-	return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
-#endif
-}
-
-ccl_device_inline float4 operator-(const float4& a, const float4& b)
-{
-#ifdef __KERNEL_SSE__
-	return float4(_mm_sub_ps(a.m128, b.m128));
-#else
-	return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
-#endif
-}
-
-ccl_device_inline float4 operator+=(float4& a, const float4& b)
-{
-	return a = a + b;
-}
-
-ccl_device_inline float4 operator*=(float4& a, const float4& b)
-{
-	return a = a * b;
-}
-
-ccl_device_inline float4 operator/=(float4& a, float f)
-{
-	return a = a / f;
-}
-
-ccl_device_inline int4 operator<(const float4& a, const float4& b)
-{
-#ifdef __KERNEL_SSE__
-	/* TODO(sergey): avoid cvt. */
-	return int4(_mm_cvtps_epi32(_mm_cmplt_ps(a.m128, b.m128)));
-#else
-	return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
-#endif
-}
-
-ccl_device_inline int4 operator>=(const float4& a, const float4& b)
-{
-#ifdef __KERNEL_SSE__
-	/* TODO(sergey): avoid cvt. */
-	return int4(_mm_cvtps_epi32(_mm_cmpge_ps(a.m128, b.m128)));
-#else
-	return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
-#endif
-}
-
-ccl_device_inline int4 operator<=(const float4& a, const float4& b)
-{
-#ifdef __KERNEL_SSE__
-	/* TODO(sergey): avoid cvt. */
-	return int4(_mm_cvtps_epi32(_mm_cmple_ps(a.m128, b.m128)));
-#else
-	return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w);
-#endif
-}
-
-ccl_device_inline bool operator==(const float4& a, const float4& b)
-{
-#ifdef __KERNEL_SSE__
-	return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15;
-#else
-	return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w);
-#endif
-}
-
-ccl_device_inline float4 cross(const float4& a, const float4& b)
-{
-#ifdef __KERNEL_SSE__
-	return (shuffle<1,2,0,0>(a)*shuffle<2,0,1,0>(b)) - (shuffle<2,0,1,0>(a)*shuffle<1,2,0,0>(b));
-#else
-	return make_float4(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x, 0.0f);
-#endif
-}
-
-ccl_device_inline bool is_zero(const float4& a)
-{
-#ifdef __KERNEL_SSE__
-	return a == make_float4(0.0f);
-#else
-	return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
-#endif
-}
-
-ccl_device_inline float reduce_add(const float4& a)
-{
-#ifdef __KERNEL_SSE__
-	float4 h(shuffle<1,0,3,2>(a) + a);
-	/* TODO(sergey): Investigate efficiency. */
-	return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h);
-#else
-	return ((a.x + a.y) + (a.z + a.w));
-#endif
-}
-
-ccl_device_inline float average(const float4& a)
-{
-	return reduce_add(a) * 0.25f;
-}
-
-ccl_device_inline float len(const float4& a)
-{
-	return sqrtf(dot(a, a));
-}
-
-ccl_device_inline float4 normalize(const float4& a)
-{
-	return a/len(a);
-}
-
-ccl_device_inline float4 safe_normalize(const float4& a)
-{
-	float t = len(a);
-	return (t != 0.0f)? a/t: a;
-}
-
-ccl_device_inline float4 min(const float4& a, const float4& b)
-{
-#ifdef __KERNEL_SSE__
-	return float4(_mm_min_ps(a.m128, b.m128));
-#else
-	return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
-#endif
-}
-
-ccl_device_inline float4 max(const float4& a, const float4& b)
-{
-#ifdef __KERNEL_SSE__
-	return float4(_mm_max_ps(a.m128, b.m128));
-#else
-	return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
-#endif
-}
-
-#endif
-
-#ifndef __KERNEL_GPU__
-
-ccl_device_inline float4 select(const int4& mask, const float4& a, const float4& b)
-{
-#ifdef __KERNEL_SSE__
-	/* TODO(sergey): avoid cvt. */
-	return float4(_mm_or_ps(_mm_and_ps(_mm_cvtepi32_ps(mask), a),
-	                        _mm_andnot_ps(_mm_cvtepi32_ps(mask), b)));
-#else
-	return make_float4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w);
-#endif
-}
-
-ccl_device_inline float4 reduce_min(const float4& a)
-{
-#ifdef __KERNEL_SSE__
-	float4 h = min(shuffle<1,0,3,2>(a), a);
-	return min(shuffle<2,3,0,1>(h), h);
-#else
-	return make_float4(min(min(a.x, a.y), min(a.z, a.w)));
-#endif
-}
-
-ccl_device_inline float4 reduce_max(const float4& a)
-{
-#ifdef __KERNEL_SSE__
-	float4 h = max(shuffle<1,0,3,2>(a), a);
-	return max(shuffle<2,3,0,1>(h), h);
-#else
-	return make_float4(max(max(a.x, a.y), max(a.z, a.w)));
-#endif
-}
-
-#if 0
-ccl_device_inline float4 reduce_add(const float4& a)
-{
-#ifdef __KERNEL_SSE__
-	float4 h = shuffle<1,0,3,2>(a) + a;
-	return shuffle<2,3,0,1>(h) + h;
-#else
-	return make_float4((a.x + a.y) + (a.z + a.w));
-#endif
-}
-#endif
-
-ccl_device_inline void print_float4(const char *label, const float4& a)
-{
-	printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w);
-}
-
-#endif
-
-/* Int2 */
-
-#ifndef __KERNEL_OPENCL__
-
-ccl_device_inline int2 operator+(const int2 &a, const int2 &b)
-{
-	return make_int2(a.x + b.x, a.y + b.y);
-}
-
-ccl_device_inline int2 operator+=(int2 &a, const int2 &b)
-{
-	return a = a + b;
-}
-
-ccl_device_inline int2 operator-(const int2 &a, const int2 &b)
-{
-	return make_int2(a.x - b.x, a.y - b.y);
-}
-
-ccl_device_inline int2 operator*(const int2 &a, const int2 &b)
-{
-	return make_int2(a.x * b.x, a.y * b.y);
-}
+CCL_NAMESPACE_END
 
-ccl_device_inline int2 operator/(const int2 &a, const int2 &b)
-{
-	return make_int2(a.x / b.x, a.y / b.y);
-}
+#include "util/util_math_int2.h"
+#include "util/util_math_int3.h"
+#include "util/util_math_int4.h"
 
-#endif
+#include "util/util_math_float2.h"
+#include "util/util_math_float3.h"
+#include "util/util_math_float4.h"
 
-/* Int3 */
+CCL_NAMESPACE_BEGIN
 
 #ifndef __KERNEL_OPENCL__
-
-ccl_device_inline int3 min(int3 a, int3 b)
-{
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
-	return int3(_mm_min_epi32(a.m128, b.m128));
-#else
-	return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
-#endif
-}
-
-ccl_device_inline int3 max(int3 a, int3 b)
-{
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
-	return int3(_mm_max_epi32(a.m128, b.m128));
-#else
-	return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
-#endif
-}
-
-ccl_device_inline int3 clamp(const int3& a, int mn, int mx)
-{
-#ifdef __KERNEL_SSE__
-	return min(max(a, make_int3(mn)), make_int3(mx));
-#else
-	return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx));
-#endif
-}
-
-ccl_device_inline int3 clamp(const int3& a, int3& mn, int mx)
-{
-#ifdef __KERNEL_SSE__
-	return min(max(a, mn), make_int3(mx));
-#else
-	return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx));
-#endif
-}
-
-#endif
-
-#ifndef __KERNEL_GPU__
-
-ccl_device_inline void print_int3(const char *label, const int3& a)
-{
-	printf("%s: %d %d %d\n", label, a.x, a.y, a.z);
-}
-
-#endif
-
-/* Int4 */
-
-#ifndef __KERNEL_GPU__
-
-ccl_device_inline int4 operator+(const int4& a, const int4& b)
-{
-#ifdef __KERNEL_SSE__
-	return int4(_mm_add_epi32(a.m128, b.m128));
-#else
-	return make_int4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
-#endif
-}
-
-ccl_device_inline int4 operator+=(int4& a, const int4& b)
-{
-	return a = a + b;
-}
-
-ccl_device_inline int4 operator>>(const int4& a, int i)
-{
-#ifdef __KERNEL_SSE__
-	return int4(_mm_srai_epi32(a.m128, i));
-#else
-	return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i);
-#endif
-}
-
-ccl_device_inline int4 min(int4 a, int4 b)
-{
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
-	return int4(_mm_min_epi32(a.m128, b.m128));
-#else
-	return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
-#endif
-}
-
-ccl_device_inline int4 max(int4 a, int4 b)
-{
-#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
-	return int4(_mm_max_epi32(a.m128, b.m128));
-#else
-	return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
-#endif
-}
-
-ccl_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx)
-{
-	return min(max(a, mn), mx);
-}
-
-ccl_device_inline int4 select(const int4& mask, const int4& a, const int4& b)
-{
-#ifdef __KERNEL_SSE__
-	const __m128 m = _mm_cvtepi32_ps(mask);
-	/* TODO(sergey): avoid cvt. */
-	return int4(_mm_castps_si128(_mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)),
-	                                       _mm_andnot_ps(m, _mm_castsi128_ps(b)))));
-#else
-	return make_int4((mask.x)? a.x: b.x, (mask.y)? a.y: b.y, (mask.z)? a.z: b.z, (mask.w)? a.w: b.w);
-#endif
-}
-
-ccl_device_inline void print_int4(const char *label, const int4& a)
-{
-	printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
-}
-
-#endif
-
 /* Int/Float conversion */
 
-#ifndef __KERNEL_OPENCL__
-
 ccl_device_inline int as_int(uint i)
 {
 	union { uint ui; int i; } u;
@@ -1256,7 +301,6 @@ ccl_device_inline float __uint_as_float(uint i)
 	return u.f;
 }
 
-
 /* Interpolation */
 
 template<class A, class B> A lerp(const A& a, const A& b, const B& t)
@@ -1266,12 +310,13 @@ template<class A, class B> A lerp(const A& a, const A& b, const B& t)
 
 /* Triangle */
 
-ccl_device_inline float triangle_area(const float3& v1, const float3& v2, const float3& v3)
+ccl_device_inline float triangle_area(const float3& v1,
+                                      const float3& v2,
+                                      const float3& v3)
 {
 	return len(cross(v3 - v2, v1 - v2))*0.5f;
 }
-
-#endif
+#endif  /* __KERNEL_OPENCL__ */
 
 /* Versions of functions which are safe for fast math. */
 ccl_device_inline bool isnan_safe(float f)
@@ -1382,16 +427,16 @@ ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle)
 	float3 r;
 
 	r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) +
-		(((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) +
-		(((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z);
+	      (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) +
+	      (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z);
 
 	r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) +
-		((costheta + (1 - costheta) * axis.y * axis.y) * p.y) +
-		(((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z);
+	      ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) +
+	     (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z);
 
 	r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) +
-		(((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) +
-		((costheta + (1 - costheta) * axis.z * axis.z) * p.z);
+	      (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) +
+	      ((costheta + (1 - costheta) * axis.z * axis.z) * p.z);
 
 	return r;
 }
diff --git a/intern/cycles/util/util_math_float2.h b/intern/cycles/util/util_math_float2.h
new file mode 100644
index 00000000000..6f9d0855d50
--- /dev/null
+++ b/intern/cycles/util/util_math_float2.h
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_FLOAT2_H__
+#define __UTIL_MATH_FLOAT2_H__
+
+#ifndef __UTIL_MATH_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+#ifndef __KERNEL_OPENCL__
+ccl_device_inline float2 operator-(const float2& a);
+ccl_device_inline float2 operator*(const float2& a, const float2& b);
+ccl_device_inline float2 operator*(const float2& a, float f);
+ccl_device_inline float2 operator*(float f, const float2& a);
+ccl_device_inline float2 operator/(float f, const float2& a);
+ccl_device_inline float2 operator/(const float2& a, float f);
+ccl_device_inline float2 operator/(const float2& a, const float2& b);
+ccl_device_inline float2 operator+(const float2& a, const float2& b);
+ccl_device_inline float2 operator-(const float2& a, const float2& b);
+ccl_device_inline float2 operator+=(float2& a, const float2& b);
+ccl_device_inline float2 operator*=(float2& a, const float2& b);
+ccl_device_inline float2 operator*=(float2& a, float f);
+ccl_device_inline float2 operator/=(float2& a, const float2& b);
+ccl_device_inline float2 operator/=(float2& a, float f);
+
+ccl_device_inline bool operator==(const float2& a, const float2& b);
+ccl_device_inline bool operator!=(const float2& a, const float2& b);
+
+ccl_device_inline bool is_zero(const float2& a);
+ccl_device_inline float average(const float2& a);
+ccl_device_inline float dot(const float2& a, const float2& b);
+ccl_device_inline float cross(const float2& a, const float2& b);
+ccl_device_inline float len(const float2& a);
+ccl_device_inline float2 normalize(const float2& a);
+ccl_device_inline float2 normalize_len(const float2& a, float *t);
+ccl_device_inline float2 safe_normalize(const float2& a);
+ccl_device_inline float2 min(const float2& a, const float2& b);
+ccl_device_inline float2 max(const float2& a, const float2& b);
+ccl_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx);
+ccl_device_inline float2 fabs(const float2& a);
+ccl_device_inline float2 as_float2(const float4& a);
+ccl_device_inline float2 interp(const float2& a, const float2& b, float t);
+#endif  /* !__KERNEL_OPENCL__ */
+
+/*******************************************************************************
+ * Definition.
+ */
+
+#ifndef __KERNEL_OPENCL__
+ccl_device_inline float2 operator-(const float2& a)
+{
+	return make_float2(-a.x, -a.y);
+}
+
+ccl_device_inline float2 operator*(const float2& a, const float2& b)
+{
+	return make_float2(a.x*b.x, a.y*b.y);
+}
+
+ccl_device_inline float2 operator*(const float2& a, float f)
+{
+	return make_float2(a.x*f, a.y*f);
+}
+
+ccl_device_inline float2 operator*(float f, const float2& a)
+{
+	return make_float2(a.x*f, a.y*f);
+}
+
+ccl_device_inline float2 operator/(float f, const float2& a)
+{
+	return make_float2(f/a.x, f/a.y);
+}
+
+ccl_device_inline float2 operator/(const float2& a, float f)
+{
+	float invf = 1.0f/f;
+	return make_float2(a.x*invf, a.y*invf);
+}
+
+ccl_device_inline float2 operator/(const float2& a, const float2& b)
+{
+	return make_float2(a.x/b.x, a.y/b.y);
+}
+
+ccl_device_inline float2 operator+(const float2& a, const float2& b)
+{
+	return make_float2(a.x+b.x, a.y+b.y);
+}
+
+ccl_device_inline float2 operator-(const float2& a, const float2& b)
+{
+	return make_float2(a.x-b.x, a.y-b.y);
+}
+
+ccl_device_inline float2 operator+=(float2& a, const float2& b)
+{
+	return a = a + b;
+}
+
+ccl_device_inline float2 operator*=(float2& a, const float2& b)
+{
+	return a = a * b;
+}
+
+ccl_device_inline float2 operator*=(float2& a, float f)
+{
+	return a = a * f;
+}
+
+ccl_device_inline float2 operator/=(float2& a, const float2& b)
+{
+	return a = a / b;
+}
+
+ccl_device_inline float2 operator/=(float2& a, float f)
+{
+	float invf = 1.0f/f;
+	return a = a * invf;
+}
+
+ccl_device_inline bool operator==(const float2& a, const float2& b)
+{
+	return (a.x == b.x && a.y == b.y);
+}
+
+ccl_device_inline bool operator!=(const float2& a, const float2& b)
+{
+	return !(a == b);
+}
+
+ccl_device_inline bool is_zero(const float2& a)
+{
+	return (a.x == 0.0f && a.y == 0.0f);
+}
+
+ccl_device_inline float average(const float2& a)
+{
+	return (a.x + a.y)*(1.0f/2.0f);
+}
+
+ccl_device_inline float dot(const float2& a, const float2& b)
+{
+	return a.x*b.x + a.y*b.y;
+}
+
+ccl_device_inline float cross(const float2& a, const float2& b)
+{
+	return (a.x*b.y - a.y*b.x);
+}
+
+ccl_device_inline float len(const float2& a)
+{
+	return sqrtf(dot(a, a));
+}
+
+ccl_device_inline float2 normalize(const float2& a)
+{
+	return a/len(a);
+}
+
+ccl_device_inline float2 normalize_len(const float2& a, float *t)
+{
+	*t = len(a);
+	return a/(*t);
+}
+
+ccl_device_inline float2 safe_normalize(const float2& a)
+{
+	float t = len(a);
+	return (t != 0.0f)? a/t: a;
+}
+
+ccl_device_inline float2 min(const float2& a, const float2& b)
+{
+	return make_float2(min(a.x, b.x), min(a.y, b.y));
+}
+
+ccl_device_inline float2 max(const float2& a, const float2& b)
+{
+	return make_float2(max(a.x, b.x), max(a.y, b.y));
+}
+
+ccl_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx)
+{
+	return min(max(a, mn), mx);
+}
+
+ccl_device_inline float2 fabs(const float2& a)
+{
+	return make_float2(fabsf(a.x), fabsf(a.y));
+}
+
+ccl_device_inline float2 as_float2(const float4& a)
+{
+	return make_float2(a.x, a.y);
+}
+
+ccl_device_inline float2 interp(const float2& a, const float2& b, float t)
+{
+	return a + t*(b - a);
+}
+#endif  /* !__KERNEL_OPENCL__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_FLOAT2_H__ */
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
new file mode 100644
index 00000000000..e0c6b551040
--- /dev/null
+++ b/intern/cycles/util/util_math_float3.h
@@ -0,0 +1,365 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_FLOAT3_H__
+#define __UTIL_MATH_FLOAT3_H__
+
+#ifndef __UTIL_MATH_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+#ifndef __KERNEL_OPENCL__
+ccl_device_inline float3 operator-(const float3& a);
+ccl_device_inline float3 operator*(const float3& a, const float3& b);
+ccl_device_inline float3 operator*(const float3& a, const float f);
+ccl_device_inline float3 operator*(const float f, const float3& a);
+ccl_device_inline float3 operator/(const float f, const float3& a);
+ccl_device_inline float3 operator/(const float3& a, const float f);
+ccl_device_inline float3 operator/(const float3& a, const float3& b);
+ccl_device_inline float3 operator+(const float3& a, const float3& b);
+ccl_device_inline float3 operator-(const float3& a, const float3& b);
+ccl_device_inline float3 operator+=(float3& a, const float3& b);
+ccl_device_inline float3 operator*=(float3& a, const float3& b);
+ccl_device_inline float3 operator*=(float3& a, float f);
+ccl_device_inline float3 operator/=(float3& a, const float3& b);
+ccl_device_inline float3 operator/=(float3& a, float f);
+
+ccl_device_inline bool operator==(const float3& a, const float3& b);
+ccl_device_inline bool operator!=(const float3& a, const float3& b);
+
+ccl_device_inline float dot(const float3& a, const float3& b);
+ccl_device_inline float dot_xy(const float3& a, const float3& b);
+ccl_device_inline float3 cross(const float3& a, const float3& b);
+ccl_device_inline float3 normalize(const float3& a);
+ccl_device_inline float3 min(const float3& a, const float3& b);
+ccl_device_inline float3 max(const float3& a, const float3& b);
+ccl_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx);
+ccl_device_inline float3 fabs(const float3& a);
+ccl_device_inline float3 mix(const float3& a, const float3& b, float t);
+ccl_device_inline float3 rcp(const float3& a);
+#endif  /* !__KERNEL_OPENCL__ */
+
+ccl_device_inline float max3(float3 a);
+ccl_device_inline float len(const float3 a);
+ccl_device_inline float len_squared(const float3 a);
+
+ccl_device_inline float3 saturate3(float3 a);
+ccl_device_inline float3 safe_normalize(const float3 a);
+ccl_device_inline float3 normalize_len(const float3 a, float *t);;
+ccl_device_inline float3 safe_normalize_len(const float3 a, float *t);
+ccl_device_inline float3 interp(float3 a, float3 b, float t);
+
+ccl_device_inline bool is_zero(const float3 a);
+ccl_device_inline float reduce_add(const float3 a);
+ccl_device_inline float average(const float3 a);
+ccl_device_inline bool isequal_float3(const float3 a, const float3 b);
+
+/*******************************************************************************
+ * Definition.
+ */
+
+#ifndef __KERNEL_OPENCL__
+ccl_device_inline float3 operator-(const float3& a)
+{
+#ifdef __KERNEL_SSE__
+	return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
+#else
+	return make_float3(-a.x, -a.y, -a.z);
+#endif
+}
+
+ccl_device_inline float3 operator*(const float3& a, const float3& b)
+{
+#ifdef __KERNEL_SSE__
+	return float3(_mm_mul_ps(a.m128,b.m128));
+#else
+	return make_float3(a.x*b.x, a.y*b.y, a.z*b.z);
+#endif
+}
+
+ccl_device_inline float3 operator*(const float3& a, const float f)
+{
+#ifdef __KERNEL_SSE__
+	return float3(_mm_mul_ps(a.m128,_mm_set1_ps(f)));
+#else
+	return make_float3(a.x*f, a.y*f, a.z*f);
+#endif
+}
+
+ccl_device_inline float3 operator*(const float f, const float3& a)
+{
+	/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
+#if defined(__KERNEL_SSE__) && 0
+	return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
+#else
+	return make_float3(a.x*f, a.y*f, a.z*f);
+#endif
+}
+
+ccl_device_inline float3 operator/(const float f, const float3& a)
+{
+	/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
+#if defined(__KERNEL_SSE__) && 0
+	__m128 rc = _mm_rcp_ps(a.m128);
+	return float3(_mm_mul_ps(_mm_set1_ps(f),rc));
+#else
+	return make_float3(f / a.x, f / a.y, f / a.z);
+#endif
+}
+
+ccl_device_inline float3 operator/(const float3& a, const float f)
+{
+	float invf = 1.0f/f;
+	return a * invf;
+}
+
+ccl_device_inline float3 operator/(const float3& a, const float3& b)
+{
+	/* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
+#if defined(__KERNEL_SSE__) && 0
+	__m128 rc = _mm_rcp_ps(b.m128);
+	return float3(_mm_mul_ps(a, rc));
+#else
+	return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
+#endif
+}
+
+ccl_device_inline float3 operator+(const float3& a, const float3& b)
+{
+#ifdef __KERNEL_SSE__
+	return float3(_mm_add_ps(a.m128, b.m128));
+#else
+	return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
+#endif
+}
+
+ccl_device_inline float3 operator-(const float3& a, const float3& b)
+{
+#ifdef __KERNEL_SSE__
+	return float3(_mm_sub_ps(a.m128, b.m128));
+#else
+	return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
+#endif
+}
+
+ccl_device_inline float3 operator+=(float3& a, const float3& b)
+{
+	return a = a + b;
+}
+
+ccl_device_inline float3 operator*=(float3& a, const float3& b)
+{
+	return a = a * b;
+}
+
+ccl_device_inline float3 operator*=(float3& a, float f)
+{
+	return a = a * f;
+}
+
+ccl_device_inline float3 operator/=(float3& a, const float3& b)
+{
+	return a = a / b;
+}
+
+ccl_device_inline float3 operator/=(float3& a, float f)
+{
+	float invf = 1.0f/f;
+	return a = a * invf;
+}
+
+ccl_device_inline bool operator==(const float3& a, const float3& b)
+{
+#ifdef __KERNEL_SSE__
+	return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7;
+#else
+	return (a.x == b.x && a.y == b.y && a.z == b.z);
+#endif
+}
+
+ccl_device_inline bool operator!=(const float3& a, const float3& b)
+{
+	return !(a == b);
+}
+
+ccl_device_inline float dot(const float3& a, const float3& b)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+	return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
+#else
+	return a.x*b.x + a.y*b.y + a.z*b.z;
+#endif
+}
+
+ccl_device_inline float dot_xy(const float3& a, const float3& b)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+	return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a,b),b));
+#else
+	return a.x*b.x + a.y*b.y;
+#endif
+}
+
+ccl_device_inline float3 cross(const float3& a, const float3& b)
+{
+	float3 r = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
+	return r;
+}
+
+ccl_device_inline float3 normalize(const float3& a)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+	__m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
+	return float3(_mm_div_ps(a.m128, norm));
+#else
+	return a/len(a);
+#endif
+}
+
+ccl_device_inline float3 min(const float3& a, const float3& b)
+{
+#ifdef __KERNEL_SSE__
+	return float3(_mm_min_ps(a.m128, b.m128));
+#else
+	return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
+#endif
+}
+
+ccl_device_inline float3 max(const float3& a, const float3& b)
+{
+#ifdef __KERNEL_SSE__
+	return float3(_mm_max_ps(a.m128, b.m128));
+#else
+	return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
+#endif
+}
+
+ccl_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx)
+{
+	return min(max(a, mn), mx);
+}
+
+ccl_device_inline float3 fabs(const float3& a)
+{
+#ifdef __KERNEL_SSE__
+	__m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
+	return float3(_mm_and_ps(a.m128, mask));
+#else
+	return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
+#endif
+}
+
+ccl_device_inline float3 mix(const float3& a, const float3& b, float t)
+{
+	return a + t*(b - a);
+}
+
+ccl_device_inline float3 rcp(const float3& a)
+{
+#ifdef __KERNEL_SSE__
+	const float4 r(_mm_rcp_ps(a.m128));
+	return float3(_mm_sub_ps(_mm_add_ps(r, r),
+	                         _mm_mul_ps(_mm_mul_ps(r, r), a)));
+#else
+	return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z);
+#endif
+}
+#endif  /* !__KERNEL_OPENCL__ */
+
+ccl_device_inline float max3(float3 a)
+{
+	return max(max(a.x, a.y), a.z);
+}
+
+ccl_device_inline float len(const float3 a)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+	return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F)));
+#else
+	return sqrtf(dot(a, a));
+#endif
+}
+
+ccl_device_inline float len_squared(const float3 a)
+{
+	return dot(a, a);
+}
+
+ccl_device_inline float3 saturate3(float3 a)
+{
+	return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
+}
+
+ccl_device_inline float3 normalize_len(const float3 a, float *t)
+{
+	*t = len(a);
+	float x = 1.0f / *t;
+	return a*x;
+}
+
+ccl_device_inline float3 safe_normalize(const float3 a)
+{
+	float t = len(a);
+	return (t != 0.0f)? a * (1.0f/t) : a;
+}
+
+ccl_device_inline float3 safe_normalize_len(const float3 a, float *t)
+{
+	*t = len(a);
+	return (*t != 0.0f)? a/(*t): a;
+}
+
+ccl_device_inline float3 interp(float3 a, float3 b, float t)
+{
+	return a + t*(b - a);
+}
+
+ccl_device_inline bool is_zero(const float3 a)
+{
+#ifdef __KERNEL_SSE__
+	return a == make_float3(0.0f);
+#else
+	return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f);
+#endif
+}
+
+ccl_device_inline float reduce_add(const float3 a)
+{
+	return (a.x + a.y + a.z);
+}
+
+ccl_device_inline float average(const float3 a)
+{
+	return reduce_add(a)*(1.0f/3.0f);
+}
+
+ccl_device_inline bool isequal_float3(const float3 a, const float3 b)
+{
+#ifdef __KERNEL_OPENCL__
+	return all(a == b);
+#else
+	return a == b;
+#endif
+}
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_FLOAT3_H__ */
diff --git a/intern/cycles/util/util_math_float4.h b/intern/cycles/util/util_math_float4.h
new file mode 100644
index 00000000000..d89121b3a1d
--- /dev/null
+++ b/intern/cycles/util/util_math_float4.h
@@ -0,0 +1,393 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_FLOAT4_H__
+#define __UTIL_MATH_FLOAT4_H__
+
+#ifndef __UTIL_MATH_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+#ifndef __KERNEL_OPENCL__
+ccl_device_inline float4 operator-(const float4& a);
+ccl_device_inline float4 operator*(const float4& a, const float4& b);
+ccl_device_inline float4 operator*(const float4& a, float f);
+ccl_device_inline float4 operator*(float f, const float4& a);
+ccl_device_inline float4 operator/(const float4& a, float f);
+ccl_device_inline float4 operator/(const float4& a, const float4& b);
+ccl_device_inline float4 operator+(const float4& a, const float4& b);
+ccl_device_inline float4 operator-(const float4& a, const float4& b);
+ccl_device_inline float4 operator+=(float4& a, const float4& b);
+ccl_device_inline float4 operator*=(float4& a, const float4& b);
+ccl_device_inline float4 operator/=(float4& a, float f);
+
+ccl_device_inline int4 operator<(const float4& a, const float4& b);
+ccl_device_inline int4 operator>=(const float4& a, const float4& b);
+ccl_device_inline int4 operator<=(const float4& a, const float4& b);
+ccl_device_inline bool operator==(const float4& a, const float4& b);
+
+ccl_device_inline float dot(const float4& a, const float4& b);
+ccl_device_inline float len_squared(const float4& a);
+ccl_device_inline float4 rcp(const float4& a);
+ccl_device_inline float4 cross(const float4& a, const float4& b);
+ccl_device_inline bool is_zero(const float4& a);
+ccl_device_inline float reduce_add(const float4& a);
+ccl_device_inline float average(const float4& a);
+ccl_device_inline float len(const float4& a);
+ccl_device_inline float4 normalize(const float4& a);
+ccl_device_inline float4 safe_normalize(const float4& a);
+ccl_device_inline float4 min(const float4& a, const float4& b);
+ccl_device_inline float4 max(const float4& a, const float4& b);
+#endif  /* !__KERNEL_OPENCL__*/
+
+#ifdef __KERNEL_SSE__
+template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
+__forceinline const float4 shuffle(const float4& b);
+
+template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b);
+
+#  ifdef __KERNEL_SSE3__
+template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b);
+template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b);
+#  endif
+#endif  /* __KERNEL_SSE__ */
+
+#ifndef __KERNEL_GPU__
+ccl_device_inline float4 select(const int4& mask,
+                                const float4& a,
+                                const float4& b);
+ccl_device_inline float4 reduce_min(const float4& a);
+ccl_device_inline float4 reduce_max(const float4& a);
+#  if 0
+ccl_device_inline float4 reduce_add(const float4& a);
+#  endif
+#endif  /* !__KERNEL_GPU__ */
+
+/*******************************************************************************
+ * Definition.
+ */
+
+#ifndef __KERNEL_OPENCL__
+ccl_device_inline float4 operator-(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+	__m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
+	return float4(_mm_xor_ps(a.m128, mask));
+#else
+	return make_float4(-a.x, -a.y, -a.z, -a.w);
+#endif
+}
+
+ccl_device_inline float4 operator*(const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+	return float4(_mm_mul_ps(a.m128, b.m128));
+#else
+	return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w);
+#endif
+}
+
+ccl_device_inline float4 operator*(const float4& a, float f)
+{
+#if defined(__KERNEL_SSE__)
+	return a * make_float4(f);
+#else
+	return make_float4(a.x*f, a.y*f, a.z*f, a.w*f);
+#endif
+}
+
+ccl_device_inline float4 operator*(float f, const float4& a)
+{
+	return a * f;
+}
+
+ccl_device_inline float4 operator/(const float4& a, float f)
+{
+	return a * (1.0f/f);
+}
+
+ccl_device_inline float4 operator/(const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+	return a * rcp(b);
+#else
+	return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w);
+#endif
+
+}
+
+ccl_device_inline float4 operator+(const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+	return float4(_mm_add_ps(a.m128, b.m128));
+#else
+	return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
+#endif
+}
+
+ccl_device_inline float4 operator-(const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+	return float4(_mm_sub_ps(a.m128, b.m128));
+#else
+	return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
+#endif
+}
+
+ccl_device_inline float4 operator+=(float4& a, const float4& b)
+{
+	return a = a + b;
+}
+
+ccl_device_inline float4 operator*=(float4& a, const float4& b)
+{
+	return a = a * b;
+}
+
+ccl_device_inline float4 operator/=(float4& a, float f)
+{
+	return a = a / f;
+}
+
+ccl_device_inline int4 operator<(const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+	/* TODO(sergey): avoid cvt. */
+	return int4(_mm_cvtps_epi32(_mm_cmplt_ps(a.m128, b.m128)));
+#else
+	return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
+#endif
+}
+
+ccl_device_inline int4 operator>=(const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+	/* TODO(sergey): avoid cvt. */
+	return int4(_mm_cvtps_epi32(_mm_cmpge_ps(a.m128, b.m128)));
+#else
+	return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
+#endif
+}
+
+ccl_device_inline int4 operator<=(const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+	/* TODO(sergey): avoid cvt. */
+	return int4(_mm_cvtps_epi32(_mm_cmple_ps(a.m128, b.m128)));
+#else
+	return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w);
+#endif
+}
+
+ccl_device_inline bool operator==(const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+	return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15;
+#else
+	return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w);
+#endif
+}
+
+ccl_device_inline float dot(const float4& a, const float4& b)
+{
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+	return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
+#else
+	return (a.x*b.x + a.y*b.y) + (a.z*b.z + a.w*b.w);
+#endif
+}
+
+ccl_device_inline float len_squared(const float4& a)
+{
+	return dot(a, a);
+}
+
+ccl_device_inline float4 rcp(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+	float4 r(_mm_rcp_ps(a.m128));
+	return float4(_mm_sub_ps(_mm_add_ps(r, r),
+	                         _mm_mul_ps(_mm_mul_ps(r, r), a)));
+#else
+	return make_float4(1.0f/a.x, 1.0f/a.y, 1.0f/a.z, 1.0f/a.w);
+#endif
+}
+
+ccl_device_inline float4 cross(const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+	return (shuffle<1,2,0,0>(a)*shuffle<2,0,1,0>(b)) -
+	       (shuffle<2,0,1,0>(a)*shuffle<1,2,0,0>(b));
+#else
+	return make_float4(a.y*b.z - a.z*b.y,
+	                   a.z*b.x - a.x*b.z,
+	                   a.x*b.y - a.y*b.x,
+	                   0.0f);
+#endif
+}
+
+ccl_device_inline bool is_zero(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+	return a == make_float4(0.0f);
+#else
+	return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
+#endif
+}
+
+ccl_device_inline float reduce_add(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+	float4 h(shuffle<1,0,3,2>(a) + a);
+	/* TODO(sergey): Investigate efficiency. */
+	return _mm_cvtss_f32(shuffle<2,3,0,1>(h) + h);
+#else
+	return ((a.x + a.y) + (a.z + a.w));
+#endif
+}
+
+ccl_device_inline float average(const float4& a)
+{
+	return reduce_add(a) * 0.25f;
+}
+
+ccl_device_inline float len(const float4& a)
+{
+	return sqrtf(dot(a, a));
+}
+
+ccl_device_inline float4 normalize(const float4& a)
+{
+	return a/len(a);
+}
+
+ccl_device_inline float4 safe_normalize(const float4& a)
+{
+	float t = len(a);
+	return (t != 0.0f)? a/t: a;
+}
+
+ccl_device_inline float4 min(const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+	return float4(_mm_min_ps(a.m128, b.m128));
+#else
+	return make_float4(min(a.x, b.x),
+	                   min(a.y, b.y),
+	                   min(a.z, b.z),
+	                   min(a.w, b.w));
+#endif
+}
+
+ccl_device_inline float4 max(const float4& a, const float4& b)
+{
+#ifdef __KERNEL_SSE__
+	return float4(_mm_max_ps(a.m128, b.m128));
+#else
+	return make_float4(max(a.x, b.x),
+	                   max(a.y, b.y),
+	                   max(a.z, b.z),
+	                   max(a.w, b.w));
+#endif
+}
+#endif  /* !__KERNEL_OPENCL__*/
+
+#ifdef __KERNEL_SSE__
+template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
+__forceinline const float4 shuffle(const float4& b)
+{
+	return float4(_mm_castsi128_ps(
+	        _mm_shuffle_epi32(_mm_castps_si128(b),
+	                          _MM_SHUFFLE(index_3, index_2, index_1, index_0))));
+}
+
+template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b)
+{
+	return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b))));
+}
+
+#  ifdef __KERNEL_SSE3__
+template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b)
+{
+	return float4(_mm_moveldup_ps(b));
+}
+
+template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b)
+{
+	return float4(_mm_movehdup_ps(b));
+}
+#  endif  /* __KERNEL_SSE3__ */
+#endif  /* __KERNEL_SSE__ */
+
+#ifndef __KERNEL_GPU__
+ccl_device_inline float4 select(const int4& mask,
+                                const float4& a,
+                                const float4& b)
+{
+#ifdef __KERNEL_SSE__
+	/* TODO(sergey): avoid cvt. */
+	return float4(_mm_or_ps(_mm_and_ps(_mm_cvtepi32_ps(mask), a),
+	                        _mm_andnot_ps(_mm_cvtepi32_ps(mask), b)));
+#else
+	return make_float4((mask.x)? a.x: b.x,
+	                   (mask.y)? a.y: b.y,
+	                   (mask.z)? a.z: b.z,
+	                   (mask.w)? a.w: b.w);
+#endif
+}
+
+ccl_device_inline float4 reduce_min(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+	float4 h = min(shuffle<1,0,3,2>(a), a);
+	return min(shuffle<2,3,0,1>(h), h);
+#else
+	return make_float4(min(min(a.x, a.y), min(a.z, a.w)));
+#endif
+}
+
+ccl_device_inline float4 reduce_max(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+	float4 h = max(shuffle<1,0,3,2>(a), a);
+	return max(shuffle<2,3,0,1>(h), h);
+#else
+	return make_float4(max(max(a.x, a.y), max(a.z, a.w)));
+#endif
+}
+
+#if 0
+ccl_device_inline float4 reduce_add(const float4& a)
+{
+#ifdef __KERNEL_SSE__
+	float4 h = shuffle<1,0,3,2>(a) + a;
+	return shuffle<2,3,0,1>(h) + h;
+#else
+	return make_float4((a.x + a.y) + (a.z + a.w));
+#endif
+}
+#endif
+#endif  /* !__KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_FLOAT4_H__ */
diff --git a/intern/cycles/util/util_math_int2.h b/intern/cycles/util/util_math_int2.h
new file mode 100644
index 00000000000..828c49a131c
--- /dev/null
+++ b/intern/cycles/util/util_math_int2.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_INT2_H__
+#define __UTIL_MATH_INT2_H__
+
+#ifndef __UTIL_MATH_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+#ifndef __KERNEL_OPENCL__
+ccl_device_inline bool operator==(const int2 a, const int2 b);
+ccl_device_inline int2 operator+(const int2 &a, const int2 &b);
+ccl_device_inline int2 operator+=(int2 &a, const int2 &b);
+ccl_device_inline int2 operator-(const int2 &a, const int2 &b);
+ccl_device_inline int2 operator*(const int2 &a, const int2 &b);
+ccl_device_inline int2 operator/(const int2 &a, const int2 &b);
+#endif  /* !__KERNEL_OPENCL__ */
+
+/*******************************************************************************
+ * Definition.
+ */
+
+#ifndef __KERNEL_OPENCL__
+ccl_device_inline bool operator==(const int2 a, const int2 b)
+{
+	return (a.x == b.x && a.y == b.y);
+}
+
+ccl_device_inline int2 operator+(const int2 &a, const int2 &b)
+{
+	return make_int2(a.x + b.x, a.y + b.y);
+}
+
+ccl_device_inline int2 operator+=(int2 &a, const int2 &b)
+{
+	return a = a + b;
+}
+
+ccl_device_inline int2 operator-(const int2 &a, const int2 &b)
+{
+	return make_int2(a.x - b.x, a.y - b.y);
+}
+
+ccl_device_inline int2 operator*(const int2 &a, const int2 &b)
+{
+	return make_int2(a.x * b.x, a.y * b.y);
+}
+
+ccl_device_inline int2 operator/(const int2 &a, const int2 &b)
+{
+	return make_int2(a.x / b.x, a.y / b.y);
+}
+#endif  /* !__KERNEL_OPENCL__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_INT2_H__ */
diff --git a/intern/cycles/util/util_math_int3.h b/intern/cycles/util/util_math_int3.h
new file mode 100644
index 00000000000..fa7a02636de
--- /dev/null
+++ b/intern/cycles/util/util_math_int3.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_INT3_H__
+#define __UTIL_MATH_INT3_H__
+
+#ifndef __UTIL_MATH_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+#ifndef __KERNEL_OPENCL__
+ccl_device_inline int3 min(int3 a, int3 b);
+ccl_device_inline int3 max(int3 a, int3 b);
+ccl_device_inline int3 clamp(const int3& a, int mn, int mx);
+ccl_device_inline int3 clamp(const int3& a, int3& mn, int mx);
+#endif  /* !__KERNEL_OPENCL__ */
+
+/*******************************************************************************
+ * Definition.
+ */
+
+#ifndef __KERNEL_OPENCL__
+ccl_device_inline int3 min(int3 a, int3 b)
+{
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+	return int3(_mm_min_epi32(a.m128, b.m128));
+#else
+	return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
+#endif
+}
+
+ccl_device_inline int3 max(int3 a, int3 b)
+{
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+	return int3(_mm_max_epi32(a.m128, b.m128));
+#else
+	return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
+#endif
+}
+
+ccl_device_inline int3 clamp(const int3& a, int mn, int mx)
+{
+#ifdef __KERNEL_SSE__
+	return min(max(a, make_int3(mn)), make_int3(mx));
+#else
+	return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx));
+#endif
+}
+
+ccl_device_inline int3 clamp(const int3& a, int3& mn, int mx)
+{
+#ifdef __KERNEL_SSE__
+	return min(max(a, mn), make_int3(mx));
+#else
+	return make_int3(clamp(a.x, mn.x, mx),
+	                 clamp(a.y, mn.y, mx),
+	                 clamp(a.z, mn.z, mx));
+#endif
+}
+#endif  /* !__KERNEL_OPENCL__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_INT3_H__ */
diff --git a/intern/cycles/util/util_math_int4.h b/intern/cycles/util/util_math_int4.h
new file mode 100644
index 00000000000..4b327c90c33
--- /dev/null
+++ b/intern/cycles/util/util_math_int4.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_MATH_INT4_H__
+#define __UTIL_MATH_INT4_H__
+
+#ifndef __UTIL_MATH_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+#ifndef __KERNEL_GPU__
+ccl_device_inline int4 operator+(const int4& a, const int4& b);
+ccl_device_inline int4 operator+=(int4& a, const int4& b);
+ccl_device_inline int4 operator>>(const int4& a, int i);
+ccl_device_inline int4 min(int4 a, int4 b);
+ccl_device_inline int4 max(int4 a, int4 b);
+ccl_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx);
+ccl_device_inline int4 select(const int4& mask, const int4& a, const int4& b);
+#endif  /* __KERNEL_GPU__ */
+
+/*******************************************************************************
+ * Definition.
+ */
+
+#ifndef __KERNEL_GPU__
+ccl_device_inline int4 operator+(const int4& a, const int4& b)
+{
+#ifdef __KERNEL_SSE__
+	return int4(_mm_add_epi32(a.m128, b.m128));
+#else
+	return make_int4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
+#endif
+}
+
+ccl_device_inline int4 operator+=(int4& a, const int4& b)
+{
+	return a = a + b;
+}
+
+ccl_device_inline int4 operator>>(const int4& a, int i)
+{
+#ifdef __KERNEL_SSE__
+	return int4(_mm_srai_epi32(a.m128, i));
+#else
+	return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i);
+#endif
+}
+
+ccl_device_inline int4 min(int4 a, int4 b)
+{
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+	return int4(_mm_min_epi32(a.m128, b.m128));
+#else
+	return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
+#endif
+}
+
+ccl_device_inline int4 max(int4 a, int4 b)
+{
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+	return int4(_mm_max_epi32(a.m128, b.m128));
+#else
+	return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
+#endif
+}
+
+ccl_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx)
+{
+	return min(max(a, mn), mx);
+}
+
+ccl_device_inline int4 select(const int4& mask, const int4& a, const int4& b)
+{
+#ifdef __KERNEL_SSE__
+	const __m128 m = _mm_cvtepi32_ps(mask);
+	/* TODO(sergey): avoid cvt. */
+	return int4(_mm_castps_si128(
+	        _mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)),
+	                  _mm_andnot_ps(m, _mm_castsi128_ps(b)))));
+#else
+	return make_int4((mask.x)? a.x: b.x,
+	                 (mask.y)? a.y: b.y,
+	                 (mask.z)? a.z: b.z,
+	                 (mask.w)? a.w: b.w);
+#endif
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_INT4_H__ */
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 5a8f11694a7..137cc73b70b 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -18,78 +18,68 @@
 #define __UTIL_TYPES_H__
 
 #ifndef __KERNEL_OPENCL__
-
-#include <stdlib.h>
-
+#  include <stdlib.h>
 #endif
 
 /* Bitness */
 
 #if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64)
-#define __KERNEL_64_BIT__
+#  define __KERNEL_64_BIT__
 #endif
 
 /* Qualifiers for kernel code shared by CPU and GPU */
 
 #ifndef __KERNEL_GPU__
-
-#define ccl_device static inline
-#define ccl_device_noinline static
-#define ccl_global
-#define ccl_constant
-#define ccl_local
-#define ccl_local_param
-#define ccl_private
-#define ccl_restrict __restrict
-#define __KERNEL_WITH_SSE_ALIGN__
-
-#if defined(_WIN32) && !defined(FREE_WINDOWS)
-#define ccl_device_inline static __forceinline
-#define ccl_device_forceinline static __forceinline
-#define ccl_align(...) __declspec(align(__VA_ARGS__))
-#ifdef __KERNEL_64_BIT__
-#define ccl_try_align(...) __declspec(align(__VA_ARGS__))
-#else
-#undef __KERNEL_WITH_SSE_ALIGN__
-#define ccl_try_align(...) /* not support for function arguments (error C2719) */
-#endif
-#define ccl_may_alias
-#define ccl_always_inline __forceinline
-#define ccl_never_inline __declspec(noinline)
-#define ccl_maybe_unused
-
-#else
-
-#define ccl_device_inline static inline __attribute__((always_inline))
-#define ccl_device_forceinline static inline __attribute__((always_inline))
-#define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
-#ifndef FREE_WINDOWS64
-#define __forceinline inline __attribute__((always_inline))
-#endif
-#define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
-#define ccl_may_alias __attribute__((__may_alias__))
-#define ccl_always_inline __attribute__((always_inline))
-#define ccl_never_inline __attribute__((noinline))
-#define ccl_maybe_unused __attribute__((used))
-
-#endif
-
-#endif
+#  define ccl_device static inline
+#  define ccl_device_noinline static
+#  define ccl_global
+#  define ccl_constant
+#  define ccl_local
+#  define ccl_local_param
+#  define ccl_private
+#  define ccl_restrict __restrict
+#  define __KERNEL_WITH_SSE_ALIGN__
+
+#  if defined(_WIN32) && !defined(FREE_WINDOWS)
+#    define ccl_device_inline static __forceinline
+#    define ccl_device_forceinline static __forceinline
+#    define ccl_align(...) __declspec(align(__VA_ARGS__))
+#    ifdef __KERNEL_64_BIT__
+#      define ccl_try_align(...) __declspec(align(__VA_ARGS__))
+#    else  /* __KERNEL_64_BIT__ */
+#      undef __KERNEL_WITH_SSE_ALIGN__
+/* No support for function arguments (error C2719). */
+#      define ccl_try_align(...)
+#    endif  /* __KERNEL_64_BIT__ */
+#    define ccl_may_alias
+#    define ccl_always_inline __forceinline
+#    define ccl_never_inline __declspec(noinline)
+#    define ccl_maybe_unused
+#  else  /* _WIN32 && !FREE_WINDOWS */
+#    define ccl_device_inline static inline __attribute__((always_inline))
+#    define ccl_device_forceinline static inline __attribute__((always_inline))
+#    define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
+#    ifndef FREE_WINDOWS64
+#      define __forceinline inline __attribute__((always_inline))
+#    endif
+#    define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
+#    define ccl_may_alias __attribute__((__may_alias__))
+#    define ccl_always_inline __attribute__((always_inline))
+#    define ccl_never_inline __attribute__((noinline))
+#    define ccl_maybe_unused __attribute__((used))
+#  endif  /* _WIN32 && !FREE_WINDOWS */
+#endif  /* __KERNEL_GPU__ */
 
 /* Standard Integer Types */
 
 #ifndef __KERNEL_GPU__
-
 /* int8_t, uint16_t, and friends */
-#ifndef _WIN32
-#include <stdint.h>
-#endif
-
+#  ifndef _WIN32
+#    include <stdint.h>
+#  endif
 /* SIMD Types */
-
-#include "util/util_optimization.h"
-
-#endif
+#  include "util/util_optimization.h"
+#endif  /* __KERNEL_GPU__ */
 
 CCL_NAMESPACE_BEGIN
 
@@ -102,24 +92,18 @@ CCL_NAMESPACE_BEGIN
 /* Shorter Unsigned Names */
 
 #ifndef __KERNEL_OPENCL__
-
 typedef unsigned char uchar;
 typedef unsigned int uint;
-
 #endif
 
 /* Fixed Bits Types */
 
 #ifdef __KERNEL_OPENCL__
-
 typedef ulong uint64_t;
-
 #endif
 
 #ifndef __KERNEL_GPU__
-
-#ifdef _WIN32
-
+#  ifdef _WIN32
 typedef signed char int8_t;
 typedef unsigned char uint8_t;
 
@@ -131,356 +115,17 @@ typedef unsigned int uint32_t;
 
 typedef long long int64_t;
 typedef unsigned long long uint64_t;
-
-#ifdef __KERNEL_64_BIT__
+#    ifdef __KERNEL_64_BIT__
 typedef int64_t ssize_t;
-#else
+#    else
 typedef int32_t ssize_t;
-#endif
-
-#endif
+#    endif
+#  endif  /* _WIN32 */
 
 /* Generic Memory Pointer */
 
 typedef uint64_t device_ptr;
-
-/* Vector Types */
-
-struct uchar2 {
-	uchar x, y;
-
-	__forceinline uchar operator[](int i) const { return *(&x + i); }
-	__forceinline uchar& operator[](int i) { return *(&x + i); }
-};
-
-struct uchar3 {
-	uchar x, y, z;
-
-	__forceinline uchar operator[](int i) const { return *(&x + i); }
-	__forceinline uchar& operator[](int i) { return *(&x + i); }
-};
-
-struct uchar4 {
-	uchar x, y, z, w;
-
-	__forceinline uchar operator[](int i) const { return *(&x + i); }
-	__forceinline uchar& operator[](int i) { return *(&x + i); }
-};
-
-struct int2 {
-	int x, y;
-
-	__forceinline int operator[](int i) const { return *(&x + i); }
-	__forceinline int& operator[](int i) { return *(&x + i); }
-};
-
-struct ccl_try_align(16) int3 {
-#ifdef __KERNEL_SSE__
-	union {
-		__m128i m128;
-		struct { int x, y, z, w; };
-	};
-
-	__forceinline int3() {}
-	__forceinline explicit int3(const __m128i& a) : m128(a) {}
-	__forceinline operator const __m128i&(void) const { return m128; }
-	__forceinline operator __m128i&(void) { return m128; }
-
-	int3(const int3& a) { m128 = a.m128; }
-	int3& operator =(const int3& a) { m128 = a.m128; return *this; }
-#else
-	int x, y, z, w;
-#endif
-
-	__forceinline int operator[](int i) const { return *(&x + i); }
-	__forceinline int& operator[](int i) { return *(&x + i); }
-};
-
-struct ccl_try_align(16) int4 {
-#ifdef __KERNEL_SSE__
-	union {
-		__m128i m128;
-		struct { int x, y, z, w; };
-	};
-
-	__forceinline int4() {}
-	__forceinline explicit int4(const __m128i& a) : m128(a) {}
-	__forceinline operator const __m128i&(void) const { return m128; }
-	__forceinline operator __m128i&(void) { return m128; }
-
-	int4(const int4& a) : m128(a.m128) {}
-	int4& operator=(const int4& a) { m128 = a.m128; return *this; }
-#else
-	int x, y, z, w;
-#endif
-
-	__forceinline int operator[](int i) const { return *(&x + i); }
-	__forceinline int& operator[](int i) { return *(&x + i); }
-};
-
-struct uint2 {
-	uint x, y;
-
-	__forceinline uint operator[](uint i) const { return *(&x + i); }
-	__forceinline uint& operator[](uint i) { return *(&x + i); }
-};
-
-struct uint3 {
-	uint x, y, z;
-
-	__forceinline uint operator[](uint i) const { return *(&x + i); }
-	__forceinline uint& operator[](uint i) { return *(&x + i); }
-};
-
-struct uint4 {
-	uint x, y, z, w;
-
-	__forceinline uint operator[](uint i) const { return *(&x + i); }
-	__forceinline uint& operator[](uint i) { return *(&x + i); }
-};
-
-struct float2 {
-	float x, y;
-
-	__forceinline float operator[](int i) const { return *(&x + i); }
-	__forceinline float& operator[](int i) { return *(&x + i); }
-};
-
-struct ccl_try_align(16) float3 {
-#ifdef __KERNEL_SSE__
-	union {
-		__m128 m128;
-		struct { float x, y, z, w; };
-	};
-
-	__forceinline float3() {}
-	__forceinline explicit float3(const __m128& a) : m128(a) {}
-	__forceinline operator const __m128&(void) const { return m128; }
-	__forceinline operator __m128&(void) { return m128; }
-
-	__forceinline float3(const float3& a) : m128(a.m128) {}
-	__forceinline float3& operator =(const float3& a) { m128 = a.m128; return *this; }
-#else
-	float x, y, z, w;
-#endif
-
-	__forceinline float operator[](int i) const { return *(&x + i); }
-	__forceinline float& operator[](int i) { return *(&x + i); }
-};
-
-struct ccl_try_align(16) float4 {
-#ifdef __KERNEL_SSE__
-	union {
-		__m128 m128;
-		struct { float x, y, z, w; };
-	};
-
-	__forceinline float4() {}
-	__forceinline explicit float4(const __m128& a) : m128(a) {}
-	__forceinline operator const __m128&(void) const { return m128; }
-	__forceinline operator __m128&(void) { return m128; }
-
-	__forceinline float4(const float4& a) : m128(a.m128) {}
-	__forceinline float4& operator =(const float4& a) { m128 = a.m128; return *this; }
-
-#else
-	float x, y, z, w;
-#endif
-
-	__forceinline float operator[](int i) const { return *(&x + i); }
-	__forceinline float& operator[](int i) { return *(&x + i); }
-};
-
-template<typename T>
-class vector3
-{
-public:
-	T x, y, z;
-
-	ccl_always_inline vector3() {}
-	ccl_always_inline vector3(const T& a)
-	  : x(a), y(a), z(a) {}
-	ccl_always_inline vector3(const T& x, const T& y, const T& z)
-	  : x(x), y(y), z(z) {}
-};
-
-#endif
-
-#ifndef __KERNEL_GPU__
-
-/* Vector Type Constructors
- * 
- * OpenCL does not support C++ class, so we use these instead. */
-
-ccl_device_inline uchar2 make_uchar2(uchar x, uchar y)
-{
-	uchar2 a = {x, y};
-	return a;
-}
-
-ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z)
-{
-	uchar3 a = {x, y, z};
-	return a;
-}
-
-ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w)
-{
-	uchar4 a = {x, y, z, w};
-	return a;
-}
-
-ccl_device_inline int2 make_int2(int x, int y)
-{
-	int2 a = {x, y};
-	return a;
-}
-
-ccl_device_inline int3 make_int3(int x, int y, int z)
-{
-#ifdef __KERNEL_SSE__
-	int3 a;
-	a.m128 = _mm_set_epi32(0, z, y, x);
-#else
-	int3 a = {x, y, z, 0};
-#endif
-
-	return a;
-}
-
-ccl_device_inline int4 make_int4(int x, int y, int z, int w)
-{
-#ifdef __KERNEL_SSE__
-	int4 a;
-	a.m128 = _mm_set_epi32(w, z, y, x);
-#else
-	int4 a = {x, y, z, w};
-#endif
-
-	return a;
-}
-
-ccl_device_inline uint2 make_uint2(uint x, uint y)
-{
-	uint2 a = {x, y};
-	return a;
-}
-
-ccl_device_inline uint3 make_uint3(uint x, uint y, uint z)
-{
-	uint3 a = {x, y, z};
-	return a;
-}
-
-ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w)
-{
-	uint4 a = {x, y, z, w};
-	return a;
-}
-
-ccl_device_inline float2 make_float2(float x, float y)
-{
-	float2 a = {x, y};
-	return a;
-}
-
-ccl_device_inline float3 make_float3(float x, float y, float z)
-{
-#ifdef __KERNEL_SSE__
-	float3 a;
-	a.m128 = _mm_set_ps(0.0f, z, y, x);
-#else
-	float3 a = {x, y, z, 0.0f};
-#endif
-
-	return a;
-}
-
-ccl_device_inline float4 make_float4(float x, float y, float z, float w)
-{
-#ifdef __KERNEL_SSE__
-	float4 a;
-	a.m128 = _mm_set_ps(w, z, y, x);
-#else
-	float4 a = {x, y, z, w};
-#endif
-
-	return a;
-}
-
-ccl_device_inline int3 make_int3(int i)
-{
-#ifdef __KERNEL_SSE__
-	int3 a;
-	a.m128 = _mm_set1_epi32(i);
-#else
-	int3 a = {i, i, i, i};
-#endif
-
-	return a;
-}
-
-ccl_device_inline int4 make_int4(int i)
-{
-#ifdef __KERNEL_SSE__
-	int4 a;
-	a.m128 = _mm_set1_epi32(i);
-#else
-	int4 a = {i, i, i, i};
-#endif
-
-	return a;
-}
-
-ccl_device_inline float3 make_float3(float f)
-{
-#ifdef __KERNEL_SSE__
-	float3 a;
-	a.m128 = _mm_set1_ps(f);
-#else
-	float3 a = {f, f, f, f};
-#endif
-
-	return a;
-}
-
-ccl_device_inline float4 make_float4(float f)
-{
-#ifdef __KERNEL_SSE__
-	float4 a;
-	a.m128 = _mm_set1_ps(f);
-#else
-	float4 a = {f, f, f, f};
-#endif
-
-	return a;
-}
-
-ccl_device_inline float4 make_float4(const int4& i)
-{
-#ifdef __KERNEL_SSE__
-	float4 a;
-	a.m128 = _mm_cvtepi32_ps(i.m128);
-#else
-	float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w};
-#endif
-
-	return a;
-}
-
-ccl_device_inline int4 make_int4(const float3& f)
-{
-#ifdef __KERNEL_SSE__
-	int4 a;
-	a.m128 = _mm_cvtps_epi32(f.m128);
-#else
-	int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
-#endif
-
-	return a;
-}
-
-#endif
+#endif  /* __KERNEL_GPU__ */
 
 ccl_device_inline size_t align_up(size_t offset, size_t alignment)
 {
@@ -554,7 +199,7 @@ template<typename T> static inline T decltype_helper(T x) { return x; }
  * ... the compiler optimizes away the temp var */
 #ifdef __GNUC__
 #define CHECK_TYPE(var, type)  {  \
-	TYPEOF(var) *__tmp;         \
+	TYPEOF(var) *__tmp;           \
 	__tmp = (type *)NULL;         \
 	(void)__tmp;                  \
 } (void)0
@@ -576,5 +221,50 @@ template<typename T> static inline T decltype_helper(T x) { return x; }
 
 CCL_NAMESPACE_END
 
+#ifndef __KERNEL_GPU__
+#  include <cassert>
+#  define util_assert(statement)  assert(statement)
+#else
+#  define util_assert(statement)
+#endif
+
+/* Vectorized types declaration. */
+#include "util/util_types_uchar2.h"
+#include "util/util_types_uchar3.h"
+#include "util/util_types_uchar4.h"
+
+#include "util/util_types_int2.h"
+#include "util/util_types_int3.h"
+#include "util/util_types_int4.h"
+
+#include "util/util_types_uint2.h"
+#include "util/util_types_uint3.h"
+#include "util/util_types_uint4.h"
+
+#include "util/util_types_float2.h"
+#include "util/util_types_float3.h"
+#include "util/util_types_float4.h"
+
+#include "util/util_types_vector3.h"
+
+/* Vectorized types implementation. */
+#include "util/util_types_uchar2_impl.h"
+#include "util/util_types_uchar3_impl.h"
+#include "util/util_types_uchar4_impl.h"
+
+#include "util/util_types_int2_impl.h"
+#include "util/util_types_int3_impl.h"
+#include "util/util_types_int4_impl.h"
+
+#include "util/util_types_uint2_impl.h"
+#include "util/util_types_uint3_impl.h"
+#include "util/util_types_uint4_impl.h"
+
+#include "util/util_types_float2_impl.h"
+#include "util/util_types_float3_impl.h"
+#include "util/util_types_float4_impl.h"
+
+#include "util/util_types_vector3_impl.h"
+
 #endif /* __UTIL_TYPES_H__ */
 
diff --git a/intern/cycles/util/util_types_float2.h b/intern/cycles/util/util_types_float2.h
new file mode 100644
index 00000000000..ec7a1f717a1
--- /dev/null
+++ b/intern/cycles/util/util_types_float2.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT2_H__
+#define __UTIL_TYPES_FLOAT2_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct float2 {
+	float x, y;
+
+	__forceinline float operator[](int i) const;
+	__forceinline float& operator[](int i);
+};
+
+ccl_device_inline float2 make_float2(float x, float y);
+ccl_device_inline void print_float2(const char *label, const float2& a);
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_FLOAT2_H__ */
diff --git a/intern/cycles/util/util_types_float2_impl.h b/intern/cycles/util/util_types_float2_impl.h
new file mode 100644
index 00000000000..782dda195eb
--- /dev/null
+++ b/intern/cycles/util/util_types_float2_impl.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT2_IMPL_H__
+#define __UTIL_TYPES_FLOAT2_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+#  include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+__forceinline float float2::operator[](int i) const
+{
+	util_assert(i >= 0);
+	util_assert(i < 2);
+	return *(&x + i);
+}
+
+__forceinline float& float2::operator[](int i)
+{
+	util_assert(i >= 0);
+	util_assert(i < 2);
+	return *(&x + i);
+}
+
+ccl_device_inline float2 make_float2(float x, float y)
+{
+	float2 a = {x, y};
+	return a;
+}
+
+ccl_device_inline void print_float2(const char *label, const float2& a)
+{
+	printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y);
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_FLOAT2_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_float3.h b/intern/cycles/util/util_types_float3.h
new file mode 100644
index 00000000000..28146ad04f7
--- /dev/null
+++ b/intern/cycles/util/util_types_float3.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT3_H__
+#define __UTIL_TYPES_FLOAT3_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct ccl_try_align(16) float3 {
+#ifdef __KERNEL_SSE__
+	union {
+		__m128 m128;
+		struct { float x, y, z, w; };
+	};
+
+	__forceinline float3();
+	__forceinline float3(const float3& a);
+	__forceinline explicit float3(const __m128& a);
+
+	__forceinline operator const __m128&(void) const;
+	__forceinline operator __m128&(void);
+
+	__forceinline float3& operator =(const float3& a);
+#else  /* __KERNEL_SSE__ */
+	float x, y, z, w;
+#endif  /* __KERNEL_SSE__ */
+
+	__forceinline float operator[](int i) const;
+	__forceinline float& operator[](int i);
+};
+
+ccl_device_inline float3 make_float3(float f);
+ccl_device_inline float3 make_float3(float x, float y, float z);
+ccl_device_inline void print_float3(const char *label, const float3& a);
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_FLOAT3_H__ */
diff --git a/intern/cycles/util/util_types_float3_impl.h b/intern/cycles/util/util_types_float3_impl.h
new file mode 100644
index 00000000000..45f61767d3f
--- /dev/null
+++ b/intern/cycles/util/util_types_float3_impl.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT3_IMPL_H__
+#define __UTIL_TYPES_FLOAT3_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+#  include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+#ifdef __KERNEL_SSE__
+__forceinline float3::float3()
+{
+}
+
+__forceinline float3::float3(const float3& a)
+        : m128(a.m128)
+{
+}
+
+__forceinline float3::float3(const __m128& a)
+        : m128(a)
+{
+}
+
+__forceinline float3::operator const __m128&(void) const
+{
+	return m128;
+}
+
+__forceinline float3::operator __m128&(void)
+{
+	return m128;
+}
+
+__forceinline float3& float3::operator =(const float3& a)
+{
+	m128 = a.m128;
+	return *this;
+}
+#endif  /* __KERNEL_SSE__ */
+
+__forceinline float float3::operator[](int i) const
+{
+	util_assert(i >= 0);
+	util_assert(i < 3);
+	return *(&x + i);
+}
+
+__forceinline float& float3::operator[](int i)
+{
+	util_assert(i >= 0);
+	util_assert(i < 3);
+	return *(&x + i);
+}
+
+ccl_device_inline float3 make_float3(float f)
+{
+#ifdef __KERNEL_SSE__
+	float3 a(_mm_set1_ps(f));
+#else
+	float3 a = {f, f, f, f};
+#endif
+	return a;
+}
+
+ccl_device_inline float3 make_float3(float x, float y, float z)
+{
+#ifdef __KERNEL_SSE__
+	float3 a(_mm_set_ps(0.0f, z, y, x));
+#else
+	float3 a = {x, y, z, 0.0f};
+#endif
+	return a;
+}
+
+ccl_device_inline void print_float3(const char *label, const float3& a)
+{
+	printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z);
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_FLOAT3_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_float4.h b/intern/cycles/util/util_types_float4.h
new file mode 100644
index 00000000000..a7d9abe1b95
--- /dev/null
+++ b/intern/cycles/util/util_types_float4.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT4_H__
+#define __UTIL_TYPES_FLOAT4_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct int4;
+
+struct ccl_try_align(16) float4 {
+#ifdef __KERNEL_SSE__
+	union {
+		__m128 m128;
+		struct { float x, y, z, w; };
+	};
+
+	__forceinline float4();
+	__forceinline float4(const float4& a);
+	__forceinline explicit float4(const __m128& a);
+
+	__forceinline operator const __m128&(void) const;
+	__forceinline operator __m128&(void);
+
+	__forceinline float4& operator =(const float4& a);
+
+#else  /* __KERNEL_SSE__ */
+	float x, y, z, w;
+#endif  /* __KERNEL_SSE__ */
+
+	__forceinline float operator[](int i) const;
+	__forceinline float& operator[](int i);
+};
+
+ccl_device_inline float4 make_float4(float f);
+ccl_device_inline float4 make_float4(float x, float y, float z, float w);
+ccl_device_inline float4 make_float4(const int4& i);
+ccl_device_inline void print_float4(const char *label, const float4& a);
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_FLOAT4_H__ */
diff --git a/intern/cycles/util/util_types_float4_impl.h b/intern/cycles/util/util_types_float4_impl.h
new file mode 100644
index 00000000000..ff3ec4d4ecf
--- /dev/null
+++ b/intern/cycles/util/util_types_float4_impl.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_FLOAT4_IMPL_H__
+#define __UTIL_TYPES_FLOAT4_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+#  include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+#ifdef __KERNEL_SSE__
+__forceinline float4::float4()
+{
+}
+
+__forceinline float4::float4(const float4& a)
+        : m128(a.m128)
+{
+}
+
+__forceinline float4::float4(const __m128& a)
+        : m128(a)
+{
+}
+
+__forceinline float4::operator const __m128&(void) const
+{
+	return m128;
+}
+
+__forceinline float4::operator __m128&(void)
+{
+	return m128;
+}
+
+__forceinline float4& float4::operator =(const float4& a)
+{
+	m128 = a.m128;
+	return *this;
+}
+#endif  /* __KERNEL_SSE__ */
+
+__forceinline float float4::operator[](int i) const
+{
+	util_assert(i >= 0);
+	util_assert(i < 4);
+	return *(&x + i);
+}
+
+__forceinline float& float4::operator[](int i)
+{
+	util_assert(i >= 0);
+	util_assert(i < 4);
+	return *(&x + i);
+}
+
+ccl_device_inline float4 make_float4(float f)
+{
+#ifdef __KERNEL_SSE__
+	float4 a(_mm_set1_ps(f));
+#else
+	float4 a = {f, f, f, f};
+#endif
+	return a;
+}
+
+ccl_device_inline float4 make_float4(float x, float y, float z, float w)
+{
+#ifdef __KERNEL_SSE__
+	float4 a(_mm_set_ps(w, z, y, x));
+#else
+	float4 a = {x, y, z, w};
+#endif
+	return a;
+}
+
+ccl_device_inline float4 make_float4(const int4& i)
+{
+#ifdef __KERNEL_SSE__
+	float4 a(_mm_cvtepi32_ps(i.m128));
+#else
+	float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w};
+#endif
+	return a;
+}
+
+ccl_device_inline void print_float4(const char *label, const float4& a)
+{
+	printf("%s: %.8f %.8f %.8f %.8f\n",
+	       label, 
+	       (double)a.x, (double)a.y, (double)a.z, (double)a.w);
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_FLOAT4_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_int2.h b/intern/cycles/util/util_types_int2.h
new file mode 100644
index 00000000000..82e860f89eb
--- /dev/null
+++ b/intern/cycles/util/util_types_int2.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_INT2_H__
+#define __UTIL_TYPES_INT2_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct int2 {
+	int x, y;
+
+	__forceinline int operator[](int i) const;
+	__forceinline int& operator[](int i);
+};
+
+ccl_device_inline int2 make_int2(int x, int y);
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_INT2_H__ */
diff --git a/intern/cycles/util/util_types_int2_impl.h b/intern/cycles/util/util_types_int2_impl.h
new file mode 100644
index 00000000000..c7d3942e723
--- /dev/null
+++ b/intern/cycles/util/util_types_int2_impl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_INT2_IMPL_H__
+#define __UTIL_TYPES_INT2_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+int int2::operator[](int i) const
+{
+	util_assert(i >= 0);
+	util_assert(i < 2);
+	return *(&x + i);
+}
+
+int& int2::operator[](int i)
+{
+	util_assert(i >= 0);
+	util_assert(i < 2);
+	return *(&x + i);
+}
+
+ccl_device_inline int2 make_int2(int x, int y)
+{
+	int2 a = {x, y};
+	return a;
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_INT2_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_int3.h b/intern/cycles/util/util_types_int3.h
new file mode 100644
index 00000000000..9d43b201c02
--- /dev/null
+++ b/intern/cycles/util/util_types_int3.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_INT3_H__
+#define __UTIL_TYPES_INT3_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct ccl_try_align(16) int3 {
+#ifdef __KERNEL_SSE__
+	union {
+		__m128i m128;
+		struct { int x, y, z, w; };
+	};
+
+	__forceinline int3();
+	__forceinline int3(const int3& a);
+	__forceinline explicit int3(const __m128i& a);
+
+	__forceinline operator const __m128i&(void) const;
+	__forceinline operator __m128i&(void);
+
+	__forceinline int3& operator =(const int3& a);
+#else  /* __KERNEL_SSE__ */
+	int x, y, z, w;
+#endif  /* __KERNEL_SSE__ */
+
+	__forceinline int operator[](int i) const;
+	__forceinline int& operator[](int i);
+};
+
+ccl_device_inline int3 make_int3(int i);
+ccl_device_inline int3 make_int3(int x, int y, int z);
+ccl_device_inline void print_int3(const char *label, const int3& a);
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_INT3_H__ */
diff --git a/intern/cycles/util/util_types_int3_impl.h b/intern/cycles/util/util_types_int3_impl.h
new file mode 100644
index 00000000000..ada50c4812c
--- /dev/null
+++ b/intern/cycles/util/util_types_int3_impl.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_INT3_IMPL_H__
+#define __UTIL_TYPES_INT3_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+#  include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+#ifdef __KERNEL_SSE__
+__forceinline int3::int3()
+{
+}
+
+__forceinline int3::int3(const __m128i& a)
+        : m128(a)
+{
+}
+
+__forceinline int3::int3(const int3& a)
+        : m128(a.m128)
+{
+}
+
+__forceinline int3::operator const __m128i&(void) const
+{
+	return m128;
+}
+
+__forceinline int3::operator __m128i&(void)
+{
+	return m128;
+}
+
+__forceinline int3& int3::operator =(const int3& a)
+{
+	m128 = a.m128;
+	return *this;
+}
+#endif  /* __KERNEL_SSE__ */
+
+__forceinline int int3::operator[](int i) const
+{
+	util_assert(i >= 0);
+	util_assert(i < 3);
+	return *(&x + i);
+}
+
+__forceinline int& int3::operator[](int i)
+{
+	util_assert(i >= 0);
+	util_assert(i < 3);
+	return *(&x + i);
+}
+
+ccl_device_inline int3 make_int3(int i)
+{
+#ifdef __KERNEL_SSE__
+	int3 a(_mm_set1_epi32(i));
+#else
+	int3 a = {i, i, i, i};
+#endif
+	return a;
+}
+
+ccl_device_inline int3 make_int3(int x, int y, int z)
+{
+#ifdef __KERNEL_SSE__
+	int3 a(_mm_set_epi32(0, z, y, x));
+#else
+	int3 a = {x, y, z, 0};
+#endif
+
+	return a;
+}
+
+ccl_device_inline void print_int3(const char *label, const int3& a)
+{
+	printf("%s: %d %d %d\n", label, a.x, a.y, a.z);
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_INT3_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_int4.h b/intern/cycles/util/util_types_int4.h
new file mode 100644
index 00000000000..cdd0ecbdae5
--- /dev/null
+++ b/intern/cycles/util/util_types_int4.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_INT4_H__
+#define __UTIL_TYPES_INT4_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+
+struct float3;
+
+struct ccl_try_align(16) int4 {
+#ifdef __KERNEL_SSE__
+	union {
+		__m128i m128;
+		struct { int x, y, z, w; };
+	};
+
+	__forceinline int4();
+	__forceinline int4(const int4& a);
+	__forceinline explicit int4(const __m128i& a);
+
+	__forceinline operator const __m128i&(void) const;
+	__forceinline operator __m128i&(void);
+
+	__forceinline int4& operator=(const int4& a);
+#else  /* __KERNEL_SSE__ */
+	int x, y, z, w;
+#endif  /* __KERNEL_SSE__ */
+
+	__forceinline int operator[](int i) const;
+	__forceinline int& operator[](int i);
+};
+
+ccl_device_inline int4 make_int4(int i);
+ccl_device_inline int4 make_int4(int x, int y, int z, int w);
+ccl_device_inline int4 make_int4(const float3& f);
+ccl_device_inline void print_int4(const char *label, const int4& a);
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_INT4_H__ */
diff --git a/intern/cycles/util/util_types_int4_impl.h b/intern/cycles/util/util_types_int4_impl.h
new file mode 100644
index 00000000000..07cdc88f2dc
--- /dev/null
+++ b/intern/cycles/util/util_types_int4_impl.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_INT4_IMPL_H__
+#define __UTIL_TYPES_INT4_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+#  include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+#ifdef __KERNEL_SSE__
+__forceinline int4::int4()
+{
+}
+
+__forceinline int4::int4(const int4& a)
+        : m128(a.m128)
+{
+}
+
+__forceinline int4::int4(const __m128i& a)
+        : m128(a)
+{
+}
+
+__forceinline int4::operator const __m128i&(void) const
+{
+	return m128;
+}
+
+__forceinline int4::operator __m128i&(void)
+{
+	return m128;
+}
+
+__forceinline int4& int4::operator=(const int4& a)
+{
+	m128 = a.m128;
+	return *this;
+}
+#endif  /* __KERNEL_SSE__ */
+
+__forceinline int int4::operator[](int i) const
+{
+	util_assert(i >= 0);
+	util_assert(i < 4);
+	return *(&x + i);
+}
+
+__forceinline int& int4::operator[](int i)
+{
+	util_assert(i >= 0);
+	util_assert(i < 4);
+	return *(&x + i);
+}
+
+ccl_device_inline int4 make_int4(int i)
+{
+#ifdef __KERNEL_SSE__
+	int4 a(_mm_set1_epi32(i));
+#else
+	int4 a = {i, i, i, i};
+#endif
+	return a;
+}
+
+ccl_device_inline int4 make_int4(int x, int y, int z, int w)
+{
+#ifdef __KERNEL_SSE__
+	int4 a(_mm_set_epi32(w, z, y, x));
+#else
+	int4 a = {x, y, z, w};
+#endif
+	return a;
+}
+
+ccl_device_inline int4 make_int4(const float3& f)
+{
+#ifdef __KERNEL_SSE__
+	int4 a(_mm_cvtps_epi32(f.m128));
+#else
+	int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
+#endif
+	return a;
+}
+
+ccl_device_inline void print_int4(const char *label, const int4& a)
+{
+	printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_INT4_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_uchar2.h b/intern/cycles/util/util_types_uchar2.h
new file mode 100644
index 00000000000..f618a2234ca
--- /dev/null
+++ b/intern/cycles/util/util_types_uchar2.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UCHAR2_H__
+#define __UTIL_TYPES_UCHAR2_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct uchar2 {
+	uchar x, y;
+
+	__forceinline uchar operator[](int i) const;
+	__forceinline uchar& operator[](int i);
+};
+
+ccl_device_inline uchar2 make_uchar2(uchar x, uchar y);
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_UCHAR2_H__ */
diff --git a/intern/cycles/util/util_types_uchar2_impl.h b/intern/cycles/util/util_types_uchar2_impl.h
new file mode 100644
index 00000000000..d5f196d0ce0
--- /dev/null
+++ b/intern/cycles/util/util_types_uchar2_impl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UCHAR2_IMPL_H__
+#define __UTIL_TYPES_UCHAR2_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+uchar uchar2::operator[](int i) const
+{
+	util_assert(i >= 0);
+	util_assert(i < 2);
+	return *(&x + i);
+}
+
+uchar& uchar2::operator[](int i)
+{
+	util_assert(i >= 0);
+	util_assert(i < 2);
+	return *(&x + i);
+}
+
+ccl_device_inline uchar2 make_uchar2(uchar x, uchar y)
+{
+	uchar2 a = {x, y};
+	return a;
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_UCHAR2_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_uchar3.h b/intern/cycles/util/util_types_uchar3.h
new file mode 100644
index 00000000000..1e3644e6fd6
--- /dev/null
+++ b/intern/cycles/util/util_types_uchar3.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UCHAR3_H__
+#define __UTIL_TYPES_UCHAR3_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct uchar3 {
+	uchar x, y, z;
+
+	__forceinline uchar operator[](int i) const;
+	__forceinline uchar& operator[](int i);
+};
+
+ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z);
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_UCHAR3_H__ */
diff --git a/intern/cycles/util/util_types_uchar3_impl.h b/intern/cycles/util/util_types_uchar3_impl.h
new file mode 100644
index 00000000000..611021efb7f
--- /dev/null
+++ b/intern/cycles/util/util_types_uchar3_impl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UCHAR3_IMPL_H__
+#define __UTIL_TYPES_UCHAR3_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+uchar uchar3::operator[](int i) const
+{
+	util_assert(i >= 0);
+	util_assert(i < 3);
+	return *(&x + i);
+}
+
+uchar& uchar3::operator[](int i)
+{
+	util_assert(i >= 0);
+	util_assert(i < 3);
+	return *(&x + i);
+}
+
+ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z)
+{
+	uchar3 a = {x, y, z};
+	return a;
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_UCHAR3_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_uchar4.h b/intern/cycles/util/util_types_uchar4.h
new file mode 100644
index 00000000000..3802cebbfb9
--- /dev/null
+++ b/intern/cycles/util/util_types_uchar4.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UCHAR4_H__
+#define __UTIL_TYPES_UCHAR4_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct uchar4 {
+	uchar x, y, z, w;
+
+	__forceinline uchar operator[](int i) const;
+	__forceinline uchar& operator[](int i);
+};
+
+ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w);
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_UCHAR4_H__ */
diff --git a/intern/cycles/util/util_types_uchar4_impl.h b/intern/cycles/util/util_types_uchar4_impl.h
new file mode 100644
index 00000000000..03039f60c54
--- /dev/null
+++ b/intern/cycles/util/util_types_uchar4_impl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UCHAR4_IMPL_H__
+#define __UTIL_TYPES_UCHAR4_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+uchar uchar4::operator[](int i) const
+{
+	util_assert(i >= 0);
+	util_assert(i < 4);
+	return *(&x + i);
+}
+
+uchar& uchar4::operator[](int i)
+{
+	util_assert(i >= 0);
+	util_assert(i < 4);
+	return *(&x + i);
+}
+
+ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w)
+{
+	uchar4 a = {x, y, z, w};
+	return a;
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_UCHAR4_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_uint2.h b/intern/cycles/util/util_types_uint2.h
new file mode 100644
index 00000000000..c4a31899614
--- /dev/null
+++ b/intern/cycles/util/util_types_uint2.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UINT2_H__
+#define __UTIL_TYPES_UINT2_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct uint2 {
+	uint x, y;
+
+	__forceinline uint operator[](uint i) const;
+	__forceinline uint& operator[](uint i);
+};
+
+ccl_device_inline uint2 make_uint2(uint x, uint y);
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_UINT2_H__ */
diff --git a/intern/cycles/util/util_types_uint2_impl.h b/intern/cycles/util/util_types_uint2_impl.h
new file mode 100644
index 00000000000..b50ffa2667f
--- /dev/null
+++ b/intern/cycles/util/util_types_uint2_impl.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UINT2_IMPL_H__
+#define __UTIL_TYPES_UINT2_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+__forceinline uint uint2::operator[](uint i) const
+{
+	util_assert(i < 2);
+	return *(&x + i);
+}
+
+__forceinline uint& uint2::operator[](uint i)
+{
+	util_assert(i < 2);
+	return *(&x + i);
+}
+
+ccl_device_inline uint2 make_uint2(uint x, uint y)
+{
+	uint2 a = {x, y};
+	return a;
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_UINT2_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_uint3.h b/intern/cycles/util/util_types_uint3.h
new file mode 100644
index 00000000000..aeeecd2df06
--- /dev/null
+++ b/intern/cycles/util/util_types_uint3.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UINT3_H__
+#define __UTIL_TYPES_UINT3_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct uint3 {
+	uint x, y, z;
+
+	__forceinline uint operator[](uint i) const;
+	__forceinline uint& operator[](uint i);
+};
+
+ccl_device_inline uint3 make_uint3(uint x, uint y, uint z);
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_UINT3_H__ */
diff --git a/intern/cycles/util/util_types_uint3_impl.h b/intern/cycles/util/util_types_uint3_impl.h
new file mode 100644
index 00000000000..26005d5baff
--- /dev/null
+++ b/intern/cycles/util/util_types_uint3_impl.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UINT3_IMPL_H__
+#define __UTIL_TYPES_UINT3_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+__forceinline uint uint3::operator[](uint i) const
+{
+	util_assert(i < 3);
+	return *(&x + i);
+}
+
+__forceinline uint& uint3::operator[](uint i)
+{
+	util_assert(i < 3);
+	return *(&x + i);
+}
+
+ccl_device_inline uint3 make_uint3(uint x, uint y, uint z)
+{
+	uint3 a = {x, y, z};
+	return a;
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_UINT3_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_uint4.h b/intern/cycles/util/util_types_uint4.h
new file mode 100644
index 00000000000..2d3a7bb85e4
--- /dev/null
+++ b/intern/cycles/util/util_types_uint4.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UINT4_H__
+#define __UTIL_TYPES_UINT4_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+struct uint4 {
+	uint x, y, z, w;
+
+	__forceinline uint operator[](uint i) const;
+	__forceinline uint& operator[](uint i);
+};
+
+ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w);
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_UINT4_H__ */
diff --git a/intern/cycles/util/util_types_uint4_impl.h b/intern/cycles/util/util_types_uint4_impl.h
new file mode 100644
index 00000000000..6d48131a446
--- /dev/null
+++ b/intern/cycles/util/util_types_uint4_impl.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_UINT4_IMPL_H__
+#define __UTIL_TYPES_UINT4_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+__forceinline uint uint4::operator[](uint i) const
+{
+	util_assert(i < 3);
+	return *(&x + i);
+}
+
+__forceinline uint& uint4::operator[](uint i)
+{
+	util_assert(i < 3);
+	return *(&x + i);
+}
+
+ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w)
+{
+	uint4 a = {x, y, z, w};
+	return a;
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_UINT4_IMPL_H__ */
diff --git a/intern/cycles/util/util_types_vector3.h b/intern/cycles/util/util_types_vector3.h
new file mode 100644
index 00000000000..12acf9dc959
--- /dev/null
+++ b/intern/cycles/util/util_types_vector3.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_VECTOR3_H__
+#define __UTIL_TYPES_VECTOR3_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+template<typename T>
+class vector3
+{
+public:
+	T x, y, z;
+
+	__forceinline vector3();
+	__forceinline vector3(const T& a);
+	__forceinline vector3(const T& x, const T& y, const T& z);
+};
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_VECTOR3_H__ */
diff --git a/intern/cycles/util/util_types_vector3_impl.h b/intern/cycles/util/util_types_vector3_impl.h
new file mode 100644
index 00000000000..2f6b8368540
--- /dev/null
+++ b/intern/cycles/util/util_types_vector3_impl.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2011-2017 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_TYPES_VECTOR3_IMPL_H__
+#define __UTIL_TYPES_VECTOR3_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+template<typename T>
+ccl_always_inline vector3<T>::vector3()
+{
+}
+
+template<typename T>
+ccl_always_inline vector3<T>::vector3(const T& a)
+        : x(a), y(a), z(a)
+{
+}
+
+template<typename T>
+ccl_always_inline vector3<T>::vector3(const T& x, const T& y, const T& z)
+        : x(x), y(y), z(z)
+{
+}
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_VECTOR3_IMPL_H__ */
diff --git a/source/blender/alembic/intern/abc_archive.cc b/source/blender/alembic/intern/abc_archive.cc
index 5f8fc1a3739..cdbda0ace69 100644
--- a/source/blender/alembic/intern/abc_archive.cc
+++ b/source/blender/alembic/intern/abc_archive.cc
@@ -28,6 +28,8 @@
 #  include "utfconv.h"
 #endif
 
+#include <fstream>
+
 using Alembic::Abc::Exception;
 using Alembic::Abc::ErrorHandler;
 using Alembic::Abc::IArchive;
@@ -38,8 +40,9 @@ static IArchive open_archive(const std::string &filename,
                              const std::vector<std::istream *> &input_streams,
                              bool &is_hdf5)
 {
+	is_hdf5 = false;
+
 	try {
-		is_hdf5 = false;
 		Alembic::AbcCoreOgawa::ReadArchive archive_reader(input_streams);
 
 		return IArchive(archive_reader(filename),
@@ -63,6 +66,27 @@ static IArchive open_archive(const std::string &filename,
 			return IArchive();
 		}
 #else
+		/* Inspect the file to see whether it's really a HDF5 file. */
+		char header[4];  /* char(0x89) + "HDF" */
+		std::ifstream the_file(filename, std::ios::in | std::ios::binary);
+		if (!the_file) {
+			std::cerr << "Unable to open " << filename << std::endl;
+		}
+		else if (!the_file.read(header, sizeof(header))) {
+			std::cerr << "Unable to read from " << filename << std::endl;
+		}
+		else if (strncmp(header + 1, "HDF", 3)) {
+			std::cerr << filename << " has an unknown file format, unable to read." << std::endl;
+		}
+		else {
+			is_hdf5 = true;
+			std::cerr << filename << " is in the obsolete HDF5 format, unable to read." << std::endl;
+		}
+
+		if (the_file.is_open()) {
+			the_file.close();
+		}
+
 		return IArchive();
 #endif
 	}
@@ -83,16 +107,20 @@ ArchiveReader::ArchiveReader(const char *filename)
 
 	m_streams.push_back(&m_infile);
 
-	bool is_hdf5;
-	m_archive = open_archive(filename, m_streams, is_hdf5);
+	m_archive = open_archive(filename, m_streams, m_is_hdf5);
 
 	/* We can't open an HDF5 file from a stream, so close it. */
-	if (is_hdf5) {
+	if (m_is_hdf5) {
 		m_infile.close();
 		m_streams.clear();
 	}
 }
 
+bool ArchiveReader::is_hdf5() const
+{
+	return m_is_hdf5;
+}
+
 bool ArchiveReader::valid() const
 {
 	return m_archive.valid();
diff --git a/source/blender/alembic/intern/abc_archive.h b/source/blender/alembic/intern/abc_archive.h
index d412574b736..84309fbc9df 100644
--- a/source/blender/alembic/intern/abc_archive.h
+++ b/source/blender/alembic/intern/abc_archive.h
@@ -44,12 +44,21 @@ class ArchiveReader {
 	Alembic::Abc::IArchive m_archive;
 	std::ifstream m_infile;
 	std::vector<std::istream *> m_streams;
+	bool m_is_hdf5;
 
 public:
 	explicit ArchiveReader(const char *filename);
 
 	bool valid() const;
 
+	/**
+	 * Returns true when either Blender is compiled with HDF5 support and
+	 * the archive was succesfully opened (valid() will also return true),
+	 * or when Blender was built without HDF5 support but a HDF5 file was
+	 * detected (valid() will return false).
+	 */
+	bool is_hdf5() const;
+
 	Alembic::Abc::IObject getTop();
 };
 
diff --git a/source/blender/alembic/intern/abc_camera.cc b/source/blender/alembic/intern/abc_camera.cc
index 33344f6c9c5..4f70b2a972c 100644
--- a/source/blender/alembic/intern/abc_camera.cc
+++ b/source/blender/alembic/intern/abc_camera.cc
@@ -117,10 +117,11 @@ bool AbcCameraReader::valid() const
 	return m_schema.valid();
 }
 
-void AbcCameraReader::readObjectData(Main *bmain, const ISampleSelector &sample_sel)
+void AbcCameraReader::readObjectData(Main *bmain, float time)
 {
 	Camera *bcam = static_cast<Camera *>(BKE_camera_add(bmain, m_data_name.c_str()));
 
+	ISampleSelector sample_sel(time);
 	CameraSample cam_sample;
 	m_schema.get(cam_sample, sample_sel);
 
diff --git a/source/blender/alembic/intern/abc_camera.h b/source/blender/alembic/intern/abc_camera.h
index 0201dd9d6e1..1c4b7f77949 100644
--- a/source/blender/alembic/intern/abc_camera.h
+++ b/source/blender/alembic/intern/abc_camera.h
@@ -55,7 +55,7 @@ public:
 
 	bool valid() const;
 
-	void readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel);
+	void readObjectData(Main *bmain, float time);
 };
 
 #endif  /* __ABC_CAMERA_H__ */
diff --git a/source/blender/alembic/intern/abc_curves.cc b/source/blender/alembic/intern/abc_curves.cc
index d85d8df4d77..bf485c7ef61 100644
--- a/source/blender/alembic/intern/abc_curves.cc
+++ b/source/blender/alembic/intern/abc_curves.cc
@@ -211,7 +211,7 @@ bool AbcCurveReader::valid() const
 	return m_curves_schema.valid();
 }
 
-void AbcCurveReader::readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel)
+void AbcCurveReader::readObjectData(Main *bmain, float time)
 {
 	Curve *cu = BKE_curve_add(bmain, m_data_name.c_str(), OB_CURVE);
 
@@ -219,6 +219,8 @@ void AbcCurveReader::readObjectData(Main *bmain, const Alembic::Abc::ISampleSele
 	cu->actvert = CU_ACT_NONE;
 	cu->resolu = 1;
 
+	const ISampleSelector sample_sel(time);
+
 	ICompoundProperty user_props = m_curves_schema.getUserProperties();
 	if (user_props) {
 		const PropertyHeader *header = user_props.getPropertyHeader(ABC_CURVE_RESOLUTION_U_PROPNAME);
@@ -387,11 +389,9 @@ void read_curve_sample(Curve *cu, const ICurvesSchema &schema, const ISampleSele
  * object directly and create a new DerivedMesh from that. Also we might need to
  * create new or delete existing NURBS in the curve.
  */
-DerivedMesh *AbcCurveReader::read_derivedmesh(DerivedMesh * /*dm*/,
-                                              const ISampleSelector &sample_sel,
-                                              int /*read_flag*/,
-                                              const char ** /*err_str*/)
+DerivedMesh *AbcCurveReader::read_derivedmesh(DerivedMesh * /*dm*/, const float time, int /*read_flag*/, const char ** /*err_str*/)
 {
+	ISampleSelector sample_sel(time);
 	const ICurvesSchema::Sample sample = m_curves_schema.getValue(sample_sel);
 
 	const P3fArraySamplePtr &positions = sample.getPositions();
diff --git a/source/blender/alembic/intern/abc_curves.h b/source/blender/alembic/intern/abc_curves.h
index 1ad6d0bf278..71b0d205820 100644
--- a/source/blender/alembic/intern/abc_curves.h
+++ b/source/blender/alembic/intern/abc_curves.h
@@ -55,11 +55,8 @@ public:
 
 	bool valid() const;
 
-	void readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel);
-	DerivedMesh *read_derivedmesh(DerivedMesh *dm,
-	                              const Alembic::Abc::ISampleSelector &sample_sel,
-	                              int read_flag,
-	                              const char **err_str);
+	void readObjectData(Main *bmain, float time);
+	DerivedMesh *read_derivedmesh(DerivedMesh *, const float time, int read_flag, const char **err_str);
 };
 
 /* ************************************************************************** */
diff --git a/source/blender/alembic/intern/abc_mesh.cc b/source/blender/alembic/intern/abc_mesh.cc
index fc3ad239c5d..a0b175dbcdc 100644
--- a/source/blender/alembic/intern/abc_mesh.cc
+++ b/source/blender/alembic/intern/abc_mesh.cc
@@ -1020,15 +1020,17 @@ bool AbcMeshReader::valid() const
 	return m_schema.valid();
 }
 
-void AbcMeshReader::readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel)
+void AbcMeshReader::readObjectData(Main *bmain, float time)
 {
 	Mesh *mesh = BKE_mesh_add(bmain, m_data_name.c_str());
 
 	m_object = BKE_object_add_only_object(bmain, OB_MESH, m_object_name.c_str());
 	m_object->data = mesh;
 
+	const ISampleSelector sample_sel(time);
+
 	DerivedMesh *dm = CDDM_from_mesh(mesh);
-	DerivedMesh *ndm = this->read_derivedmesh(dm, sample_sel, MOD_MESHSEQ_READ_ALL, NULL);
+	DerivedMesh *ndm = this->read_derivedmesh(dm, time, MOD_MESHSEQ_READ_ALL, NULL);
 
 	if (ndm != dm) {
 		dm->release(dm);
@@ -1047,11 +1049,9 @@ void AbcMeshReader::readObjectData(Main *bmain, const Alembic::Abc::ISampleSelec
 	}
 }
 
-DerivedMesh *AbcMeshReader::read_derivedmesh(DerivedMesh *dm,
-                                             const ISampleSelector &sample_sel,
-                                             int read_flag,
-                                             const char **err_str)
+DerivedMesh *AbcMeshReader::read_derivedmesh(DerivedMesh *dm, const float time, int read_flag, const char **err_str)
 {
+	ISampleSelector sample_sel(time);
 	const IPolyMeshSchema::Sample sample = m_schema.getValue(sample_sel);
 
 	const P3fArraySamplePtr &positions = sample.getPositions();
@@ -1091,7 +1091,7 @@ DerivedMesh *AbcMeshReader::read_derivedmesh(DerivedMesh *dm,
 	}
 
 	CDStreamConfig config = get_config(new_dm ? new_dm : dm);
-	config.time = sample_sel.getRequestedTime();
+	config.time = time;
 
 	bool do_normals = false;
 	read_mesh_sample(&settings, m_schema, sample_sel, config, do_normals);
@@ -1238,7 +1238,7 @@ bool AbcSubDReader::valid() const
 	return m_schema.valid();
 }
 
-void AbcSubDReader::readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel)
+void AbcSubDReader::readObjectData(Main *bmain, float time)
 {
 	Mesh *mesh = BKE_mesh_add(bmain, m_data_name.c_str());
 
@@ -1246,7 +1246,7 @@ void AbcSubDReader::readObjectData(Main *bmain, const Alembic::Abc::ISampleSelec
 	m_object->data = mesh;
 
 	DerivedMesh *dm = CDDM_from_mesh(mesh);
-	DerivedMesh *ndm = this->read_derivedmesh(dm, sample_sel, MOD_MESHSEQ_READ_ALL, NULL);
+	DerivedMesh *ndm = this->read_derivedmesh(dm, time, MOD_MESHSEQ_READ_ALL, NULL);
 
 	if (ndm != dm) {
 		dm->release(dm);
@@ -1254,6 +1254,7 @@ void AbcSubDReader::readObjectData(Main *bmain, const Alembic::Abc::ISampleSelec
 
 	DM_to_mesh(ndm, mesh, m_object, CD_MASK_MESH, true);
 
+	const ISampleSelector sample_sel(time);
 	const ISubDSchema::Sample sample = m_schema.getValue(sample_sel);
 	Int32ArraySamplePtr indices = sample.getCreaseIndices();
 	Alembic::Abc::FloatArraySamplePtr sharpnesses = sample.getCreaseSharpnesses();
@@ -1284,11 +1285,9 @@ void AbcSubDReader::readObjectData(Main *bmain, const Alembic::Abc::ISampleSelec
 	}
 }
 
-DerivedMesh *AbcSubDReader::read_derivedmesh(DerivedMesh *dm,
-                                             const ISampleSelector &sample_sel,
-                                             int read_flag,
-                                             const char **err_str)
+DerivedMesh *AbcSubDReader::read_derivedmesh(DerivedMesh *dm, const float time, int read_flag, const char **err_str)
 {
+	ISampleSelector sample_sel(time);
 	const ISubDSchema::Sample sample = m_schema.getValue(sample_sel);
 
 	const P3fArraySamplePtr &positions = sample.getPositions();
@@ -1328,7 +1327,7 @@ DerivedMesh *AbcSubDReader::read_derivedmesh(DerivedMesh *dm,
 
 	/* Only read point data when streaming meshes, unless we need to create new ones. */
 	CDStreamConfig config = get_config(new_dm ? new_dm : dm);
-	config.time = sample_sel.getRequestedTime();
+	config.time = time;
 	read_subd_sample(&settings, m_schema, sample_sel, config);
 
 	if (new_dm) {
diff --git a/source/blender/alembic/intern/abc_mesh.h b/source/blender/alembic/intern/abc_mesh.h
index 1b67c80cc72..64a3109232c 100644
--- a/source/blender/alembic/intern/abc_mesh.h
+++ b/source/blender/alembic/intern/abc_mesh.h
@@ -100,12 +100,9 @@ public:
 
 	bool valid() const;
 
-	void readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel);
+	void readObjectData(Main *bmain, float time);
 
-	DerivedMesh *read_derivedmesh(DerivedMesh *dm,
-	                              const Alembic::Abc::ISampleSelector &sample_sel,
-	                              int read_flag,
-	                              const char **err_str);
+	DerivedMesh *read_derivedmesh(DerivedMesh *dm, const float time, int read_flag, const char **err_str);
 
 private:
 	void readFaceSetsSample(Main *bmain, Mesh *mesh, size_t poly_start,
@@ -124,11 +121,8 @@ public:
 
 	bool valid() const;
 
-	void readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel);
-	DerivedMesh *read_derivedmesh(DerivedMesh *dm,
-	                              const Alembic::Abc::ISampleSelector &sample_sel,
-	                              int read_flag,
-	                              const char **err_str);
+	void readObjectData(Main *bmain, float time);
+	DerivedMesh *read_derivedmesh(DerivedMesh *dm, const float time, int read_flag, const char **err_str);
 };
 
 /* ************************************************************************** */
diff --git a/source/blender/alembic/intern/abc_nurbs.cc b/source/blender/alembic/intern/abc_nurbs.cc
index eaef06fd6d1..d0b9561f679 100644
--- a/source/blender/alembic/intern/abc_nurbs.cc
+++ b/source/blender/alembic/intern/abc_nurbs.cc
@@ -239,7 +239,7 @@ static bool set_knots(const FloatArraySamplePtr &knots, float *&nu_knots)
 	return true;
 }
 
-void AbcNurbsReader::readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel)
+void AbcNurbsReader::readObjectData(Main *bmain, float time)
 {
 	Curve *cu = static_cast<Curve *>(BKE_curve_add(bmain, "abc_curve", OB_SURF));
 	cu->actvert = CU_ACT_NONE;
@@ -253,6 +253,7 @@ void AbcNurbsReader::readObjectData(Main *bmain, const Alembic::Abc::ISampleSele
 		nu->resolu = cu->resolu;
 		nu->resolv = cu->resolv;
 
+		const ISampleSelector sample_sel(time);
 		const INuPatchSchema &schema = it->first;
 		const INuPatchSchema::Sample smp = schema.getValue(sample_sel);
 
diff --git a/source/blender/alembic/intern/abc_nurbs.h b/source/blender/alembic/intern/abc_nurbs.h
index abe460a8988..1b2e7a8391f 100644
--- a/source/blender/alembic/intern/abc_nurbs.h
+++ b/source/blender/alembic/intern/abc_nurbs.h
@@ -54,7 +54,7 @@ public:
 
 	bool valid() const;
 
-	void readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel);
+	void readObjectData(Main *bmain, float time);
 
 private:
 	void getNurbsPatches(const Alembic::Abc::IObject &obj);
diff --git a/source/blender/alembic/intern/abc_object.cc b/source/blender/alembic/intern/abc_object.cc
index 972438ef3a6..ce0f9225228 100644
--- a/source/blender/alembic/intern/abc_object.cc
+++ b/source/blender/alembic/intern/abc_object.cc
@@ -159,15 +159,6 @@ void AbcObjectReader::object(Object *ob)
 	m_object = ob;
 }
 
-DerivedMesh *AbcObjectReader::read_derivedmesh(DerivedMesh *dm,
-                                               const Alembic::Abc::ISampleSelector &UNUSED(sample_sel),
-                                               int UNUSED(read_flag),
-                                               const char **UNUSED(err_str))
-{
-	return dm;
-}
-
-
 static Imath::M44d blend_matrices(const Imath::M44d &m0, const Imath::M44d &m1, const float weight)
 {
 	float mat0[4][4], mat1[4][4], ret[4][4];
@@ -223,6 +214,14 @@ Imath::M44d get_matrix(const IXformSchema &schema, const float time)
 	return s0.getMatrix();
 }
 
+DerivedMesh *AbcObjectReader::read_derivedmesh(DerivedMesh *dm,
+                                                       const float UNUSED(time),
+                                                       int UNUSED(read_flag),
+                                                       const char **UNUSED(err_str))
+{
+	return dm;
+}
+
 void AbcObjectReader::setupObjectTransform(const float time)
 {
 	bool is_constant = false;
diff --git a/source/blender/alembic/intern/abc_object.h b/source/blender/alembic/intern/abc_object.h
index a9dbaa89c97..ade232b11e7 100644
--- a/source/blender/alembic/intern/abc_object.h
+++ b/source/blender/alembic/intern/abc_object.h
@@ -170,12 +170,12 @@ public:
 
 	virtual bool valid() const = 0;
 
-	virtual void readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel) = 0;
+	virtual void readObjectData(Main *bmain, float time) = 0;
 
-	DerivedMesh *read_derivedmesh(DerivedMesh *dm,
-	                              const Alembic::Abc::ISampleSelector &sample_sel,
-	                              int read_flag,
-	                              const char **err_str);
+	virtual DerivedMesh *read_derivedmesh(DerivedMesh *dm,
+	                                      const float time,
+	                                      int read_flag,
+	                                      const char **err_str);
 
 	/** Reads the object matrix and sets up an object transform if animated. */
 	void setupObjectTransform(const float time);
diff --git a/source/blender/alembic/intern/abc_points.cc b/source/blender/alembic/intern/abc_points.cc
index d5ecf16b793..fc84759b1d9 100644
--- a/source/blender/alembic/intern/abc_points.cc
+++ b/source/blender/alembic/intern/abc_points.cc
@@ -151,12 +151,12 @@ bool AbcPointsReader::valid() const
 	return m_schema.valid();
 }
 
-void AbcPointsReader::readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel)
+void AbcPointsReader::readObjectData(Main *bmain, float time)
 {
 	Mesh *mesh = BKE_mesh_add(bmain, m_data_name.c_str());
 
 	DerivedMesh *dm = CDDM_from_mesh(mesh);
-	DerivedMesh *ndm = this->read_derivedmesh(dm, sample_sel, 0, NULL);
+	DerivedMesh *ndm = this->read_derivedmesh(dm, time, 0, NULL);
 
 	if (ndm != dm) {
 		dm->release(dm);
@@ -178,7 +178,8 @@ void AbcPointsReader::readObjectData(Main *bmain, const Alembic::Abc::ISampleSel
 
 void read_points_sample(const IPointsSchema &schema,
                         const ISampleSelector &selector,
-                        CDStreamConfig &config)
+                        CDStreamConfig &config,
+                        float time)
 {
 	Alembic::AbcGeom::IPointsSchema::Sample sample = schema.getValue(selector);
 
@@ -188,7 +189,7 @@ void read_points_sample(const IPointsSchema &schema,
 	N3fArraySamplePtr vnormals;
 
 	if (has_property(prop, "N")) {
-		const Alembic::Util::uint32_t itime = static_cast<Alembic::Util::uint32_t>(selector.getRequestedTime());
+		const Alembic::Util::uint32_t itime = static_cast<Alembic::Util::uint32_t>(time);
 		const IN3fArrayProperty &normals_prop = IN3fArrayProperty(prop, "N", itime);
 
 		if (normals_prop) {
@@ -199,11 +200,9 @@ void read_points_sample(const IPointsSchema &schema,
 	read_mverts(config.mvert, positions, vnormals);
 }
 
-DerivedMesh *AbcPointsReader::read_derivedmesh(DerivedMesh *dm,
-                                               const ISampleSelector &sample_sel,
-                                               int /*read_flag*/,
-                                               const char ** /*err_str*/)
+DerivedMesh *AbcPointsReader::read_derivedmesh(DerivedMesh *dm, const float time, int /*read_flag*/, const char ** /*err_str*/)
 {
+	ISampleSelector sample_sel(time);
 	const IPointsSchema::Sample sample = m_schema.getValue(sample_sel);
 
 	const P3fArraySamplePtr &positions = sample.getPositions();
@@ -215,7 +214,7 @@ DerivedMesh *AbcPointsReader::read_derivedmesh(DerivedMesh *dm,
 	}
 
 	CDStreamConfig config = get_config(new_dm ? new_dm : dm);
-	read_points_sample(m_schema, sample_sel, config);
+	read_points_sample(m_schema, sample_sel, config, time);
 
 	return new_dm ? new_dm : dm;
 }
diff --git a/source/blender/alembic/intern/abc_points.h b/source/blender/alembic/intern/abc_points.h
index 1f74351d302..cb68dbca4d5 100644
--- a/source/blender/alembic/intern/abc_points.h
+++ b/source/blender/alembic/intern/abc_points.h
@@ -59,16 +59,14 @@ public:
 
 	bool valid() const;
 
-	void readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel);
+	void readObjectData(Main *bmain, float time);
 
-	DerivedMesh *read_derivedmesh(DerivedMesh *dm,
-	                              const Alembic::Abc::ISampleSelector &sample_sel,
-	                              int read_flag,
-	                              const char **err_str);
+	DerivedMesh *read_derivedmesh(DerivedMesh *dm, const float time, int read_flag, const char **err_str);
 };
 
 void read_points_sample(const Alembic::AbcGeom::IPointsSchema &schema,
                         const Alembic::AbcGeom::ISampleSelector &selector,
-                        CDStreamConfig &config);
+                        CDStreamConfig &config,
+                        float time);
 
 #endif  /* __ABC_POINTS_H__ */
diff --git a/source/blender/alembic/intern/abc_transform.cc b/source/blender/alembic/intern/abc_transform.cc
index c48be361877..6e218cac429 100644
--- a/source/blender/alembic/intern/abc_transform.cc
+++ b/source/blender/alembic/intern/abc_transform.cc
@@ -36,7 +36,6 @@ extern "C" {
 
 using Alembic::AbcGeom::OObject;
 using Alembic::AbcGeom::OXform;
-using Alembic::Abc::ISampleSelector;
 
 /* ************************************************************************** */
 
@@ -154,7 +153,7 @@ bool AbcEmptyReader::valid() const
 	return m_schema.valid();
 }
 
-void AbcEmptyReader::readObjectData(Main *bmain, const ISampleSelector &UNUSED(sample_sel))
+void AbcEmptyReader::readObjectData(Main *bmain, float /*time*/)
 {
 	m_object = BKE_object_add_only_object(bmain, OB_EMPTY,
 	                                      m_object_name.c_str());
diff --git a/source/blender/alembic/intern/abc_transform.h b/source/blender/alembic/intern/abc_transform.h
index f2e2edf8010..59388e155dc 100644
--- a/source/blender/alembic/intern/abc_transform.h
+++ b/source/blender/alembic/intern/abc_transform.h
@@ -69,7 +69,7 @@ public:
 
 	bool valid() const;
 
-	void readObjectData(Main *bmain, const Alembic::Abc::ISampleSelector &sample_sel);
+	void readObjectData(Main *bmain, float time);
 };
 
 #endif  /* __ABC_TRANSFORM_H__ */
diff --git a/source/blender/alembic/intern/alembic_capi.cc b/source/blender/alembic/intern/alembic_capi.cc
index 058c047e7d5..4aa4b8d548f 100644
--- a/source/blender/alembic/intern/alembic_capi.cc
+++ b/source/blender/alembic/intern/alembic_capi.cc
@@ -613,6 +613,7 @@ static std::pair<bool, AbcObjectReader *> visit_object(
 enum {
 	ABC_NO_ERROR = 0,
 	ABC_ARCHIVE_FAIL,
+	ABC_UNSUPPORTED_HDF5,
 };
 
 struct ImportJobData {
@@ -678,8 +679,12 @@ static void import_startjob(void *user_data, short *stop, short *do_update, floa
 	ArchiveReader *archive = new ArchiveReader(data->filename);
 
 	if (!archive->valid()) {
-		delete archive;
+#ifndef WITH_ALEMBIC_HDF5
+		data->error_code = archive->is_hdf5() ? ABC_UNSUPPORTED_HDF5 : ABC_ARCHIVE_FAIL;
+#else
 		data->error_code = ABC_ARCHIVE_FAIL;
+#endif
+		delete archive;
 		return;
 	}
 
@@ -723,13 +728,12 @@ static void import_startjob(void *user_data, short *stop, short *do_update, floa
 	chrono_t min_time = std::numeric_limits<chrono_t>::max();
 	chrono_t max_time = std::numeric_limits<chrono_t>::min();
 
-	ISampleSelector sample_sel(0.0f);
 	std::vector<AbcObjectReader *>::iterator iter;
 	for (iter = data->readers.begin(); iter != data->readers.end(); ++iter) {
 		AbcObjectReader *reader = *iter;
 
 		if (reader->valid()) {
-			reader->readObjectData(data->bmain, sample_sel);
+			reader->readObjectData(data->bmain, 0.0f);
 
 			min_time = std::min(min_time, reader->minTime());
 			max_time = std::max(max_time, reader->maxTime());
@@ -861,6 +865,9 @@ static void import_endjob(void *user_data)
 		case ABC_ARCHIVE_FAIL:
 			WM_report(RPT_ERROR, "Could not open Alembic archive for reading! See console for detail.");
 			break;
+		case ABC_UNSUPPORTED_HDF5:
+			WM_report(RPT_ERROR, "Alembic archive in obsolete HDF5 format is not supported.");
+			break;
 	}
 
 	WM_main_add_notifier(NC_SCENE | ND_FRAME, data->scene);
@@ -955,7 +962,6 @@ DerivedMesh *ABC_read_mesh(CacheReader *reader,
 	}
 
 	const ObjectHeader &header = iobject.getHeader();
-	ISampleSelector sample_sel(time);
 
 	if (IPolyMesh::matches(header)) {
 		if (ob->type != OB_MESH) {
@@ -963,7 +969,7 @@ DerivedMesh *ABC_read_mesh(CacheReader *reader,
 			return NULL;
 		}
 
-		return abc_reader->read_derivedmesh(dm, sample_sel, read_flag, err_str);
+		return abc_reader->read_derivedmesh(dm, time, read_flag, err_str);
 	}
 	else if (ISubD::matches(header)) {
 		if (ob->type != OB_MESH) {
@@ -971,7 +977,7 @@ DerivedMesh *ABC_read_mesh(CacheReader *reader,
 			return NULL;
 		}
 
-		return abc_reader->read_derivedmesh(dm, sample_sel, read_flag, err_str);
+		return abc_reader->read_derivedmesh(dm, time, read_flag, err_str);
 	}
 	else if (IPoints::matches(header)) {
 		if (ob->type != OB_MESH) {
@@ -979,7 +985,7 @@ DerivedMesh *ABC_read_mesh(CacheReader *reader,
 			return NULL;
 		}
 
-		return abc_reader->read_derivedmesh(dm, sample_sel, read_flag, err_str);
+		return abc_reader->read_derivedmesh(dm, time, read_flag, err_str);
 	}
 	else if (ICurves::matches(header)) {
 		if (ob->type != OB_CURVE) {
@@ -987,7 +993,7 @@ DerivedMesh *ABC_read_mesh(CacheReader *reader,
 			return NULL;
 		}
 
-		return abc_reader->read_derivedmesh(dm, sample_sel, read_flag, err_str);
+		return abc_reader->read_derivedmesh(dm, time, read_flag, err_str);
 	}
 
 	*err_str = "Unsupported object type: verify object path"; // or poke developer
diff --git a/source/blender/editors/mesh/editface.c b/source/blender/editors/mesh/editface.c
index c4e87614732..a478526dee0 100644
--- a/source/blender/editors/mesh/editface.c
+++ b/source/blender/editors/mesh/editface.c
@@ -306,31 +306,30 @@ void paintface_deselect_all_visible(Object *ob, int action, bool flush_flags)
 
 bool paintface_minmax(Object *ob, float r_min[3], float r_max[3])
 {
-	Mesh *me;
-	MPoly *mp;
-	MTexPoly *tf;
-	MLoop *ml;
-	MVert *mvert;
+	const Mesh *me;
+	const MPoly *mp;
+	const MLoop *ml;
+	const MVert *mvert;
 	int a, b;
 	bool ok = false;
 	float vec[3], bmat[3][3];
 
 	me = BKE_mesh_from_object(ob);
-	if (!me || !me->mtpoly) return ok;
+	if (!me || !me->mloopuv) {
+		return ok;
+	}
 	
 	copy_m3_m4(bmat, ob->obmat);
 
 	mvert = me->mvert;
 	mp = me->mpoly;
-	tf = me->mtpoly;
-	for (a = me->totpoly; a > 0; a--, mp++, tf++) {
+	for (a = me->totpoly; a > 0; a--, mp++) {
 		if (mp->flag & ME_HIDE || !(mp->flag & ME_FACE_SEL))
 			continue;
 
 		ml = me->mloop + mp->totloop;
 		for (b = 0; b < mp->totloop; b++, ml++) {
-			copy_v3_v3(vec, (mvert[ml->v].co));
-			mul_m3_v3(bmat, vec);
+			mul_v3_m3v3(vec, bmat, mvert[ml->v].co);
 			add_v3_v3v3(vec, vec, ob->obmat[3]);
 			minmax_v3v3_v3(r_min, r_max, vec);
 		}
diff --git a/source/blender/editors/uvedit/uvedit_draw.c b/source/blender/editors/uvedit/uvedit_draw.c
index 3149abc338b..16d430c9421 100644
--- a/source/blender/editors/uvedit/uvedit_draw.c
+++ b/source/blender/editors/uvedit/uvedit_draw.c
@@ -553,7 +553,7 @@ static void draw_uvs_texpaint(SpaceImage *sima, Scene *scene, SceneLayer *sl, Ob
 
 	ma = give_current_material(ob, ob->actcol);
 
-	if (me->mtpoly) {
+	if (me->mloopuv) {
 		MPoly *mpoly = me->mpoly;
 		MLoopUV *mloopuv, *mloopuv_base;
 		int a, b;
diff --git a/source/blender/makesdna/DNA_cachefile_types.h b/source/blender/makesdna/DNA_cachefile_types.h
index 46b1adf2725..e6fe8102b4f 100644
--- a/source/blender/makesdna/DNA_cachefile_types.h
+++ b/source/blender/makesdna/DNA_cachefile_types.h
@@ -50,7 +50,7 @@ enum {
 typedef struct AlembicObjectPath {
 	struct AlembicObjectPath *next, *prev;
 
-	char path[1024];  /* 1024 = FILE_MAX, might use PATH_MAX in the future. */
+	char path[4096];  /* 4096 = PATH_MAX */
 } AlembicObjectPath;
 
 typedef struct CacheFile {
@@ -64,7 +64,7 @@ typedef struct CacheFile {
 	 * CacheFile. */
 	ListBase object_paths;
 
-	char filepath[1024];  /* 1024 = FILE_MAX */
+	char filepath[4096];  /* 4096 = PATH_MAX */
 
 	char is_sequence;
 	char forward_axis;
diff --git a/source/blender/windowmanager/intern/wm_files.c b/source/blender/windowmanager/intern/wm_files.c
index f77b6c92029..99e0e28ab1b 100644
--- a/source/blender/windowmanager/intern/wm_files.c
+++ b/source/blender/windowmanager/intern/wm_files.c
@@ -725,7 +725,7 @@ int wm_homefile_read(
 	if (filepath_startup_override != NULL) {
 		/* pass */
 	}
-	else if (app_template_override) {
+	else if (app_template_override && app_template_override[0]) {
 		app_template = app_template_override;
 	}
 	else if (!use_factory_settings && U.app_template[0]) {
diff --git a/tests/python/alembic_tests.py b/tests/python/alembic_tests.py
index 1cdfd75426a..2a2dd0230cf 100755
--- a/tests/python/alembic_tests.py
+++ b/tests/python/alembic_tests.py
@@ -372,6 +372,95 @@ class HairParticlesExportTest(AbstractAlembicTest):
         self.assertIn('.faceIndices', abcprop)
 
 
+class LongNamesExportTest(AbstractAlembicTest):
+    @with_tempdir
+    def test_export_long_names(self, tempdir: pathlib.Path):
+        abc = tempdir / 'long-names.abc'
+        script = "import bpy; bpy.ops.wm.alembic_export(filepath='%s', start=1, end=1, " \
+                 "renderable_only=False, visible_layers_only=False, flatten=False)" % abc
+        self.run_blender('long-names.blend', script)
+
+        name_parts = [
+            'foG9aeLahgoh5goacee1dah6Hethaghohjaich5pasizairuWigee1ahPeekiGh',
+            'yoNgoisheedah2ua0eigh2AeCaiTee5bo0uphoo7Aixephah9racahvaingeeH4',
+            'zuthohnoi1thooS3eezoo8seuph2Boo5aefacaethuvee1aequoonoox1sookie',
+            'wugh4ciTh3dipiepeequait5uug7thiseek5ca7Eijei5ietaizokohhaecieto',
+            'up9aeheenein9oteiX6fohP3thiez6Ahvah0oohah1ep2Eesho4Beboechaipoh',
+            'coh4aehiacheTh0ue0eegho9oku1lohl4loht9ohPoongoow7dasiego6yimuis',
+            'lohtho8eigahfeipohviepajaix4it2peeQu6Iefee1nevihaes4cee2soh4noy',
+            'kaht9ahv0ieXaiyih7ohxe8bah7eeyicahjoa2ohbu7Choxua7oongah6sei4bu',
+            'deif0iPaechohkee5nahx6oi2uJeeN7ze3seunohJibe4shai0mah5Iesh3Quai',
+            'ChohDahshooNee0NeNohthah0eiDeese3Vu6ohShil1Iey9ja0uebi2quiShae6',
+            'Dee1kai7eiph2ahh2nufah3zai3eexeengohQue1caj0eeW0xeghi3eshuadoot',
+            'aeshiup3aengajoog0AhCoo5tiu3ieghaeGhie4Tu1ohh1thee8aepheingah1E',
+            'ooRa6ahciolohshaifoopeo9ZeiGhae2aech4raisheiWah9AaNga0uas9ahquo',
+            'thaepheip2aip6shief4EaXopei8ohPo0ighuiXah2ashowai9nohp4uach6Mei',
+            'ohph4yaev3quieji3phophiem3OoNuisheepahng4waithae3Naichai7aw3noo',
+            'aibeawaneBahmieyuph8ieng8iopheereeD2uu9Uyee5bei2phahXeir8eeJ8oo',
+            'ooshahphei2hoh3uth5chaen7ohsai6uutiesucheichai8ungah9Gie1Aiphie',
+            'eiwohchoo7ere2iebohn4Aapheichaelooriiyaoxaik7ooqua7aezahx0aeJei',
+            'Vah0ohgohphiefohTheshieghichaichahch5moshoo0zai5eeva7eisi4yae8T',
+            'EibeeN0fee0Gohnguz8iec6yeigh7shuNg4eingu3siph9joucahpeidoom4ree',
+            'iejiu3shohheeZahHusheimeefaihoh5eecachu5eeZie9ceisugu9taidohT3U',
+            'eex6dilakaix5Eetai7xiCh5Jaa8aiD4Ag3tuij1aijohv5fo0heevah8hohs3m',
+            'ohqueeNgahraew6uraemohtoo5qua3oojiex6ohqu6Aideibaithaiphuriquie',
+            'cei0eiN4Shiey7Aeluy3unohboo5choiphahc2mahbei5paephaiKeso1thoog1',
+            'ieghif4ohKequ7ong0jah5ooBah0eiGh1caechahnahThae9Shoo0phopashoo4',
+            'roh9er3thohwi5am8iequeequuSh3aic0voocai3ihi5nie2abahphupiegh7vu',
+            'uv3Quei7wujoo5beingei2aish5op4VaiX0aebai7iwoaPee5pei8ko9IepaPig',
+            'co7aegh5beitheesi9lu7jeeQu3johgeiphee9cheichi8aithuDehu2gaeNein',
+            'thai3Tiewoo4nuir1ohy4aithiuZ7shae1luuwei5phibohriepe2paeci1Ach8',
+            'phoi3ribah7ufuvoh8eigh1oB6deeBaiPohphaghiPieshahfah5EiCi3toogoo',
+            'aiM8geil7ooreinee4Cheiwea4yeec8eeshi7Sei4Shoo3wu6ohkaNgooQu1mai',
+            'agoo3faciewah9ZeesiXeereek7am0eigaeShie3Tisu8haReeNgoo0ci2Hae5u',
+            'Aesatheewiedohshaephaenohbooshee8eu7EiJ8isal1laech2eiHo0noaV3ta',
+            'liunguep3ooChoo4eir8ahSie8eenee0oo1TooXu8Cais8Aimo4eir6Phoo3xei',
+            'toe9heepeobein3teequachemei0Cejoomef9ujie3ohwae9AiNgiephi3ep0de',
+            'ua6xooY9uzaeB3of6sheiyaedohoiS5Eev0Aequ9ahm1zoa5Aegh3ooz9ChahDa',
+            'eevasah6Bu9wi7EiwiequumahkaeCheegh6lui8xoh4eeY4ieneavah8phaibun',
+            'AhNgei2sioZeeng6phaecheemeehiShie5eFeiTh6ooV8iiphabud0die4siep4',
+            'kushe6Xieg6ahQuoo9aex3aipheefiec1esa7OhBuG0ueziep9phai5eegh1vie',
+            'Jie5yu8aafuQuoh9shaep3moboh3Pooy7och8oC6obeik6jaew2aiLooweib3ch',
+            'ohohjajaivaiRail3odaimei6aekohVaicheip2wu7phieg5Gohsaing2ahxaiy',
+            'hahzaht6yaiYu9re9jah9loisiit4ahtoh2quoh9xohishioz4oo4phofu3ogha',
+            'pu4oorea0uh2tahB8aiZoonge1aophaes6ogaiK9ailaigeej4zoVou8ielotee',
+            'cae2thei3Luphuqu0zeeG8leeZuchahxaicai4ui4Eedohte9uW6gae8Geeh0ea',
+            'air7tuy7ohw5sho2Tahpai8aep4so5ria7eaShus5weaqu0Naquei2xaeyoo2ae',
+            'vohge4aeCh7ahwoo7Jaex6sohl0Koong4Iejisei8Coir0iemeiz9uru9Iebaep',
+            'aepeidie8aiw6waish9gie4Woolae2thuj5phae4phexux7gishaeph4Deu7ooS',
+            'vahc5ia0xohHooViT0uyuxookiaquu2ogueth0ahquoudeefohshai8aeThahba',
+            'mun3oagah2eequaenohfoo8DaigeghoozaV2eiveeQuee7kah0quaa6tiesheet',
+            'ooSet4IdieC4ugow3za0die4ohGoh1oopoh6luaPhaeng4Eechea1hae0eimie5',
+            'iedeimadaefu2NeiPaey2jooloov5iehiegeakoo4ueso7aeK9ahqu2Thahkaes',
+            'nahquah9Quuu2uuf0aJah7eishi2siegh8ue5eiJa2EeVu8ebohkepoh4dahNgo',
+            'io1bie7chioPiej5ae2oohe2fee6ooP2thaeJohjohb9Se8tang3eipaifeimai',
+            'oungoqu6dieneejiechez1xeD2Zi9iox2Ahchaiy9ithah3ohVoolu2euQuuawo',
+            'thaew0veigei4neishohd8mecaixuqu7eeshiex1chaigohmoThoghoitoTa0Eo',
+            'ahroob2phohvaiz0Ohteik2ohtakie6Iu1vitho8IyiyeeleeShae9defaiw9ki',
+            'DohHoothohzeaxolai3Toh5eJie7ahlah9reF0ohn1chaipoogain2aibahw4no',
+            'aif8lo5she4aich5cho2rie8ieJaujeem2Joongeedae4vie3tah1Leequaix1O',
+            'Aang0Shaih6chahthie1ahZ7aewei9thiethee7iuThah3yoongi8ahngiobaa5',
+            'iephoBuayoothah0Ru6aichai4aiw8deg1umongauvaixai3ohy6oowohlee8ei',
+            'ohn5shigoameer0aejohgoh8oChohlaecho9jie6shu0ahg9Bohngau6paevei9',
+            'edahghaishak0paigh1eecuich3aad7yeB0ieD6akeeliem2beifufaekee6eat',
+            'hiechahgheloh2zo7Ieghaiph0phahhu8aeyuiKie1xeipheech9zai4aeme0ee',
+            'Cube'
+        ]
+        name = '/' + '/'.join(name_parts)
+
+        # Now check the resulting Alembic file.
+        abcprop = self.abcprop(abc, '%s/.xform' % name)
+        self.assertEqual(abcprop['.vals'], [
+            1.0, 0.0, 0.0, 0.0,
+            0.0, 1.0, 0.0, 0.0,
+            0.0, 0.0, 1.0, 0.0,
+            0.0, 3.0, 0.0, 1.0,
+        ])
+
+        abcprop = self.abcprop(abc, '%s/CubeShape/.geom' % name)
+        self.assertIn('.faceCounts', abcprop)
+
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--blender', required=True)
diff --git a/tests/python/bl_alembic_import_test.py b/tests/python/bl_alembic_import_test.py
index e036157e59c..e83d38aec63 100644
--- a/tests/python/bl_alembic_import_test.py
+++ b/tests/python/bl_alembic_import_test.py
@@ -122,6 +122,15 @@ class SimpleImportTest(unittest.TestCase):
         self.assertAlmostEqual(y, 0)
         self.assertAlmostEqual(z, 0)
 
+    def test_import_long_names(self):
+        # This file contains very long names. The longest name is 4047 chars.
+        bpy.ops.wm.alembic_import(
+            filepath=str(self.testdir / "long-names.abc"),
+            as_background_job=False)
+
+        self.assertIn('Cube', bpy.data.objects)
+        self.assertEqual('CubeShape', bpy.data.objects['Cube'].data.name)
+
 
 def main():
     global args