Merge branch 'master' into temp-gpencil-drw-engine

Conflicts: source/blender/draw/engines/gpencil/gpencil_engine.c
author: Antonio Vazquez <blendergit@gmail.com> 2019-08-26 12:52:52 +0300
committer: Antonio Vazquez <blendergit@gmail.com> 2019-08-26 12:52:52 +0300
commit: 6129e20cec4639aebf335ff13b2ba0c59670662d (patch)
tree: a36691d3c487e376f0fa21676ca6e416f051e9a4 /intern/cycles
parent: 03bbd5f9dedf7b3dfea7119c172c61f0b50ae28c (diff)
parent: 27787549256410b6b2de1eca47a2719830af7f96 (diff)
108 files changed, 1904 insertions, 1006 deletions
diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt
index 87f88f7ed34..6a3ebd85378 100644
--- a/intern/cycles/CMakeLists.txt
+++ b/intern/cycles/CMakeLists.txt
@@ -178,11 +178,13 @@ if(CXX_HAS_AVX2)
 endif()
 
 if(WITH_CYCLES_OSL)
+  # LLVM and OSL need to build without RTTI
   if(WIN32 AND MSVC)
     set(RTTI_DISABLE_FLAGS "/GR- -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
   elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang"))
     set(RTTI_DISABLE_FLAGS "-fno-rtti -DBOOST_NO_RTTI -DBOOST_NO_TYPEID")
   endif()
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}")
 endif()
 
 # Definitions and Includes
diff --git a/intern/cycles/blender/blender_curves.cpp b/intern/cycles/blender/blender_curves.cpp
index 29a1408d85d..4dba8ffbe0e 100644
--- a/intern/cycles/blender/blender_curves.cpp
+++ b/intern/cycles/blender/blender_curves.cpp
@@ -656,7 +656,7 @@ static void ExportCurveSegments(Scene *scene, Mesh *mesh, ParticleCurveData *CDa
       }
 
       if (attr_random != NULL) {
-        attr_random->add(hash_int_01(num_curves));
+        attr_random->add(hash_uint2_to_float(num_curves, 0));
       }
 
       mesh->add_curve(num_keys, CData->psys_shader[sys]);
diff --git a/intern/cycles/blender/blender_mesh.cpp b/intern/cycles/blender/blender_mesh.cpp
index c672bc9f3e2..551866f7fce 100644
--- a/intern/cycles/blender/blender_mesh.cpp
+++ b/intern/cycles/blender/blender_mesh.cpp
@@ -1002,6 +1002,9 @@ Mesh *BlenderSync::sync_mesh(BL::Depsgraph &b_depsgraph,
   oldcurve_keys.steal_data(mesh->curve_keys);
   oldcurve_radius.steal_data(mesh->curve_radius);
 
+  /* ensure bvh rebuild (instead of refit) if has_voxel_attributes() changed */
+  bool oldhas_voxel_attributes = mesh->has_voxel_attributes();
+
   mesh->clear();
   mesh->used_shaders = used_shaders;
   mesh->name = ustring(b_ob_data.name().c_str());
@@ -1050,7 +1053,8 @@ Mesh *BlenderSync::sync_mesh(BL::Depsgraph &b_depsgraph,
   /* tag update */
   bool rebuild = (oldtriangles != mesh->triangles) || (oldsubd_faces != mesh->subd_faces) ||
                  (oldsubd_face_corners != mesh->subd_face_corners) ||
-                 (oldcurve_keys != mesh->curve_keys) || (oldcurve_radius != mesh->curve_radius);
+                 (oldcurve_keys != mesh->curve_keys) || (oldcurve_radius != mesh->curve_radius) ||
+                 (oldhas_voxel_attributes != mesh->has_voxel_attributes());
 
   mesh->tag_update(scene, rebuild);
 
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp
index 7ccf8226e5b..b670922ac88 100644
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -217,7 +217,7 @@ void BlenderSync::sync_light(BL::Object &b_parent,
     light->random_id = random_id;
   }
   else {
-    light->random_id = hash_int_2d(hash_string(b_ob.name().c_str()), 0);
+    light->random_id = hash_uint2(hash_string(b_ob.name().c_str()), 0);
   }
 
   if (light->type == LIGHT_AREA)
@@ -444,6 +444,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
   if (object_updated || (object->mesh && object->mesh->need_update) || tfm != object->tfm) {
     object->name = b_ob.name().c_str();
     object->pass_id = b_ob.pass_index();
+    object->color = get_float3(b_ob.color());
     object->tfm = tfm;
     object->motion.clear();
 
@@ -490,7 +491,7 @@ Object *BlenderSync::sync_object(BL::Depsgraph &b_depsgraph,
     else {
       object->dupli_generated = make_float3(0.0f, 0.0f, 0.0f);
       object->dupli_uv = make_float2(0.0f, 0.0f);
-      object->random_id = hash_int_2d(hash_string(object->name.c_str()), 0);
+      object->random_id = hash_uint2(hash_string(object->name.c_str()), 0);
     }
 
     object->tag_update(scene);
diff --git a/intern/cycles/blender/blender_session.cpp b/intern/cycles/blender/blender_session.cpp
index dcbb101b01d..9a798a4f979 100644
--- a/intern/cycles/blender/blender_session.cpp
+++ b/intern/cycles/blender/blender_session.cpp
@@ -527,7 +527,7 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
     builtin_images_load();
 
     /* Attempt to free all data which is held by Blender side, since at this
-     * point we knwo that we've got everything to render current view layer.
+     * point we know that we've got everything to render current view layer.
      */
     /* At the moment we only free if we are not doing multi-view
      * (or if we are rendering the last view). See T58142/D4239 for discussion.
@@ -539,8 +539,8 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
     /* Make sure all views have different noise patterns. - hardcoded value just to make it random
      */
     if (view_index != 0) {
-      scene->integrator->seed += hash_int_2d(scene->integrator->seed,
-                                             hash_int(view_index * 0xdeadbeef));
+      scene->integrator->seed += hash_uint2(scene->integrator->seed,
+                                            hash_uint2(view_index * 0xdeadbeef, 0));
       scene->integrator->tag_update(scene);
     }
 
@@ -1481,8 +1481,8 @@ void BlenderSession::update_resumable_tile_manager(int num_samples)
 
   /* Round after doing the multiplications with num_chunks and num_samples_per_chunk
    * to allow for many small chunks. */
-  int rounded_range_start_sample = (int)floor(range_start_sample + 0.5f);
-  int rounded_range_num_samples = max((int)floor(range_num_samples + 0.5f), 1);
+  int rounded_range_start_sample = (int)floorf(range_start_sample + 0.5f);
+  int rounded_range_num_samples = max((int)floorf(range_num_samples + 0.5f), 1);
 
   /* Make sure we don't overshoot. */
   if (rounded_range_start_sample + rounded_range_num_samples > num_samples) {
diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp
index f952b3025f0..720f521c589 100644
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -315,18 +315,27 @@ static ShaderNode *add_node(Scene *scene,
   else if (b_node.is_a(&RNA_ShaderNodeRGBToBW)) {
     node = new RGBToBWNode();
   }
+  else if (b_node.is_a(&RNA_ShaderNodeMapRange)) {
+    BL::ShaderNodeMapRange b_map_range_node(b_node);
+    MapRangeNode *map_range_node = new MapRangeNode();
+    map_range_node->clamp = b_map_range_node.clamp();
+    node = map_range_node;
+  }
+  else if (b_node.is_a(&RNA_ShaderNodeClamp)) {
+    node = new ClampNode();
+  }
   else if (b_node.is_a(&RNA_ShaderNodeMath)) {
     BL::ShaderNodeMath b_math_node(b_node);
-    MathNode *math = new MathNode();
-    math->type = (NodeMath)b_math_node.operation();
-    math->use_clamp = b_math_node.use_clamp();
-    node = math;
+    MathNode *math_node = new MathNode();
+    math_node->type = (NodeMathType)b_math_node.operation();
+    math_node->use_clamp = b_math_node.use_clamp();
+    node = math_node;
   }
   else if (b_node.is_a(&RNA_ShaderNodeVectorMath)) {
     BL::ShaderNodeVectorMath b_vector_math_node(b_node);
-    VectorMathNode *vmath = new VectorMathNode();
-    vmath->type = (NodeVectorMath)b_vector_math_node.operation();
-    node = vmath;
+    VectorMathNode *vector_math_node = new VectorMathNode();
+    vector_math_node->type = (NodeVectorMathType)b_vector_math_node.operation();
+    node = vector_math_node;
   }
   else if (b_node.is_a(&RNA_ShaderNodeVectorTransform)) {
     BL::ShaderNodeVectorTransform b_vector_transform_node(b_node);
@@ -598,6 +607,9 @@ static ShaderNode *add_node(Scene *scene,
   else if (b_node.is_a(&RNA_ShaderNodeHairInfo)) {
     node = new HairInfoNode();
   }
+  else if (b_node.is_a(&RNA_ShaderNodeVolumeInfo)) {
+    node = new VolumeInfoNode();
+  }
   else if (b_node.is_a(&RNA_ShaderNodeBump)) {
     BL::ShaderNodeBump b_bump_node(b_node);
     BumpNode *bump = new BumpNode();
@@ -835,6 +847,12 @@ static ShaderNode *add_node(Scene *scene,
     }
     node = ies;
   }
+  else if (b_node.is_a(&RNA_ShaderNodeTexWhiteNoise)) {
+    BL::ShaderNodeTexWhiteNoise b_tex_white_noise_node(b_node);
+    WhiteNoiseTextureNode *white_noise_node = new WhiteNoiseTextureNode();
+    white_noise_node->dimensions = b_tex_white_noise_node.dimensions();
+    node = white_noise_node;
+  }
   else if (b_node.is_a(&RNA_ShaderNodeNormalMap)) {
     BL::ShaderNodeNormalMap b_normal_map_node(b_node);
     NormalMapNode *nmap = new NormalMapNode();
diff --git a/intern/cycles/blender/blender_sync.cpp b/intern/cycles/blender/blender_sync.cpp
index bac571b02ce..aec21887088 100644
--- a/intern/cycles/blender/blender_sync.cpp
+++ b/intern/cycles/blender/blender_sync.cpp
@@ -255,13 +255,13 @@ void BlenderSync::sync_integrator()
 
   integrator->seed = get_int(cscene, "seed");
   if (get_boolean(cscene, "use_animated_seed")) {
-    integrator->seed = hash_int_2d(b_scene.frame_current(), get_int(cscene, "seed"));
+    integrator->seed = hash_uint2(b_scene.frame_current(), get_int(cscene, "seed"));
     if (b_scene.frame_subframe() != 0.0f) {
       /* TODO(sergey): Ideally should be some sort of hash_merge,
        * but this is good enough for now.
        */
-      integrator->seed += hash_int_2d((int)(b_scene.frame_subframe() * (float)INT_MAX),
-                                      get_int(cscene, "seed"));
+      integrator->seed += hash_uint2((int)(b_scene.frame_subframe() * (float)INT_MAX),
+                                     get_int(cscene, "seed"));
     }
   }
 
diff --git a/intern/cycles/blender/blender_util.h b/intern/cycles/blender/blender_util.h
index c9d1dc67e54..3625dd45ae2 100644
--- a/intern/cycles/blender/blender_util.h
+++ b/intern/cycles/blender/blender_util.h
@@ -594,7 +594,7 @@ template<typename K, typename T> class id_map {
 
   T *find(const BL::ID &id)
   {
-    return find(id.ptr.id.data);
+    return find(id.ptr.owner_id);
   }
 
   T *find(const K &key)
@@ -629,7 +629,7 @@ template<typename K, typename T> class id_map {
 
   bool sync(T **r_data, const BL::ID &id)
   {
-    return sync(r_data, id, id, id.ptr.id.data);
+    return sync(r_data, id, id, id.ptr.owner_id);
   }
 
   bool sync(T **r_data, const BL::ID &id, const BL::ID &parent, const K &key)
diff --git a/intern/cycles/bvh/bvh4.cpp b/intern/cycles/bvh/bvh4.cpp
index 850bdf5b8b4..b6df9024ffa 100644
--- a/intern/cycles/bvh/bvh4.cpp
+++ b/intern/cycles/bvh/bvh4.cpp
@@ -43,8 +43,7 @@ BVHNode *bvh_node_merge_children_recursively(const BVHNode *node)
   if (node->is_leaf()) {
     return new LeafNode(*reinterpret_cast<const LeafNode *>(node));
   }
-  /* Collect nodes of one layer deeper, allowing us to have more childrem in
-   * an inner layer. */
+  /* Collect nodes of one layer deeper, allowing us to have more children in an inner layer. */
   assert(node->num_children() <= 2);
   const BVHNode *children[4];
   const BVHNode *child0 = node->get_child(0);
diff --git a/intern/cycles/device/opencl/opencl_split.cpp b/intern/cycles/device/opencl/opencl_split.cpp
index 442b92100bb..79474fb0814 100644
--- a/intern/cycles/device/opencl/opencl_split.cpp
+++ b/intern/cycles/device/opencl/opencl_split.cpp
@@ -194,7 +194,7 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures &requested_
     DeviceRequestedFeatures features(requested_features);
     enable_default_features(features);
 
-    /* Always turn off baking at this point. Baking is only usefull when building the bake kernel.
+    /* Always turn off baking at this point. Baking is only useful when building the bake kernel.
      * this also makes sure that the kernels that are build during baking can be reused
      * when not doing any baking. */
     features.use_baking = false;
diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp
index cc40ad42b06..dc9b4072841 100644
--- a/intern/cycles/device/opencl/opencl_util.cpp
+++ b/intern/cycles/device/opencl/opencl_util.cpp
@@ -746,7 +746,7 @@ bool OpenCLInfo::device_supported(const string &platform_name, const cl_device_i
   }
   VLOG(3) << "OpenCL driver version " << driver_major << "." << driver_minor;
 
-  /* It is possible tyo have Iris GPU on AMD/Apple OpenCL framework
+  /* It is possible to have Iris GPU on AMD/Apple OpenCL framework
    * (aka, it will not be on Intel framework). This isn't supported
    * and needs an explicit blacklist.
    */
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 8a8fee108ae..48439a8b68f 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -179,6 +179,7 @@ set(SRC_SVM_HEADERS
   svm/svm_blackbody.h
   svm/svm_bump.h
   svm/svm_camera.h
+  svm/svm_clamp.h
   svm/svm_closure.h
   svm/svm_convert.h
   svm/svm_checker.h
@@ -198,6 +199,7 @@ set(SRC_SVM_HEADERS
   svm/svm_invert.h
   svm/svm_light_path.h
   svm/svm_magic.h
+  svm/svm_map_range.h
   svm/svm_mapping.h
   svm/svm_math.h
   svm/svm_math_util.h
@@ -219,6 +221,7 @@ set(SRC_SVM_HEADERS
   svm/svm_voronoi.h
   svm/svm_voxel.h
   svm/svm_wave.h
+  svm/svm_white_noise.h
 )
 
 set(SRC_GEOM_HEADERS
@@ -486,6 +489,19 @@ endif()
 include_directories(${INC})
 include_directories(SYSTEM ${INC_SYS})
 
+if(WITH_COMPILER_ASAN)
+  if(CMAKE_COMPILER_IS_GNUCC AND (NOT WITH_CYCLES_KERNEL_ASAN))
+    # GCC hangs compiling the big kernel files with asan and release, so disable by default.
+    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-sanitize=all")
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=vptr")
+  elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
+    # With OSL, Cycles disables rtti in some modules, wich then breaks at linking
+    # when trying to use vptr sanitizer (included into 'undefined' general option).
+    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-sanitize=vptr")
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fno-sanitize=vptr")
+  endif()
+endif()
+
 set_source_files_properties(kernels/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
 set_source_files_properties(kernels/cpu/kernel_split.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
 set_source_files_properties(kernels/cpu/filter.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
diff --git a/intern/cycles/kernel/bvh/bvh.h b/intern/cycles/kernel/bvh/bvh.h
index be0f05285e8..162b2fb5cdb 100644
--- a/intern/cycles/kernel/bvh/bvh.h
+++ b/intern/cycles/kernel/bvh/bvh.h
@@ -177,24 +177,23 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray)
   return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x);
 }
 
-/* Note: ray is passed by value to work around a possible CUDA compiler bug. */
 ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
-                                          const Ray ray,
+                                          const Ray *ray,
                                           const uint visibility,
                                           Intersection *isect)
 {
   PROFILING_INIT(kg, PROFILING_INTERSECT);
 
-  if (!scene_intersect_valid(&ray)) {
+  if (!scene_intersect_valid(ray)) {
     return false;
   }
 #ifdef __EMBREE__
   if (kernel_data.bvh.scene) {
-    isect->t = ray.t;
+    isect->t = ray->t;
     CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR);
     IntersectContext rtc_ctx(&ctx);
     RTCRayHit ray_hit;
-    kernel_embree_setup_rayhit(ray, ray_hit, visibility);
+    kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
     rtcIntersect1(kernel_data.bvh.scene, &rtc_ctx.context, &ray_hit);
     if (ray_hit.hit.geomID != RTC_INVALID_GEOMETRY_ID &&
         ray_hit.hit.primID != RTC_INVALID_GEOMETRY_ID) {
@@ -207,42 +206,43 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg,
 #ifdef __OBJECT_MOTION__
   if (kernel_data.bvh.have_motion) {
 #  ifdef __HAIR__
-    if (kernel_data.bvh.have_curves)
-      return bvh_intersect_hair_motion(kg, &ray, isect, visibility);
+    if (kernel_data.bvh.have_curves) {
+      return bvh_intersect_hair_motion(kg, ray, isect, visibility);
+    }
 #  endif /* __HAIR__ */
 
-    return bvh_intersect_motion(kg, &ray, isect, visibility);
+    return bvh_intersect_motion(kg, ray, isect, visibility);
   }
 #endif /* __OBJECT_MOTION__ */
 
 #ifdef __HAIR__
-  if (kernel_data.bvh.have_curves)
-    return bvh_intersect_hair(kg, &ray, isect, visibility);
+  if (kernel_data.bvh.have_curves) {
+    return bvh_intersect_hair(kg, ray, isect, visibility);
+  }
 #endif /* __HAIR__ */
 
 #ifdef __KERNEL_CPU__
 
 #  ifdef __INSTANCING__
-  if (kernel_data.bvh.have_instancing)
-    return bvh_intersect_instancing(kg, &ray, isect, visibility);
+  if (kernel_data.bvh.have_instancing) {
+    return bvh_intersect_instancing(kg, ray, isect, visibility);
+  }
 #  endif /* __INSTANCING__ */
-
-  return bvh_intersect(kg, &ray, isect, visibility);
+  return bvh_intersect(kg, ray, isect, visibility);
 #else /* __KERNEL_CPU__ */
 
 #  ifdef __INSTANCING__
-  return bvh_intersect_instancing(kg, &ray, isect, visibility);
+  return bvh_intersect_instancing(kg, ray, isect, visibility);
 #  else
-  return bvh_intersect(kg, &ray, isect, visibility);
+  return bvh_intersect(kg, ray, isect, visibility);
 #  endif /* __INSTANCING__ */
 
 #endif /* __KERNEL_CPU__ */
 }
 
 #ifdef __BVH_LOCAL__
-/* Note: ray is passed by value to work around a possible CUDA compiler bug. */
 ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
-                                                const Ray ray,
+                                                const Ray *ray,
                                                 LocalIntersection *local_isect,
                                                 int local_object,
                                                 uint *lcg_state,
@@ -250,7 +250,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
 {
   PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL);
 
-  if (!scene_intersect_valid(&ray)) {
+  if (!scene_intersect_valid(ray)) {
     local_isect->num_hits = 0;
     return false;
   }
@@ -264,19 +264,19 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
     ctx.sss_object_id = local_object;
     IntersectContext rtc_ctx(&ctx);
     RTCRay rtc_ray;
-    kernel_embree_setup_ray(ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
+    kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
 
     /* Get the Embree scene for this intersection. */
     RTCGeometry geom = rtcGetGeometry(kernel_data.bvh.scene, local_object * 2);
     if (geom) {
-      float3 P = ray.P;
-      float3 dir = ray.D;
-      float3 idir = ray.D;
+      float3 P = ray->P;
+      float3 dir = ray->D;
+      float3 idir = ray->D;
       const int object_flag = kernel_tex_fetch(__object_flag, local_object);
       if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
         Transform ob_itfm;
         rtc_ray.tfar = bvh_instance_motion_push(
-            kg, local_object, &ray, &P, &dir, &idir, ray.t, &ob_itfm);
+            kg, local_object, ray, &P, &dir, &idir, ray->t, &ob_itfm);
         /* bvh_instance_motion_push() returns the inverse transform but
          * it's not needed here. */
         (void)ob_itfm;
@@ -299,10 +299,10 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg,
 #  endif /* __EMBREE__ */
 #  ifdef __OBJECT_MOTION__
   if (kernel_data.bvh.have_motion) {
-    return bvh_intersect_local_motion(kg, &ray, local_isect, local_object, lcg_state, max_hits);
+    return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
   }
 #  endif /* __OBJECT_MOTION__ */
-  return bvh_intersect_local(kg, &ray, local_isect, local_object, lcg_state, max_hits);
+  return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
 }
 #endif
 
@@ -377,15 +377,18 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg,
   if (!scene_intersect_valid(ray)) {
     return false;
   }
+
 #  ifdef __OBJECT_MOTION__
   if (kernel_data.bvh.have_motion) {
     return bvh_intersect_volume_motion(kg, ray, isect, visibility);
   }
 #  endif /* __OBJECT_MOTION__ */
+
 #  ifdef __KERNEL_CPU__
 #    ifdef __INSTANCING__
-  if (kernel_data.bvh.have_instancing)
+  if (kernel_data.bvh.have_instancing) {
     return bvh_intersect_volume_instancing(kg, ray, isect, visibility);
+  }
 #    endif /* __INSTANCING__ */
   return bvh_intersect_volume(kg, ray, isect, visibility);
 #  else /* __KERNEL_CPU__ */
@@ -422,15 +425,18 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg,
     rtcOccluded1(kernel_data.bvh.scene, &rtc_ctx.context, &rtc_ray);
     return rtc_ray.tfar == -INFINITY;
   }
-#  endif
+#  endif /* __EMBREE__ */
+
 #  ifdef __OBJECT_MOTION__
   if (kernel_data.bvh.have_motion) {
     return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
   }
 #  endif /* __OBJECT_MOTION__ */
+
 #  ifdef __INSTANCING__
-  if (kernel_data.bvh.have_instancing)
+  if (kernel_data.bvh.have_instancing) {
     return bvh_intersect_volume_all_instancing(kg, ray, isect, max_hits, visibility);
+  }
 #  endif /* __INSTANCING__ */
   return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
 }
diff --git a/intern/cycles/kernel/bvh/bvh_nodes.h b/intern/cycles/kernel/bvh/bvh_nodes.h
index a33bc73e25b..db598d1c7fa 100644
--- a/intern/cycles/kernel/bvh/bvh_nodes.h
+++ b/intern/cycles/kernel/bvh/bvh_nodes.h
@@ -39,7 +39,9 @@ ccl_device_forceinline int bvh_aligned_node_intersect(KernelGlobals *kg,
 {
 
   /* fetch node data */
+#  ifdef __VISIBILITY_FLAG__
   float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+#  endif
   float4 node0 = kernel_tex_fetch(__bvh_nodes, node_addr + 1);
   float4 node1 = kernel_tex_fetch(__bvh_nodes, node_addr + 2);
   float4 node2 = kernel_tex_fetch(__bvh_nodes, node_addr + 3);
@@ -111,7 +113,9 @@ ccl_device_forceinline int bvh_unaligned_node_intersect(KernelGlobals *kg,
                                                         float dist[2])
 {
   int mask = 0;
+#  ifdef __VISIBILITY_FLAG__
   float4 cnodes = kernel_tex_fetch(__bvh_nodes, node_addr + 0);
+#  endif
   if (bvh_unaligned_node_intersect_child(kg, P, dir, t, node_addr, 0, &dist[0])) {
 #  ifdef __VISIBILITY_FLAG__
     if ((__float_as_uint(cnodes.x) & visibility))
diff --git a/intern/cycles/kernel/closure/bssrdf.h b/intern/cycles/kernel/closure/bssrdf.h
index 0da4d6bd060..4d88a822821 100644
--- a/intern/cycles/kernel/closure/bssrdf.h
+++ b/intern/cycles/kernel/closure/bssrdf.h
@@ -231,7 +231,7 @@ ccl_device float bssrdf_burley_eval(const float d, float r)
    * NOTES:
    * - Surface albedo is already included into sc->weight, no need to
    *   multiply by this term here.
-   * - This is normalized diffuse model, so the equation is mutliplied
+   * - This is normalized diffuse model, so the equation is multiplied
    *   by 2*pi, which also matches cdf().
    */
   float exp_r_3_d = expf(-r / (3.0f * d));
diff --git a/intern/cycles/kernel/filter/filter_transform.h b/intern/cycles/kernel/filter/filter_transform.h
index 585c4b33787..880a661214e 100644
--- a/intern/cycles/kernel/filter/filter_transform.h
+++ b/intern/cycles/kernel/filter/filter_transform.h
@@ -70,9 +70,9 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
   filter_calculate_scale(feature_scale, use_time);
 
   /* === Generate the feature transformation. ===
-   * This transformation maps the num_features-dimentional feature space to a reduced feature
-   * (r-feature) space which generally has fewer dimensions. This mainly helps to prevent
-   * overfitting. */
+   * This transformation maps the num_features-dimensional feature space to a reduced feature
+   * (r-feature) space which generally has fewer dimensions.
+   * This mainly helps to prevent over-fitting. */
   float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES];
   math_matrix_zero(feature_matrix, num_features);
   FOR_PIXEL_WINDOW
@@ -85,7 +85,7 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff
 
   math_matrix_jacobi_eigendecomposition(feature_matrix, transform, num_features, 1);
   *rank = 0;
-  /* Prevent overfitting when a small window is used. */
+  /* Prevent over-fitting when a small window is used. */
   int max_rank = min(num_features, num_pixels / 3);
   if (pca_threshold < 0.0f) {
     float threshold_energy = 0.0f;
diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h
index f410e6e27e2..af4e6fbd89b 100644
--- a/intern/cycles/kernel/geom/geom_object.h
+++ b/intern/cycles/kernel/geom/geom_object.h
@@ -227,6 +227,17 @@ ccl_device_inline float object_surface_area(KernelGlobals *kg, int object)
   return kernel_tex_fetch(__objects, object).surface_area;
 }
 
+/* Color of the object */
+
+ccl_device_inline float3 object_color(KernelGlobals *kg, int object)
+{
+  if (object == OBJECT_NONE)
+    return make_float3(0.0f, 0.0f, 0.0f);
+
+  const ccl_global KernelObject *kobject = &kernel_tex_fetch(__objects, object);
+  return make_float3(kobject->color[0], kobject->color[1], kobject->color[2]);
+}
+
 /* Pass ID number of object */
 
 ccl_device_inline float object_pass_id(KernelGlobals *kg, int object)
diff --git a/intern/cycles/kernel/kernel_accumulate.h b/intern/cycles/kernel/kernel_accumulate.h
index b9d723222a1..46a51f5a560 100644
--- a/intern/cycles/kernel/kernel_accumulate.h
+++ b/intern/cycles/kernel/kernel_accumulate.h
@@ -319,10 +319,12 @@ ccl_device_inline void path_radiance_accum_ao(PathRadiance *L,
                                               float3 bsdf,
                                               float3 ao)
 {
+#ifdef __PASSES__
   /* Store AO pass. */
   if (L->use_light_pass && state->bounce == 0) {
     L->ao += alpha * throughput * ao;
   }
+#endif
 
 #ifdef __SHADOW_TRICKS__
   /* For shadow catcher, accumulate ratio. */
diff --git a/intern/cycles/kernel/kernel_camera.h b/intern/cycles/kernel/kernel_camera.h
index 97cd3a3320c..62ce04ba48f 100644
--- a/intern/cycles/kernel/kernel_camera.h
+++ b/intern/cycles/kernel/kernel_camera.h
@@ -237,7 +237,9 @@ ccl_device void camera_sample_orthographic(KernelGlobals *kg,
 /* Panorama Camera */
 
 ccl_device_inline void camera_sample_panorama(ccl_constant KernelCamera *cam,
+#ifdef __CAMERA_MOTION__
                                               const ccl_global DecomposedTransform *cam_motion,
+#endif
                                               float raster_x,
                                               float raster_y,
                                               float lens_u,
@@ -413,8 +415,12 @@ ccl_device_inline void camera_sample(KernelGlobals *kg,
     camera_sample_orthographic(kg, raster_x, raster_y, lens_u, lens_v, ray);
   }
   else {
+#ifdef __CAMERA_MOTION__
     const ccl_global DecomposedTransform *cam_motion = kernel_tex_array(__camera_motion);
     camera_sample_panorama(&kernel_data.cam, cam_motion, raster_x, raster_y, lens_u, lens_v, ray);
+#else
+    camera_sample_panorama(&kernel_data.cam, raster_x, raster_y, lens_u, lens_v, ray);
+#endif
   }
 }
 
diff --git a/intern/cycles/kernel/kernel_compat_cuda.h b/intern/cycles/kernel/kernel_compat_cuda.h
index 469b81d120b..5075c434b10 100644
--- a/intern/cycles/kernel/kernel_compat_cuda.h
+++ b/intern/cycles/kernel/kernel_compat_cuda.h
@@ -58,6 +58,7 @@ __device__ half __float2half(const float f)
 #  define ccl_device_forceinline __device__ __forceinline__
 #endif
 #define ccl_device_noinline __device__ __noinline__
+#define ccl_device_noinline_cpu ccl_device
 #define ccl_global
 #define ccl_static_constant __constant__
 #define ccl_constant const
diff --git a/intern/cycles/kernel/kernel_compat_opencl.h b/intern/cycles/kernel/kernel_compat_opencl.h
index e040ea88d7c..1fe52c51ab0 100644
--- a/intern/cycles/kernel/kernel_compat_opencl.h
+++ b/intern/cycles/kernel/kernel_compat_opencl.h
@@ -35,6 +35,7 @@
 #define ccl_device_inline ccl_device
 #define ccl_device_forceinline ccl_device
 #define ccl_device_noinline ccl_device ccl_noinline
+#define ccl_device_noinline_cpu ccl_device
 #define ccl_may_alias
 #define ccl_static_constant static __constant
 #define ccl_constant __constant
diff --git a/intern/cycles/kernel/kernel_emission.h b/intern/cycles/kernel/kernel_emission.h
index 34300543f91..16d52b0c733 100644
--- a/intern/cycles/kernel/kernel_emission.h
+++ b/intern/cycles/kernel/kernel_emission.h
@@ -17,17 +17,17 @@
 CCL_NAMESPACE_BEGIN
 
 /* Direction Emission */
-ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
-                                                ShaderData *emission_sd,
-                                                LightSample *ls,
-                                                ccl_addr_space PathState *state,
-                                                float3 I,
-                                                differential3 dI,
-                                                float t,
-                                                float time)
+ccl_device_noinline_cpu float3 direct_emissive_eval(KernelGlobals *kg,
+                                                    ShaderData *emission_sd,
+                                                    LightSample *ls,
+                                                    ccl_addr_space PathState *state,
+                                                    float3 I,
+                                                    differential3 dI,
+                                                    float t,
+                                                    float time)
 {
   /* setup shading at emitter */
-  float3 eval;
+  float3 eval = make_float3(0.0f, 0.0f, 0.0f);
 
   if (shader_constant_emission_eval(kg, ls->shader, &eval)) {
     if ((ls->prim != PRIM_NONE) && dot(ls->Ng, I) < 0.0f) {
@@ -98,15 +98,15 @@ ccl_device_noinline float3 direct_emissive_eval(KernelGlobals *kg,
   return eval;
 }
 
-ccl_device_noinline bool direct_emission(KernelGlobals *kg,
-                                         ShaderData *sd,
-                                         ShaderData *emission_sd,
-                                         LightSample *ls,
-                                         ccl_addr_space PathState *state,
-                                         Ray *ray,
-                                         BsdfEval *eval,
-                                         bool *is_lamp,
-                                         float rand_terminate)
+ccl_device_noinline_cpu bool direct_emission(KernelGlobals *kg,
+                                             ShaderData *sd,
+                                             ShaderData *emission_sd,
+                                             LightSample *ls,
+                                             ccl_addr_space PathState *state,
+                                             Ray *ray,
+                                             BsdfEval *eval,
+                                             bool *is_lamp,
+                                             float rand_terminate)
 {
   if (ls->pdf == 0.0f)
     return false;
@@ -208,7 +208,7 @@ ccl_device_noinline bool direct_emission(KernelGlobals *kg,
 
 /* Indirect Primitive Emission */
 
-ccl_device_noinline float3 indirect_primitive_emission(
+ccl_device_noinline_cpu float3 indirect_primitive_emission(
     KernelGlobals *kg, ShaderData *sd, float t, int path_flag, float bsdf_pdf)
 {
   /* evaluate emissive closure */
@@ -234,11 +234,11 @@ ccl_device_noinline float3 indirect_primitive_emission(
 
 /* Indirect Lamp Emission */
 
-ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
-                                                ShaderData *emission_sd,
-                                                ccl_addr_space PathState *state,
-                                                Ray *ray,
-                                                float3 *emission)
+ccl_device_noinline_cpu bool indirect_lamp_emission(KernelGlobals *kg,
+                                                    ShaderData *emission_sd,
+                                                    ccl_addr_space PathState *state,
+                                                    Ray *ray,
+                                                    float3 *emission)
 {
   bool hit_lamp = false;
 
@@ -293,10 +293,10 @@ ccl_device_noinline bool indirect_lamp_emission(KernelGlobals *kg,
 
 /* Indirect Background */
 
-ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
-                                               ShaderData *emission_sd,
-                                               ccl_addr_space PathState *state,
-                                               ccl_addr_space Ray *ray)
+ccl_device_noinline_cpu float3 indirect_background(KernelGlobals *kg,
+                                                   ShaderData *emission_sd,
+                                                   ccl_addr_space PathState *state,
+                                                   ccl_addr_space Ray *ray)
 {
 #ifdef __BACKGROUND__
   int shader = kernel_data.background.surface_shader;
@@ -314,7 +314,7 @@ ccl_device_noinline float3 indirect_background(KernelGlobals *kg,
   }
 
   /* Evaluate background shader. */
-  float3 L;
+  float3 L = make_float3(0.0f, 0.0f, 0.0f);
   if (!shader_constant_emission_eval(kg, shader, &L)) {
 #  ifdef __SPLIT_KERNEL__
     Ray priv_ray = *ray;
diff --git a/intern/cycles/kernel/kernel_light.h b/intern/cycles/kernel/kernel_light.h
index 9128bfa9d95..ce908ce0fe2 100644
--- a/intern/cycles/kernel/kernel_light.h
+++ b/intern/cycles/kernel/kernel_light.h
@@ -182,17 +182,7 @@ ccl_device float lamp_light_pdf(KernelGlobals *kg, const float3 Ng, const float3
 
 #ifdef __BACKGROUND_MIS__
 
-/* TODO(sergey): In theory it should be all fine to use noinline for all
- * devices, but we're so close to the release so better not screw things
- * up for CPU at least.
- */
-#  ifdef __KERNEL_GPU__
-ccl_device_noinline
-#  else
-ccl_device
-#  endif
-    float3
-    background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
+ccl_device float3 background_map_sample(KernelGlobals *kg, float randu, float randv, float *pdf)
 {
   /* for the following, the CDF values are actually a pair of floats, with the
    * function value as X and the actual CDF as Y.  The last entry's function
@@ -274,13 +264,7 @@ ccl_device
 /* TODO(sergey): Same as above, after the release we should consider using
  * 'noinline' for all devices.
  */
-#  ifdef __KERNEL_GPU__
-ccl_device_noinline
-#  else
-ccl_device
-#  endif
-    float
-    background_map_pdf(KernelGlobals *kg, float3 direction)
+ccl_device float background_map_pdf(KernelGlobals *kg, float3 direction)
 {
   float2 uv = direction_to_equirectangular(direction);
   int res_x = kernel_data.integrator.pdf_background_res_x;
@@ -1092,7 +1076,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
   int len = kernel_data.integrator.num_distribution + 1;
   float r = *randu;
 
-  while (len > 0) {
+  do {
     int half_len = len >> 1;
     int middle = first + half_len;
 
@@ -1103,7 +1087,7 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
       first = middle + 1;
       len = len - half_len - 1;
     }
-  }
+  } while (len > 0);
 
   /* Clamping should not be needed but float rounding errors seem to
    * make this fail on rare occasions. */
@@ -1120,42 +1104,49 @@ ccl_device int light_distribution_sample(KernelGlobals *kg, float *randu)
 
 /* Generic Light */
 
-ccl_device bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce)
+ccl_device_inline bool light_select_reached_max_bounces(KernelGlobals *kg, int index, int bounce)
 {
   return (bounce > kernel_tex_fetch(__lights, index).max_bounces);
 }
 
-ccl_device_noinline bool light_sample(
-    KernelGlobals *kg, float randu, float randv, float time, float3 P, int bounce, LightSample *ls)
+ccl_device_noinline bool light_sample(KernelGlobals *kg,
+                                      int lamp,
+                                      float randu,
+                                      float randv,
+                                      float time,
+                                      float3 P,
+                                      int bounce,
+                                      LightSample *ls)
 {
-  /* sample index */
-  int index = light_distribution_sample(kg, &randu);
-
-  /* fetch light data */
-  const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(__light_distribution,
-                                                                              index);
-  int prim = kdistribution->prim;
-
-  if (prim >= 0) {
-    int object = kdistribution->mesh_light.object_id;
-    int shader_flag = kdistribution->mesh_light.shader_flag;
+  if (lamp < 0) {
+    /* sample index */
+    int index = light_distribution_sample(kg, &randu);
+
+    /* fetch light data */
+    const ccl_global KernelLightDistribution *kdistribution = &kernel_tex_fetch(
+        __light_distribution, index);
+    int prim = kdistribution->prim;
+
+    if (prim >= 0) {
+      int object = kdistribution->mesh_light.object_id;
+      int shader_flag = kdistribution->mesh_light.shader_flag;
+
+      triangle_light_sample(kg, prim, object, randu, randv, time, ls, P);
+      ls->shader |= shader_flag;
+      return (ls->pdf > 0.0f);
+    }
 
-    triangle_light_sample(kg, prim, object, randu, randv, time, ls, P);
-    ls->shader |= shader_flag;
-    return (ls->pdf > 0.0f);
+    lamp = -prim - 1;
   }
-  else {
-    int lamp = -prim - 1;
 
-    if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
-      return false;
-    }
-
-    return lamp_light_sample(kg, lamp, randu, randv, P, ls);
+  if (UNLIKELY(light_select_reached_max_bounces(kg, lamp, bounce))) {
+    return false;
   }
+
+  return lamp_light_sample(kg, lamp, randu, randv, P, ls);
 }
 
-ccl_device int light_select_num_samples(KernelGlobals *kg, int index)
+ccl_device_inline int light_select_num_samples(KernelGlobals *kg, int index)
 {
   return kernel_tex_fetch(__lights, index).samples;
 }
diff --git a/intern/cycles/kernel/kernel_passes.h b/intern/cycles/kernel/kernel_passes.h
index e84937e2fd9..4a424866efe 100644
--- a/intern/cycles/kernel/kernel_passes.h
+++ b/intern/cycles/kernel/kernel_passes.h
@@ -114,14 +114,12 @@ ccl_device_inline void kernel_write_denoising_shadow(KernelGlobals *kg,
   float value = path_total_shaded / max(path_total, 1e-7f);
   kernel_write_pass_float(buffer + 2, value * value);
 }
-#endif /* __DENOISING_FEATURES__ */
 
 ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
                                                         ShaderData *sd,
                                                         ccl_addr_space PathState *state,
                                                         PathRadiance *L)
 {
-#ifdef __DENOISING_FEATURES__
   if (state->denoising_feature_weight == 0.0f) {
     return;
   }
@@ -162,13 +160,8 @@ ccl_device_inline void kernel_update_denoising_features(KernelGlobals *kg,
 
     state->denoising_feature_weight = 0.0f;
   }
-#else
-  (void)kg;
-  (void)sd;
-  (void)state;
-  (void)L;
-#endif /* __DENOISING_FEATURES__ */
 }
+#endif /* __DENOISING_FEATURES__ */
 
 #ifdef __KERNEL_DEBUG__
 ccl_device_inline void kernel_write_debug_passes(KernelGlobals *kg,
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index f3e2a8a234a..63be0a7f505 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -65,7 +65,7 @@ ccl_device_forceinline bool kernel_path_scene_intersect(KernelGlobals *kg,
     ray->t = kernel_data.background.ao_distance;
   }
 
-  bool hit = scene_intersect(kg, *ray, visibility, isect);
+  bool hit = scene_intersect(kg, ray, visibility, isect);
 
 #ifdef __KERNEL_DEBUG__
   if (state->flag & PATH_RAY_CAMERA) {
@@ -103,7 +103,7 @@ ccl_device_forceinline void kernel_path_lamp_emission(KernelGlobals *kg,
     light_ray.dP = ray->dP;
 
     /* intersect with lamp */
-    float3 emission;
+    float3 emission = make_float3(0.0f, 0.0f, 0.0f);
 
     if (indirect_lamp_emission(kg, emission_sd, state, &light_ray, &emission))
       path_radiance_accum_emission(L, state, throughput, emission);
@@ -474,12 +474,10 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
 #    endif /* __SUBSURFACE__ */
 
 #    if defined(__EMISSION__)
-        if (kernel_data.integrator.use_direct_light) {
-          int all = (kernel_data.integrator.sample_all_lights_indirect) ||
-                    (state->flag & PATH_RAY_SHADOW_CATCHER);
-          kernel_branched_path_surface_connect_light(
-              kg, sd, emission_sd, state, throughput, 1.0f, L, all);
-        }
+        int all = (kernel_data.integrator.sample_all_lights_indirect) ||
+                  (state->flag & PATH_RAY_SHADOW_CATCHER);
+        kernel_branched_path_surface_connect_light(
+            kg, sd, emission_sd, state, throughput, 1.0f, L, all);
 #    endif /* defined(__EMISSION__) */
 
 #    ifdef __VOLUME__
@@ -590,7 +588,9 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
           throughput /= probability;
         }
 
+#  ifdef __DENOISING_FEATURES__
         kernel_update_denoising_features(kg, &sd, state, L);
+#  endif
 
 #  ifdef __AO__
         /* ambient occlusion */
@@ -610,8 +610,10 @@ ccl_device_forceinline void kernel_path_integrate(KernelGlobals *kg,
         }
 #  endif /* __SUBSURFACE__ */
 
+#  ifdef __EMISSION__
         /* direct lighting */
         kernel_path_surface_connect_light(kg, &sd, emission_sd, throughput, state, L);
+#  endif /* __EMISSION__ */
 
 #  ifdef __VOLUME__
       }
diff --git a/intern/cycles/kernel/kernel_path_branched.h b/intern/cycles/kernel/kernel_path_branched.h
index f3a1ea3f4fd..ea6b23e7eb4 100644
--- a/intern/cycles/kernel/kernel_path_branched.h
+++ b/intern/cycles/kernel/kernel_path_branched.h
@@ -198,14 +198,14 @@ ccl_device_forceinline void kernel_branched_path_volume(KernelGlobals *kg,
 #    endif /* __VOLUME__ */
 
 /* bounce off surface and integrate indirect light */
-ccl_device_noinline void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
-                                                                     ShaderData *sd,
-                                                                     ShaderData *indirect_sd,
-                                                                     ShaderData *emission_sd,
-                                                                     float3 throughput,
-                                                                     float num_samples_adjust,
-                                                                     PathState *state,
-                                                                     PathRadiance *L)
+ccl_device_noinline_cpu void kernel_branched_path_surface_indirect_light(KernelGlobals *kg,
+                                                                         ShaderData *sd,
+                                                                         ShaderData *indirect_sd,
+                                                                         ShaderData *emission_sd,
+                                                                         float3 throughput,
+                                                                         float num_samples_adjust,
+                                                                         PathState *state,
+                                                                         PathRadiance *L)
 {
   float sum_sample_weight = 0.0f;
 #    ifdef __DENOISING_FEATURES__
@@ -445,7 +445,9 @@ ccl_device void kernel_branched_path_integrate(KernelGlobals *kg,
         }
       }
 
+#    ifdef __DENOISING_FEATURES__
       kernel_update_denoising_features(kg, &sd, &state, L);
+#    endif
 
 #    ifdef __AO__
       /* ambient occlusion */
diff --git a/intern/cycles/kernel/kernel_path_surface.h b/intern/cycles/kernel/kernel_path_surface.h
index a1ab4951565..d299106ea96 100644
--- a/intern/cycles/kernel/kernel_path_surface.h
+++ b/intern/cycles/kernel/kernel_path_surface.h
@@ -20,7 +20,7 @@ CCL_NAMESPACE_BEGIN
     defined(__BAKING__)
 /* branched path tracing: connect path directly to position on one or more lights and add it to L
  */
-ccl_device_noinline void kernel_branched_path_surface_connect_light(
+ccl_device_noinline_cpu void kernel_branched_path_surface_connect_light(
     KernelGlobals *kg,
     ShaderData *sd,
     ShaderData *emission_sd,
@@ -32,140 +32,100 @@ ccl_device_noinline void kernel_branched_path_surface_connect_light(
 {
 #  ifdef __EMISSION__
   /* sample illumination from lights to find path contribution */
-  if (!(sd->flag & SD_BSDF_HAS_EVAL))
-    return;
-
-  Ray light_ray;
   BsdfEval L_light;
-  bool is_lamp;
-
-#    ifdef __OBJECT_MOTION__
-  light_ray.time = sd->time;
-#    endif
-
-  if (sample_all_lights) {
-    /* lamp sampling */
-    for (int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
-      if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce)))
-        continue;
-
-      int num_samples = ceil_to_int(num_samples_adjust * light_select_num_samples(kg, i));
-      float num_samples_inv = num_samples_adjust /
-                              (num_samples * kernel_data.integrator.num_all_lights);
-      uint lamp_rng_hash = cmj_hash(state->rng_hash, i);
-
-      for (int j = 0; j < num_samples; j++) {
-        float light_u, light_v;
-        path_branched_rng_2D(
-            kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-        float terminate = path_branched_rng_light_termination(
-            kg, lamp_rng_hash, state, j, num_samples);
 
-        LightSample ls;
-        if (lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
-          /* The sampling probability returned by lamp_light_sample assumes that all lights were
-           * sampled.
-           * However, this code only samples lamps, so if the scene also had mesh lights, the real
-           * probability is twice as high. */
-          if (kernel_data.integrator.pdf_triangles != 0.0f)
-            ls.pdf *= 2.0f;
+  int num_lights = 0;
+  if (kernel_data.integrator.use_direct_light) {
+    if (sample_all_lights) {
+      num_lights = kernel_data.integrator.num_all_lights;
+      if (kernel_data.integrator.pdf_triangles != 0.0f) {
+        num_lights += 1;
+      }
+    }
+    else {
+      num_lights = 1;
+    }
+  }
 
-          if (direct_emission(
-                  kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-            /* trace shadow ray */
-            float3 shadow;
-
-            if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-              /* accumulate */
-              path_radiance_accum_light(L,
-                                        state,
-                                        throughput * num_samples_inv,
-                                        &L_light,
-                                        shadow,
-                                        num_samples_inv,
-                                        is_lamp);
-            }
-            else {
-              path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light);
-            }
-          }
+  for (int i = 0; i < num_lights; i++) {
+    /* sample one light at random */
+    int num_samples = 1;
+    int num_all_lights = 1;
+    uint lamp_rng_hash = state->rng_hash;
+    bool double_pdf = false;
+    bool is_mesh_light = false;
+    bool is_lamp = false;
+
+    if (sample_all_lights) {
+      /* lamp sampling */
+      is_lamp = i < kernel_data.integrator.num_all_lights;
+      if (is_lamp) {
+        if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) {
+          continue;
         }
+        num_samples = ceil_to_int(num_samples_adjust * light_select_num_samples(kg, i));
+        num_all_lights = kernel_data.integrator.num_all_lights;
+        lamp_rng_hash = cmj_hash(state->rng_hash, i);
+        double_pdf = kernel_data.integrator.pdf_triangles != 0.0f;
+      }
+      /* mesh light sampling */
+      else {
+        num_samples = ceil_to_int(num_samples_adjust * kernel_data.integrator.mesh_light_samples);
+        double_pdf = kernel_data.integrator.num_all_lights != 0;
+        is_mesh_light = true;
       }
     }
 
-    /* mesh light sampling */
-    if (kernel_data.integrator.pdf_triangles != 0.0f) {
-      int num_samples = ceil_to_int(num_samples_adjust *
-                                    kernel_data.integrator.mesh_light_samples);
-      float num_samples_inv = num_samples_adjust / num_samples;
+    float num_samples_inv = num_samples_adjust / (num_samples * num_all_lights);
 
-      for (int j = 0; j < num_samples; j++) {
+    for (int j = 0; j < num_samples; j++) {
+      Ray light_ray;
+      light_ray.t = 0.0f; /* reset ray */
+#    ifdef __OBJECT_MOTION__
+      light_ray.time = sd->time;
+#    endif
+      bool has_emission = false;
+
+      if (kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)) {
         float light_u, light_v;
         path_branched_rng_2D(
-            kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+            kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
         float terminate = path_branched_rng_light_termination(
-            kg, state->rng_hash, state, j, num_samples);
+            kg, lamp_rng_hash, state, j, num_samples);
 
         /* only sample triangle lights */
-        if (kernel_data.integrator.num_all_lights)
+        if (is_mesh_light && double_pdf) {
           light_u = 0.5f * light_u;
+        }
 
         LightSample ls;
-        if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-          /* Same as above, probability needs to be corrected since the sampling was forced to
-           * select a mesh light. */
-          if (kernel_data.integrator.num_all_lights)
+        const int lamp = is_lamp ? i : -1;
+        if (light_sample(kg, lamp, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+          /* The sampling probability returned by lamp_light_sample assumes that all lights were
+           * sampled. However, this code only samples lamps, so if the scene also had mesh lights,
+           * the real probability is twice as high. */
+          if (double_pdf) {
             ls.pdf *= 2.0f;
-
-          if (direct_emission(
-                  kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-            /* trace shadow ray */
-            float3 shadow;
-
-            if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-              /* accumulate */
-              path_radiance_accum_light(L,
-                                        state,
-                                        throughput * num_samples_inv,
-                                        &L_light,
-                                        shadow,
-                                        num_samples_inv,
-                                        is_lamp);
-            }
-            else {
-              path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light);
-            }
           }
+
+          has_emission = direct_emission(
+              kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate);
         }
       }
-    }
-  }
-  else {
-    /* sample one light at random */
-    float light_u, light_v;
-    path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
-    float terminate = path_state_rng_light_termination(kg, state);
 
-    LightSample ls;
-    if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-      /* sample random light */
-      if (direct_emission(
-              kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-        /* trace shadow ray */
-        float3 shadow;
-
-        if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
+      /* trace shadow ray */
+      float3 shadow;
+
+      const bool blocked = shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow);
+
+      if (has_emission) {
+        if (!blocked) {
           /* accumulate */
-          path_radiance_accum_light(L,
-                                    state,
-                                    throughput * num_samples_adjust,
-                                    &L_light,
-                                    shadow,
-                                    num_samples_adjust,
-                                    is_lamp);
+          path_radiance_accum_light(
+              L, state, throughput * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
         }
         else {
-          path_radiance_accum_total_light(L, state, throughput * num_samples_adjust, &L_light);
+          path_radiance_accum_total_light(L, state, throughput * num_samples_inv, &L_light);
         }
       }
     }
@@ -255,45 +215,48 @@ ccl_device_inline void kernel_path_surface_connect_light(KernelGlobals *kg,
   PROFILING_INIT(kg, PROFILING_CONNECT_LIGHT);
 
 #ifdef __EMISSION__
-  if (!(kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)))
-    return;
-
 #  ifdef __SHADOW_TRICKS__
-  if (state->flag & PATH_RAY_SHADOW_CATCHER) {
-    kernel_branched_path_surface_connect_light(kg, sd, emission_sd, state, throughput, 1.0f, L, 1);
-    return;
-  }
-#  endif
-
+  int all = (state->flag & PATH_RAY_SHADOW_CATCHER);
+  kernel_branched_path_surface_connect_light(kg, sd, emission_sd, state, throughput, 1.0f, L, all);
+#  else
   /* sample illumination from lights to find path contribution */
-  float light_u, light_v;
-  path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
-
   Ray light_ray;
   BsdfEval L_light;
-  bool is_lamp;
+  bool is_lamp = false;
+  bool has_emission = false;
 
-#  ifdef __OBJECT_MOTION__
+  light_ray.t = 0.0f;
+#    ifdef __OBJECT_MOTION__
   light_ray.time = sd->time;
-#  endif
+#    endif
 
-  LightSample ls;
-  if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-    float terminate = path_state_rng_light_termination(kg, state);
-    if (direct_emission(
-            kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-      /* trace shadow ray */
-      float3 shadow;
+  if (kernel_data.integrator.use_direct_light && (sd->flag & SD_BSDF_HAS_EVAL)) {
+    float light_u, light_v;
+    path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
 
-      if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-        /* accumulate */
-        path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
-      }
-      else {
-        path_radiance_accum_total_light(L, state, throughput, &L_light);
-      }
+    LightSample ls;
+    if (light_sample(kg, -1, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+      float terminate = path_state_rng_light_termination(kg, state);
+      has_emission = direct_emission(
+          kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate);
+    }
+  }
+
+  /* trace shadow ray */
+  float3 shadow;
+
+  const bool blocked = shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow);
+
+  if (has_emission) {
+    if (!blocked) {
+      /* accumulate */
+      path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
+    }
+    else {
+      path_radiance_accum_total_light(L, state, throughput, &L_light);
     }
   }
+#  endif
 #endif
 }
 
diff --git a/intern/cycles/kernel/kernel_path_volume.h b/intern/cycles/kernel/kernel_path_volume.h
index fea4dfc159d..6b62005d19a 100644
--- a/intern/cycles/kernel/kernel_path_volume.h
+++ b/intern/cycles/kernel/kernel_path_volume.h
@@ -26,49 +26,48 @@ ccl_device_inline void kernel_path_volume_connect_light(KernelGlobals *kg,
                                                         PathRadiance *L)
 {
 #  ifdef __EMISSION__
-  if (!kernel_data.integrator.use_direct_light)
-    return;
-
   /* sample illumination from lights to find path contribution */
-  float light_u, light_v;
-  path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
-
   Ray light_ray;
   BsdfEval L_light;
-  LightSample ls;
-  bool is_lamp;
+  bool is_lamp = false;
+  bool has_emission = false;
 
+  light_ray.t = 0.0f;
+#    ifdef __OBJECT_MOTION__
   /* connect to light from given point where shader has been evaluated */
   light_ray.time = sd->time;
+#    endif
 
-  if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-    float terminate = path_state_rng_light_termination(kg, state);
-    if (direct_emission(
-            kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-      /* trace shadow ray */
-      float3 shadow;
+  if (kernel_data.integrator.use_direct_light) {
+    float light_u, light_v;
+    path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
 
-      if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-        /* accumulate */
-        path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
-      }
+    LightSample ls;
+    if (light_sample(kg, -1, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+      float terminate = path_state_rng_light_termination(kg, state);
+      has_emission = direct_emission(
+          kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate);
     }
   }
+
+  /* trace shadow ray */
+  float3 shadow;
+
+  const bool blocked = shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow);
+
+  if (has_emission && !blocked) {
+    /* accumulate */
+    path_radiance_accum_light(L, state, throughput, &L_light, shadow, 1.0f, is_lamp);
+  }
 #  endif /* __EMISSION__ */
 }
 
-#  ifdef __KERNEL_GPU__
-ccl_device_noinline
-#  else
-ccl_device
-#  endif
-    bool
-    kernel_path_volume_bounce(KernelGlobals *kg,
-                              ShaderData *sd,
-                              ccl_addr_space float3 *throughput,
-                              ccl_addr_space PathState *state,
-                              PathRadianceState *L_state,
-                              ccl_addr_space Ray *ray)
+ccl_device_noinline_cpu bool kernel_path_volume_bounce(KernelGlobals *kg,
+                                                       ShaderData *sd,
+                                                       ccl_addr_space float3 *throughput,
+                                                       ccl_addr_space PathState *state,
+                                                       PathRadianceState *L_state,
+                                                       ccl_addr_space Ray *ray)
 {
   /* sample phase function */
   float phase_pdf;
@@ -128,7 +127,7 @@ ccl_device
   return true;
 }
 
-#  ifndef __SPLIT_KERNEL__
+#  if !defined(__SPLIT_KERNEL__) && (defined(__BRANCHED_PATH__) || defined(__VOLUME_DECOUPLED__))
 ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg,
                                                           ShaderData *sd,
                                                           ShaderData *emission_sd,
@@ -140,96 +139,71 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg,
                                                           const VolumeSegment *segment)
 {
 #    ifdef __EMISSION__
-  if (!kernel_data.integrator.use_direct_light)
-    return;
-
-  Ray light_ray;
   BsdfEval L_light;
-  bool is_lamp;
-
-  light_ray.time = sd->time;
 
+  int num_lights = 1;
   if (sample_all_lights) {
-    /* lamp sampling */
-    for (int i = 0; i < kernel_data.integrator.num_all_lights; i++) {
-      if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce)))
-        continue;
-
-      int num_samples = light_select_num_samples(kg, i);
-      float num_samples_inv = 1.0f / (num_samples * kernel_data.integrator.num_all_lights);
-      uint lamp_rng_hash = cmj_hash(state->rng_hash, i);
-
-      for (int j = 0; j < num_samples; j++) {
-        /* sample random position on given light */
-        float light_u, light_v;
-        path_branched_rng_2D(
-            kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
-
-        LightSample ls;
-        lamp_light_sample(kg, i, light_u, light_v, ray->P, &ls);
-
-        float3 tp = throughput;
-
-        /* sample position on volume segment */
-        float rphase = path_branched_rng_1D(
-            kg, state->rng_hash, state, j, num_samples, PRNG_PHASE_CHANNEL);
-        float rscatter = path_branched_rng_1D(
-            kg, state->rng_hash, state, j, num_samples, PRNG_SCATTER_DISTANCE);
-
-        VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
-                                                                       state,
-                                                                       ray,
-                                                                       sd,
-                                                                       &tp,
-                                                                       rphase,
-                                                                       rscatter,
-                                                                       segment,
-                                                                       (ls.t != FLT_MAX) ? &ls.P :
-                                                                                           NULL,
-                                                                       false);
+    num_lights = kernel_data.integrator.num_all_lights;
+    if (kernel_data.integrator.pdf_triangles != 0.0f) {
+      num_lights += 1;
+    }
+  }
 
-        /* todo: split up light_sample so we don't have to call it again with new position */
-        if (result == VOLUME_PATH_SCATTERED &&
-            lamp_light_sample(kg, i, light_u, light_v, sd->P, &ls)) {
-          if (kernel_data.integrator.pdf_triangles != 0.0f)
-            ls.pdf *= 2.0f;
-
-          float terminate = path_branched_rng_light_termination(
-              kg, state->rng_hash, state, j, num_samples);
-          if (direct_emission(
-                  kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-            /* trace shadow ray */
-            float3 shadow;
-
-            if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-              /* accumulate */
-              path_radiance_accum_light(
-                  L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
-            }
-          }
+  for (int i = 0; i < num_lights; ++i) {
+    /* sample one light at random */
+    int num_samples = 1;
+    int num_all_lights = 1;
+    uint lamp_rng_hash = state->rng_hash;
+    bool double_pdf = false;
+    bool is_mesh_light = false;
+    bool is_lamp = false;
+
+    if (sample_all_lights) {
+      /* lamp sampling */
+      is_lamp = i < kernel_data.integrator.num_all_lights;
+      if (is_lamp) {
+        if (UNLIKELY(light_select_reached_max_bounces(kg, i, state->bounce))) {
+          continue;
         }
+        num_samples = light_select_num_samples(kg, i);
+        num_all_lights = kernel_data.integrator.num_all_lights;
+        lamp_rng_hash = cmj_hash(state->rng_hash, i);
+        double_pdf = kernel_data.integrator.pdf_triangles != 0.0f;
+      }
+      /* mesh light sampling */
+      else {
+        num_samples = kernel_data.integrator.mesh_light_samples;
+        double_pdf = kernel_data.integrator.num_all_lights != 0;
+        is_mesh_light = true;
       }
     }
 
-    /* mesh light sampling */
-    if (kernel_data.integrator.pdf_triangles != 0.0f) {
-      int num_samples = kernel_data.integrator.mesh_light_samples;
-      float num_samples_inv = 1.0f / num_samples;
+    float num_samples_inv = 1.0f / (num_samples * num_all_lights);
+
+    for (int j = 0; j < num_samples; j++) {
+      Ray light_ray;
+      light_ray.t = 0.0f; /* reset ray */
+#      ifdef __OBJECT_MOTION__
+      light_ray.time = sd->time;
+#      endif
+      bool has_emission = false;
 
-      for (int j = 0; j < num_samples; j++) {
-        /* sample random position on random triangle */
+      float3 tp = throughput;
+
+      if (kernel_data.integrator.use_direct_light) {
+        /* sample random position on random light/triangle */
         float light_u, light_v;
         path_branched_rng_2D(
-            kg, state->rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
+            kg, lamp_rng_hash, state, j, num_samples, PRNG_LIGHT_U, &light_u, &light_v);
 
         /* only sample triangle lights */
-        if (kernel_data.integrator.num_all_lights)
+        if (is_mesh_light && double_pdf) {
           light_u = 0.5f * light_u;
+        }
 
         LightSample ls;
-        light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls);
-
-        float3 tp = throughput;
+        const int lamp = is_lamp ? i : -1;
+        light_sample(kg, lamp, light_u, light_v, sd->time, ray->P, state->bounce, &ls);
 
         /* sample position on volume segment */
         float rphase = path_branched_rng_1D(
@@ -249,69 +223,31 @@ ccl_device void kernel_branched_path_volume_connect_light(KernelGlobals *kg,
                                                                                            NULL,
                                                                        false);
 
-        /* todo: split up light_sample so we don't have to call it again with new position */
-        if (result == VOLUME_PATH_SCATTERED &&
-            light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-          if (kernel_data.integrator.num_all_lights)
-            ls.pdf *= 2.0f;
-
-          float terminate = path_branched_rng_light_termination(
-              kg, state->rng_hash, state, j, num_samples);
-          if (direct_emission(
-                  kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-            /* trace shadow ray */
-            float3 shadow;
-
-            if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-              /* accumulate */
-              path_radiance_accum_light(
-                  L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
+        if (result == VOLUME_PATH_SCATTERED) {
+          /* todo: split up light_sample so we don't have to call it again with new position */
+          if (light_sample(kg, lamp, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
+            if (double_pdf) {
+              ls.pdf *= 2.0f;
             }
+
+            /* sample random light */
+            float terminate = path_branched_rng_light_termination(
+                kg, state->rng_hash, state, j, num_samples);
+            has_emission = direct_emission(
+                kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate);
           }
         }
       }
-    }
-  }
-  else {
-    /* sample random position on random light */
-    float light_u, light_v;
-    path_state_rng_2D(kg, state, PRNG_LIGHT_U, &light_u, &light_v);
 
-    LightSample ls;
-    light_sample(kg, light_u, light_v, sd->time, ray->P, state->bounce, &ls);
-
-    float3 tp = throughput;
-
-    /* sample position on volume segment */
-    float rphase = path_state_rng_1D(kg, state, PRNG_PHASE_CHANNEL);
-    float rscatter = path_state_rng_1D(kg, state, PRNG_SCATTER_DISTANCE);
-
-    VolumeIntegrateResult result = kernel_volume_decoupled_scatter(kg,
-                                                                   state,
-                                                                   ray,
-                                                                   sd,
-                                                                   &tp,
-                                                                   rphase,
-                                                                   rscatter,
-                                                                   segment,
-                                                                   (ls.t != FLT_MAX) ? &ls.P :
-                                                                                       NULL,
-                                                                   false);
-
-    /* todo: split up light_sample so we don't have to call it again with new position */
-    if (result == VOLUME_PATH_SCATTERED &&
-        light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-      /* sample random light */
-      float terminate = path_state_rng_light_termination(kg, state);
-      if (direct_emission(
-              kg, sd, emission_sd, &ls, state, &light_ray, &L_light, &is_lamp, terminate)) {
-        /* trace shadow ray */
-        float3 shadow;
-
-        if (!shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow)) {
-          /* accumulate */
-          path_radiance_accum_light(L, state, tp, &L_light, shadow, 1.0f, is_lamp);
-        }
+      /* trace shadow ray */
+      float3 shadow;
+
+      const bool blocked = shadow_blocked(kg, sd, emission_sd, state, &light_ray, &shadow);
+
+      if (has_emission && !blocked) {
+        /* accumulate */
+        path_radiance_accum_light(
+            L, state, tp * num_samples_inv, &L_light, shadow, num_samples_inv, is_lamp);
       }
     }
   }
diff --git a/intern/cycles/kernel/kernel_random.h b/intern/cycles/kernel/kernel_random.h
index 78eafbfe3cb..a5ae427c2d3 100644
--- a/intern/cycles/kernel/kernel_random.h
+++ b/intern/cycles/kernel/kernel_random.h
@@ -130,7 +130,7 @@ ccl_device_inline void path_rng_init(KernelGlobals *kg,
                                      float *fy)
 {
   /* load state */
-  *rng_hash = hash_int_2d(x, y);
+  *rng_hash = hash_uint2(x, y);
   *rng_hash ^= kernel_data.integrator.seed;
 
 #ifdef __DEBUG_CORRELATION__
diff --git a/intern/cycles/kernel/kernel_shader.h b/intern/cycles/kernel/kernel_shader.h
index 4963e012e15..4688857b718 100644
--- a/intern/cycles/kernel/kernel_shader.h
+++ b/intern/cycles/kernel/kernel_shader.h
@@ -686,8 +686,7 @@ ccl_device_inline const ShaderClosure *shader_bsdf_pick(ShaderData *sd, float *r
         if (r < next_sum) {
           sampled = i;
 
-          /* Rescale to reuse for direction sample, to better
-           * preserve stratification. */
+          /* Rescale to reuse for direction sample, to better preserve stratification. */
           *randu = (r - partial_sum) / sc->sample_weight;
           break;
         }
@@ -743,8 +742,7 @@ ccl_device_inline const ShaderClosure *shader_bssrdf_pick(ShaderData *sd,
             *throughput *= (sum_bsdf + sum_bssrdf) / sum_bssrdf;
             sampled = i;
 
-            /* Rescale to reuse for direction sample, to better
-             * preserve stratifaction. */
+            /* Rescale to reuse for direction sample, to better preserve stratification. */
             *randu = (r - partial_sum) / sc->sample_weight;
             break;
           }
@@ -780,7 +778,7 @@ ccl_device_inline int shader_bsdf_sample(KernelGlobals *kg,
   kernel_assert(CLOSURE_IS_BSDF(sc->type));
 
   int label;
-  float3 eval;
+  float3 eval = make_float3(0.0f, 0.0f, 0.0f);
 
   *pdf = 0.0f;
   label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
@@ -810,7 +808,7 @@ ccl_device int shader_bsdf_sample_closure(KernelGlobals *kg,
   PROFILING_INIT(kg, PROFILING_CLOSURE_SAMPLE);
 
   int label;
-  float3 eval;
+  float3 eval = make_float3(0.0f, 0.0f, 0.0f);
 
   *pdf = 0.0f;
   label = bsdf_sample(kg, sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
@@ -1223,7 +1221,7 @@ ccl_device int shader_volume_phase_sample(KernelGlobals *kg,
    * depending on color channels, even if this is perhaps not a common case */
   const ShaderClosure *sc = &sd->closure[sampled];
   int label;
-  float3 eval;
+  float3 eval = make_float3(0.0f, 0.0f, 0.0f);
 
   *pdf = 0.0f;
   label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
@@ -1248,7 +1246,7 @@ ccl_device int shader_phase_sample_closure(KernelGlobals *kg,
   PROFILING_INIT(kg, PROFILING_CLOSURE_VOLUME_SAMPLE);
 
   int label;
-  float3 eval;
+  float3 eval = make_float3(0.0f, 0.0f, 0.0f);
 
   *pdf = 0.0f;
   label = volume_phase_sample(sd, sc, randu, randv, &eval, omega_in, domega_in, pdf);
diff --git a/intern/cycles/kernel/kernel_shadow.h b/intern/cycles/kernel/kernel_shadow.h
index 6640f64518a..c02d7d77faf 100644
--- a/intern/cycles/kernel/kernel_shadow.h
+++ b/intern/cycles/kernel/kernel_shadow.h
@@ -103,7 +103,7 @@ ccl_device bool shadow_blocked_opaque(KernelGlobals *kg,
                                       Intersection *isect,
                                       float3 *shadow)
 {
-  const bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
+  const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
 #ifdef __VOLUME__
   if (!blocked && state->volume_stack[0].shader != SHADER_NONE) {
     /* Apply attenuation from current volume shader. */
@@ -318,7 +318,7 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg,
       if (bounce >= kernel_data.integrator.transparent_max_bounce) {
         return true;
       }
-      if (!scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) {
+      if (!scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_TRANSPARENT, isect)) {
         break;
       }
       if (!shader_transparent_shadow(kg, isect)) {
@@ -374,7 +374,7 @@ ccl_device bool shadow_blocked_transparent_stepped(KernelGlobals *kg,
                                                    Intersection *isect,
                                                    float3 *shadow)
 {
-  bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
+  bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, isect);
   bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, isect) : false;
   return shadow_blocked_transparent_stepped_loop(
       kg, sd, shadow_sd, state, visibility, ray, isect, blocked, is_transparent_isect, shadow);
@@ -433,7 +433,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg,
    * TODO(sergey): Check why using record-all behavior causes slowdown in such
    * cases. Could that be caused by a higher spill pressure?
    */
-  const bool blocked = scene_intersect(kg, *ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect);
+  const bool blocked = scene_intersect(kg, ray, visibility & PATH_RAY_SHADOW_OPAQUE, &isect);
   const bool is_transparent_isect = blocked ? shader_transparent_shadow(kg, &isect) : false;
   if (!blocked || !is_transparent_isect || max_hits + 1 >= SHADOW_STACK_MAX_HITS) {
     return shadow_blocked_transparent_stepped_loop(
diff --git a/intern/cycles/kernel/kernel_subsurface.h b/intern/cycles/kernel/kernel_subsurface.h
index 7510e50a962..8dc1904058d 100644
--- a/intern/cycles/kernel/kernel_subsurface.h
+++ b/intern/cycles/kernel/kernel_subsurface.h
@@ -222,7 +222,7 @@ ccl_device_inline int subsurface_scatter_disk(KernelGlobals *kg,
 
   /* intersect with the same object. if multiple intersections are found it
    * will use at most BSSRDF_MAX_HITS hits, a random subset of all hits */
-  scene_intersect_local(kg, *ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
+  scene_intersect_local(kg, ray, ss_isect, sd->object, lcg_state, BSSRDF_MAX_HITS);
   int num_eval_hits = min(ss_isect->num_hits, BSSRDF_MAX_HITS);
 
   for (int hit = 0; hit < num_eval_hits; hit++) {
@@ -418,7 +418,7 @@ ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg,
     float t = -logf(1.0f - rdist) / sample_sigma_t;
 
     ray->t = t;
-    scene_intersect_local(kg, *ray, ss_isect, sd->object, NULL, 1);
+    scene_intersect_local(kg, ray, ss_isect, sd->object, NULL, 1);
     hit = (ss_isect->num_hits > 0);
 
     if (hit) {
diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h
index a1d950bbc70..b3cb6ca7c19 100644
--- a/intern/cycles/kernel/kernel_types.h
+++ b/intern/cycles/kernel/kernel_types.h
@@ -115,7 +115,6 @@ CCL_NAMESPACE_BEGIN
 #  define __LAMP_MIS__
 #  define __CAMERA_MOTION__
 #  define __OBJECT_MOTION__
-#  define __HAIR__
 #  define __BAKING__
 #  define __PRINCIPLED__
 #  define __SUBSURFACE__
@@ -650,9 +649,8 @@ typedef struct Ray {
  * is fixed.
  */
 #ifndef __KERNEL_OPENCL_AMD__
-  float3 P; /* origin */
-  float3 D; /* direction */
-
+  float3 P;   /* origin */
+  float3 D;   /* direction */
   float t;    /* length of the ray */
   float time; /* time (for motion blur) */
 #else
@@ -1408,6 +1406,7 @@ typedef struct KernelObject {
   float surface_area;
   float pass_id;
   float random_number;
+  float color[3];
   int particle_index;
 
   float dupli_generated[3];
@@ -1420,11 +1419,9 @@ typedef struct KernelObject {
   uint patch_map_offset;
   uint attribute_map_offset;
   uint motion_offset;
-  uint pad1;
 
   float cryptomatte_object;
   float cryptomatte_asset;
-  float pad2, pad3;
 } KernelObject;
 static_assert_align(KernelObject, 16);
 
@@ -1523,7 +1520,7 @@ static_assert_align(KernelShader, 16);
  * Queue 1 - Active rays
  * Queue 2 - Background queue
  * Queue 3 - Shadow ray cast kernel - AO
- * Queeu 4 - Shadow ray cast kernel - direct lighting
+ * Queue 4 - Shadow ray cast kernel - direct lighting
  */
 
 /* Queue names */
diff --git a/intern/cycles/kernel/kernel_volume.h b/intern/cycles/kernel/kernel_volume.h
index cc85110bdd8..2705526abe4 100644
--- a/intern/cycles/kernel/kernel_volume.h
+++ b/intern/cycles/kernel/kernel_volume.h
@@ -187,7 +187,7 @@ ccl_device void kernel_volume_shadow_homogeneous(KernelGlobals *kg,
                                                  ShaderData *sd,
                                                  float3 *throughput)
 {
-  float3 sigma_t;
+  float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f);
 
   if (volume_shader_extinction_sample(kg, sd, state, ray->P, &sigma_t))
     *throughput *= volume_color_transmittance(sigma_t, ray->t);
@@ -225,7 +225,7 @@ ccl_device void kernel_volume_shadow_heterogeneous(KernelGlobals *kg,
     }
 
     float3 new_P = ray->P + ray->D * (t + step_offset);
-    float3 sigma_t;
+    float3 sigma_t = make_float3(0.0f, 0.0f, 0.0f);
 
     /* compute attenuation over segment */
     if (volume_shader_extinction_sample(kg, sd, state, new_P, &sigma_t)) {
@@ -621,6 +621,7 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
         new_tp = tp * transmittance;
       }
       else {
+        transmittance = make_float3(0.0f, 0.0f, 0.0f);
         new_tp = tp;
       }
 
@@ -671,7 +672,7 @@ kernel_volume_integrate_heterogeneous_distance(KernelGlobals *kg,
  * ray, with the assumption that there are no surfaces blocking light
  * between the endpoints. distance sampling is used to decide if we will
  * scatter or not. */
-ccl_device_noinline VolumeIntegrateResult
+ccl_device_noinline_cpu VolumeIntegrateResult
 kernel_volume_integrate(KernelGlobals *kg,
                         ccl_addr_space PathState *state,
                         ShaderData *sd,
@@ -1275,7 +1276,7 @@ ccl_device void kernel_volume_stack_init(KernelGlobals *kg,
    */
   if (stack_index == 0 && kernel_data.background.volume_shader == SHADER_NONE) {
     stack[0].shader = kernel_data.background.volume_shader;
-    stack[0].object = PRIM_NONE;
+    stack[0].object = OBJECT_NONE;
     stack[1].shader = SHADER_NONE;
   }
   else {
diff --git a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
index 4289e2bbb85..8f311baf010 100644
--- a/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
+++ b/intern/cycles/kernel/kernels/cpu/kernel_cpu_image.h
@@ -19,6 +19,10 @@
 
 CCL_NAMESPACE_BEGIN
 
+/* Make template functions private so symbols don't conflict between kernels with different
+ * instruction sets. */
+namespace {
+
 template<typename T> struct TextureInterpolator {
 #define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
   { \
@@ -523,6 +527,8 @@ ccl_device float4 kernel_tex_image_interp_3d(
   }
 }
 
+} /* Namespace. */
+
 CCL_NAMESPACE_END
 
 #endif  // __KERNEL_CPU_IMAGE_H__
diff --git a/intern/cycles/kernel/osl/CMakeLists.txt b/intern/cycles/kernel/osl/CMakeLists.txt
index 28d9ca854db..35cca2da8ad 100644
--- a/intern/cycles/kernel/osl/CMakeLists.txt
+++ b/intern/cycles/kernel/osl/CMakeLists.txt
@@ -29,8 +29,6 @@ set(LIB
   cycles_render
 )
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}")
-
 include_directories(${INC})
 include_directories(SYSTEM ${INC_SYS})
 
diff --git a/intern/cycles/kernel/osl/osl_services.cpp b/intern/cycles/kernel/osl/osl_services.cpp
index 316d24b0954..415de9cd66b 100644
--- a/intern/cycles/kernel/osl/osl_services.cpp
+++ b/intern/cycles/kernel/osl/osl_services.cpp
@@ -81,6 +81,7 @@ ustring OSLRenderServices::u_screen("screen");
 ustring OSLRenderServices::u_raster("raster");
 ustring OSLRenderServices::u_ndc("NDC");
 ustring OSLRenderServices::u_object_location("object:location");
+ustring OSLRenderServices::u_object_color("object:color");
 ustring OSLRenderServices::u_object_index("object:index");
 ustring OSLRenderServices::u_geom_dupli_generated("geom:dupli_generated");
 ustring OSLRenderServices::u_geom_dupli_uv("geom:dupli_uv");
@@ -668,6 +669,10 @@ bool OSLRenderServices::get_object_standard_attribute(
     float3 f = object_location(kg, sd);
     return set_attribute_float3(f, type, derivatives, val);
   }
+  else if (name == u_object_color) {
+    float3 f = object_color(kg, sd->object);
+    return set_attribute_float3(f, type, derivatives, val);
+  }
   else if (name == u_object_index) {
     float f = object_pass_id(kg, sd->object);
     return set_attribute_float(f, type, derivatives, val);
@@ -697,7 +702,7 @@ bool OSLRenderServices::get_object_standard_attribute(
   }
   else if (name == u_particle_random) {
     int particle_id = object_particle_id(kg, sd->object);
-    float f = hash_int_01(particle_index(kg, particle_id));
+    float f = hash_uint2_to_float(particle_index(kg, particle_id), 0);
     return set_attribute_float(f, type, derivatives, val);
   }
 
@@ -1401,7 +1406,7 @@ bool OSLRenderServices::trace(TraceOpt &options,
 
   /* Raytrace, leaving out shadow opaque to avoid early exit. */
   uint visibility = PATH_RAY_ALL_VISIBILITY - PATH_RAY_SHADOW_OPAQUE;
-  return scene_intersect(kg, ray, visibility, &tracedata->isect);
+  return scene_intersect(kg, &ray, visibility, &tracedata->isect);
 }
 
 bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg,
diff --git a/intern/cycles/kernel/osl/osl_services.h b/intern/cycles/kernel/osl/osl_services.h
index 024ef656be1..469c5188730 100644
--- a/intern/cycles/kernel/osl/osl_services.h
+++ b/intern/cycles/kernel/osl/osl_services.h
@@ -245,6 +245,7 @@ class OSLRenderServices : public OSL::RendererServices {
   static ustring u_raster;
   static ustring u_ndc;
   static ustring u_object_location;
+  static ustring u_object_color;
   static ustring u_object_index;
   static ustring u_geom_dupli_generated;
   static ustring u_geom_dupli_uv;
diff --git a/intern/cycles/kernel/shaders/CMakeLists.txt b/intern/cycles/kernel/shaders/CMakeLists.txt
index b42b9b2fe64..c50bffe27b2 100644
--- a/intern/cycles/kernel/shaders/CMakeLists.txt
+++ b/intern/cycles/kernel/shaders/CMakeLists.txt
@@ -13,6 +13,7 @@ set(SRC_OSL
   node_bump.osl
   node_camera.osl
   node_checker_texture.osl
+  node_clamp.osl
   node_combine_rgb.osl
   node_combine_hsv.osl
   node_combine_xyz.osl
@@ -46,6 +47,7 @@ set(SRC_OSL
   node_light_falloff.osl
   node_light_path.osl
   node_magic_texture.osl
+  node_map_range.osl
   node_mapping.osl
   node_math.osl
   node_mix.osl
@@ -83,6 +85,7 @@ set(SRC_OSL
   node_wavelength.osl
   node_blackbody.osl
   node_wave_texture.osl
+  node_white_noise_texture.osl
   node_wireframe.osl
   node_hair_bsdf.osl
   node_principled_hair_bsdf.osl
diff --git a/intern/cycles/kernel/shaders/node_clamp.osl b/intern/cycles/kernel/shaders/node_clamp.osl
new file mode 100644
index 00000000000..87dc1ccdb12
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_clamp.osl
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdosl.h"
+
+shader node_clamp(float Value = 1.0, float Min = 0.0, float Max = 1.0, output float Result = 0.0)
+{
+  Result = clamp(Value, Min, Max);
+}
diff --git a/intern/cycles/kernel/shaders/node_map_range.osl b/intern/cycles/kernel/shaders/node_map_range.osl
new file mode 100644
index 00000000000..8a28edf5f35
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_map_range.osl
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdosl.h"
+
+shader node_map_range(float Value = 1.0,
+                      float FromMin = 0.0,
+                      float FromMax = 1.0,
+                      float ToMin = 0.0,
+                      float ToMax = 1.0,
+                      output float Result = 0.0)
+{
+  if (FromMax != FromMin) {
+    Result = ToMin + ((Value - FromMin) / (FromMax - FromMin)) * (ToMax - ToMin);
+  }
+}
diff --git a/intern/cycles/kernel/shaders/node_math.osl b/intern/cycles/kernel/shaders/node_math.osl
index 8830339e05f..13e4c91ba10 100644
--- a/intern/cycles/kernel/shaders/node_math.osl
+++ b/intern/cycles/kernel/shaders/node_math.osl
@@ -18,56 +18,30 @@
 
 float safe_divide(float a, float b)
 {
-  float result;
-
-  if (b == 0.0)
-    result = 0.0;
-  else
-    result = a / b;
-
-  return result;
+  return (b != 0.0) ? a / b : 0.0;
 }
 
 float safe_modulo(float a, float b)
 {
-  float result;
-
-  if (b == 0.0)
-    result = 0.0;
-  else
-    result = fmod(a, b);
-
-  return result;
+  return (b != 0.0) ? fmod(a, b) : 0.0;
 }
 
 float safe_sqrt(float a)
 {
-  float result;
-
-  if (a > 0.0)
-    result = sqrt(a);
-  else
-    result = 0.0;
-
-  return result;
+  return (a > 0.0) ? sqrt(a) : 0.0;
 }
 
 float safe_log(float a, float b)
 {
-  if (a < 0.0 || b < 0.0)
-    return 0.0;
-
-  return log(a) / log(b);
+  return (a > 0.0 && b > 0.0) ? log(a) / log(b) : 0.0;
 }
 
+/* OSL asin, acos, and pow functions are safe by default. */
 shader node_math(string type = "add",
-                 int use_clamp = 0,
-                 float Value1 = 0.0,
-                 float Value2 = 0.0,
+                 float Value1 = 0.5,
+                 float Value2 = 0.5,
                  output float Value = 0.0)
 {
-  /* OSL asin, acos, pow check for values that could give rise to nan */
-
   if (type == "add")
     Value = Value1 + Value2;
   else if (type == "subtract")
@@ -76,47 +50,46 @@ shader node_math(string type = "add",
     Value = Value1 * Value2;
   else if (type == "divide")
     Value = safe_divide(Value1, Value2);
-  else if (type == "sine")
-    Value = sin(Value1);
-  else if (type == "cosine")
-    Value = cos(Value1);
-  else if (type == "tangent")
-    Value = tan(Value1);
-  else if (type == "arcsine")
-    Value = asin(Value1);
-  else if (type == "arccosine")
-    Value = acos(Value1);
-  else if (type == "arctangent")
-    Value = atan(Value1);
   else if (type == "power")
     Value = pow(Value1, Value2);
   else if (type == "logarithm")
     Value = safe_log(Value1, Value2);
+  else if (type == "sqrt")
+    Value = safe_sqrt(Value1);
+  else if (type == "absolute")
+    Value = fabs(Value1);
   else if (type == "minimum")
     Value = min(Value1, Value2);
   else if (type == "maximum")
     Value = max(Value1, Value2);
-  else if (type == "round")
-    Value = floor(Value1 + 0.5);
   else if (type == "less_than")
     Value = Value1 < Value2;
   else if (type == "greater_than")
     Value = Value1 > Value2;
-  else if (type == "modulo")
-    Value = safe_modulo(Value1, Value2);
-  else if (type == "absolute")
-    Value = fabs(Value1);
-  else if (type == "arctan2")
-    Value = atan2(Value1, Value2);
+  else if (type == "round")
+    Value = floor(Value1 + 0.5);
   else if (type == "floor")
     Value = floor(Value1);
   else if (type == "ceil")
     Value = ceil(Value1);
-  else if (type == "fract")
+  else if (type == "fraction")
     Value = Value1 - floor(Value1);
-  else if (type == "sqrt")
-    Value = safe_sqrt(Value1);
-
-  if (use_clamp)
-    Value = clamp(Value, 0.0, 1.0);
+  else if (type == "modulo")
+    Value = safe_modulo(Value1, Value2);
+  else if (type == "sine")
+    Value = sin(Value1);
+  else if (type == "cosine")
+    Value = cos(Value1);
+  else if (type == "tangent")
+    Value = tan(Value1);
+  else if (type == "arcsine")
+    Value = asin(Value1);
+  else if (type == "arccosine")
+    Value = acos(Value1);
+  else if (type == "arctangent")
+    Value = atan(Value1);
+  else if (type == "arctan2")
+    Value = atan2(Value1, Value2);
+  else
+    warning("%s", "Unknown math operator!");
 }
diff --git a/intern/cycles/kernel/shaders/node_object_info.osl b/intern/cycles/kernel/shaders/node_object_info.osl
index 0904a30a53f..350404bb747 100644
--- a/intern/cycles/kernel/shaders/node_object_info.osl
+++ b/intern/cycles/kernel/shaders/node_object_info.osl
@@ -17,11 +17,13 @@
 #include "stdosl.h"
 
 shader node_object_info(output point Location = point(0.0, 0.0, 0.0),
+                        output color Color = color(1.0, 1.0, 1.0),
                         output float ObjectIndex = 0.0,
                         output float MaterialIndex = 0.0,
                         output float Random = 0.0)
 {
   getattribute("object:location", Location);
+  getattribute("object:color", Color);
   getattribute("object:index", ObjectIndex);
   getattribute("material:index", MaterialIndex);
   getattribute("object:random", Random);
diff --git a/intern/cycles/kernel/shaders/node_vector_math.osl b/intern/cycles/kernel/shaders/node_vector_math.osl
index 10bb0c7283c..fd5e27aa144 100644
--- a/intern/cycles/kernel/shaders/node_vector_math.osl
+++ b/intern/cycles/kernel/shaders/node_vector_math.osl
@@ -16,34 +16,97 @@
 
 #include "stdosl.h"
 
+float safe_divide(float a, float b)
+{
+  return (b != 0.0) ? a / b : 0.0;
+}
+
+vector safe_divide(vector a, vector b)
+{
+  return vector((b[0] != 0.0) ? a[0] / b[0] : 0.0,
+                (b[1] != 0.0) ? a[1] / b[1] : 0.0,
+                (b[2] != 0.0) ? a[2] / b[2] : 0.0);
+}
+
+vector project(vector v, vector v_proj)
+{
+  float lenSquared = dot(v_proj, v_proj);
+  return (lenSquared != 0.0) ? (dot(v, v_proj) / lenSquared) * v_proj : vector(0.0);
+}
+
+vector snap(vector a, vector b)
+{
+  return floor(safe_divide(a, b)) * b;
+}
+
 shader node_vector_math(string type = "add",
                         vector Vector1 = vector(0.0, 0.0, 0.0),
                         vector Vector2 = vector(0.0, 0.0, 0.0),
+                        float Scale = 1.0,
                         output float Value = 0.0,
                         output vector Vector = vector(0.0, 0.0, 0.0))
 {
   if (type == "add") {
     Vector = Vector1 + Vector2;
-    Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0;
   }
   else if (type == "subtract") {
     Vector = Vector1 - Vector2;
-    Value = (abs(Vector[0]) + abs(Vector[1]) + abs(Vector[2])) / 3.0;
   }
-  else if (type == "average") {
-    Value = length(Vector1 + Vector2);
-    Vector = normalize(Vector1 + Vector2);
+  else if (type == "multiply") {
+    Vector = Vector1 * Vector2;
+  }
+  else if (type == "divide") {
+    Vector = safe_divide(Vector1, Vector2);
+  }
+  else if (type == "cross_product") {
+    Vector = cross(Vector1, Vector2);
+  }
+  else if (type == "project") {
+    Vector = project(Vector1, Vector2);
+  }
+  else if (type == "reflect") {
+    Vector = reflect(Vector1, normalize(Vector2));
   }
   else if (type == "dot_product") {
     Value = dot(Vector1, Vector2);
   }
-  else if (type == "cross_product") {
-    vector c = cross(Vector1, Vector2);
-    Value = length(c);
-    Vector = normalize(c);
+  else if (type == "distance") {
+    Value = distance(Vector1, Vector2);
   }
-  else if (type == "normalize") {
+  else if (type == "length") {
     Value = length(Vector1);
+  }
+  else if (type == "scale") {
+    Vector = Vector1 * Scale;
+  }
+  else if (type == "normalize") {
     Vector = normalize(Vector1);
   }
+  else if (type == "snap") {
+    Vector = snap(Vector1, Vector2);
+  }
+  else if (type == "floor") {
+    Vector = floor(Vector1);
+  }
+  else if (type == "ceil") {
+    Vector = ceil(Vector1);
+  }
+  else if (type == "modulo") {
+    Vector = mod(Vector1, Vector2);
+  }
+  else if (type == "fraction") {
+    Vector = Vector1 - floor(Vector1);
+  }
+  else if (type == "absolute") {
+    Vector = abs(Vector1);
+  }
+  else if (type == "minimum") {
+    Vector = min(Vector1, Vector2);
+  }
+  else if (type == "maximum") {
+    Vector = max(Vector1, Vector2);
+  }
+  else {
+    warning("%s", "Unknown vector math operator!");
+  }
 }
diff --git a/intern/cycles/kernel/shaders/node_white_noise_texture.osl b/intern/cycles/kernel/shaders/node_white_noise_texture.osl
new file mode 100644
index 00000000000..f026fb4ab39
--- /dev/null
+++ b/intern/cycles/kernel/shaders/node_white_noise_texture.osl
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stdosl.h"
+
+shader node_white_noise_texture(string dimensions = "3D",
+                                point Vector = point(0.0, 0.0, 0.0),
+                                float W = 0.0,
+                                output float Value = 0.0)
+{
+  if (dimensions == "1D") {
+    Value = noise("hash", W);
+  }
+  else if (dimensions == "2D") {
+    Value = noise("hash", Vector[0], Vector[1]);
+  }
+  else if (dimensions == "3D") {
+    Value = noise("hash", Vector);
+  }
+  else if (dimensions == "4D") {
+    Value = noise("hash", Vector, W);
+  }
+  else {
+    warning("%s", "Unknown dimension!");
+  }
+}
diff --git a/intern/cycles/kernel/split/kernel_direct_lighting.h b/intern/cycles/kernel/split/kernel_direct_lighting.h
index b2ca59d60cc..3be2b35812f 100644
--- a/intern/cycles/kernel/split/kernel_direct_lighting.h
+++ b/intern/cycles/kernel/split/kernel_direct_lighting.h
@@ -86,8 +86,7 @@ ccl_device void kernel_direct_lighting(KernelGlobals *kg,
       float terminate = path_state_rng_light_termination(kg, state);
 
       LightSample ls;
-      if (light_sample(kg, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
-
+      if (light_sample(kg, -1, light_u, light_v, sd->time, sd->P, state->bounce, &ls)) {
         Ray light_ray;
         light_ray.time = sd->time;
 
diff --git a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
index 5cd4131e2ae..56cdb22bba3 100644
--- a/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
+++ b/intern/cycles/kernel/split/kernel_holdout_emission_blurring_pathtermination_ao.h
@@ -132,10 +132,12 @@ ccl_device void kernel_holdout_emission_blurring_pathtermination_ao(
         }
       }
 
+#ifdef __DENOISING_FEATURES__
       if (IS_STATE(ray_state, ray_index, RAY_ACTIVE)) {
         PathRadiance *L = &kernel_split_state.path_radiance[ray_index];
         kernel_update_denoising_features(kg, sd, state, L);
       }
+#endif
     }
 
 #ifdef __AO__
diff --git a/intern/cycles/kernel/svm/svm.h b/intern/cycles/kernel/svm/svm.h
index 4a386afa5de..ab8570618ab 100644
--- a/intern/cycles/kernel/svm/svm.h
+++ b/intern/cycles/kernel/svm/svm.h
@@ -132,16 +132,25 @@ ccl_device_inline float4 fetch_node_float(KernelGlobals *kg, int offset)
                      __uint_as_float(node.w));
 }
 
-ccl_device_inline void decode_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
+ccl_device_forceinline void svm_unpack_node_uchar2(uint i, uint *x, uint *y)
 {
-  if (x)
-    *x = (i & 0xFF);
-  if (y)
-    *y = ((i >> 8) & 0xFF);
-  if (z)
-    *z = ((i >> 16) & 0xFF);
-  if (w)
-    *w = ((i >> 24) & 0xFF);
+  *x = (i & 0xFF);
+  *y = ((i >> 8) & 0xFF);
+}
+
+ccl_device_forceinline void svm_unpack_node_uchar3(uint i, uint *x, uint *y, uint *z)
+{
+  *x = (i & 0xFF);
+  *y = ((i >> 8) & 0xFF);
+  *z = ((i >> 16) & 0xFF);
+}
+
+ccl_device_forceinline void svm_unpack_node_uchar4(uint i, uint *x, uint *y, uint *z, uint *w)
+{
+  *x = (i & 0xFF);
+  *y = ((i >> 8) & 0xFF);
+  *z = ((i >> 16) & 0xFF);
+  *w = ((i >> 24) & 0xFF);
 }
 
 CCL_NAMESPACE_END
@@ -192,6 +201,9 @@ CCL_NAMESPACE_END
 #include "kernel/svm/svm_vector_transform.h"
 #include "kernel/svm/svm_voxel.h"
 #include "kernel/svm/svm_bump.h"
+#include "kernel/svm/svm_map_range.h"
+#include "kernel/svm/svm_clamp.h"
+#include "kernel/svm/svm_white_noise.h"
 
 #ifdef __SHADER_RAYTRACE__
 #  include "kernel/svm/svm_ao.h"
@@ -430,6 +442,9 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
       case NODE_TEX_BRICK:
         svm_node_tex_brick(kg, sd, stack, node, &offset);
         break;
+      case NODE_TEX_WHITE_NOISE:
+        svm_node_tex_white_noise(kg, sd, stack, node.y, node.z, node.w, &offset);
+        break;
 #  endif /* __TEXTURES__ */
 #  ifdef __EXTRA_NODES__
       case NODE_NORMAL:
@@ -486,6 +501,12 @@ ccl_device_noinline void svm_eval_nodes(KernelGlobals *kg,
       case NODE_BLACKBODY:
         svm_node_blackbody(kg, sd, stack, node.y, node.z);
         break;
+      case NODE_MAP_RANGE:
+        svm_node_map_range(kg, sd, stack, node.y, node.z, node.w, &offset);
+        break;
+      case NODE_CLAMP:
+        svm_node_clamp(kg, sd, stack, node.y, node.z, node.w, &offset);
+        break;
 #  endif /* __EXTRA_NODES__ */
 #  if NODES_FEATURE(NODE_FEATURE_VOLUME)
       case NODE_TEX_VOXEL:
diff --git a/intern/cycles/kernel/svm/svm_ao.h b/intern/cycles/kernel/svm/svm_ao.h
index 62413979201..4cb986b897a 100644
--- a/intern/cycles/kernel/svm/svm_ao.h
+++ b/intern/cycles/kernel/svm/svm_ao.h
@@ -16,6 +16,8 @@
 
 CCL_NAMESPACE_BEGIN
 
+#ifdef __SHADER_RAYTRACE__
+
 ccl_device_noinline float svm_ao(KernelGlobals *kg,
                                  ShaderData *sd,
                                  float3 N,
@@ -64,13 +66,13 @@ ccl_device_noinline float svm_ao(KernelGlobals *kg,
     ray.dD = differential3_zero();
 
     if (flags & NODE_AO_ONLY_LOCAL) {
-      if (!scene_intersect_local(kg, ray, NULL, sd->object, NULL, 0)) {
+      if (!scene_intersect_local(kg, &ray, NULL, sd->object, NULL, 0)) {
         unoccluded++;
       }
     }
     else {
       Intersection isect;
-      if (!scene_intersect(kg, ray, PATH_RAY_SHADOW_OPAQUE, &isect)) {
+      if (!scene_intersect(kg, &ray, PATH_RAY_SHADOW_OPAQUE, &isect)) {
         unoccluded++;
       }
     }
@@ -83,10 +85,10 @@ ccl_device void svm_node_ao(
     KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node)
 {
   uint flags, dist_offset, normal_offset, out_ao_offset;
-  decode_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
+  svm_unpack_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
 
   uint color_offset, out_color_offset, samples;
-  decode_node_uchar4(node.z, &color_offset, &out_color_offset, &samples, NULL);
+  svm_unpack_node_uchar3(node.z, &color_offset, &out_color_offset, &samples);
 
   float dist = stack_load_float_default(stack, dist_offset, node.w);
   float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
@@ -102,4 +104,6 @@ ccl_device void svm_node_ao(
   }
 }
 
+#endif /* __SHADER_RAYTRACE__ */
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_attribute.h b/intern/cycles/kernel/svm/svm_attribute.h
index a67cfe91a30..eaee0f9e4ee 100644
--- a/intern/cycles/kernel/svm/svm_attribute.h
+++ b/intern/cycles/kernel/svm/svm_attribute.h
@@ -46,8 +46,8 @@ ccl_device AttributeDescriptor svm_node_attr_init(
 
 ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-  NodeAttributeType type;
-  uint out_offset;
+  NodeAttributeType type = NODE_ATTR_FLOAT;
+  uint out_offset = 0;
   AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
 
   /* fetch and store attribute */
@@ -80,16 +80,10 @@ ccl_device void svm_node_attr(KernelGlobals *kg, ShaderData *sd, float *stack, u
   }
 }
 
-#ifndef __KERNEL_CUDA__
-ccl_device
-#else
-ccl_device_noinline
-#endif
-    void
-    svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device void svm_node_attr_bump_dx(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-  NodeAttributeType type;
-  uint out_offset;
+  NodeAttributeType type = NODE_ATTR_FLOAT;
+  uint out_offset = 0;
   AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
 
   /* fetch and store attribute */
@@ -125,16 +119,10 @@ ccl_device_noinline
   }
 }
 
-#ifndef __KERNEL_CUDA__
-ccl_device
-#else
-ccl_device_noinline
-#endif
-    void
-    svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
+ccl_device void svm_node_attr_bump_dy(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
-  NodeAttributeType type;
-  uint out_offset;
+  NodeAttributeType type = NODE_ATTR_FLOAT;
+  uint out_offset = 0;
   AttributeDescriptor desc = svm_node_attr_init(kg, sd, node, &type, &out_offset);
 
   /* fetch and store attribute */
diff --git a/intern/cycles/kernel/svm/svm_bevel.h b/intern/cycles/kernel/svm/svm_bevel.h
index fcf28e96e98..434502f31f9 100644
--- a/intern/cycles/kernel/svm/svm_bevel.h
+++ b/intern/cycles/kernel/svm/svm_bevel.h
@@ -16,6 +16,8 @@
 
 CCL_NAMESPACE_BEGIN
 
+#ifdef __SHADER_RAYTRACE__
+
 /* Bevel shader averaging normals from nearby surfaces.
  *
  * Sampling strategy from: BSSRDF Importance Sampling, SIGGRAPH 2013
@@ -51,7 +53,7 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
   float3 sum_N = make_float3(0.0f, 0.0f, 0.0f);
 
   for (int sample = 0; sample < num_samples; sample++) {
-    float disk_u, disk_v;
+    float disk_u = 0.0f, disk_v = 0.0f;
     path_branched_rng_2D(
         kg, state->rng_hash, state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
 
@@ -110,7 +112,7 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
 
     /* Intersect with the same object. if multiple intersections are found it
      * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */
-    scene_intersect_local(kg, *ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS);
+    scene_intersect_local(kg, ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS);
 
     int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS);
 
@@ -120,14 +122,14 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
       if (sd->type & PRIMITIVE_TRIANGLE) {
         hit_P = triangle_refine_local(kg, sd, &isect.hits[hit], ray);
       }
-#ifdef __OBJECT_MOTION__
+#  ifdef __OBJECT_MOTION__
       else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
         float3 verts[3];
         motion_triangle_vertices(
             kg, sd->object, kernel_tex_fetch(__prim_index, isect.hits[hit].prim), sd->time, verts);
         hit_P = motion_triangle_refine_local(kg, sd, &isect.hits[hit], ray, verts);
       }
-#endif /* __OBJECT_MOTION__ */
+#  endif /* __OBJECT_MOTION__ */
 
       /* Get geometric normal. */
       float3 hit_Ng = isect.Ng[hit];
@@ -151,11 +153,11 @@ ccl_device_noinline float3 svm_bevel(KernelGlobals *kg,
         if (sd->type & PRIMITIVE_TRIANGLE) {
           N = triangle_smooth_normal(kg, N, prim, u, v);
         }
-#ifdef __OBJECT_MOTION__
+#  ifdef __OBJECT_MOTION__
         else if (sd->type & PRIMITIVE_MOTION_TRIANGLE) {
           N = motion_triangle_smooth_normal(kg, N, sd->object, prim, u, v, sd->time);
         }
-#endif /* __OBJECT_MOTION__ */
+#  endif /* __OBJECT_MOTION__ */
       }
 
       /* Transform normals to world space. */
@@ -200,7 +202,7 @@ ccl_device void svm_node_bevel(
     KernelGlobals *kg, ShaderData *sd, ccl_addr_space PathState *state, float *stack, uint4 node)
 {
   uint num_samples, radius_offset, normal_offset, out_offset;
-  decode_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
+  svm_unpack_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
 
   float radius = stack_load_float(stack, radius_offset);
   float3 bevel_N = svm_bevel(kg, sd, state, radius, num_samples);
@@ -214,4 +216,6 @@ ccl_device void svm_node_bevel(
   stack_store_float3(stack, out_offset, bevel_N);
 }
 
+#endif /* __SHADER_RAYTRACE__ */
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_brick.h b/intern/cycles/kernel/svm/svm_brick.h
index b5cbfcc72df..6984afa30a5 100644
--- a/intern/cycles/kernel/svm/svm_brick.h
+++ b/intern/cycles/kernel/svm/svm_brick.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Brick */
 
-ccl_device_noinline float brick_noise(uint n) /* fast integer noise */
+ccl_device_inline float brick_noise(uint n) /* fast integer noise */
 {
   uint nn;
   n = (n + 1013) & 0x7fffffff;
@@ -27,16 +27,16 @@ ccl_device_noinline float brick_noise(uint n) /* fast integer noise */
   return 0.5f * ((float)nn / 1073741824.0f);
 }
 
-ccl_device_noinline float2 svm_brick(float3 p,
-                                     float mortar_size,
-                                     float mortar_smooth,
-                                     float bias,
-                                     float brick_width,
-                                     float row_height,
-                                     float offset_amount,
-                                     int offset_frequency,
-                                     float squash_amount,
-                                     int squash_frequency)
+ccl_device_noinline_cpu float2 svm_brick(float3 p,
+                                         float mortar_size,
+                                         float mortar_smooth,
+                                         float bias,
+                                         float brick_width,
+                                         float row_height,
+                                         float offset_amount,
+                                         int offset_frequency,
+                                         float squash_amount,
+                                         int squash_frequency)
 {
   int bricknum, rownum;
   float offset = 0.0f;
@@ -87,13 +87,13 @@ ccl_device void svm_node_tex_brick(
   /* RNA properties */
   uint offset_frequency, squash_frequency;
 
-  decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
-  decode_node_uchar4(
+  svm_unpack_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &mortar_offset);
+  svm_unpack_node_uchar4(
       node.z, &scale_offset, &mortar_size_offset, &bias_offset, &brick_width_offset);
-  decode_node_uchar4(
+  svm_unpack_node_uchar4(
       node.w, &row_height_offset, &color_offset, &fac_offset, &mortar_smooth_offset);
 
-  decode_node_uchar4(node2.x, &offset_frequency, &squash_frequency, NULL, NULL);
+  svm_unpack_node_uchar2(node2.x, &offset_frequency, &squash_frequency);
 
   float3 co = stack_load_float3(stack, co_offset);
 
diff --git a/intern/cycles/kernel/svm/svm_brightness.h b/intern/cycles/kernel/svm/svm_brightness.h
index dcd75a2fe8f..9554b5946fb 100644
--- a/intern/cycles/kernel/svm/svm_brightness.h
+++ b/intern/cycles/kernel/svm/svm_brightness.h
@@ -22,7 +22,7 @@ ccl_device void svm_node_brightness(
   uint bright_offset, contrast_offset;
   float3 color = stack_load_float3(stack, in_color);
 
-  decode_node_uchar4(node, &bright_offset, &contrast_offset, NULL, NULL);
+  svm_unpack_node_uchar2(node, &bright_offset, &contrast_offset);
   float brightness = stack_load_float(stack, bright_offset);
   float contrast = stack_load_float(stack, contrast_offset);
 
diff --git a/intern/cycles/kernel/svm/svm_checker.h b/intern/cycles/kernel/svm/svm_checker.h
index 63b4d1e149b..d54cb73df91 100644
--- a/intern/cycles/kernel/svm/svm_checker.h
+++ b/intern/cycles/kernel/svm/svm_checker.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Checker */
 
-ccl_device_noinline float svm_checker(float3 p)
+ccl_device float svm_checker(float3 p)
 {
   /* avoid precision issues on unit coordinates */
   p.x = (p.x + 0.000001f) * 0.999999f;
@@ -37,8 +37,8 @@ ccl_device void svm_node_tex_checker(KernelGlobals *kg, ShaderData *sd, float *s
   uint co_offset, color1_offset, color2_offset, scale_offset;
   uint color_offset, fac_offset;
 
-  decode_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset);
-  decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
+  svm_unpack_node_uchar4(node.y, &co_offset, &color1_offset, &color2_offset, &scale_offset);
+  svm_unpack_node_uchar2(node.z, &color_offset, &fac_offset);
 
   float3 co = stack_load_float3(stack, co_offset);
   float3 color1 = stack_load_float3(stack, color1_offset);
diff --git a/intern/cycles/kernel/svm/svm_clamp.h b/intern/cycles/kernel/svm/svm_clamp.h
new file mode 100644
index 00000000000..a45e70a3f15
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_clamp.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Clamp Node */
+
+ccl_device void svm_node_clamp(KernelGlobals *kg,
+                               ShaderData *sd,
+                               float *stack,
+                               uint value_stack_offset,
+                               uint parameters_stack_offsets,
+                               uint result_stack_offset,
+                               int *offset)
+{
+  uint min_stack_offset, max_stack_offset;
+  svm_unpack_node_uchar2(parameters_stack_offsets, &min_stack_offset, &max_stack_offset);
+
+  uint4 defaults = read_node(kg, offset);
+
+  float value = stack_load_float(stack, value_stack_offset);
+  float min = stack_load_float_default(stack, min_stack_offset, defaults.x);
+  float max = stack_load_float_default(stack, max_stack_offset, defaults.y);
+
+  stack_store_float(stack, result_stack_offset, clamp(value, min, max));
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_closure.h b/intern/cycles/kernel/svm/svm_closure.h
index 270fe4c8615..1511fc65835 100644
--- a/intern/cycles/kernel/svm/svm_closure.h
+++ b/intern/cycles/kernel/svm/svm_closure.h
@@ -85,7 +85,7 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
   uint type, param1_offset, param2_offset;
 
   uint mix_weight_offset;
-  decode_node_uchar4(node.y, &type, &param1_offset, &param2_offset, &mix_weight_offset);
+  svm_unpack_node_uchar4(node.y, &type, &param1_offset, &param2_offset, &mix_weight_offset);
   float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
                                                        1.0f);
 
@@ -122,21 +122,21 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
       uint4 data_node2 = read_node(kg, offset);
 
       float3 T = stack_load_float3(stack, data_node.y);
-      decode_node_uchar4(data_node.z,
-                         &specular_offset,
-                         &roughness_offset,
-                         &specular_tint_offset,
-                         &anisotropic_offset);
-      decode_node_uchar4(data_node.w,
-                         &sheen_offset,
-                         &sheen_tint_offset,
-                         &clearcoat_offset,
-                         &clearcoat_roughness_offset);
-      decode_node_uchar4(data_node2.x,
-                         &eta_offset,
-                         &transmission_offset,
-                         &anisotropic_rotation_offset,
-                         &transmission_roughness_offset);
+      svm_unpack_node_uchar4(data_node.z,
+                             &specular_offset,
+                             &roughness_offset,
+                             &specular_tint_offset,
+                             &anisotropic_offset);
+      svm_unpack_node_uchar4(data_node.w,
+                             &sheen_offset,
+                             &sheen_tint_offset,
+                             &clearcoat_offset,
+                             &clearcoat_roughness_offset);
+      svm_unpack_node_uchar4(data_node2.x,
+                             &eta_offset,
+                             &transmission_offset,
+                             &anisotropic_rotation_offset,
+                             &transmission_roughness_offset);
 
       // get Disney principled parameters
       float metallic = param1;
@@ -793,19 +793,19 @@ ccl_device void svm_node_closure_bsdf(KernelGlobals *kg,
       float3 weight = sd->svm_closure_weight * mix_weight;
 
       uint offset_ofs, ior_ofs, color_ofs, parametrization;
-      decode_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, &parametrization);
+      svm_unpack_node_uchar4(data_node.y, &offset_ofs, &ior_ofs, &color_ofs, &parametrization);
       float alpha = stack_load_float_default(stack, offset_ofs, data_node.z);
       float ior = stack_load_float_default(stack, ior_ofs, data_node.w);
 
       uint coat_ofs, melanin_ofs, melanin_redness_ofs, absorption_coefficient_ofs;
-      decode_node_uchar4(data_node2.x,
-                         &coat_ofs,
-                         &melanin_ofs,
-                         &melanin_redness_ofs,
-                         &absorption_coefficient_ofs);
+      svm_unpack_node_uchar4(data_node2.x,
+                             &coat_ofs,
+                             &melanin_ofs,
+                             &melanin_redness_ofs,
+                             &absorption_coefficient_ofs);
 
       uint tint_ofs, random_ofs, random_color_ofs, random_roughness_ofs;
-      decode_node_uchar4(
+      svm_unpack_node_uchar4(
           data_node3.x, &tint_ofs, &random_ofs, &random_color_ofs, &random_roughness_ofs);
 
       const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node4.y);
@@ -982,7 +982,7 @@ ccl_device void svm_node_closure_volume(
   uint type, density_offset, anisotropy_offset;
 
   uint mix_weight_offset;
-  decode_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset);
+  svm_unpack_node_uchar4(node.y, &type, &density_offset, &anisotropy_offset, &mix_weight_offset);
   float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
                                                        1.0f);
 
@@ -1040,7 +1040,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg,
   }
 
   uint density_offset, anisotropy_offset, absorption_color_offset, mix_weight_offset;
-  decode_node_uchar4(
+  svm_unpack_node_uchar4(
       node.y, &density_offset, &anisotropy_offset, &absorption_color_offset, &mix_weight_offset);
   float mix_weight = (stack_valid(mix_weight_offset) ? stack_load_float(stack, mix_weight_offset) :
                                                        1.0f);
@@ -1099,7 +1099,7 @@ ccl_device void svm_node_principled_volume(KernelGlobals *kg,
   }
 
   uint emission_offset, emission_color_offset, blackbody_offset, temperature_offset;
-  decode_node_uchar4(
+  svm_unpack_node_uchar4(
       node.z, &emission_offset, &emission_color_offset, &blackbody_offset, &temperature_offset);
   float emission = (stack_valid(emission_offset)) ? stack_load_float(stack, emission_offset) :
                                                     __uint_as_float(value_node.z);
@@ -1229,7 +1229,8 @@ ccl_device void svm_node_mix_closure(ShaderData *sd, float *stack, uint4 node)
   /* fetch weight from blend input, previous mix closures,
    * and write to stack to be used by closure nodes later */
   uint weight_offset, in_weight_offset, weight1_offset, weight2_offset;
-  decode_node_uchar4(node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset);
+  svm_unpack_node_uchar4(
+      node.y, &weight_offset, &in_weight_offset, &weight1_offset, &weight2_offset);
 
   float weight = stack_load_float(stack, weight_offset);
   weight = saturate(weight);
diff --git a/intern/cycles/kernel/svm/svm_color_util.h b/intern/cycles/kernel/svm/svm_color_util.h
index 12b59d2616b..3a6a5ba782f 100644
--- a/intern/cycles/kernel/svm/svm_color_util.h
+++ b/intern/cycles/kernel/svm/svm_color_util.h
@@ -264,7 +264,7 @@ ccl_device float3 svm_mix_clamp(float3 col)
   return outcol;
 }
 
-ccl_device_noinline float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2)
+ccl_device_noinline_cpu float3 svm_mix(NodeMix type, float fac, float3 c1, float3 c2)
 {
   float t = saturate(fac);
 
diff --git a/intern/cycles/kernel/svm/svm_displace.h b/intern/cycles/kernel/svm/svm_displace.h
index f16664a684c..250fac6bcb8 100644
--- a/intern/cycles/kernel/svm/svm_displace.h
+++ b/intern/cycles/kernel/svm/svm_displace.h
@@ -23,7 +23,7 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac
 #ifdef __RAY_DIFFERENTIALS__
   /* get normal input */
   uint normal_offset, scale_offset, invert, use_object_space;
-  decode_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space);
+  svm_unpack_node_uchar4(node.y, &normal_offset, &scale_offset, &invert, &use_object_space);
 
   float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
 
@@ -42,7 +42,7 @@ ccl_device void svm_node_set_bump(KernelGlobals *kg, ShaderData *sd, float *stac
 
   /* get bump values */
   uint c_offset, x_offset, y_offset, strength_offset;
-  decode_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset);
+  svm_unpack_node_uchar4(node.z, &c_offset, &x_offset, &y_offset, &strength_offset);
 
   float h_c = stack_load_float(stack, c_offset);
   float h_x = stack_load_float(stack, x_offset);
@@ -95,7 +95,7 @@ ccl_device void svm_node_set_displacement(KernelGlobals *kg,
 ccl_device void svm_node_displacement(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
   uint height_offset, midlevel_offset, scale_offset, normal_offset;
-  decode_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset);
+  svm_unpack_node_uchar4(node.y, &height_offset, &midlevel_offset, &scale_offset, &normal_offset);
 
   float height = stack_load_float(stack, height_offset);
   float midlevel = stack_load_float(stack, midlevel_offset);
@@ -126,7 +126,7 @@ ccl_device void svm_node_vector_displacement(
   uint space = data_node.x;
 
   uint vector_offset, midlevel_offset, scale_offset, displacement_offset;
-  decode_node_uchar4(
+  svm_unpack_node_uchar4(
       node.y, &vector_offset, &midlevel_offset, &scale_offset, &displacement_offset);
 
   float3 vector = stack_load_float3(stack, vector_offset);
diff --git a/intern/cycles/kernel/svm/svm_fresnel.h b/intern/cycles/kernel/svm/svm_fresnel.h
index 03119991597..96d602e35bf 100644
--- a/intern/cycles/kernel/svm/svm_fresnel.h
+++ b/intern/cycles/kernel/svm/svm_fresnel.h
@@ -22,7 +22,7 @@ ccl_device void svm_node_fresnel(
     ShaderData *sd, float *stack, uint ior_offset, uint ior_value, uint node)
 {
   uint normal_offset, out_offset;
-  decode_node_uchar4(node, &normal_offset, &out_offset, NULL, NULL);
+  svm_unpack_node_uchar2(node, &normal_offset, &out_offset);
   float eta = (stack_valid(ior_offset)) ? stack_load_float(stack, ior_offset) :
                                           __uint_as_float(ior_value);
   float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
@@ -43,7 +43,7 @@ ccl_device void svm_node_layer_weight(ShaderData *sd, float *stack, uint4 node)
   uint blend_value = node.z;
 
   uint type, normal_offset, out_offset;
-  decode_node_uchar4(node.w, &type, &normal_offset, &out_offset, NULL);
+  svm_unpack_node_uchar3(node.w, &type, &normal_offset, &out_offset);
 
   float blend = (stack_valid(blend_offset)) ? stack_load_float(stack, blend_offset) :
                                               __uint_as_float(blend_value);
diff --git a/intern/cycles/kernel/svm/svm_geometry.h b/intern/cycles/kernel/svm/svm_geometry.h
index a9104643299..019c6294082 100644
--- a/intern/cycles/kernel/svm/svm_geometry.h
+++ b/intern/cycles/kernel/svm/svm_geometry.h
@@ -113,6 +113,10 @@ ccl_device void svm_node_object_info(
       stack_store_float3(stack, out_offset, object_location(kg, sd));
       return;
     }
+    case NODE_INFO_OB_COLOR: {
+      stack_store_float3(stack, out_offset, object_color(kg, sd->object));
+      return;
+    }
     case NODE_INFO_OB_INDEX:
       data = object_pass_id(kg, sd->object);
       break;
@@ -149,7 +153,7 @@ ccl_device void svm_node_particle_info(
     }
     case NODE_INFO_PAR_RANDOM: {
       int particle_id = object_particle_id(kg, sd->object);
-      float random = hash_int_01(particle_index(kg, particle_id));
+      float random = hash_uint2_to_float(particle_index(kg, particle_id), 0);
       stack_store_float(stack, out_offset, random);
       break;
     }
diff --git a/intern/cycles/kernel/svm/svm_gradient.h b/intern/cycles/kernel/svm/svm_gradient.h
index c315564fbc2..08304bc47e8 100644
--- a/intern/cycles/kernel/svm/svm_gradient.h
+++ b/intern/cycles/kernel/svm/svm_gradient.h
@@ -64,7 +64,7 @@ ccl_device void svm_node_tex_gradient(ShaderData *sd, float *stack, uint4 node)
 {
   uint type, co_offset, color_offset, fac_offset;
 
-  decode_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset);
+  svm_unpack_node_uchar4(node.y, &type, &co_offset, &fac_offset, &color_offset);
 
   float3 co = stack_load_float3(stack, co_offset);
 
diff --git a/intern/cycles/kernel/svm/svm_hsv.h b/intern/cycles/kernel/svm/svm_hsv.h
index 72379fba870..1f7bd421869 100644
--- a/intern/cycles/kernel/svm/svm_hsv.h
+++ b/intern/cycles/kernel/svm/svm_hsv.h
@@ -24,8 +24,8 @@ ccl_device void svm_node_hsv(
 {
   uint in_color_offset, fac_offset, out_color_offset;
   uint hue_offset, sat_offset, val_offset;
-  decode_node_uchar4(node.y, &in_color_offset, &fac_offset, &out_color_offset, NULL);
-  decode_node_uchar4(node.z, &hue_offset, &sat_offset, &val_offset, NULL);
+  svm_unpack_node_uchar3(node.y, &in_color_offset, &fac_offset, &out_color_offset);
+  svm_unpack_node_uchar3(node.z, &hue_offset, &sat_offset, &val_offset);
 
   float fac = stack_load_float(stack, fac_offset);
   float3 in_color = stack_load_float3(stack, in_color_offset);
diff --git a/intern/cycles/kernel/svm/svm_ies.h b/intern/cycles/kernel/svm/svm_ies.h
index e57e54ef123..56c804b44d0 100644
--- a/intern/cycles/kernel/svm/svm_ies.h
+++ b/intern/cycles/kernel/svm/svm_ies.h
@@ -101,8 +101,8 @@ ccl_device_inline float kernel_ies_interp(KernelGlobals *kg,
 ccl_device void svm_node_ies(
     KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
-  uint vector_offset, strength_offset, fac_offset, dummy, slot = node.z;
-  decode_node_uchar4(node.y, &strength_offset, &vector_offset, &fac_offset, &dummy);
+  uint vector_offset, strength_offset, fac_offset, slot = node.z;
+  svm_unpack_node_uchar3(node.y, &strength_offset, &vector_offset, &fac_offset);
 
   float3 vector = stack_load_float3(stack, vector_offset);
   float strength = stack_load_float_default(stack, strength_offset, node.w);
diff --git a/intern/cycles/kernel/svm/svm_image.h b/intern/cycles/kernel/svm/svm_image.h
index 2ef64662d0e..64abdd2d8b3 100644
--- a/intern/cycles/kernel/svm/svm_image.h
+++ b/intern/cycles/kernel/svm/svm_image.h
@@ -16,6 +16,8 @@
 
 CCL_NAMESPACE_BEGIN
 
+#ifdef __TEXTURES__
+
 ccl_device float4 svm_image_texture(KernelGlobals *kg, int id, float x, float y, uint flags)
 {
   float4 r = kernel_tex_image_interp(kg, id, x, y);
@@ -48,7 +50,7 @@ ccl_device void svm_node_tex_image(KernelGlobals *kg, ShaderData *sd, float *sta
   uint id = node.y;
   uint co_offset, out_offset, alpha_offset, flags;
 
-  decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
+  svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
 
   float3 co = stack_load_float3(stack, co_offset);
   float2 tex_co;
@@ -143,7 +145,7 @@ ccl_device void svm_node_tex_image_box(KernelGlobals *kg, ShaderData *sd, float
 
   /* now fetch textures */
   uint co_offset, out_offset, alpha_offset, flags;
-  decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
+  svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
 
   float3 co = stack_load_float3(stack, co_offset);
   uint id = node.y;
@@ -179,7 +181,7 @@ ccl_device void svm_node_tex_environment(KernelGlobals *kg,
   uint co_offset, out_offset, alpha_offset, flags;
   uint projection = node.w;
 
-  decode_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
+  svm_unpack_node_uchar4(node.z, &co_offset, &out_offset, &alpha_offset, &flags);
 
   float3 co = stack_load_float3(stack, co_offset);
   float2 uv;
@@ -199,4 +201,6 @@ ccl_device void svm_node_tex_environment(KernelGlobals *kg,
     stack_store_float(stack, alpha_offset, f.w);
 }
 
+#endif /* __TEXTURES__ */
+
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_light_path.h b/intern/cycles/kernel/svm/svm_light_path.h
index 65a9a284a17..768c65918cd 100644
--- a/intern/cycles/kernel/svm/svm_light_path.h
+++ b/intern/cycles/kernel/svm/svm_light_path.h
@@ -84,7 +84,7 @@ ccl_device void svm_node_light_falloff(ShaderData *sd, float *stack, uint4 node)
 {
   uint strength_offset, out_offset, smooth_offset;
 
-  decode_node_uchar4(node.z, &strength_offset, &smooth_offset, &out_offset, NULL);
+  svm_unpack_node_uchar3(node.z, &strength_offset, &smooth_offset, &out_offset);
 
   float strength = stack_load_float(stack, strength_offset);
   uint type = node.y;
diff --git a/intern/cycles/kernel/svm/svm_magic.h b/intern/cycles/kernel/svm/svm_magic.h
index 115d2e2fe4b..9c160e6d8cc 100644
--- a/intern/cycles/kernel/svm/svm_magic.h
+++ b/intern/cycles/kernel/svm/svm_magic.h
@@ -18,7 +18,7 @@ CCL_NAMESPACE_BEGIN
 
 /* Magic */
 
-ccl_device_noinline float3 svm_magic(float3 p, int n, float distortion)
+ccl_device_noinline_cpu float3 svm_magic(float3 p, int n, float distortion)
 {
   float x = sinf((p.x + p.y + p.z) * 5.0f);
   float y = cosf((-p.x + p.y - p.z) * 5.0f);
@@ -93,8 +93,8 @@ ccl_device void svm_node_tex_magic(
   uint depth;
   uint scale_offset, distortion_offset, co_offset, fac_offset, color_offset;
 
-  decode_node_uchar4(node.y, &depth, &color_offset, &fac_offset, NULL);
-  decode_node_uchar4(node.z, &co_offset, &scale_offset, &distortion_offset, NULL);
+  svm_unpack_node_uchar3(node.y, &depth, &color_offset, &fac_offset);
+  svm_unpack_node_uchar3(node.z, &co_offset, &scale_offset, &distortion_offset);
 
   uint4 node2 = read_node(kg, offset);
   float3 co = stack_load_float3(stack, co_offset);
diff --git a/intern/cycles/kernel/svm/svm_map_range.h b/intern/cycles/kernel/svm/svm_map_range.h
new file mode 100644
index 00000000000..f2a68adbe61
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_map_range.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+/* Map Range Node */
+
+ccl_device void svm_node_map_range(KernelGlobals *kg,
+                                   ShaderData *sd,
+                                   float *stack,
+                                   uint value_stack_offset,
+                                   uint parameters_stack_offsets,
+                                   uint result_stack_offset,
+                                   int *offset)
+{
+  uint from_min_stack_offset, from_max_stack_offset, to_min_stack_offset, to_max_stack_offset;
+  svm_unpack_node_uchar4(parameters_stack_offsets,
+                         &from_min_stack_offset,
+                         &from_max_stack_offset,
+                         &to_min_stack_offset,
+                         &to_max_stack_offset);
+
+  uint4 defaults = read_node(kg, offset);
+
+  float value = stack_load_float(stack, value_stack_offset);
+  float from_min = stack_load_float_default(stack, from_min_stack_offset, defaults.x);
+  float from_max = stack_load_float_default(stack, from_max_stack_offset, defaults.y);
+  float to_min = stack_load_float_default(stack, to_min_stack_offset, defaults.z);
+  float to_max = stack_load_float_default(stack, to_max_stack_offset, defaults.w);
+
+  float result;
+  if (from_max != from_min) {
+    result = to_min + ((value - from_min) / (from_max - from_min)) * (to_max - to_min);
+  }
+  else {
+    result = 0.0f;
+  }
+  stack_store_float(stack, result_stack_offset, result);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math.h b/intern/cycles/kernel/svm/svm_math.h
index 5920913825b..d156dec497c 100644
--- a/intern/cycles/kernel/svm/svm_math.h
+++ b/intern/cycles/kernel/svm/svm_math.h
@@ -16,48 +16,50 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* Nodes */
-
 ccl_device void svm_node_math(KernelGlobals *kg,
                               ShaderData *sd,
                               float *stack,
-                              uint itype,
-                              uint f1_offset,
-                              uint f2_offset,
+                              uint type,
+                              uint inputs_stack_offsets,
+                              uint result_stack_offset,
                               int *offset)
 {
-  NodeMath type = (NodeMath)itype;
-  float f1 = stack_load_float(stack, f1_offset);
-  float f2 = stack_load_float(stack, f2_offset);
-  float f = svm_math(type, f1, f2);
+  uint a_stack_offset, b_stack_offset;
+  svm_unpack_node_uchar2(inputs_stack_offsets, &a_stack_offset, &b_stack_offset);
 
-  uint4 node1 = read_node(kg, offset);
+  float a = stack_load_float(stack, a_stack_offset);
+  float b = stack_load_float(stack, b_stack_offset);
+  float result = svm_math((NodeMathType)type, a, b);
 
-  stack_store_float(stack, node1.y, f);
+  stack_store_float(stack, result_stack_offset, result);
 }
 
 ccl_device void svm_node_vector_math(KernelGlobals *kg,
                                      ShaderData *sd,
                                      float *stack,
-                                     uint itype,
-                                     uint v1_offset,
-                                     uint v2_offset,
+                                     uint type,
+                                     uint inputs_stack_offsets,
+                                     uint outputs_stack_offsets,
                                      int *offset)
 {
-  NodeVectorMath type = (NodeVectorMath)itype;
-  float3 v1 = stack_load_float3(stack, v1_offset);
-  float3 v2 = stack_load_float3(stack, v2_offset);
-  float f;
-  float3 v;
+  uint value_stack_offset, vector_stack_offset;
+  uint a_stack_offset, b_stack_offset, scale_stack_offset;
+  svm_unpack_node_uchar3(
+      inputs_stack_offsets, &a_stack_offset, &b_stack_offset, &scale_stack_offset);
+  svm_unpack_node_uchar2(outputs_stack_offsets, &value_stack_offset, &vector_stack_offset);
 
-  svm_vector_math(&f, &v, type, v1, v2);
+  float3 a = stack_load_float3(stack, a_stack_offset);
+  float3 b = stack_load_float3(stack, b_stack_offset);
+  float scale = stack_load_float(stack, scale_stack_offset);
 
-  uint4 node1 = read_node(kg, offset);
+  float value;
+  float3 vector;
+  svm_vector_math(&value, &vector, (NodeVectorMathType)type, a, b, scale);
 
-  if (stack_valid(node1.y))
-    stack_store_float(stack, node1.y, f);
-  if (stack_valid(node1.z))
-    stack_store_float3(stack, node1.z, v);
+  if (stack_valid(value_stack_offset))
+    stack_store_float(stack, value_stack_offset, value);
+  if (stack_valid(vector_stack_offset))
+    stack_store_float3(stack, vector_stack_offset, vector);
 }
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_math_util.h b/intern/cycles/kernel/svm/svm_math_util.h
index e3544515f1b..c07a1e4ed98 100644
--- a/intern/cycles/kernel/svm/svm_math_util.h
+++ b/intern/cycles/kernel/svm/svm_math_util.h
@@ -16,99 +16,130 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device float average_fac(float3 v)
-{
-  return (fabsf(v.x) + fabsf(v.y) + fabsf(v.z)) / 3.0f;
-}
-
 ccl_device void svm_vector_math(
-    float *Fac, float3 *Vector, NodeVectorMath type, float3 Vector1, float3 Vector2)
+    float *value, float3 *vector, NodeVectorMathType type, float3 a, float3 b, float scale)
 {
-  if (type == NODE_VECTOR_MATH_ADD) {
-    *Vector = Vector1 + Vector2;
-    *Fac = average_fac(*Vector);
-  }
-  else if (type == NODE_VECTOR_MATH_SUBTRACT) {
-    *Vector = Vector1 - Vector2;
-    *Fac = average_fac(*Vector);
-  }
-  else if (type == NODE_VECTOR_MATH_AVERAGE) {
-    *Vector = safe_normalize_len(Vector1 + Vector2, Fac);
-  }
-  else if (type == NODE_VECTOR_MATH_DOT_PRODUCT) {
-    *Fac = dot(Vector1, Vector2);
-    *Vector = make_float3(0.0f, 0.0f, 0.0f);
-  }
-  else if (type == NODE_VECTOR_MATH_CROSS_PRODUCT) {
-    *Vector = safe_normalize_len(cross(Vector1, Vector2), Fac);
-  }
-  else if (type == NODE_VECTOR_MATH_NORMALIZE) {
-    *Vector = safe_normalize_len(Vector1, Fac);
-  }
-  else {
-    *Fac = 0.0f;
-    *Vector = make_float3(0.0f, 0.0f, 0.0f);
+  switch (type) {
+    case NODE_VECTOR_MATH_ADD:
+      *vector = a + b;
+      break;
+    case NODE_VECTOR_MATH_SUBTRACT:
+      *vector = a - b;
+      break;
+    case NODE_VECTOR_MATH_MULTIPLY:
+      *vector = a * b;
+      break;
+    case NODE_VECTOR_MATH_DIVIDE:
+      *vector = safe_divide_float3_float3(a, b);
+      break;
+    case NODE_VECTOR_MATH_CROSS_PRODUCT:
+      *vector = cross(a, b);
+      break;
+    case NODE_VECTOR_MATH_PROJECT:
+      *vector = project(a, b);
+      break;
+    case NODE_VECTOR_MATH_REFLECT:
+      *vector = reflect(a, b);
+      break;
+    case NODE_VECTOR_MATH_DOT_PRODUCT:
+      *value = dot(a, b);
+      break;
+    case NODE_VECTOR_MATH_DISTANCE:
+      *value = distance(a, b);
+      break;
+    case NODE_VECTOR_MATH_LENGTH:
+      *value = len(a);
+      break;
+    case NODE_VECTOR_MATH_SCALE:
+      *vector = a * scale;
+      break;
+    case NODE_VECTOR_MATH_NORMALIZE:
+      *vector = safe_normalize(a);
+      break;
+    case NODE_VECTOR_MATH_SNAP:
+      *vector = floor(safe_divide_float3_float3(a, b)) * b;
+      break;
+    case NODE_VECTOR_MATH_FLOOR:
+      *vector = floor(a);
+      break;
+    case NODE_VECTOR_MATH_CEIL:
+      *vector = ceil(a);
+      break;
+    case NODE_VECTOR_MATH_MODULO:
+      *vector = make_float3(safe_modulo(a.x, b.x), safe_modulo(a.y, b.y), safe_modulo(a.z, b.z));
+      break;
+    case NODE_VECTOR_MATH_FRACTION:
+      *vector = a - floor(a);
+      break;
+    case NODE_VECTOR_MATH_ABSOLUTE:
+      *vector = fabs(a);
+      break;
+    case NODE_VECTOR_MATH_MINIMUM:
+      *vector = min(a, b);
+      break;
+    case NODE_VECTOR_MATH_MAXIMUM:
+      *vector = max(a, b);
+      break;
+    default:
+      *vector = make_float3(0.0f, 0.0f, 0.0f);
+      *value = 0.0f;
   }
 }
 
-ccl_device float svm_math(NodeMath type, float Fac1, float Fac2)
+ccl_device float svm_math(NodeMathType type, float a, float b)
 {
-  float Fac;
-
-  if (type == NODE_MATH_ADD)
-    Fac = Fac1 + Fac2;
-  else if (type == NODE_MATH_SUBTRACT)
-    Fac = Fac1 - Fac2;
-  else if (type == NODE_MATH_MULTIPLY)
-    Fac = Fac1 * Fac2;
-  else if (type == NODE_MATH_DIVIDE)
-    Fac = safe_divide(Fac1, Fac2);
-  else if (type == NODE_MATH_SINE)
-    Fac = sinf(Fac1);
-  else if (type == NODE_MATH_COSINE)
-    Fac = cosf(Fac1);
-  else if (type == NODE_MATH_TANGENT)
-    Fac = tanf(Fac1);
-  else if (type == NODE_MATH_ARCSINE)
-    Fac = safe_asinf(Fac1);
-  else if (type == NODE_MATH_ARCCOSINE)
-    Fac = safe_acosf(Fac1);
-  else if (type == NODE_MATH_ARCTANGENT)
-    Fac = atanf(Fac1);
-  else if (type == NODE_MATH_POWER)
-    Fac = safe_powf(Fac1, Fac2);
-  else if (type == NODE_MATH_LOGARITHM)
-    Fac = safe_logf(Fac1, Fac2);
-  else if (type == NODE_MATH_MINIMUM)
-    Fac = fminf(Fac1, Fac2);
-  else if (type == NODE_MATH_MAXIMUM)
-    Fac = fmaxf(Fac1, Fac2);
-  else if (type == NODE_MATH_ROUND)
-    Fac = floorf(Fac1 + 0.5f);
-  else if (type == NODE_MATH_LESS_THAN)
-    Fac = Fac1 < Fac2;
-  else if (type == NODE_MATH_GREATER_THAN)
-    Fac = Fac1 > Fac2;
-  else if (type == NODE_MATH_MODULO)
-    Fac = safe_modulo(Fac1, Fac2);
-  else if (type == NODE_MATH_ABSOLUTE)
-    Fac = fabsf(Fac1);
-  else if (type == NODE_MATH_ARCTAN2)
-    Fac = atan2f(Fac1, Fac2);
-  else if (type == NODE_MATH_FLOOR)
-    Fac = floorf(Fac1);
-  else if (type == NODE_MATH_CEIL)
-    Fac = ceilf(Fac1);
-  else if (type == NODE_MATH_FRACT)
-    Fac = Fac1 - floorf(Fac1);
-  else if (type == NODE_MATH_SQRT)
-    Fac = safe_sqrtf(Fac1);
-  else if (type == NODE_MATH_CLAMP)
-    Fac = saturate(Fac1);
-  else
-    Fac = 0.0f;
-
-  return Fac;
+  switch (type) {
+    case NODE_MATH_ADD:
+      return a + b;
+    case NODE_MATH_SUBTRACT:
+      return a - b;
+    case NODE_MATH_MULTIPLY:
+      return a * b;
+    case NODE_MATH_DIVIDE:
+      return safe_divide(a, b);
+    case NODE_MATH_POWER:
+      return safe_powf(a, b);
+    case NODE_MATH_LOGARITHM:
+      return safe_logf(a, b);
+    case NODE_MATH_SQRT:
+      return safe_sqrtf(a);
+    case NODE_MATH_ABSOLUTE:
+      return fabsf(a);
+    case NODE_MATH_MINIMUM:
+      return fminf(a, b);
+    case NODE_MATH_MAXIMUM:
+      return fmaxf(a, b);
+    case NODE_MATH_LESS_THAN:
+      return a < b;
+    case NODE_MATH_GREATER_THAN:
+      return a > b;
+    case NODE_MATH_ROUND:
+      return floorf(a + 0.5f);
+    case NODE_MATH_FLOOR:
+      return floorf(a);
+    case NODE_MATH_CEIL:
+      return ceilf(a);
+    case NODE_MATH_FRACTION:
+      return a - floorf(a);
+    case NODE_MATH_MODULO:
+      return safe_modulo(a, b);
+    case NODE_MATH_SINE:
+      return sinf(a);
+    case NODE_MATH_COSINE:
+      return cosf(a);
+    case NODE_MATH_TANGENT:
+      return tanf(a);
+    case NODE_MATH_ARCSINE:
+      return safe_asinf(a);
+    case NODE_MATH_ARCCOSINE:
+      return safe_acosf(a);
+    case NODE_MATH_ARCTANGENT:
+      return atanf(a);
+    case NODE_MATH_ARCTAN2:
+      return atan2f(a, b);
+    default:
+      return 0.0f;
+  }
 }
 
 /* Calculate color in range 800..12000 using an approximation
diff --git a/intern/cycles/kernel/svm/svm_musgrave.h b/intern/cycles/kernel/svm/svm_musgrave.h
index 67fb5ca6241..9291c7e7295 100644
--- a/intern/cycles/kernel/svm/svm_musgrave.h
+++ b/intern/cycles/kernel/svm/svm_musgrave.h
@@ -25,7 +25,10 @@ CCL_NAMESPACE_BEGIN
  * from "Texturing and Modelling: A procedural approach"
  */
 
-ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity, float octaves)
+ccl_device_noinline_cpu float noise_musgrave_fBm(float3 p,
+                                                 float H,
+                                                 float lacunarity,
+                                                 float octaves)
 {
   float rmd;
   float value = 0.0f;
@@ -53,10 +56,10 @@ ccl_device_noinline float noise_musgrave_fBm(float3 p, float H, float lacunarity
  * octaves: number of frequencies in the fBm
  */
 
-ccl_device_noinline float noise_musgrave_multi_fractal(float3 p,
-                                                       float H,
-                                                       float lacunarity,
-                                                       float octaves)
+ccl_device_noinline_cpu float noise_musgrave_multi_fractal(float3 p,
+                                                           float H,
+                                                           float lacunarity,
+                                                           float octaves)
 {
   float rmd;
   float value = 1.0f;
@@ -85,7 +88,7 @@ ccl_device_noinline float noise_musgrave_multi_fractal(float3 p,
  * offset: raises the terrain from `sea level'
  */
 
-ccl_device_noinline float noise_musgrave_hetero_terrain(
+ccl_device_noinline_cpu float noise_musgrave_hetero_terrain(
     float3 p, float H, float lacunarity, float octaves, float offset)
 {
   float value, increment, rmd;
@@ -121,7 +124,7 @@ ccl_device_noinline float noise_musgrave_hetero_terrain(
  * offset: raises the terrain from `sea level'
  */
 
-ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(
+ccl_device_noinline_cpu float noise_musgrave_hybrid_multi_fractal(
     float3 p, float H, float lacunarity, float octaves, float offset, float gain)
 {
   float result, signal, weight, rmd;
@@ -159,7 +162,7 @@ ccl_device_noinline float noise_musgrave_hybrid_multi_fractal(
  * offset: raises the terrain from `sea level'
  */
 
-ccl_device_noinline float noise_musgrave_ridged_multi_fractal(
+ccl_device_noinline_cpu float noise_musgrave_ridged_multi_fractal(
     float3 p, float H, float lacunarity, float octaves, float offset, float gain)
 {
   float result, signal, weight;
@@ -222,10 +225,10 @@ ccl_device void svm_node_tex_musgrave(
   uint dimension_offset, lacunarity_offset, detail_offset, offset_offset;
   uint gain_offset, scale_offset;
 
-  decode_node_uchar4(node.y, &type, &co_offset, &color_offset, &fac_offset);
-  decode_node_uchar4(
+  svm_unpack_node_uchar4(node.y, &type, &co_offset, &color_offset, &fac_offset);
+  svm_unpack_node_uchar4(
       node.z, &dimension_offset, &lacunarity_offset, &detail_offset, &offset_offset);
-  decode_node_uchar4(node.w, &gain_offset, &scale_offset, NULL, NULL);
+  svm_unpack_node_uchar2(node.w, &gain_offset, &scale_offset);
 
   float3 co = stack_load_float3(stack, co_offset);
   float dimension = stack_load_float_default(stack, dimension_offset, node2.x);
diff --git a/intern/cycles/kernel/svm/svm_noise.h b/intern/cycles/kernel/svm/svm_noise.h
index 322579ccfe3..dd375af27e5 100644
--- a/intern/cycles/kernel/svm/svm_noise.h
+++ b/intern/cycles/kernel/svm/svm_noise.h
@@ -41,42 +41,6 @@ ccl_device_inline ssei quick_floor_sse(const ssef &x)
 }
 #endif
 
-ccl_device uint hash(uint kx, uint ky, uint kz)
-{
-  // define some handy macros
-#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
-#define final(a, b, c) \
-  { \
-    c ^= b; \
-    c -= rot(b, 14); \
-    a ^= c; \
-    a -= rot(c, 11); \
-    b ^= a; \
-    b -= rot(a, 25); \
-    c ^= b; \
-    c -= rot(b, 16); \
-    a ^= c; \
-    a -= rot(c, 4); \
-    b ^= a; \
-    b -= rot(a, 14); \
-    c ^= b; \
-    c -= rot(b, 24); \
-  }
-  // now hash the data!
-  uint a, b, c, len = 3;
-  a = b = c = 0xdeadbeef + (len << 2) + 13;
-
-  c += kz;
-  b += ky;
-  a += kx;
-  final(a, b, c);
-
-  return c;
-  // macros not needed anymore
-#undef rot
-#undef final
-}
-
 #ifdef __KERNEL_SSE2__
 ccl_device_inline ssei hash_sse(const ssei &kx, const ssei &ky, const ssei &kz)
 {
@@ -218,7 +182,7 @@ ccl_device_inline ssef scale3_sse(const ssef &result)
 #endif
 
 #ifndef __KERNEL_SSE2__
-ccl_device_noinline float perlin(float x, float y, float z)
+ccl_device_noinline_cpu float perlin(float x, float y, float z)
 {
   int X;
   float fx = floorfrac(x, &X);
@@ -236,17 +200,19 @@ ccl_device_noinline float perlin(float x, float y, float z)
   result = nerp(
       w,
       nerp(v,
-           nerp(u, grad(hash(X, Y, Z), fx, fy, fz), grad(hash(X + 1, Y, Z), fx - 1.0f, fy, fz)),
            nerp(u,
-                grad(hash(X, Y + 1, Z), fx, fy - 1.0f, fz),
-                grad(hash(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz))),
+                grad(hash_uint3(X, Y, Z), fx, fy, fz),
+                grad(hash_uint3(X + 1, Y, Z), fx - 1.0f, fy, fz)),
+           nerp(u,
+                grad(hash_uint3(X, Y + 1, Z), fx, fy - 1.0f, fz),
+                grad(hash_uint3(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz))),
       nerp(v,
            nerp(u,
-                grad(hash(X, Y, Z + 1), fx, fy, fz - 1.0f),
-                grad(hash(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f)),
+                grad(hash_uint3(X, Y, Z + 1), fx, fy, fz - 1.0f),
+                grad(hash_uint3(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f)),
            nerp(u,
-                grad(hash(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
-                grad(hash(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f))));
+                grad(hash_uint3(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
+                grad(hash_uint3(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f))));
   float r = scale3(result);
 
   /* can happen for big coordinates, things even out to 0.0 then anyway */
@@ -312,16 +278,16 @@ ccl_device float snoise(float3 p)
 ccl_device float cellnoise(float3 p)
 {
   int3 ip = quick_floor_to_int3(p);
-  return bits_to_01(hash(ip.x, ip.y, ip.z));
+  return hash_uint3_to_float(ip.x, ip.y, ip.z);
 }
 
 ccl_device float3 cellnoise3(float3 p)
 {
   int3 ip = quick_floor_to_int3(p);
 #ifndef __KERNEL_SSE__
-  float r = bits_to_01(hash(ip.x, ip.y, ip.z));
-  float g = bits_to_01(hash(ip.y, ip.x, ip.z));
-  float b = bits_to_01(hash(ip.y, ip.z, ip.x));
+  float r = hash_uint3_to_float(ip.x, ip.y, ip.z);
+  float g = hash_uint3_to_float(ip.y, ip.x, ip.z);
+  float b = hash_uint3_to_float(ip.y, ip.z, ip.x);
   return make_float3(r, g, b);
 #else
   ssei ip_yxz = shuffle<1, 0, 2, 3>(ssei(ip.m128));
diff --git a/intern/cycles/kernel/svm/svm_noisetex.h b/intern/cycles/kernel/svm/svm_noisetex.h
index 3324e86fcd8..91dc11691e6 100644
--- a/intern/cycles/kernel/svm/svm_noisetex.h
+++ b/intern/cycles/kernel/svm/svm_noisetex.h
@@ -23,8 +23,8 @@ ccl_device void svm_node_tex_noise(
 {
   uint co_offset, scale_offset, detail_offset, distortion_offset, fac_offset, color_offset;
 
-  decode_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
-  decode_node_uchar4(node.z, &color_offset, &fac_offset, NULL, NULL);
+  svm_unpack_node_uchar4(node.y, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
+  svm_unpack_node_uchar2(node.z, &color_offset, &fac_offset);
 
   uint4 node2 = read_node(kg, offset);
 
diff --git a/intern/cycles/kernel/svm/svm_ramp.h b/intern/cycles/kernel/svm/svm_ramp.h
index 6084ee35a1f..85ccf39144b 100644
--- a/intern/cycles/kernel/svm/svm_ramp.h
+++ b/intern/cycles/kernel/svm/svm_ramp.h
@@ -59,7 +59,7 @@ ccl_device void svm_node_rgb_ramp(
   uint fac_offset, color_offset, alpha_offset;
   uint interpolate = node.z;
 
-  decode_node_uchar4(node.y, &fac_offset, &color_offset, &alpha_offset, NULL);
+  svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &alpha_offset);
 
   uint table_size = read_node(kg, offset).x;
 
@@ -78,7 +78,7 @@ ccl_device void svm_node_curves(
     KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
   uint fac_offset, color_offset, out_offset;
-  decode_node_uchar4(node.y, &fac_offset, &color_offset, &out_offset, NULL);
+  svm_unpack_node_uchar3(node.y, &fac_offset, &color_offset, &out_offset);
 
   uint table_size = read_node(kg, offset).x;
 
diff --git a/intern/cycles/kernel/svm/svm_tex_coord.h b/intern/cycles/kernel/svm/svm_tex_coord.h
index 1fb3e20f9e0..a876d6bc916 100644
--- a/intern/cycles/kernel/svm/svm_tex_coord.h
+++ b/intern/cycles/kernel/svm/svm_tex_coord.h
@@ -257,7 +257,7 @@ ccl_device void svm_node_tex_coord_bump_dy(
 ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
   uint color_offset, strength_offset, normal_offset, space;
-  decode_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space);
+  svm_unpack_node_uchar4(node.y, &color_offset, &strength_offset, &normal_offset, &space);
 
   float3 color = stack_load_float3(stack, color_offset);
   color = 2.0f * make_float3(color.x - 0.5f, color.y - 0.5f, color.z - 0.5f);
@@ -349,7 +349,7 @@ ccl_device void svm_node_normal_map(KernelGlobals *kg, ShaderData *sd, float *st
 ccl_device void svm_node_tangent(KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node)
 {
   uint tangent_offset, direction_type, axis;
-  decode_node_uchar4(node.y, &tangent_offset, &direction_type, &axis, NULL);
+  svm_unpack_node_uchar3(node.y, &tangent_offset, &direction_type, &axis);
 
   float3 tangent;
   float3 attribute_value;
diff --git a/intern/cycles/kernel/svm/svm_types.h b/intern/cycles/kernel/svm/svm_types.h
index ea92fd7ce59..a3caa1ab68d 100644
--- a/intern/cycles/kernel/svm/svm_types.h
+++ b/intern/cycles/kernel/svm/svm_types.h
@@ -138,6 +138,9 @@ typedef enum ShaderNodeType {
   NODE_VECTOR_DISPLACEMENT,
   NODE_PRINCIPLED_VOLUME,
   NODE_IES,
+  NODE_MAP_RANGE,
+  NODE_CLAMP,
+  NODE_TEX_WHITE_NOISE,
 } ShaderNodeType;
 
 typedef enum NodeAttributeType {
@@ -158,6 +161,7 @@ typedef enum NodeGeometry {
 
 typedef enum NodeObjectInfo {
   NODE_INFO_OB_LOCATION,
+  NODE_INFO_OB_COLOR,
   NODE_INFO_OB_INDEX,
   NODE_INFO_MAT_INDEX,
   NODE_INFO_OB_RANDOM
@@ -242,7 +246,7 @@ typedef enum NodeMix {
   NODE_MIX_CLAMP /* used for the clamp UI option */
 } NodeMix;
 
-typedef enum NodeMath {
+typedef enum NodeMathType {
   NODE_MATH_ADD,
   NODE_MATH_SUBTRACT,
   NODE_MATH_MULTIPLY,
@@ -265,19 +269,35 @@ typedef enum NodeMath {
   NODE_MATH_ARCTAN2,
   NODE_MATH_FLOOR,
   NODE_MATH_CEIL,
-  NODE_MATH_FRACT,
+  NODE_MATH_FRACTION,
   NODE_MATH_SQRT,
-  NODE_MATH_CLAMP /* used for the clamp UI option */
-} NodeMath;
+} NodeMathType;
 
-typedef enum NodeVectorMath {
+typedef enum NodeVectorMathType {
   NODE_VECTOR_MATH_ADD,
   NODE_VECTOR_MATH_SUBTRACT,
-  NODE_VECTOR_MATH_AVERAGE,
-  NODE_VECTOR_MATH_DOT_PRODUCT,
+  NODE_VECTOR_MATH_MULTIPLY,
+  NODE_VECTOR_MATH_DIVIDE,
+
   NODE_VECTOR_MATH_CROSS_PRODUCT,
-  NODE_VECTOR_MATH_NORMALIZE
-} NodeVectorMath;
+  NODE_VECTOR_MATH_PROJECT,
+  NODE_VECTOR_MATH_REFLECT,
+  NODE_VECTOR_MATH_DOT_PRODUCT,
+
+  NODE_VECTOR_MATH_DISTANCE,
+  NODE_VECTOR_MATH_LENGTH,
+  NODE_VECTOR_MATH_SCALE,
+  NODE_VECTOR_MATH_NORMALIZE,
+
+  NODE_VECTOR_MATH_SNAP,
+  NODE_VECTOR_MATH_FLOOR,
+  NODE_VECTOR_MATH_CEIL,
+  NODE_VECTOR_MATH_MODULO,
+  NODE_VECTOR_MATH_FRACTION,
+  NODE_VECTOR_MATH_ABSOLUTE,
+  NODE_VECTOR_MATH_MINIMUM,
+  NODE_VECTOR_MATH_MAXIMUM,
+} NodeVectorMathType;
 
 typedef enum NodeVectorTransformType {
   NODE_VECTOR_TRANSFORM_TYPE_VECTOR,
diff --git a/intern/cycles/kernel/svm/svm_vector_transform.h b/intern/cycles/kernel/svm/svm_vector_transform.h
index 7ec0f07f2e4..1e95492cf1b 100644
--- a/intern/cycles/kernel/svm/svm_vector_transform.h
+++ b/intern/cycles/kernel/svm/svm_vector_transform.h
@@ -26,8 +26,8 @@ ccl_device void svm_node_vector_transform(KernelGlobals *kg,
   uint itype, ifrom, ito;
   uint vector_in, vector_out;
 
-  decode_node_uchar4(node.y, &itype, &ifrom, &ito, NULL);
-  decode_node_uchar4(node.z, &vector_in, &vector_out, NULL, NULL);
+  svm_unpack_node_uchar3(node.y, &itype, &ifrom, &ito);
+  svm_unpack_node_uchar2(node.z, &vector_in, &vector_out);
 
   float3 in = stack_load_float3(stack, vector_in);
 
diff --git a/intern/cycles/kernel/svm/svm_voronoi.h b/intern/cycles/kernel/svm/svm_voronoi.h
index 3e28a316169..3d7fa523968 100644
--- a/intern/cycles/kernel/svm/svm_voronoi.h
+++ b/intern/cycles/kernel/svm/svm_voronoi.h
@@ -116,8 +116,8 @@ ccl_device void svm_node_tex_voronoi(
   uint co_offset, coloring, distance, feature;
   uint scale_offset, e_offset, fac_offset, color_offset;
 
-  decode_node_uchar4(node.y, &co_offset, &coloring, &distance, &feature);
-  decode_node_uchar4(node.z, &scale_offset, &e_offset, &fac_offset, &color_offset);
+  svm_unpack_node_uchar4(node.y, &co_offset, &coloring, &distance, &feature);
+  svm_unpack_node_uchar4(node.z, &scale_offset, &e_offset, &fac_offset, &color_offset);
 
   float3 co = stack_load_float3(stack, co_offset);
   float scale = stack_load_float_default(stack, scale_offset, node2.x);
diff --git a/intern/cycles/kernel/svm/svm_voxel.h b/intern/cycles/kernel/svm/svm_voxel.h
index 26d8cc71d3b..b79be8e5bde 100644
--- a/intern/cycles/kernel/svm/svm_voxel.h
+++ b/intern/cycles/kernel/svm/svm_voxel.h
@@ -23,7 +23,7 @@ ccl_device void svm_node_tex_voxel(
     KernelGlobals *kg, ShaderData *sd, float *stack, uint4 node, int *offset)
 {
   uint co_offset, density_out_offset, color_out_offset, space;
-  decode_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
+  svm_unpack_node_uchar4(node.z, &co_offset, &density_out_offset, &color_out_offset, &space);
 #ifdef __VOLUME__
   int id = node.y;
   float3 co = stack_load_float3(stack, co_offset);
diff --git a/intern/cycles/kernel/svm/svm_wave.h b/intern/cycles/kernel/svm/svm_wave.h
index 003ad7dc63a..baaa89ab0cb 100644
--- a/intern/cycles/kernel/svm/svm_wave.h
+++ b/intern/cycles/kernel/svm/svm_wave.h
@@ -18,12 +18,12 @@ CCL_NAMESPACE_BEGIN
 
 /* Wave */
 
-ccl_device_noinline float svm_wave(NodeWaveType type,
-                                   NodeWaveProfile profile,
-                                   float3 p,
-                                   float detail,
-                                   float distortion,
-                                   float dscale)
+ccl_device_noinline_cpu float svm_wave(NodeWaveType type,
+                                       NodeWaveProfile profile,
+                                       float3 p,
+                                       float detail,
+                                       float distortion,
+                                       float dscale)
 {
   float n;
 
@@ -54,8 +54,8 @@ ccl_device void svm_node_tex_wave(
   uint co_offset, scale_offset, detail_offset, dscale_offset, distortion_offset, color_offset,
       fac_offset;
 
-  decode_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset);
-  decode_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
+  svm_unpack_node_uchar4(node.y, &type, &color_offset, &fac_offset, &dscale_offset);
+  svm_unpack_node_uchar4(node.z, &co_offset, &scale_offset, &detail_offset, &distortion_offset);
 
   float3 co = stack_load_float3(stack, co_offset);
   float scale = stack_load_float_default(stack, scale_offset, node2.x);
diff --git a/intern/cycles/kernel/svm/svm_white_noise.h b/intern/cycles/kernel/svm/svm_white_noise.h
new file mode 100644
index 00000000000..71d4591d25d
--- /dev/null
+++ b/intern/cycles/kernel/svm/svm_white_noise.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device void svm_node_tex_white_noise(KernelGlobals *kg,
+                                         ShaderData *sd,
+                                         float *stack,
+                                         uint dimensions,
+                                         uint inputs_stack_offsets,
+                                         uint value_stack_offset,
+                                         int *offset)
+{
+  uint vector_stack_offset, w_stack_offset;
+  svm_unpack_node_uchar2(inputs_stack_offsets, &vector_stack_offset, &w_stack_offset);
+
+  float3 vector = stack_load_float3(stack, vector_stack_offset);
+  float w = stack_load_float(stack, w_stack_offset);
+
+  float value;
+  switch (dimensions) {
+    case 1:
+      value = hash_float_to_float(w);
+      break;
+    case 2:
+      value = hash_float2_to_float(make_float2(vector.x, vector.y));
+      break;
+    case 3:
+      value = hash_float3_to_float(vector);
+      break;
+    case 4:
+      value = hash_float4_to_float(make_float4(vector.x, vector.y, vector.z, w));
+      break;
+    default:
+      value = 0.0f;
+      kernel_assert(0);
+      break;
+  }
+  stack_store_float(stack, value_stack_offset, value);
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/kernel/svm/svm_wireframe.h b/intern/cycles/kernel/svm/svm_wireframe.h
index 55e61d0e8c7..49158bd86d5 100644
--- a/intern/cycles/kernel/svm/svm_wireframe.h
+++ b/intern/cycles/kernel/svm/svm_wireframe.h
@@ -93,7 +93,7 @@ ccl_device void svm_node_wireframe(KernelGlobals *kg, ShaderData *sd, float *sta
   uint in_size = node.y;
   uint out_fac = node.z;
   uint use_pixel_size, bump_offset;
-  decode_node_uchar4(node.w, &use_pixel_size, &bump_offset, NULL, NULL);
+  svm_unpack_node_uchar2(node.w, &use_pixel_size, &bump_offset);
 
   /* Input Data */
   float size = stack_load_float(stack, in_size);
diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt
index c79e5a23ea1..53196b013f6 100644
--- a/intern/cycles/render/CMakeLists.txt
+++ b/intern/cycles/render/CMakeLists.txt
@@ -85,8 +85,6 @@ if(WITH_CYCLES_OSL)
   )
 endif()
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${RTTI_DISABLE_FLAGS}")
-
 include_directories(${INC})
 include_directories(SYSTEM ${INC_SYS})
 
diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp
index 73893921500..b906357b7b5 100644
--- a/intern/cycles/render/bake.cpp
+++ b/intern/cycles/render/bake.cpp
@@ -124,7 +124,7 @@ BakeData *BakeManager::init(const int object, const size_t tri_offset, const siz
 void BakeManager::set_shader_limit(const size_t x, const size_t y)
 {
   m_shader_limit = x * y;
-  m_shader_limit = (size_t)pow(2, ceil(log(m_shader_limit) / log(2)));
+  m_shader_limit = (size_t)pow(2, std::ceil(log(m_shader_limit) / log(2)));
 }
 
 bool BakeManager::bake(Device *device,
diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp
index 7591d9dda0c..38306a63c74 100644
--- a/intern/cycles/render/camera.cpp
+++ b/intern/cycles/render/camera.cpp
@@ -565,8 +565,7 @@ float3 Camera::transform_raster_to_world(float raster_x, float raster_y)
 BoundBox Camera::viewplane_bounds_get()
 {
   /* TODO(sergey): This is all rather stupid, but is there a way to perform
-   * checks we need in a more clear and smart fasion?
-   */
+   * checks we need in a more clear and smart fashion? */
   BoundBox bounds = BoundBox::empty;
 
   if (type == CAMERA_PANORAMA) {
diff --git a/intern/cycles/render/constant_fold.cpp b/intern/cycles/render/constant_fold.cpp
index e475ff60eef..851d4b71df8 100644
--- a/intern/cycles/render/constant_fold.cpp
+++ b/intern/cycles/render/constant_fold.cpp
@@ -301,7 +301,7 @@ void ConstantFolder::fold_mix(NodeMix type, bool clamp) const
   }
 }
 
-void ConstantFolder::fold_math(NodeMath type, bool clamp) const
+void ConstantFolder::fold_math(NodeMathType type) const
 {
   ShaderInput *value1_in = node->input("Value1");
   ShaderInput *value2_in = node->input("Value2");
@@ -310,25 +310,25 @@ void ConstantFolder::fold_math(NodeMath type, bool clamp) const
     case NODE_MATH_ADD:
       /* X + 0 == 0 + X == X */
       if (is_zero(value1_in)) {
-        try_bypass_or_make_constant(value2_in, clamp);
+        try_bypass_or_make_constant(value2_in);
       }
       else if (is_zero(value2_in)) {
-        try_bypass_or_make_constant(value1_in, clamp);
+        try_bypass_or_make_constant(value1_in);
       }
       break;
     case NODE_MATH_SUBTRACT:
       /* X - 0 == X */
       if (is_zero(value2_in)) {
-        try_bypass_or_make_constant(value1_in, clamp);
+        try_bypass_or_make_constant(value1_in);
       }
       break;
     case NODE_MATH_MULTIPLY:
       /* X * 1 == 1 * X == X */
       if (is_one(value1_in)) {
-        try_bypass_or_make_constant(value2_in, clamp);
+        try_bypass_or_make_constant(value2_in);
       }
       else if (is_one(value2_in)) {
-        try_bypass_or_make_constant(value1_in, clamp);
+        try_bypass_or_make_constant(value1_in);
       }
       /* X * 0 == 0 * X == 0 */
       else if (is_zero(value1_in) || is_zero(value2_in)) {
@@ -338,7 +338,7 @@ void ConstantFolder::fold_math(NodeMath type, bool clamp) const
     case NODE_MATH_DIVIDE:
       /* X / 1 == X */
       if (is_one(value2_in)) {
-        try_bypass_or_make_constant(value1_in, clamp);
+        try_bypass_or_make_constant(value1_in);
       }
       /* 0 / X == 0 */
       else if (is_zero(value1_in)) {
@@ -352,17 +352,18 @@ void ConstantFolder::fold_math(NodeMath type, bool clamp) const
       }
       /* X ^ 1 == X */
       else if (is_one(value2_in)) {
-        try_bypass_or_make_constant(value1_in, clamp);
+        try_bypass_or_make_constant(value1_in);
       }
     default:
       break;
   }
 }
 
-void ConstantFolder::fold_vector_math(NodeVectorMath type) const
+void ConstantFolder::fold_vector_math(NodeVectorMathType type) const
 {
   ShaderInput *vector1_in = node->input("Vector1");
   ShaderInput *vector2_in = node->input("Vector2");
+  ShaderInput *scale_in = node->input("Scale");
 
   switch (type) {
     case NODE_VECTOR_MATH_ADD:
@@ -380,6 +381,27 @@ void ConstantFolder::fold_vector_math(NodeVectorMath type) const
         try_bypass_or_make_constant(vector1_in);
       }
       break;
+    case NODE_VECTOR_MATH_MULTIPLY:
+      /* X * 0 == 0 * X == 0 */
+      if (is_zero(vector1_in) || is_zero(vector2_in)) {
+        make_zero();
+      } /* X * 1 == 1 * X == X */
+      else if (is_one(vector1_in)) {
+        try_bypass_or_make_constant(vector2_in);
+      }
+      else if (is_one(vector2_in)) {
+        try_bypass_or_make_constant(vector1_in);
+      }
+      break;
+    case NODE_VECTOR_MATH_DIVIDE:
+      /* X / 0 == 0 / X == 0 */
+      if (is_zero(vector1_in) || is_zero(vector2_in)) {
+        make_zero();
+      } /* X / 1 == X */
+      else if (is_one(vector2_in)) {
+        try_bypass_or_make_constant(vector1_in);
+      }
+      break;
     case NODE_VECTOR_MATH_DOT_PRODUCT:
     case NODE_VECTOR_MATH_CROSS_PRODUCT:
       /* X * 0 == 0 * X == 0 */
@@ -387,6 +409,21 @@ void ConstantFolder::fold_vector_math(NodeVectorMath type) const
         make_zero();
       }
       break;
+    case NODE_VECTOR_MATH_LENGTH:
+    case NODE_VECTOR_MATH_ABSOLUTE:
+      if (is_zero(vector1_in)) {
+        make_zero();
+      }
+      break;
+    case NODE_VECTOR_MATH_SCALE:
+      /* X * 0 == 0 * X == 0 */
+      if (is_zero(vector1_in) || is_zero(scale_in)) {
+        make_zero();
+      } /* X * 1 == X */
+      else if (is_one(scale_in)) {
+        try_bypass_or_make_constant(vector1_in);
+      }
+      break;
     default:
       break;
   }
diff --git a/intern/cycles/render/constant_fold.h b/intern/cycles/render/constant_fold.h
index c14b94868dc..881636a9fe1 100644
--- a/intern/cycles/render/constant_fold.h
+++ b/intern/cycles/render/constant_fold.h
@@ -64,8 +64,8 @@ class ConstantFolder {
 
   /* Specific nodes. */
   void fold_mix(NodeMix type, bool clamp) const;
-  void fold_math(NodeMath type, bool clamp) const;
-  void fold_vector_math(NodeVectorMath type) const;
+  void fold_math(NodeMathType type) const;
+  void fold_vector_math(NodeVectorMathType type) const;
 };
 
 CCL_NAMESPACE_END
diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp
index 76258a292e8..b41b0b7b260 100644
--- a/intern/cycles/render/integrator.cpp
+++ b/intern/cycles/render/integrator.cpp
@@ -141,7 +141,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene
   kintegrator->caustics_refractive = caustics_refractive;
   kintegrator->filter_glossy = (filter_glossy == 0.0f) ? FLT_MAX : 1.0f / filter_glossy;
 
-  kintegrator->seed = hash_int(seed);
+  kintegrator->seed = hash_uint2(seed, 0);
 
   kintegrator->use_ambient_occlusion = ((Pass::contains(scene->film->passes, PASS_AO)) ||
                                         dscene->data.background.ao_factor != 0.0f);
diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp
index 5c3f1c35bdc..8c7a21da561 100644
--- a/intern/cycles/render/light.cpp
+++ b/intern/cycles/render/light.cpp
@@ -944,7 +944,7 @@ void LightManager::tag_update(Scene * /*scene*/)
   need_update = true;
 }
 
-int LightManager::add_ies_from_file(ustring filename)
+int LightManager::add_ies_from_file(const string &filename)
 {
   string content;
 
@@ -953,10 +953,10 @@ int LightManager::add_ies_from_file(ustring filename)
     content = "\n";
   }
 
-  return add_ies(ustring(content));
+  return add_ies(content);
 }
 
-int LightManager::add_ies(ustring content)
+int LightManager::add_ies(const string &content)
 {
   uint hash = hash_string(content.c_str());
 
diff --git a/intern/cycles/render/light.h b/intern/cycles/render/light.h
index 79450ea5f8d..6dd23374818 100644
--- a/intern/cycles/render/light.h
+++ b/intern/cycles/render/light.h
@@ -92,8 +92,8 @@ class LightManager {
   ~LightManager();
 
   /* IES texture management */
-  int add_ies(ustring ies);
-  int add_ies_from_file(ustring filename);
+  int add_ies(const string &ies);
+  int add_ies_from_file(const string &filename);
   void remove_ies(int slot);
 
   void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress);
diff --git a/intern/cycles/render/mesh.cpp b/intern/cycles/render/mesh.cpp
index 91c3a772537..6ac66661859 100644
--- a/intern/cycles/render/mesh.cpp
+++ b/intern/cycles/render/mesh.cpp
@@ -1091,6 +1091,17 @@ bool Mesh::has_true_displacement() const
   return false;
 }
 
+bool Mesh::has_voxel_attributes() const
+{
+  foreach (const Attribute &attr, attributes.attributes) {
+    if (attr.element == ATTR_ELEMENT_VOXEL) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
 float Mesh::motion_time(int step) const
 {
   return (motion_steps > 1) ? 2.0f * step / (motion_steps - 1) - 1.0f : 0.0f;
@@ -2020,15 +2031,7 @@ void MeshManager::device_update_preprocess(Device *device, Scene *scene, Progres
 
     if (need_update && mesh->has_volume) {
       /* Create volume meshes if there is voxel data. */
-      bool has_voxel_attributes = false;
-
-      foreach (Attribute &attr, mesh->attributes.attributes) {
-        if (attr.element == ATTR_ELEMENT_VOXEL) {
-          has_voxel_attributes = true;
-        }
-      }
-
-      if (has_voxel_attributes) {
+      if (mesh->has_voxel_attributes()) {
         if (!volume_images_updated) {
           progress.set_status("Updating Meshes Volume Bounds");
           device_update_volume_images(device, scene, progress);
diff --git a/intern/cycles/render/mesh.h b/intern/cycles/render/mesh.h
index 05c67ccb3b7..5bb6ab328b7 100644
--- a/intern/cycles/render/mesh.h
+++ b/intern/cycles/render/mesh.h
@@ -318,6 +318,7 @@ class Mesh : public Node {
 
   bool has_motion_blur() const;
   bool has_true_displacement() const;
+  bool has_voxel_attributes() const;
 
   /* Convert between normalized -1..1 motion time and index
    * in the VERTEX_MOTION attribute. */
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index 8e7969cfbaf..69c1c06f846 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -163,8 +163,10 @@ void TextureMapping::compile(SVMCompiler &compiler, int offset_in, int offset_ou
   }
 
   if (type == NORMAL) {
-    compiler.add_node(NODE_VECTOR_MATH, NODE_VECTOR_MATH_NORMALIZE, offset_out, offset_out);
-    compiler.add_node(NODE_VECTOR_MATH, SVM_STACK_INVALID, offset_out);
+    compiler.add_node(NODE_VECTOR_MATH,
+                      NODE_VECTOR_MATH_NORMALIZE,
+                      compiler.encode_uchar4(offset_out, offset_out, offset_out),
+                      compiler.encode_uchar4(SVM_STACK_INVALID, offset_out));
   }
 }
 
@@ -1067,10 +1069,10 @@ void IESLightNode::get_slot()
 
   if (slot == -1) {
     if (ies.empty()) {
-      slot = light_manager->add_ies_from_file(filename);
+      slot = light_manager->add_ies_from_file(filename.string());
     }
     else {
-      slot = light_manager->add_ies(ies);
+      slot = light_manager->add_ies(ies.string());
     }
   }
 }
@@ -1108,6 +1110,53 @@ void IESLightNode::compile(OSLCompiler &compiler)
   compiler.add(this, "node_ies_light");
 }
 
+/* White Noise Texture */
+
+NODE_DEFINE(WhiteNoiseTextureNode)
+{
+  NodeType *type = NodeType::add("white_noise_texture", create, NodeType::SHADER);
+
+  static NodeEnum dimensions_enum;
+  dimensions_enum.insert("1D", 1);
+  dimensions_enum.insert("2D", 2);
+  dimensions_enum.insert("3D", 3);
+  dimensions_enum.insert("4D", 4);
+  SOCKET_ENUM(dimensions, "Dimensions", dimensions_enum, 3);
+
+  SOCKET_IN_POINT(vector, "Vector", make_float3(0.0f, 0.0f, 0.0f));
+  SOCKET_IN_FLOAT(w, "W", 0.0f);
+
+  SOCKET_OUT_FLOAT(value, "Value");
+
+  return type;
+}
+
+WhiteNoiseTextureNode::WhiteNoiseTextureNode() : ShaderNode(node_type)
+{
+}
+
+void WhiteNoiseTextureNode::compile(SVMCompiler &compiler)
+{
+  ShaderInput *vector_in = input("Vector");
+  ShaderInput *w_in = input("W");
+  ShaderOutput *value_out = output("Value");
+
+  int vector_stack_offset = compiler.stack_assign(vector_in);
+  int w_stack_offset = compiler.stack_assign(w_in);
+  int value_stack_offset = compiler.stack_assign(value_out);
+
+  compiler.add_node(NODE_TEX_WHITE_NOISE,
+                    dimensions,
+                    compiler.encode_uchar4(vector_stack_offset, w_stack_offset),
+                    value_stack_offset);
+}
+
+void WhiteNoiseTextureNode::compile(OSLCompiler &compiler)
+{
+  compiler.parameter(this, "dimensions");
+  compiler.add(this, "node_white_noise_texture");
+}
+
 /* Musgrave Texture */
 
 NODE_DEFINE(MusgraveTextureNode)
@@ -3891,6 +3940,7 @@ NODE_DEFINE(ObjectInfoNode)
   NodeType *type = NodeType::add("object_info", create, NodeType::SHADER);
 
   SOCKET_OUT_VECTOR(location, "Location");
+  SOCKET_OUT_COLOR(color, "Color");
   SOCKET_OUT_FLOAT(object_index, "Object Index");
   SOCKET_OUT_FLOAT(material_index, "Material Index");
   SOCKET_OUT_FLOAT(random, "Random");
@@ -3909,6 +3959,11 @@ void ObjectInfoNode::compile(SVMCompiler &compiler)
     compiler.add_node(NODE_OBJECT_INFO, NODE_INFO_OB_LOCATION, compiler.stack_assign(out));
   }
 
+  out = output("Color");
+  if (!out->links.empty()) {
+    compiler.add_node(NODE_OBJECT_INFO, NODE_INFO_OB_COLOR, compiler.stack_assign(out));
+  }
+
   out = output("Object Index");
   if (!out->links.empty()) {
     compiler.add_node(NODE_OBJECT_INFO, NODE_INFO_OB_INDEX, compiler.stack_assign(out));
@@ -4119,6 +4174,90 @@ void HairInfoNode::compile(OSLCompiler &compiler)
   compiler.add(this, "node_hair_info");
 }
 
+/* Volume Info */
+
+NODE_DEFINE(VolumeInfoNode)
+{
+  NodeType *type = NodeType::add("volume_info", create, NodeType::SHADER);
+
+  SOCKET_OUT_COLOR(color, "Color");
+  SOCKET_OUT_FLOAT(density, "Density");
+  SOCKET_OUT_FLOAT(flame, "Flame");
+  SOCKET_OUT_FLOAT(temperature, "Temperature");
+
+  return type;
+}
+
+VolumeInfoNode::VolumeInfoNode() : ShaderNode(node_type)
+{
+}
+
+/* The requested attributes are not updated after node expansion.
+ * So we explicitly request the required attributes.
+ */
+void VolumeInfoNode::attributes(Shader *shader, AttributeRequestSet *attributes)
+{
+  if (shader->has_volume) {
+    if (!output("Color")->links.empty()) {
+      attributes->add(ATTR_STD_VOLUME_COLOR);
+    }
+    if (!output("Density")->links.empty()) {
+      attributes->add(ATTR_STD_VOLUME_DENSITY);
+    }
+    if (!output("Flame")->links.empty()) {
+      attributes->add(ATTR_STD_VOLUME_FLAME);
+    }
+    if (!output("Temperature")->links.empty()) {
+      attributes->add(ATTR_STD_VOLUME_TEMPERATURE);
+    }
+    attributes->add(ATTR_STD_GENERATED_TRANSFORM);
+  }
+  ShaderNode::attributes(shader, attributes);
+}
+
+void VolumeInfoNode::expand(ShaderGraph *graph)
+{
+  ShaderOutput *color_out = output("Color");
+  if (!color_out->links.empty()) {
+    AttributeNode *attr = new AttributeNode();
+    attr->attribute = "color";
+    graph->add(attr);
+    graph->relink(color_out, attr->output("Color"));
+  }
+
+  ShaderOutput *density_out = output("Density");
+  if (!density_out->links.empty()) {
+    AttributeNode *attr = new AttributeNode();
+    attr->attribute = "density";
+    graph->add(attr);
+    graph->relink(density_out, attr->output("Fac"));
+  }
+
+  ShaderOutput *flame_out = output("Flame");
+  if (!flame_out->links.empty()) {
+    AttributeNode *attr = new AttributeNode();
+    attr->attribute = "flame";
+    graph->add(attr);
+    graph->relink(flame_out, attr->output("Fac"));
+  }
+
+  ShaderOutput *temperature_out = output("Temperature");
+  if (!temperature_out->links.empty()) {
+    AttributeNode *attr = new AttributeNode();
+    attr->attribute = "temperature";
+    graph->add(attr);
+    graph->relink(temperature_out, attr->output("Fac"));
+  }
+}
+
+void VolumeInfoNode::compile(SVMCompiler &)
+{
+}
+
+void VolumeInfoNode::compile(OSLCompiler &)
+{
+}
+
 /* Value */
 
 NODE_DEFINE(ValueNode)
@@ -5259,6 +5398,140 @@ void OutputNode::compile(OSLCompiler &compiler)
     compiler.add(this, "node_output_displacement");
 }
 
+/* Map Range Node */
+
+NODE_DEFINE(MapRangeNode)
+{
+  NodeType *type = NodeType::add("map_range", create, NodeType::SHADER);
+
+  SOCKET_IN_FLOAT(value, "Value", 1.0f);
+  SOCKET_IN_FLOAT(from_min, "From Min", 0.0f);
+  SOCKET_IN_FLOAT(from_max, "From Max", 1.0f);
+  SOCKET_IN_FLOAT(to_min, "To Min", 0.0f);
+  SOCKET_IN_FLOAT(to_max, "To Max", 1.0f);
+
+  SOCKET_OUT_FLOAT(result, "Result");
+
+  return type;
+}
+
+MapRangeNode::MapRangeNode() : ShaderNode(node_type)
+{
+}
+
+void MapRangeNode::expand(ShaderGraph *graph)
+{
+  if (clamp) {
+    ShaderOutput *result_out = output("Result");
+    if (!result_out->links.empty()) {
+      ClampNode *clamp_node = new ClampNode();
+      clamp_node->min = to_min;
+      clamp_node->max = to_max;
+      graph->add(clamp_node);
+      graph->relink(result_out, clamp_node->output("Result"));
+      graph->connect(result_out, clamp_node->input("Value"));
+    }
+  }
+}
+
+void MapRangeNode::constant_fold(const ConstantFolder &folder)
+{
+  if (folder.all_inputs_constant()) {
+    float result;
+    if (from_max != from_min) {
+      result = to_min + ((value - from_min) / (from_max - from_min)) * (to_max - to_min);
+    }
+    else {
+      result = 0.0f;
+    }
+    folder.make_constant(result);
+  }
+}
+
+void MapRangeNode::compile(SVMCompiler &compiler)
+{
+  ShaderInput *value_in = input("Value");
+  ShaderInput *from_min_in = input("From Min");
+  ShaderInput *from_max_in = input("From Max");
+  ShaderInput *to_min_in = input("To Min");
+  ShaderInput *to_max_in = input("To Max");
+  ShaderOutput *result_out = output("Result");
+
+  int value_stack_offset = compiler.stack_assign(value_in);
+  int from_min_stack_offset = compiler.stack_assign_if_linked(from_min_in);
+  int from_max_stack_offset = compiler.stack_assign_if_linked(from_max_in);
+  int to_min_stack_offset = compiler.stack_assign_if_linked(to_min_in);
+  int to_max_stack_offset = compiler.stack_assign_if_linked(to_max_in);
+  int result_stack_offset = compiler.stack_assign(result_out);
+
+  compiler.add_node(
+      NODE_MAP_RANGE,
+      value_stack_offset,
+      compiler.encode_uchar4(
+          from_min_stack_offset, from_max_stack_offset, to_min_stack_offset, to_max_stack_offset),
+      result_stack_offset);
+
+  compiler.add_node(__float_as_int(from_min),
+                    __float_as_int(from_max),
+                    __float_as_int(to_min),
+                    __float_as_int(to_max));
+}
+
+void MapRangeNode::compile(OSLCompiler &compiler)
+{
+  compiler.add(this, "node_map_range");
+}
+
+/* Clamp Node */
+
+NODE_DEFINE(ClampNode)
+{
+  NodeType *type = NodeType::add("clamp", create, NodeType::SHADER);
+
+  SOCKET_IN_FLOAT(value, "Value", 1.0f);
+  SOCKET_IN_FLOAT(min, "Min", 0.0f);
+  SOCKET_IN_FLOAT(max, "Max", 1.0f);
+
+  SOCKET_OUT_FLOAT(result, "Result");
+
+  return type;
+}
+
+ClampNode::ClampNode() : ShaderNode(node_type)
+{
+}
+
+void ClampNode::constant_fold(const ConstantFolder &folder)
+{
+  if (folder.all_inputs_constant()) {
+    folder.make_constant(clamp(value, min, max));
+  }
+}
+
+void ClampNode::compile(SVMCompiler &compiler)
+{
+  ShaderInput *value_in = input("Value");
+  ShaderInput *min_in = input("Min");
+  ShaderInput *max_in = input("Max");
+  ShaderOutput *result_out = output("Result");
+
+  int value_stack_offset = compiler.stack_assign(value_in);
+  int min_stack_offset = compiler.stack_assign(min_in);
+  int max_stack_offset = compiler.stack_assign(max_in);
+  int result_stack_offset = compiler.stack_assign(result_out);
+
+  compiler.add_node(NODE_CLAMP,
+                    value_stack_offset,
+                    compiler.encode_uchar4(min_stack_offset, max_stack_offset),
+                    result_stack_offset);
+  compiler.add_node(__float_as_int(min), __float_as_int(max));
+}
+
+void ClampNode::compile(OSLCompiler &compiler)
+{
+  compiler.add(this, "node_clamp");
+}
+
 /* Math */
 
 NODE_DEFINE(MathNode)
@@ -5288,14 +5561,14 @@ NODE_DEFINE(MathNode)
   type_enum.insert("arctan2", NODE_MATH_ARCTAN2);
   type_enum.insert("floor", NODE_MATH_FLOOR);
   type_enum.insert("ceil", NODE_MATH_CEIL);
-  type_enum.insert("fract", NODE_MATH_FRACT);
+  type_enum.insert("fraction", NODE_MATH_FRACTION);
   type_enum.insert("sqrt", NODE_MATH_SQRT);
   SOCKET_ENUM(type, "Type", type_enum, NODE_MATH_ADD);
 
   SOCKET_BOOLEAN(use_clamp, "Use Clamp", false);
 
-  SOCKET_IN_FLOAT(value1, "Value1", 0.0f);
-  SOCKET_IN_FLOAT(value2, "Value2", 0.0f);
+  SOCKET_IN_FLOAT(value1, "Value1", 0.5f);
+  SOCKET_IN_FLOAT(value2, "Value2", 0.5f);
 
   SOCKET_OUT_FLOAT(value, "Value");
 
@@ -5306,13 +5579,28 @@ MathNode::MathNode() : ShaderNode(node_type)
 {
 }
 
+void MathNode::expand(ShaderGraph *graph)
+{
+  if (use_clamp) {
+    ShaderOutput *result_out = output("Value");
+    if (!result_out->links.empty()) {
+      ClampNode *clamp_node = new ClampNode();
+      clamp_node->min = 0.0f;
+      clamp_node->max = 1.0f;
+      graph->add(clamp_node);
+      graph->relink(result_out, clamp_node->output("Result"));
+      graph->connect(result_out, clamp_node->input("Value"));
+    }
+  }
+}
+
 void MathNode::constant_fold(const ConstantFolder &folder)
 {
   if (folder.all_inputs_constant()) {
-    folder.make_constant_clamp(svm_math(type, value1, value2), use_clamp);
+    folder.make_constant(svm_math(type, value1, value2));
   }
   else {
-    folder.fold_math(type, use_clamp);
+    folder.fold_math(type);
   }
 }
 
@@ -5322,20 +5610,19 @@ void MathNode::compile(SVMCompiler &compiler)
   ShaderInput *value2_in = input("Value2");
   ShaderOutput *value_out = output("Value");
 
-  compiler.add_node(
-      NODE_MATH, type, compiler.stack_assign(value1_in), compiler.stack_assign(value2_in));
-  compiler.add_node(NODE_MATH, compiler.stack_assign(value_out));
+  int value1_stack_offset = compiler.stack_assign(value1_in);
+  int value2_stack_offset = compiler.stack_assign(value2_in);
+  int value_stack_offset = compiler.stack_assign(value_out);
 
-  if (use_clamp) {
-    compiler.add_node(NODE_MATH, NODE_MATH_CLAMP, compiler.stack_assign(value_out));
-    compiler.add_node(NODE_MATH, compiler.stack_assign(value_out));
-  }
+  compiler.add_node(NODE_MATH,
+                    type,
+                    compiler.encode_uchar4(value1_stack_offset, value2_stack_offset),
+                    value_stack_offset);
 }
 
 void MathNode::compile(OSLCompiler &compiler)
 {
   compiler.parameter(this, "type");
-  compiler.parameter(this, "use_clamp");
   compiler.add(this, "node_math");
 }
 
@@ -5348,14 +5635,32 @@ NODE_DEFINE(VectorMathNode)
   static NodeEnum type_enum;
   type_enum.insert("add", NODE_VECTOR_MATH_ADD);
   type_enum.insert("subtract", NODE_VECTOR_MATH_SUBTRACT);
-  type_enum.insert("average", NODE_VECTOR_MATH_AVERAGE);
-  type_enum.insert("dot_product", NODE_VECTOR_MATH_DOT_PRODUCT);
+  type_enum.insert("multiply", NODE_VECTOR_MATH_MULTIPLY);
+  type_enum.insert("divide", NODE_VECTOR_MATH_DIVIDE);
+
   type_enum.insert("cross_product", NODE_VECTOR_MATH_CROSS_PRODUCT);
+  type_enum.insert("project", NODE_VECTOR_MATH_PROJECT);
+  type_enum.insert("reflect", NODE_VECTOR_MATH_REFLECT);
+  type_enum.insert("dot_product", NODE_VECTOR_MATH_DOT_PRODUCT);
+
+  type_enum.insert("distance", NODE_VECTOR_MATH_DISTANCE);
+  type_enum.insert("length", NODE_VECTOR_MATH_LENGTH);
+  type_enum.insert("scale", NODE_VECTOR_MATH_SCALE);
   type_enum.insert("normalize", NODE_VECTOR_MATH_NORMALIZE);
+
+  type_enum.insert("snap", NODE_VECTOR_MATH_SNAP);
+  type_enum.insert("floor", NODE_VECTOR_MATH_FLOOR);
+  type_enum.insert("ceil", NODE_VECTOR_MATH_CEIL);
+  type_enum.insert("modulo", NODE_VECTOR_MATH_MODULO);
+  type_enum.insert("fraction", NODE_VECTOR_MATH_FRACTION);
+  type_enum.insert("absolute", NODE_VECTOR_MATH_ABSOLUTE);
+  type_enum.insert("minimum", NODE_VECTOR_MATH_MINIMUM);
+  type_enum.insert("maximum", NODE_VECTOR_MATH_MAXIMUM);
   SOCKET_ENUM(type, "Type", type_enum, NODE_VECTOR_MATH_ADD);
 
   SOCKET_IN_VECTOR(vector1, "Vector1", make_float3(0.0f, 0.0f, 0.0f));
   SOCKET_IN_VECTOR(vector2, "Vector2", make_float3(0.0f, 0.0f, 0.0f));
+  SOCKET_IN_FLOAT(scale, "Scale", 1.0f);
 
   SOCKET_OUT_FLOAT(value, "Value");
   SOCKET_OUT_VECTOR(vector, "Vector");
@@ -5373,8 +5678,7 @@ void VectorMathNode::constant_fold(const ConstantFolder &folder)
   float3 vector;
 
   if (folder.all_inputs_constant()) {
-    svm_vector_math(&value, &vector, type, vector1, vector2);
-
+    svm_vector_math(&value, &vector, type, vector1, vector2, scale);
     if (folder.output == output("Value")) {
       folder.make_constant(value);
     }
@@ -5391,15 +5695,21 @@ void VectorMathNode::compile(SVMCompiler &compiler)
 {
   ShaderInput *vector1_in = input("Vector1");
   ShaderInput *vector2_in = input("Vector2");
+  ShaderInput *scale_in = input("Scale");
   ShaderOutput *value_out = output("Value");
   ShaderOutput *vector_out = output("Vector");
 
-  compiler.add_node(NODE_VECTOR_MATH,
-                    type,
-                    compiler.stack_assign(vector1_in),
-                    compiler.stack_assign(vector2_in));
+  int vector1_stack_offset = compiler.stack_assign(vector1_in);
+  int vector2_stack_offset = compiler.stack_assign(vector2_in);
+  int scale_stack_offset = compiler.stack_assign(scale_in);
+  int value_stack_offset = compiler.stack_assign_if_linked(value_out);
+  int vector_stack_offset = compiler.stack_assign_if_linked(vector_out);
+
   compiler.add_node(
-      NODE_VECTOR_MATH, compiler.stack_assign(value_out), compiler.stack_assign(vector_out));
+      NODE_VECTOR_MATH,
+      type,
+      compiler.encode_uchar4(vector1_stack_offset, vector2_stack_offset, scale_stack_offset),
+      compiler.encode_uchar4(value_stack_offset, vector_stack_offset));
 }
 
 void VectorMathNode::compile(OSLCompiler &compiler)
diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h
index 6b21be88663..fbed2ff0ef6 100644
--- a/intern/cycles/render/nodes.h
+++ b/intern/cycles/render/nodes.h
@@ -370,6 +370,19 @@ class IESLightNode : public TextureNode {
   void get_slot();
 };
 
+class WhiteNoiseTextureNode : public ShaderNode {
+ public:
+  SHADER_NODE_CLASS(WhiteNoiseTextureNode)
+  virtual int get_group()
+  {
+    return NODE_GROUP_LEVEL_2;
+  }
+
+  int dimensions;
+  float3 vector;
+  float w;
+};
+
 class MappingNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(MappingNode)
@@ -948,6 +961,21 @@ class HairInfoNode : public ShaderNode {
   }
 };
 
+class VolumeInfoNode : public ShaderNode {
+ public:
+  SHADER_NODE_CLASS(VolumeInfoNode)
+  void attributes(Shader *shader, AttributeRequestSet *attributes);
+  bool has_attribute_dependency()
+  {
+    return true;
+  }
+  bool has_spatial_varying()
+  {
+    return true;
+  }
+  void expand(ShaderGraph *graph);
+};
+
 class ValueNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(ValueNode)
@@ -1228,6 +1256,31 @@ class BlackbodyNode : public ShaderNode {
   float temperature;
 };
 
+class MapRangeNode : public ShaderNode {
+ public:
+  SHADER_NODE_CLASS(MapRangeNode)
+  void constant_fold(const ConstantFolder &folder);
+  virtual int get_group()
+  {
+    return NODE_GROUP_LEVEL_3;
+  }
+  void expand(ShaderGraph *graph);
+
+  float value, from_min, from_max, to_min, to_max;
+  bool clamp;
+};
+
+class ClampNode : public ShaderNode {
+ public:
+  SHADER_NODE_CLASS(ClampNode)
+  void constant_fold(const ConstantFolder &folder);
+  virtual int get_group()
+  {
+    return NODE_GROUP_LEVEL_3;
+  }
+  float value, min, max;
+};
+
 class MathNode : public ShaderNode {
  public:
   SHADER_NODE_CLASS(MathNode)
@@ -1235,11 +1288,12 @@ class MathNode : public ShaderNode {
   {
     return NODE_GROUP_LEVEL_1;
   }
+  void expand(ShaderGraph *graph);
   void constant_fold(const ConstantFolder &folder);
 
   float value1;
   float value2;
-  NodeMath type;
+  NodeMathType type;
   bool use_clamp;
 };
 
@@ -1266,7 +1320,8 @@ class VectorMathNode : public ShaderNode {
 
   float3 vector1;
   float3 vector2;
-  NodeVectorMath type;
+  float scale;
+  NodeVectorMathType type;
 };
 
 class VectorTransformNode : public ShaderNode {
diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp
index 6c6f8810412..849329a086d 100644
--- a/intern/cycles/render/object.cpp
+++ b/intern/cycles/render/object.cpp
@@ -90,6 +90,7 @@ NODE_DEFINE(Object)
   SOCKET_NODE(mesh, "Mesh", &Mesh::node_type);
   SOCKET_TRANSFORM(tfm, "Transform", transform_identity());
   SOCKET_UINT(visibility, "Visibility", ~0);
+  SOCKET_COLOR(color, "Color", make_float3(0.0f, 0.0f, 0.0f));
   SOCKET_UINT(random_id, "Random ID", 0);
   SOCKET_INT(pass_id, "Pass ID", 0);
   SOCKET_BOOLEAN(use_holdout, "Use Holdout", false);
@@ -371,6 +372,7 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
    */
   float uniform_scale;
   float surface_area = 0.0f;
+  float3 color = ob->color;
   float pass_id = ob->pass_id;
   float random_number = (float)ob->random_id * (1.0f / (float)0xFFFFFFFF);
   int particle_index = (ob->particle_system) ?
@@ -425,6 +427,9 @@ void ObjectManager::device_update_object_transform(UpdateObjectTransformState *s
   kobject.tfm = tfm;
   kobject.itfm = itfm;
   kobject.surface_area = surface_area;
+  kobject.color[0] = color.x;
+  kobject.color[1] = color.y;
+  kobject.color[2] = color.z;
   kobject.pass_id = pass_id;
   kobject.random_number = random_number;
   kobject.particle_index = particle_index;
diff --git a/intern/cycles/render/object.h b/intern/cycles/render/object.h
index 2fd43900da1..cbbff0d4c6d 100644
--- a/intern/cycles/render/object.h
+++ b/intern/cycles/render/object.h
@@ -51,6 +51,7 @@ class Object : public Node {
   BoundBox bounds;
   uint random_id;
   int pass_id;
+  float3 color;
   ustring asset_name;
   vector<ParamValue> attributes;
   uint visibility;
diff --git a/intern/cycles/render/sobol.cpp b/intern/cycles/render/sobol.cpp
index 5fb3531b03b..c821249b239 100644
--- a/intern/cycles/render/sobol.cpp
+++ b/intern/cycles/render/sobol.cpp
@@ -62,7 +62,7 @@ typedef struct SobolDirectionNumbers {
 
 /* Keep simple alignment. */
 /* clang-format off */
-static SobolDirectionNumbers SOBOL_NUMBERS[SOBOL_MAX_DIMENSIONS - 1] = {
+static const SobolDirectionNumbers SOBOL_NUMBERS[SOBOL_MAX_DIMENSIONS - 1] = {
 {2, 1, 0, {1}},
 {3, 2, 1, {1, 3}},
 {4, 3, 1, {1, 3, 1}},
@@ -21279,10 +21279,10 @@ void sobol_generate_direction_vectors(uint vectors[][SOBOL_BITS], int dimensions
     v[i] = 1 << (31 - i);  // all m's = 1
 
   for (int dim = 1; dim < dimensions; dim++) {
-    SobolDirectionNumbers *numbers = &SOBOL_NUMBERS[dim - 1];
-    uint s = numbers->s;
-    uint a = numbers->a;
-    uint *m = numbers->m;
+    const SobolDirectionNumbers *numbers = &SOBOL_NUMBERS[dim - 1];
+    const uint s = numbers->s;
+    const uint a = numbers->a;
+    const uint *m = numbers->m;
 
     v = vectors[dim];
 
diff --git a/intern/cycles/render/stats.h b/intern/cycles/render/stats.h
index f1bf1903483..e45403a3754 100644
--- a/intern/cycles/render/stats.h
+++ b/intern/cycles/render/stats.h
@@ -29,7 +29,7 @@ CCL_NAMESPACE_BEGIN
  * semantic around the units of size, it just should be the same for all
  * entries.
  *
- * This is a generic entry foi all size-related statistics, which helps
+ * This is a generic entry for all size-related statistics, which helps
  * avoiding duplicating code for things like sorting.
  */
 class NamedSizeEntry {
diff --git a/intern/cycles/subd/subd_dice.cpp b/intern/cycles/subd/subd_dice.cpp
index fb96be5065b..914b408911e 100644
--- a/intern/cycles/subd/subd_dice.cpp
+++ b/intern/cycles/subd/subd_dice.cpp
@@ -323,8 +323,8 @@ void QuadDice::dice(SubPatch &sub, EdgeFactors &ef)
   float S = 1.0f;
 #endif
 
-  Mu = max((int)ceil(S * Mu), 2);  // XXX handle 0 & 1?
-  Mv = max((int)ceil(S * Mv), 2);  // XXX handle 0 & 1?
+  Mu = max((int)ceilf(S * Mu), 2);  // XXX handle 0 & 1?
+  Mv = max((int)ceilf(S * Mv), 2);  // XXX handle 0 & 1?
 
   /* reserve space for new verts */
   int offset = params.mesh->verts.size();
diff --git a/intern/cycles/subd/subd_split.cpp b/intern/cycles/subd/subd_split.cpp
index e6603632ba7..e5b85fcfd60 100644
--- a/intern/cycles/subd/subd_split.cpp
+++ b/intern/cycles/subd/subd_split.cpp
@@ -80,9 +80,9 @@ int DiagSplit::T(Patch *patch, float2 Pstart, float2 Pend)
     Plast = P;
   }
 
-  int tmin = (int)ceil(Lsum / params.dicing_rate);
-  int tmax = (int)ceil((params.test_steps - 1) * Lmax /
-                       params.dicing_rate);  // XXX paper says N instead of N-1, seems wrong?
+  int tmin = (int)ceilf(Lsum / params.dicing_rate);
+  int tmax = (int)ceilf((params.test_steps - 1) * Lmax /
+                        params.dicing_rate);  // XXX paper says N instead of N-1, seems wrong?
 
   if (tmax - tmin > params.split_threshold)
     return DSPLIT_NON_UNIFORM;
@@ -99,7 +99,7 @@ void DiagSplit::partition_edge(
     *t1 = T(patch, *P, Pend);
   }
   else {
-    int I = (int)floor((float)t * 0.5f);
+    int I = (int)floorf((float)t * 0.5f);
     *P = interp(Pstart, Pend, (t == 0) ? 0 : I / (float)t); /* XXX is t faces or verts */
     *t0 = I;
     *t1 = t - I;
diff --git a/intern/cycles/test/render_graph_finalize_test.cpp b/intern/cycles/test/render_graph_finalize_test.cpp
index 7fb92bfb862..ca93f8b02d0 100644
--- a/intern/cycles/test/render_graph_finalize_test.cpp
+++ b/intern/cycles/test/render_graph_finalize_test.cpp
@@ -960,6 +960,13 @@ TEST_F(RenderGraph, constant_fold_blackbody)
   graph.finalize(scene);
 }
 
+/* A Note About The Math Node
+ *
+ * The clamp option is implemented using graph expansion, where a
+ * Clamp node named "clamp" is added and connected to the output.
+ * So the final result is actually from the node "clamp".
+ */
+
 /*
  * Tests: Math with all constant inputs (clamp false).
  */
@@ -985,7 +992,7 @@ TEST_F(RenderGraph, constant_fold_math)
 TEST_F(RenderGraph, constant_fold_math_clamp)
 {
   EXPECT_ANY_MESSAGE(log);
-  CORRECT_INFO_MESSAGE(log, "Folding Math::Value to constant (1).");
+  CORRECT_INFO_MESSAGE(log, "Folding clamp::Result to constant (1).");
 
   builder
       .add_node(ShaderNodeBuilder<MathNode>("Math")
@@ -1003,7 +1010,7 @@ TEST_F(RenderGraph, constant_fold_math_clamp)
  * Includes 2 tests: constant on each side.
  */
 static void build_math_partial_test_graph(ShaderGraphBuilder &builder,
-                                          NodeMath type,
+                                          NodeMathType type,
                                           float constval)
 {
   builder
@@ -1038,7 +1045,7 @@ TEST_F(RenderGraph, constant_fold_part_math_add_0)
   /* X + 0 == 0 + X == X */
   CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to socket Attribute::Fac.");
   CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
-  INVALID_INFO_MESSAGE(log, "Folding Out::");
+  INVALID_INFO_MESSAGE(log, "Folding clamp::");
 
   build_math_partial_test_graph(builder, NODE_MATH_ADD, 0.0f);
   graph.finalize(scene);
@@ -1053,7 +1060,7 @@ TEST_F(RenderGraph, constant_fold_part_math_sub_0)
   /* X - 0 == X */
   INVALID_INFO_MESSAGE(log, "Folding Math_Cx::");
   CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
-  INVALID_INFO_MESSAGE(log, "Folding Out::");
+  INVALID_INFO_MESSAGE(log, "Folding clamp::");
 
   build_math_partial_test_graph(builder, NODE_MATH_SUBTRACT, 0.0f);
   graph.finalize(scene);
@@ -1068,7 +1075,7 @@ TEST_F(RenderGraph, constant_fold_part_math_mul_1)
   /* X * 1 == 1 * X == X */
   CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to socket Attribute::Fac.");
   CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
-  INVALID_INFO_MESSAGE(log, "Folding Out::");
+  INVALID_INFO_MESSAGE(log, "Folding clamp::");
 
   build_math_partial_test_graph(builder, NODE_MATH_MULTIPLY, 1.0f);
   graph.finalize(scene);
@@ -1083,7 +1090,7 @@ TEST_F(RenderGraph, constant_fold_part_math_div_1)
   /* X / 1 == X */
   INVALID_INFO_MESSAGE(log, "Folding Math_Cx::");
   CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
-  INVALID_INFO_MESSAGE(log, "Folding Out::");
+  INVALID_INFO_MESSAGE(log, "Folding clamp::");
 
   build_math_partial_test_graph(builder, NODE_MATH_DIVIDE, 1.0f);
   graph.finalize(scene);
@@ -1098,7 +1105,7 @@ TEST_F(RenderGraph, constant_fold_part_math_mul_0)
   /* X * 0 == 0 * X == 0 */
   CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (0).");
   CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to constant (0).");
-  CORRECT_INFO_MESSAGE(log, "Folding Out::Value to constant (0)");
+  CORRECT_INFO_MESSAGE(log, "Folding clamp::Result to constant (0)");
   CORRECT_INFO_MESSAGE(log, "Discarding closure EmissionNode.");
 
   build_math_partial_test_graph(builder, NODE_MATH_MULTIPLY, 0.0f);
@@ -1114,7 +1121,7 @@ TEST_F(RenderGraph, constant_fold_part_math_div_0)
   /* 0 / X == 0 */
   CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (0).");
   INVALID_INFO_MESSAGE(log, "Folding Math_xC::");
-  INVALID_INFO_MESSAGE(log, "Folding Out::");
+  INVALID_INFO_MESSAGE(log, "Folding clamp::");
 
   build_math_partial_test_graph(builder, NODE_MATH_DIVIDE, 0.0f);
   graph.finalize(scene);
@@ -1129,7 +1136,7 @@ TEST_F(RenderGraph, constant_fold_part_math_pow_0)
   /* X ^ 0 == 1 */
   INVALID_INFO_MESSAGE(log, "Folding Math_Cx::");
   CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to constant (1).");
-  INVALID_INFO_MESSAGE(log, "Folding Out::");
+  INVALID_INFO_MESSAGE(log, "Folding clamp::");
 
   build_math_partial_test_graph(builder, NODE_MATH_POWER, 0.0f);
   graph.finalize(scene);
@@ -1144,7 +1151,7 @@ TEST_F(RenderGraph, constant_fold_part_math_pow_1)
   /* 1 ^ X == 1; X ^ 1 == X */
   CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Value to constant (1)");
   CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Value to socket Attribute::Fac.");
-  INVALID_INFO_MESSAGE(log, "Folding Out::");
+  INVALID_INFO_MESSAGE(log, "Folding clamp::");
 
   build_math_partial_test_graph(builder, NODE_MATH_POWER, 1.0f);
   graph.finalize(scene);
@@ -1156,21 +1163,14 @@ TEST_F(RenderGraph, constant_fold_part_math_pow_1)
 TEST_F(RenderGraph, constant_fold_vector_math)
 {
   EXPECT_ANY_MESSAGE(log);
-  CORRECT_INFO_MESSAGE(log, "Folding VectorMath::Value to constant (1).");
   CORRECT_INFO_MESSAGE(log, "Folding VectorMath::Vector to constant (3, 0, 0).");
-  CORRECT_INFO_MESSAGE(log, "Folding convert_vector_to_float::value_float to constant (1).");
-  CORRECT_INFO_MESSAGE(log, "Folding Math::Value to constant (2).");
-  CORRECT_INFO_MESSAGE(log, "Folding convert_float_to_color::value_color to constant (2, 2, 2).");
 
   builder
       .add_node(ShaderNodeBuilder<VectorMathNode>("VectorMath")
                     .set(&VectorMathNode::type, NODE_VECTOR_MATH_SUBTRACT)
                     .set("Vector1", make_float3(1.3f, 0.5f, 0.7f))
                     .set("Vector2", make_float3(-1.7f, 0.5f, 0.7f)))
-      .add_node(ShaderNodeBuilder<MathNode>("Math").set(&MathNode::type, NODE_MATH_ADD))
-      .add_connection("VectorMath::Vector", "Math::Value1")
-      .add_connection("VectorMath::Value", "Math::Value2")
-      .output_color("Math::Value");
+      .output_color("VectorMath::Vector");
 
   graph.finalize(scene);
 }
@@ -1180,7 +1180,7 @@ TEST_F(RenderGraph, constant_fold_vector_math)
  * Includes 2 tests: constant on each side.
  */
 static void build_vecmath_partial_test_graph(ShaderGraphBuilder &builder,
-                                             NodeVectorMath type,
+                                             NodeVectorMathType type,
                                              float3 constval)
 {
   builder
@@ -1234,22 +1234,6 @@ TEST_F(RenderGraph, constant_fold_part_vecmath_sub_0)
 }
 
 /*
- * Tests: partial folding for Vector Math Dot Product with known 0.
- */
-TEST_F(RenderGraph, constant_fold_part_vecmath_dot_0)
-{
-  EXPECT_ANY_MESSAGE(log);
-  /* X * 0 == 0 * X == X */
-  CORRECT_INFO_MESSAGE(log, "Folding Math_Cx::Vector to constant (0, 0, 0).");
-  CORRECT_INFO_MESSAGE(log, "Folding Math_xC::Vector to constant (0, 0, 0).");
-  CORRECT_INFO_MESSAGE(log, "Folding Out::Vector to constant (0, 0, 0).");
-  CORRECT_INFO_MESSAGE(log, "Discarding closure EmissionNode.");
-
-  build_vecmath_partial_test_graph(builder, NODE_VECTOR_MATH_DOT_PRODUCT, make_float3(0, 0, 0));
-  graph.finalize(scene);
-}
-
-/*
  * Tests: partial folding for Vector Math Cross Product with known 0.
  */
 TEST_F(RenderGraph, constant_fold_part_vecmath_cross_0)
diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h
index 7f3bead0a18..760985447a8 100644
--- a/intern/cycles/util/util_defines.h
+++ b/intern/cycles/util/util_defines.h
@@ -30,6 +30,7 @@
 #  ifndef __KERNEL_GPU__
 #    define ccl_device static inline
 #    define ccl_device_noinline static
+#    define ccl_device_noinline_cpu ccl_device_noinline
 #    define ccl_global
 #    define ccl_static_constant static const
 #    define ccl_constant const
diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h
index 785482967db..2b1f26de03d 100644
--- a/intern/cycles/util/util_hash.h
+++ b/intern/cycles/util/util_hash.h
@@ -21,39 +21,196 @@
 
 CCL_NAMESPACE_BEGIN
 
-ccl_device_inline uint hash_int_2d(uint kx, uint ky)
-{
+/* ***** Jenkins Lookup3 Hash Functions ***** */
+
+/* Source: http://burtleburtle.net/bob/c/lookup3.c */
+
 #define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
 
+#define mix(a, b, c) \
+  { \
+    a -= c; \
+    a ^= rot(c, 4); \
+    c += b; \
+    b -= a; \
+    b ^= rot(a, 6); \
+    a += c; \
+    c -= b; \
+    c ^= rot(b, 8); \
+    b += a; \
+    a -= c; \
+    a ^= rot(c, 16); \
+    c += b; \
+    b -= a; \
+    b ^= rot(a, 19); \
+    a += c; \
+    c -= b; \
+    c ^= rot(b, 4); \
+    b += a; \
+  }
+
+#define final(a, b, c) \
+  { \
+    c ^= b; \
+    c -= rot(b, 14); \
+    a ^= c; \
+    a -= rot(c, 11); \
+    b ^= a; \
+    b -= rot(a, 25); \
+    c ^= b; \
+    c -= rot(b, 16); \
+    a ^= c; \
+    a -= rot(c, 4); \
+    b ^= a; \
+    b -= rot(a, 14); \
+    c ^= b; \
+    c -= rot(b, 24); \
+  }
+
+ccl_device_inline uint hash_uint(uint kx)
+{
   uint a, b, c;
+  a = b = c = 0xdeadbeef + (1 << 2) + 13;
+
+  a += kx;
+  final(a, b, c);
 
+  return c;
+}
+
+ccl_device_inline uint hash_uint2(uint kx, uint ky)
+{
+  uint a, b, c;
   a = b = c = 0xdeadbeef + (2 << 2) + 13;
+
+  b += ky;
+  a += kx;
+  final(a, b, c);
+
+  return c;
+}
+
+ccl_device_inline uint hash_uint3(uint kx, uint ky, uint kz)
+{
+  uint a, b, c;
+  a = b = c = 0xdeadbeef + (3 << 2) + 13;
+
+  c += kz;
+  b += ky;
+  a += kx;
+  final(a, b, c);
+
+  return c;
+}
+
+ccl_device_inline uint hash_uint4(uint kx, uint ky, uint kz, uint kw)
+{
+  uint a, b, c;
+  a = b = c = 0xdeadbeef + (4 << 2) + 13;
+
   a += kx;
   b += ky;
+  c += kz;
+  mix(a, b, c);
 
-  c ^= b;
-  c -= rot(b, 14);
-  a ^= c;
-  a -= rot(c, 11);
-  b ^= a;
-  b -= rot(a, 25);
-  c ^= b;
-  c -= rot(b, 16);
-  a ^= c;
-  a -= rot(c, 4);
-  b ^= a;
-  b -= rot(a, 14);
-  c ^= b;
-  c -= rot(b, 24);
+  a += kw;
+  final(a, b, c);
 
   return c;
+}
 
 #undef rot
+#undef final
+#undef mix
+
+/* Hashing uint or uint[234] into a float in the range [0, 1]. */
+
+ccl_device_inline float hash_uint_to_float(uint kx)
+{
+  return (float)hash_uint(kx) / (float)0xFFFFFFFFu;
+}
+
+ccl_device_inline float hash_uint2_to_float(uint kx, uint ky)
+{
+  return (float)hash_uint2(kx, ky) / (float)0xFFFFFFFFu;
+}
+
+ccl_device_inline float hash_uint3_to_float(uint kx, uint ky, uint kz)
+{
+  return (float)hash_uint3(kx, ky, kz) / (float)0xFFFFFFFFu;
+}
+
+ccl_device_inline float hash_uint4_to_float(uint kx, uint ky, uint kz, uint kw)
+{
+  return (float)hash_uint4(kx, ky, kz, kw) / (float)0xFFFFFFFFu;
+}
+
+/* Hashing float or float[234] into a float in the range [0, 1]. */
+
+ccl_device_inline float hash_float_to_float(float k)
+{
+  return hash_uint_to_float(__float_as_uint(k));
+}
+
+ccl_device_inline float hash_float2_to_float(float2 k)
+{
+  return hash_uint2_to_float(__float_as_uint(k.x), __float_as_uint(k.y));
+}
+
+ccl_device_inline float hash_float3_to_float(float3 k)
+{
+  return hash_uint3_to_float(__float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z));
+}
+
+ccl_device_inline float hash_float4_to_float(float4 k)
+{
+  return hash_uint4_to_float(
+      __float_as_uint(k.x), __float_as_uint(k.y), __float_as_uint(k.z), __float_as_uint(k.w));
+}
+
+/* Hashing float[234] into float[234] of components in the range [0, 1]. */
+
+ccl_device_inline float2 hash_float2_to_float2(float2 k)
+{
+  return make_float2(hash_float2_to_float(k), hash_float3_to_float(make_float3(k.x, k.y, 1.0)));
+}
+
+ccl_device_inline float3 hash_float3_to_float3(float3 k)
+{
+  return make_float3(hash_float3_to_float(k),
+                     hash_float4_to_float(make_float4(k.x, k.y, k.z, 1.0)),
+                     hash_float4_to_float(make_float4(k.x, k.y, k.z, 2.0)));
 }
 
-ccl_device_inline uint hash_int(uint k)
+ccl_device_inline float4 hash_float4_to_float4(float4 k)
+{
+  return make_float4(hash_float4_to_float(k),
+                     hash_float4_to_float(make_float4(k.w, k.x, k.y, k.z)),
+                     hash_float4_to_float(make_float4(k.z, k.w, k.x, k.y)),
+                     hash_float4_to_float(make_float4(k.y, k.z, k.w, k.x)));
+}
+
+/* Hashing float or float[234] into float3 of components in range [0, 1]. */
+
+ccl_device_inline float3 hash_float_to_float3(float k)
 {
-  return hash_int_2d(k, 0);
+  return make_float3(hash_float_to_float(k),
+                     hash_float2_to_float(make_float2(k, 1.0)),
+                     hash_float2_to_float(make_float2(k, 2.0)));
+}
+
+ccl_device_inline float3 hash_float2_to_float3(float2 k)
+{
+  return make_float3(hash_float2_to_float(k),
+                     hash_float3_to_float(make_float3(k.x, k.y, 1.0)),
+                     hash_float3_to_float(make_float3(k.x, k.y, 2.0)));
+}
+
+ccl_device_inline float3 hash_float4_to_float3(float4 k)
+{
+  return make_float3(hash_float4_to_float(k),
+                     hash_float4_to_float(make_float4(k.z, k.x, k.w, k.y)),
+                     hash_float4_to_float(make_float4(k.w, k.z, k.y, k.x)));
 }
 
 #ifndef __KERNEL_GPU__
@@ -68,11 +225,6 @@ static inline uint hash_string(const char *str)
 }
 #endif
 
-ccl_device_inline float hash_int_01(uint k)
-{
-  return (float)hash_int(k) * (1.0f / (float)0xFFFFFFFF);
-}
-
 CCL_NAMESPACE_END
 
 #endif /* __UTIL_HASH_H__ */
diff --git a/intern/cycles/util/util_ies.cpp b/intern/cycles/util/util_ies.cpp
index 7c24a4ec28c..62d3d42186d 100644
--- a/intern/cycles/util/util_ies.cpp
+++ b/intern/cycles/util/util_ies.cpp
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <algorithm>
+
 #include "util/util_foreach.h"
 #include "util/util_ies.h"
 #include "util/util_math.h"
@@ -28,7 +30,7 @@ CCL_NAMESPACE_BEGIN
 // issue.
 template class GuardedAllocator<char>;
 
-bool IESFile::load(ustring ies)
+bool IESFile::load(const string &ies)
 {
   clear();
   if (!parse(ies) || !process()) {
@@ -76,7 +78,7 @@ class IESTextParser {
   vector<char> text;
   char *data;
 
-  IESTextParser(ustring str) : text(str.begin(), str.end())
+  IESTextParser(const string &str) : text(str.begin(), str.end())
   {
     std::replace(text.begin(), text.end(), ',', ' ');
     data = strstr(&text[0], "\nTILT=");
@@ -116,7 +118,7 @@ class IESTextParser {
   }
 };
 
-bool IESFile::parse(ustring ies)
+bool IESFile::parse(const string &ies)
 {
   if (ies.empty()) {
     return false;
diff --git a/intern/cycles/util/util_ies.h b/intern/cycles/util/util_ies.h
index ab1b9ea57cf..95473103614 100644
--- a/intern/cycles/util/util_ies.h
+++ b/intern/cycles/util/util_ies.h
@@ -17,7 +17,7 @@
 #ifndef __UTIL_IES_H__
 #define __UTIL_IES_H__
 
-#include "util/util_param.h"
+#include "util/util_string.h"
 #include "util/util_vector.h"
 
 CCL_NAMESPACE_BEGIN
@@ -32,11 +32,11 @@ class IESFile {
   int packed_size();
   void pack(float *data);
 
-  bool load(ustring ies);
+  bool load(const string &ies);
   void clear();
 
  protected:
-  bool parse(ustring ies);
+  bool parse(const string &ies);
   bool process();
   bool process_type_b();
   bool process_type_c();
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index 554b7408148..c9a5b34aa58 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -47,6 +47,7 @@ ccl_device_inline float3 operator/=(float3 &a, float f);
 ccl_device_inline bool operator==(const float3 &a, const float3 &b);
 ccl_device_inline bool operator!=(const float3 &a, const float3 &b);
 
+ccl_device_inline float distance(const float3 &a, const float3 &b);
 ccl_device_inline float dot(const float3 &a, const float3 &b);
 ccl_device_inline float dot_xy(const float3 &a, const float3 &b);
 ccl_device_inline float3 cross(const float3 &a, const float3 &b);
@@ -58,6 +59,8 @@ ccl_device_inline float3 fabs(const float3 &a);
 ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t);
 ccl_device_inline float3 rcp(const float3 &a);
 ccl_device_inline float3 sqrt(const float3 &a);
+ccl_device_inline float3 floor(const float3 &a);
+ccl_device_inline float3 ceil(const float3 &a);
 #endif /* !__KERNEL_OPENCL__ */
 
 ccl_device_inline float min3(float3 a);
@@ -65,10 +68,15 @@ ccl_device_inline float max3(float3 a);
 ccl_device_inline float len(const float3 a);
 ccl_device_inline float len_squared(const float3 a);
 
+ccl_device_inline float3 reflect(const float3 incident, const float3 normal);
+ccl_device_inline float3 project(const float3 v, const float3 v_proj);
+
 ccl_device_inline float3 saturate3(float3 a);
 ccl_device_inline float3 safe_normalize(const float3 a);
 ccl_device_inline float3 normalize_len(const float3 a, float *t);
 ccl_device_inline float3 safe_normalize_len(const float3 a, float *t);
+ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b);
+ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b);
 ccl_device_inline float3 interp(float3 a, float3 b, float t);
 ccl_device_inline float3 sqr3(float3 a);
 
@@ -205,6 +213,11 @@ ccl_device_inline bool operator!=(const float3 &a, const float3 &b)
   return !(a == b);
 }
 
+ccl_device_inline float distance(const float3 &a, const float3 &b)
+{
+  return len(a - b);
+}
+
 ccl_device_inline float dot(const float3 &a, const float3 &b)
 {
 #  if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
@@ -281,6 +294,24 @@ ccl_device_inline float3 sqrt(const float3 &a)
 #  endif
 }
 
+ccl_device_inline float3 floor(const float3 &a)
+{
+#  ifdef __KERNEL_SSE__
+  return float3(_mm_floor_ps(a));
+#  else
+  return make_float3(floorf(a.x), floorf(a.y), floorf(a.z));
+#  endif
+}
+
+ccl_device_inline float3 ceil(const float3 &a)
+{
+#  ifdef __KERNEL_SSE__
+  return float3(_mm_ceil_ps(a));
+#  else
+  return make_float3(ceilf(a.x), ceilf(a.y), ceilf(a.z));
+#  endif
+}
+
 ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t)
 {
   return a + t * (b - a);
@@ -321,6 +352,19 @@ ccl_device_inline float len_squared(const float3 a)
   return dot(a, a);
 }
 
+ccl_device_inline float3 reflect(const float3 incident, const float3 normal)
+{
+  float3 unit_normal = normalize(normal);
+  return incident - 2.0f * unit_normal * dot(incident, unit_normal);
+}
+
+ccl_device_inline float3 project(const float3 v, const float3 v_proj)
+{
+  float len_squared = dot(v_proj, v_proj);
+  return (len_squared != 0.0f) ? (dot(v, v_proj) / len_squared) * v_proj :
+                                 make_float3(0.0f, 0.0f, 0.0f);
+}
+
 ccl_device_inline float3 saturate3(float3 a)
 {
   return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
@@ -345,6 +389,18 @@ ccl_device_inline float3 safe_normalize_len(const float3 a, float *t)
   return (*t != 0.0f) ? a / (*t) : a;
 }
 
+ccl_device_inline float3 safe_divide_float3_float3(const float3 a, const float3 b)
+{
+  return make_float3((b.x != 0.0f) ? a.x / b.x : 0.0f,
+                     (b.y != 0.0f) ? a.y / b.y : 0.0f,
+                     (b.z != 0.0f) ? a.z / b.z : 0.0f);
+}
+
+ccl_device_inline float3 safe_divide_float3_float(const float3 a, const float b)
+{
+  return (b != 0.0f) ? a / b : make_float3(0.0f, 0.0f, 0.0f);
+}
+
 ccl_device_inline float3 interp(float3 a, float3 b, float t)
 {
   return a + t * (b - a);
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 8fcaadc5f53..f49cfb4184d 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -45,7 +45,7 @@
 
 #  endif
 
-#  if defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86)
+#  if defined(__x86_64__) || defined(_M_X64)
 #    define SIMD_SET_FLUSH_TO_ZERO \
       _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \
       _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp
index fdbc3800806..24286116dfb 100644
--- a/intern/cycles/util/util_task.cpp
+++ b/intern/cycles/util/util_task.cpp
@@ -283,8 +283,8 @@ vector<int> distribute_threads_on_nodes(const int num_threads)
     }
     ++current_node_index;
   }
-  /* Second pass: keep scheduling threads to each node one by one, uniformly
-   * fillign them in.
+  /* Second pass: keep scheduling threads to each node one by one,
+   * uniformly filling them in.
    * This is where things becomes tricky to predict for the maximum
    * performance: on the one hand this avoids too much threading overhead on
    * few nodes, but for the final performance having all the overhead on one
diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h
index 437478d64d3..04fb33368d9 100644
--- a/intern/cycles/util/util_vector.h
+++ b/intern/cycles/util/util_vector.h
@@ -27,7 +27,7 @@
 
 CCL_NAMESPACE_BEGIN
 
-/* Own subclass-ed vestion of std::vector. Subclass is needed because:
+/* Own subclass-ed version of std::vector. Subclass is needed because:
  *
  * - Use own allocator which keeps track of used/peak memory.
  * - Have method to ensure capacity is re-set to 0.
author	Antonio Vazquez <blendergit@gmail.com>	2019-08-26 12:52:52 +0300
committer	Antonio Vazquez <blendergit@gmail.com>	2019-08-26 12:52:52 +0300
commit	6129e20cec4639aebf335ff13b2ba0c59670662d (patch)
tree	a36691d3c487e376f0fa21676ca6e416f051e9a4 /intern/cycles
parent	03bbd5f9dedf7b3dfea7119c172c61f0b50ae28c (diff)
parent	27787549256410b6b2de1eca47a2719830af7f96 (diff)