11 files changed, 167 insertions, 126 deletions
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp
index 59509d20fb2..5520cfd5ecf 100644
--- a/intern/cycles/blender/blender_object.cpp
+++ b/intern/cycles/blender/blender_object.cpp
@@ -541,7 +541,6 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
   const bool show_lights = BlenderViewportParameters(b_v3d).use_scene_lights;
 
   BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
-  const bool has_local_view = b_v3d && b_v3d.local_view();
 
   BL::Depsgraph::object_instances_iterator b_instance_iter;
   for (b_depsgraph.object_instances.begin(b_instance_iter);
@@ -555,10 +554,10 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
 
     /* test if object needs to be hidden */
     const bool show_self = b_instance.show_self();
-    const bool show_local_view = !has_local_view || b_ob.local_view_get(b_v3d);
     const bool show_particles = b_instance.show_particles();
+    const bool show_in_viewport = !b_v3d || b_ob.visible_in_viewport_get(b_v3d);
 
-    if (show_local_view && (show_self || show_particles)) {
+    if (show_in_viewport && (show_self || show_particles)) {
       /* object itself */
       sync_object(b_depsgraph,
                   b_view_layer,
diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp
index 362155f22ac..22dbc3fba79 100644
--- a/intern/cycles/blender/blender_shader.cpp
+++ b/intern/cycles/blender/blender_shader.cpp
@@ -1340,6 +1340,14 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d,
       graph->connect(background->output("Background"), out->input("Surface"));
     }
     else if (!new_viewport_parameters.use_scene_world) {
+      float3 world_color;
+      if (b_world) {
+        world_color = get_float3(b_world.color());
+      }
+      else {
+        world_color = make_float3(0.0f, 0.0f, 0.0f);
+      }
+
       BackgroundNode *background = new BackgroundNode();
       graph->add(background);
 
@@ -1347,7 +1355,7 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d,
       graph->add(light_path);
 
       MixNode *mix_scene_with_background = new MixNode();
-      mix_scene_with_background->color2 = get_float3(b_world.color());
+      mix_scene_with_background->color2 = world_color;
       graph->add(mix_scene_with_background);
 
       EnvironmentTextureNode *texture_environment = new EnvironmentTextureNode();
@@ -1369,7 +1377,7 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d,
 
       MixNode *mix_background_with_environment = new MixNode();
       mix_background_with_environment->fac = new_viewport_parameters.studiolight_background_alpha;
-      mix_background_with_environment->color1 = get_float3(b_world.color());
+      mix_background_with_environment->color1 = world_color;
       graph->add(mix_background_with_environment);
 
       ShaderNode *out = graph->output();
diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp
index 00dd37f089c..b5e10b0c2cb 100644
--- a/intern/cycles/device/device_cuda.cpp
+++ b/intern/cycles/device/device_cuda.cpp
@@ -454,6 +454,12 @@ class CUDADevice : public Device {
         VLOG(1) << "Using precompiled kernel.";
         return cubin;
       }
+      const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor));
+      VLOG(1) << "Testing for pre-compiled kernel " << ptx << ".";
+      if (path_exists(ptx)) {
+        VLOG(1) << "Using precompiled kernel.";
+        return ptx;
+      }
     }
 
     const string common_cflags = compile_kernel_get_common_cflags(
diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp
index b745235aed5..6f4734059da 100644
--- a/intern/cycles/device/device_optix.cpp
+++ b/intern/cycles/device/device_optix.cpp
@@ -169,6 +169,7 @@ class OptiXDevice : public Device {
   OptixModule optix_module = NULL;
   OptixPipeline pipelines[NUM_PIPELINES] = {};
 
+  bool motion_blur = false;
   bool need_texture_info = false;
   device_vector<SbtRecord> sbt_data;
   device_vector<TextureInfo> texture_info;
@@ -337,7 +338,12 @@ class OptiXDevice : public Device {
 #  endif
     pipeline_options.pipelineLaunchParamsVariableName = "__params";  // See kernel_globals.h
 
-    if (requested_features.use_object_motion) {
+    // Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
+    // This is necessary since objects may be reported to have motion if the Vector pass is
+    // active, but may still need to be rendered without motion blur if that isn't active as well
+    motion_blur = requested_features.use_object_motion;
+
+    if (motion_blur) {
       pipeline_options.usesMotionBlur = true;
       // Motion blur can insert motion transforms into the traversal graph
       // It is no longer a two-level graph then, so need to set flags to allow any configuration
@@ -872,7 +878,7 @@ class OptiXDevice : public Device {
 
         size_t num_motion_steps = 1;
         Attribute *motion_keys = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-        if (mesh->use_motion_blur && motion_keys) {
+        if (motion_blur && mesh->use_motion_blur && motion_keys) {
           num_motion_steps = mesh->motion_steps;
         }
 
@@ -942,7 +948,7 @@ class OptiXDevice : public Device {
 
         size_t num_motion_steps = 1;
         Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
-        if (mesh->use_motion_blur && motion_keys) {
+        if (motion_blur && mesh->use_motion_blur && motion_keys) {
           num_motion_steps = mesh->motion_steps;
         }
 
@@ -1041,7 +1047,7 @@ class OptiXDevice : public Device {
         instance.visibilityMask = (ob->mesh->has_volume ? 3 : 1);
 
         // Insert motion traversable if object has motion
-        if (ob->use_motion()) {
+        if (motion_blur && ob->use_motion()) {
           blas.emplace_back(this, "motion_transform");
           device_only_memory<uint8_t> &motion_transform_gpu = blas.back();
           motion_transform_gpu.alloc_to_device(sizeof(OptixSRTMotionTransform) +
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index ea8aa197b6f..78da584e132 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -389,11 +389,20 @@ if(WITH_CYCLES_CUDA_BINARIES)
   set(cuda_cubins)
 
   macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
-    set(cuda_cubin ${name}_${arch}.cubin)
+    if(${arch} MATCHES "compute_.*")
+      set(format "ptx")
+    else()
+      set(format "cubin")
+    endif()
+    set(cuda_file ${name}_${arch}.${format})
 
     set(kernel_sources ${sources})
     if(NOT ${prev_arch} STREQUAL "none")
-      set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
+      if(${prev_arch} MATCHES "compute_.*")
+        set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
+      else()
+        set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
+      endif()
     endif()
 
     set(cuda_kernel_src "/kernels/cuda/${name}.cu")
@@ -406,7 +415,7 @@ if(WITH_CYCLES_CUDA_BINARIES)
       -I ${CMAKE_CURRENT_SOURCE_DIR}/..
       -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda
       --use_fast_math
-      -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin})
+      -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file})
 
     if(${experimental})
       set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__)
@@ -440,20 +449,21 @@ if(WITH_CYCLES_CUDA_BINARIES)
             -v
             -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}"
         DEPENDS ${kernel_sources} cycles_cubin_cc)
+      set(cuda_file ${cuda_cubin})
     else()
       add_custom_command(
-        OUTPUT ${cuda_cubin}
+        OUTPUT ${cuda_file}
         COMMAND ${CUDA_NVCC_EXECUTABLE}
             -arch=${arch}
             ${CUDA_NVCC_FLAGS}
-            --cubin
+            --${format}
             ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
             --ptxas-options="-v"
             ${cuda_flags}
         DEPENDS ${kernel_sources})
     endif()
-    delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
-    list(APPEND cuda_cubins ${cuda_cubin})
+    delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib)
+    list(APPEND cuda_cubins ${cuda_file})
 
     unset(cuda_debug_flags)
   endmacro()
diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h
index d45ffe9c7df..55abe39c465 100644
--- a/intern/cycles/kernel/kernel_path.h
+++ b/intern/cycles/kernel/kernel_path.h
@@ -459,7 +459,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg,
           throughput /= probability;
         }
 
+#    ifdef __DENOISING_FEATURES__
         kernel_update_denoising_features(kg, sd, state, L);
+#    endif
 
 #    ifdef __AO__
         /* ambient occlusion */
diff --git a/intern/cycles/kernel/shaders/node_mix.osl b/intern/cycles/kernel/shaders/node_mix.osl
index 8caea6803ed..9fbd3391ade 100644
--- a/intern/cycles/kernel/shaders/node_mix.osl
+++ b/intern/cycles/kernel/shaders/node_mix.osl
@@ -91,12 +91,12 @@ color node_mix_diff(float t, color col1, color col2)
 
 color node_mix_dark(float t, color col1, color col2)
 {
-  return min(col1, col2) * t + col1 * (1.0 - t);
+  return mix(col1, min(col1, col2), t);
 }
 
 color node_mix_light(float t, color col1, color col2)
 {
-  return max(col1, col2 * t);
+  return mix(col1, max(col1, col2), t);
 }
 
 color node_mix_dodge(float t, color col1, color col2)
diff --git a/intern/cycles/kernel/svm/svm_color_util.h b/intern/cycles/kernel/svm/svm_color_util.h
index 0f571eb7253..1a0fa03305e 100644
--- a/intern/cycles/kernel/svm/svm_color_util.h
+++ b/intern/cycles/kernel/svm/svm_color_util.h
@@ -92,12 +92,12 @@ ccl_device float3 svm_mix_diff(float t, float3 col1, float3 col2)
 
 ccl_device float3 svm_mix_dark(float t, float3 col1, float3 col2)
 {
-  return min(col1, col2) * t + col1 * (1.0f - t);
+  return interp(col1, min(col1, col2), t);
 }
 
 ccl_device float3 svm_mix_light(float t, float3 col1, float3 col2)
 {
-  return max(col1, col2 * t);
+  return interp(col1, max(col1, col2), t);
 }
 
 ccl_device float3 svm_mix_dodge(float t, float3 col1, float3 col2)
diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp
index 71f1863ea49..b58e10a7b52 100644
--- a/intern/cycles/render/nodes.cpp
+++ b/intern/cycles/render/nodes.cpp
@@ -5567,11 +5567,21 @@ void MapRangeNode::expand(ShaderGraph *graph)
     ShaderOutput *result_out = output("Result");
     if (!result_out->links.empty()) {
       ClampNode *clamp_node = new ClampNode();
-      clamp_node->min = to_min;
-      clamp_node->max = to_max;
       graph->add(clamp_node);
       graph->relink(result_out, clamp_node->output("Result"));
       graph->connect(result_out, clamp_node->input("Value"));
+      if (input("To Min")->link) {
+        graph->connect(input("To Min")->link, clamp_node->input("Min"));
+      }
+      else {
+        clamp_node->min = to_min;
+      }
+      if (input("To Max")->link) {
+        graph->connect(input("To Max")->link, clamp_node->input("Max"));
+      }
+      else {
+        clamp_node->max = to_max;
+      }
     }
   }
 }
diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h
index 2778cffba3a..b29d4163133 100644
--- a/intern/cycles/util/util_defines.h
+++ b/intern/cycles/util/util_defines.h
@@ -16,127 +16,127 @@
  */
 
 #ifndef __UTIL_DEFINES_H__
-#  define __UTIL_DEFINES_H__
+#define __UTIL_DEFINES_H__
 
 /* Bitness */
 
-#  if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || \
-      defined(_M_X64)
-#    define __KERNEL_64_BIT__
-#  endif
+#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || \
+    defined(_M_X64)
+#  define __KERNEL_64_BIT__
+#endif
 
 /* Qualifiers for kernel code shared by CPU and GPU */
 
-#  ifndef __KERNEL_GPU__
-#    define ccl_device static inline
-#    define ccl_device_noinline static
-#    define ccl_device_noinline_cpu ccl_device_noinline
-#    define ccl_global
-#    define ccl_static_constant static const
-#    define ccl_constant const
-#    define ccl_local
-#    define ccl_local_param
-#    define ccl_private
-#    define ccl_restrict __restrict
-#    define ccl_ref &
-#    define ccl_optional_struct_init
-#    define __KERNEL_WITH_SSE_ALIGN__
-
-#    if defined(_WIN32) && !defined(FREE_WINDOWS)
-#      define ccl_device_inline static __forceinline
-#      define ccl_device_forceinline static __forceinline
-#      define ccl_align(...) __declspec(align(__VA_ARGS__))
-#      ifdef __KERNEL_64_BIT__
-#        define ccl_try_align(...) __declspec(align(__VA_ARGS__))
-#      else /* __KERNEL_64_BIT__ */
-#        undef __KERNEL_WITH_SSE_ALIGN__
+#ifndef __KERNEL_GPU__
+#  define ccl_device static inline
+#  define ccl_device_noinline static
+#  define ccl_device_noinline_cpu ccl_device_noinline
+#  define ccl_global
+#  define ccl_static_constant static const
+#  define ccl_constant const
+#  define ccl_local
+#  define ccl_local_param
+#  define ccl_private
+#  define ccl_restrict __restrict
+#  define ccl_ref &
+#  define ccl_optional_struct_init
+#  define __KERNEL_WITH_SSE_ALIGN__
+
+#  if defined(_WIN32) && !defined(FREE_WINDOWS)
+#    define ccl_device_inline static __forceinline
+#    define ccl_device_forceinline static __forceinline
+#    define ccl_align(...) __declspec(align(__VA_ARGS__))
+#    ifdef __KERNEL_64_BIT__
+#      define ccl_try_align(...) __declspec(align(__VA_ARGS__))
+#    else /* __KERNEL_64_BIT__ */
+#      undef __KERNEL_WITH_SSE_ALIGN__
 /* No support for function arguments (error C2719). */
-#        define ccl_try_align(...)
-#      endif /* __KERNEL_64_BIT__ */
-#      define ccl_may_alias
-#      define ccl_always_inline __forceinline
-#      define ccl_never_inline __declspec(noinline)
-#      define ccl_maybe_unused
-#    else /* _WIN32 && !FREE_WINDOWS */
-#      define ccl_device_inline static inline __attribute__((always_inline))
-#      define ccl_device_forceinline static inline __attribute__((always_inline))
-#      define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
-#      ifndef FREE_WINDOWS64
-#        define __forceinline inline __attribute__((always_inline))
-#      endif
-#      define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
-#      define ccl_may_alias __attribute__((__may_alias__))
-#      define ccl_always_inline __attribute__((always_inline))
-#      define ccl_never_inline __attribute__((noinline))
-#      define ccl_maybe_unused __attribute__((used))
-#    endif /* _WIN32 && !FREE_WINDOWS */
+#      define ccl_try_align(...)
+#    endif /* __KERNEL_64_BIT__ */
+#    define ccl_may_alias
+#    define ccl_always_inline __forceinline
+#    define ccl_never_inline __declspec(noinline)
+#    define ccl_maybe_unused
+#  else /* _WIN32 && !FREE_WINDOWS */
+#    define ccl_device_inline static inline __attribute__((always_inline))
+#    define ccl_device_forceinline static inline __attribute__((always_inline))
+#    define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
+#    ifndef FREE_WINDOWS64
+#      define __forceinline inline __attribute__((always_inline))
+#    endif
+#    define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__)))
+#    define ccl_may_alias __attribute__((__may_alias__))
+#    define ccl_always_inline __attribute__((always_inline))
+#    define ccl_never_inline __attribute__((noinline))
+#    define ccl_maybe_unused __attribute__((used))
+#  endif /* _WIN32 && !FREE_WINDOWS */
 
 /* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
-#    ifndef ATTR_FALLTHROUGH
-#      if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
-#        define ATTR_FALLTHROUGH __attribute__((fallthrough))
-#      else
-#        define ATTR_FALLTHROUGH ((void)0)
-#      endif
+#  ifndef ATTR_FALLTHROUGH
+#    if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
+#      define ATTR_FALLTHROUGH __attribute__((fallthrough))
+#    else
+#      define ATTR_FALLTHROUGH ((void)0)
 #    endif
-#  endif /* __KERNEL_GPU__ */
+#  endif
+#endif /* __KERNEL_GPU__ */
 
 /* macros */
 
 /* hints for branch prediction, only use in code that runs a _lot_ */
-#  if defined(__GNUC__) && defined(__KERNEL_CPU__)
-#    define LIKELY(x) __builtin_expect(!!(x), 1)
-#    define UNLIKELY(x) __builtin_expect(!!(x), 0)
-#  else
-#    define LIKELY(x) (x)
-#    define UNLIKELY(x) (x)
-#  endif
-
-#  if defined(__GNUC__) || defined(__clang__)
-#    if defined(__cplusplus)
+#if defined(__GNUC__) && defined(__KERNEL_CPU__)
+#  define LIKELY(x) __builtin_expect(!!(x), 1)
+#  define UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+#  define LIKELY(x) (x)
+#  define UNLIKELY(x) (x)
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+#  if defined(__cplusplus)
 /* Some magic to be sure we don't have reference in the type. */
 template<typename T> static inline T decltype_helper(T x)
 {
   return x;
 }
-#      define TYPEOF(x) decltype(decltype_helper(x))
-#    else
-#      define TYPEOF(x) typeof(x)
-#    endif
+#    define TYPEOF(x) decltype(decltype_helper(x))
+#  else
+#    define TYPEOF(x) typeof(x)
 #  endif
+#endif
 
 /* Causes warning:
  * incompatible types when assigning to type 'Foo' from type 'Bar'
  * ... the compiler optimizes away the temp var */
-#  ifdef __GNUC__
-#    define CHECK_TYPE(var, type) \
-      { \
-        TYPEOF(var) * __tmp; \
-        __tmp = (type *)NULL; \
-        (void)__tmp; \
-      } \
-      (void)0
-
-#    define CHECK_TYPE_PAIR(var_a, var_b) \
-      { \
-        TYPEOF(var_a) * __tmp; \
-        __tmp = (typeof(var_b) *)NULL; \
-        (void)__tmp; \
-      } \
-      (void)0
-#  else
-#    define CHECK_TYPE(var, type)
-#    define CHECK_TYPE_PAIR(var_a, var_b)
-#  endif
+#ifdef __GNUC__
+#  define CHECK_TYPE(var, type) \
+    { \
+      TYPEOF(var) * __tmp; \
+      __tmp = (type *)NULL; \
+      (void)__tmp; \
+    } \
+    (void)0
+
+#  define CHECK_TYPE_PAIR(var_a, var_b) \
+    { \
+      TYPEOF(var_a) * __tmp; \
+      __tmp = (typeof(var_b) *)NULL; \
+      (void)__tmp; \
+    } \
+    (void)0
+#else
+#  define CHECK_TYPE(var, type)
+#  define CHECK_TYPE_PAIR(var_a, var_b)
+#endif
 
 /* can be used in simple macros */
-#  define CHECK_TYPE_INLINE(val, type) ((void)(((type)0) != (val)))
-
-#  ifndef __KERNEL_GPU__
-#    include <cassert>
-#    define util_assert(statement) assert(statement)
-#  else
-#    define util_assert(statement)
-#  endif
+#define CHECK_TYPE_INLINE(val, type) ((void)(((type)0) != (val)))
+
+#ifndef __KERNEL_GPU__
+#  include <cassert>
+#  define util_assert(statement) assert(statement)
+#else
+#  define util_assert(statement)
+#endif
 
 #endif /* __UTIL_DEFINES_H__ */
diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h
index b4b972a4036..ceb52830319 100644
--- a/intern/cycles/util/util_static_assert.h
+++ b/intern/cycles/util/util_static_assert.h
@@ -15,18 +15,18 @@
  */
 
 #ifndef __UTIL_STATIC_ASSERT_H__
-#  define __UTIL_STATIC_ASSERT_H__
+#define __UTIL_STATIC_ASSERT_H__
 
 CCL_NAMESPACE_BEGIN
 
 /* TODO(sergey): In theory CUDA might work with own static assert
  * implementation since it's just pure C++.
  */
-#  ifdef __KERNEL_GPU__
-#    ifndef static_assert
-#      define static_assert(statement, message)
-#    endif
-#  endif /* __KERNEL_GPU__ */
+#ifdef __KERNEL_GPU__
+#  ifndef static_assert
+#    define static_assert(statement, message)
+#  endif
+#endif /* __KERNEL_GPU__ */
 
 /* TODO(sergey): For until C++11 is a bare minimum for us,
  * we do a bit of a trickery to show meaningful message so
@@ -42,8 +42,8 @@ CCL_NAMESPACE_BEGIN
  * After C++11 bump it should be possible to glue structure
  * name to the error message,
  */
-#  define static_assert_align(st, align) \
-    static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned")  // NOLINT
+#define static_assert_align(st, align) \
+  static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned")  // NOLINT
 
 CCL_NAMESPACE_END