diff options
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/blender/blender_object.cpp | 5 | ||||
-rw-r--r-- | intern/cycles/blender/blender_shader.cpp | 12 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/device/device_optix.cpp | 14 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 24 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_path.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/shaders/node_mix.osl | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/svm_color_util.h | 4 | ||||
-rw-r--r-- | intern/cycles/render/nodes.cpp | 14 | ||||
-rw-r--r-- | intern/cycles/util/util_defines.h | 192 | ||||
-rw-r--r-- | intern/cycles/util/util_static_assert.h | 16 |
11 files changed, 167 insertions, 126 deletions
diff --git a/intern/cycles/blender/blender_object.cpp b/intern/cycles/blender/blender_object.cpp index 59509d20fb2..5520cfd5ecf 100644 --- a/intern/cycles/blender/blender_object.cpp +++ b/intern/cycles/blender/blender_object.cpp @@ -541,7 +541,6 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, const bool show_lights = BlenderViewportParameters(b_v3d).use_scene_lights; BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval(); - const bool has_local_view = b_v3d && b_v3d.local_view(); BL::Depsgraph::object_instances_iterator b_instance_iter; for (b_depsgraph.object_instances.begin(b_instance_iter); @@ -555,10 +554,10 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph, /* test if object needs to be hidden */ const bool show_self = b_instance.show_self(); - const bool show_local_view = !has_local_view || b_ob.local_view_get(b_v3d); const bool show_particles = b_instance.show_particles(); + const bool show_in_viewport = !b_v3d || b_ob.visible_in_viewport_get(b_v3d); - if (show_local_view && (show_self || show_particles)) { + if (show_in_viewport && (show_self || show_particles)) { /* object itself */ sync_object(b_depsgraph, b_view_layer, diff --git a/intern/cycles/blender/blender_shader.cpp b/intern/cycles/blender/blender_shader.cpp index 362155f22ac..22dbc3fba79 100644 --- a/intern/cycles/blender/blender_shader.cpp +++ b/intern/cycles/blender/blender_shader.cpp @@ -1340,6 +1340,14 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, graph->connect(background->output("Background"), out->input("Surface")); } else if (!new_viewport_parameters.use_scene_world) { + float3 world_color; + if (b_world) { + world_color = get_float3(b_world.color()); + } + else { + world_color = make_float3(0.0f, 0.0f, 0.0f); + } + BackgroundNode *background = new BackgroundNode(); graph->add(background); @@ -1347,7 +1355,7 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, graph->add(light_path); MixNode *mix_scene_with_background = new MixNode(); - mix_scene_with_background->color2 = get_float3(b_world.color()); + mix_scene_with_background->color2 = world_color; graph->add(mix_scene_with_background); EnvironmentTextureNode *texture_environment = new EnvironmentTextureNode(); @@ -1369,7 +1377,7 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, MixNode *mix_background_with_environment = new MixNode(); mix_background_with_environment->fac = new_viewport_parameters.studiolight_background_alpha; - mix_background_with_environment->color1 = get_float3(b_world.color()); + mix_background_with_environment->color1 = world_color; graph->add(mix_background_with_environment); ShaderNode *out = graph->output(); diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 00dd37f089c..b5e10b0c2cb 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -454,6 +454,12 @@ class CUDADevice : public Device { VLOG(1) << "Using precompiled kernel."; return cubin; } + const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); + VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; + if (path_exists(ptx)) { + VLOG(1) << "Using precompiled kernel."; + return ptx; + } } const string common_cflags = compile_kernel_get_common_cflags( diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index b745235aed5..6f4734059da 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -169,6 +169,7 @@ class OptiXDevice : public Device { OptixModule optix_module = NULL; OptixPipeline pipelines[NUM_PIPELINES] = {}; + bool motion_blur = false; bool need_texture_info = false; device_vector<SbtRecord> sbt_data; device_vector<TextureInfo> texture_info; @@ -337,7 +338,12 @@ class OptiXDevice : public Device { # endif pipeline_options.pipelineLaunchParamsVariableName = "__params"; // See kernel_globals.h - if (requested_features.use_object_motion) { + // Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds + // This is necessary since objects may be reported to have motion if the Vector pass is + // active, but may still need to be rendered without motion blur if that isn't active as well + motion_blur = requested_features.use_object_motion; + + if (motion_blur) { pipeline_options.usesMotionBlur = true; // Motion blur can insert motion transforms into the traversal graph // It is no longer a two-level graph then, so need to set flags to allow any configuration @@ -872,7 +878,7 @@ class OptiXDevice : public Device { size_t num_motion_steps = 1; Attribute *motion_keys = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); - if (mesh->use_motion_blur && motion_keys) { + if (motion_blur && mesh->use_motion_blur && motion_keys) { num_motion_steps = mesh->motion_steps; } @@ -942,7 +948,7 @@ class OptiXDevice : public Device { size_t num_motion_steps = 1; Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); - if (mesh->use_motion_blur && motion_keys) { + if (motion_blur && mesh->use_motion_blur && motion_keys) { num_motion_steps = mesh->motion_steps; } @@ -1041,7 +1047,7 @@ class OptiXDevice : public Device { instance.visibilityMask = (ob->mesh->has_volume ? 3 : 1); // Insert motion traversable if object has motion - if (ob->use_motion()) { + if (motion_blur && ob->use_motion()) { blas.emplace_back(this, "motion_transform"); device_only_memory<uint8_t> &motion_transform_gpu = blas.back(); motion_transform_gpu.alloc_to_device(sizeof(OptixSRTMotionTransform) + diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index ea8aa197b6f..78da584e132 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -389,11 +389,20 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_cubins) macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental) - set(cuda_cubin ${name}_${arch}.cubin) + if(${arch} MATCHES "compute_.*") + set(format "ptx") + else() + set(format "cubin") + endif() + set(cuda_file ${name}_${arch}.${format}) set(kernel_sources ${sources}) if(NOT ${prev_arch} STREQUAL "none") - set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin) + if(${prev_arch} MATCHES "compute_.*") + set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx) + else() + set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin) + endif() endif() set(cuda_kernel_src "/kernels/cuda/${name}.cu") @@ -406,7 +415,7 @@ if(WITH_CYCLES_CUDA_BINARIES) -I ${CMAKE_CURRENT_SOURCE_DIR}/.. -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda --use_fast_math - -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}) + -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file}) if(${experimental}) set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__) @@ -440,20 +449,21 @@ if(WITH_CYCLES_CUDA_BINARIES) -v -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" DEPENDS ${kernel_sources} cycles_cubin_cc) + set(cuda_file ${cuda_cubin}) else() add_custom_command( - OUTPUT ${cuda_cubin} + OUTPUT ${cuda_file} COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} ${CUDA_NVCC_FLAGS} - --cubin + --${format} ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} --ptxas-options="-v" ${cuda_flags} DEPENDS ${kernel_sources}) endif() - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) - list(APPEND cuda_cubins ${cuda_cubin}) + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND cuda_cubins ${cuda_file}) unset(cuda_debug_flags) endmacro() diff --git a/intern/cycles/kernel/kernel_path.h b/intern/cycles/kernel/kernel_path.h index d45ffe9c7df..55abe39c465 100644 --- a/intern/cycles/kernel/kernel_path.h +++ b/intern/cycles/kernel/kernel_path.h @@ -459,7 +459,9 @@ ccl_device void kernel_path_indirect(KernelGlobals *kg, throughput /= probability; } +# ifdef __DENOISING_FEATURES__ kernel_update_denoising_features(kg, sd, state, L); +# endif # ifdef __AO__ /* ambient occlusion */ diff --git a/intern/cycles/kernel/shaders/node_mix.osl b/intern/cycles/kernel/shaders/node_mix.osl index 8caea6803ed..9fbd3391ade 100644 --- a/intern/cycles/kernel/shaders/node_mix.osl +++ b/intern/cycles/kernel/shaders/node_mix.osl @@ -91,12 +91,12 @@ color node_mix_diff(float t, color col1, color col2) color node_mix_dark(float t, color col1, color col2) { - return min(col1, col2) * t + col1 * (1.0 - t); + return mix(col1, min(col1, col2), t); } color node_mix_light(float t, color col1, color col2) { - return max(col1, col2 * t); + return mix(col1, max(col1, col2), t); } color node_mix_dodge(float t, color col1, color col2) diff --git a/intern/cycles/kernel/svm/svm_color_util.h b/intern/cycles/kernel/svm/svm_color_util.h index 0f571eb7253..1a0fa03305e 100644 --- a/intern/cycles/kernel/svm/svm_color_util.h +++ b/intern/cycles/kernel/svm/svm_color_util.h @@ -92,12 +92,12 @@ ccl_device float3 svm_mix_diff(float t, float3 col1, float3 col2) ccl_device float3 svm_mix_dark(float t, float3 col1, float3 col2) { - return min(col1, col2) * t + col1 * (1.0f - t); + return interp(col1, min(col1, col2), t); } ccl_device float3 svm_mix_light(float t, float3 col1, float3 col2) { - return max(col1, col2 * t); + return interp(col1, max(col1, col2), t); } ccl_device float3 svm_mix_dodge(float t, float3 col1, float3 col2) diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index 71f1863ea49..b58e10a7b52 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -5567,11 +5567,21 @@ void MapRangeNode::expand(ShaderGraph *graph) ShaderOutput *result_out = output("Result"); if (!result_out->links.empty()) { ClampNode *clamp_node = new ClampNode(); - clamp_node->min = to_min; - clamp_node->max = to_max; graph->add(clamp_node); graph->relink(result_out, clamp_node->output("Result")); graph->connect(result_out, clamp_node->input("Value")); + if (input("To Min")->link) { + graph->connect(input("To Min")->link, clamp_node->input("Min")); + } + else { + clamp_node->min = to_min; + } + if (input("To Max")->link) { + graph->connect(input("To Max")->link, clamp_node->input("Max")); + } + else { + clamp_node->max = to_max; + } } } } diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h index 2778cffba3a..b29d4163133 100644 --- a/intern/cycles/util/util_defines.h +++ b/intern/cycles/util/util_defines.h @@ -16,127 +16,127 @@ */ #ifndef __UTIL_DEFINES_H__ -# define __UTIL_DEFINES_H__ +#define __UTIL_DEFINES_H__ /* Bitness */ -# if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || \ - defined(_M_X64) -# define __KERNEL_64_BIT__ -# endif +#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || \ + defined(_M_X64) +# define __KERNEL_64_BIT__ +#endif /* Qualifiers for kernel code shared by CPU and GPU */ -# ifndef __KERNEL_GPU__ -# define ccl_device static inline -# define ccl_device_noinline static -# define ccl_device_noinline_cpu ccl_device_noinline -# define ccl_global -# define ccl_static_constant static const -# define ccl_constant const -# define ccl_local -# define ccl_local_param -# define ccl_private -# define ccl_restrict __restrict -# define ccl_ref & -# define ccl_optional_struct_init -# define __KERNEL_WITH_SSE_ALIGN__ - -# if defined(_WIN32) && !defined(FREE_WINDOWS) -# define ccl_device_inline static __forceinline -# define ccl_device_forceinline static __forceinline -# define ccl_align(...) __declspec(align(__VA_ARGS__)) -# ifdef __KERNEL_64_BIT__ -# define ccl_try_align(...) __declspec(align(__VA_ARGS__)) -# else /* __KERNEL_64_BIT__ */ -# undef __KERNEL_WITH_SSE_ALIGN__ +#ifndef __KERNEL_GPU__ +# define ccl_device static inline +# define ccl_device_noinline static +# define ccl_device_noinline_cpu ccl_device_noinline +# define ccl_global +# define ccl_static_constant static const +# define ccl_constant const +# define ccl_local +# define ccl_local_param +# define ccl_private +# define ccl_restrict __restrict +# define ccl_ref & +# define ccl_optional_struct_init +# define __KERNEL_WITH_SSE_ALIGN__ + +# if defined(_WIN32) && !defined(FREE_WINDOWS) +# define ccl_device_inline static __forceinline +# define ccl_device_forceinline static __forceinline +# define ccl_align(...) __declspec(align(__VA_ARGS__)) +# ifdef __KERNEL_64_BIT__ +# define ccl_try_align(...) __declspec(align(__VA_ARGS__)) +# else /* __KERNEL_64_BIT__ */ +# undef __KERNEL_WITH_SSE_ALIGN__ /* No support for function arguments (error C2719). */ -# define ccl_try_align(...) -# endif /* __KERNEL_64_BIT__ */ -# define ccl_may_alias -# define ccl_always_inline __forceinline -# define ccl_never_inline __declspec(noinline) -# define ccl_maybe_unused -# else /* _WIN32 && !FREE_WINDOWS */ -# define ccl_device_inline static inline __attribute__((always_inline)) -# define ccl_device_forceinline static inline __attribute__((always_inline)) -# define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) -# ifndef FREE_WINDOWS64 -# define __forceinline inline __attribute__((always_inline)) -# endif -# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) -# define ccl_may_alias __attribute__((__may_alias__)) -# define ccl_always_inline __attribute__((always_inline)) -# define ccl_never_inline __attribute__((noinline)) -# define ccl_maybe_unused __attribute__((used)) -# endif /* _WIN32 && !FREE_WINDOWS */ +# define ccl_try_align(...) +# endif /* __KERNEL_64_BIT__ */ +# define ccl_may_alias +# define ccl_always_inline __forceinline +# define ccl_never_inline __declspec(noinline) +# define ccl_maybe_unused +# else /* _WIN32 && !FREE_WINDOWS */ +# define ccl_device_inline static inline __attribute__((always_inline)) +# define ccl_device_forceinline static inline __attribute__((always_inline)) +# define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) +# ifndef FREE_WINDOWS64 +# define __forceinline inline __attribute__((always_inline)) +# endif +# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) +# define ccl_may_alias __attribute__((__may_alias__)) +# define ccl_always_inline __attribute__((always_inline)) +# define ccl_never_inline __attribute__((noinline)) +# define ccl_maybe_unused __attribute__((used)) +# endif /* _WIN32 && !FREE_WINDOWS */ /* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */ -# ifndef ATTR_FALLTHROUGH -# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */ -# define ATTR_FALLTHROUGH __attribute__((fallthrough)) -# else -# define ATTR_FALLTHROUGH ((void)0) -# endif +# ifndef ATTR_FALLTHROUGH +# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */ +# define ATTR_FALLTHROUGH __attribute__((fallthrough)) +# else +# define ATTR_FALLTHROUGH ((void)0) # endif -# endif /* __KERNEL_GPU__ */ +# endif +#endif /* __KERNEL_GPU__ */ /* macros */ /* hints for branch prediction, only use in code that runs a _lot_ */ -# if defined(__GNUC__) && defined(__KERNEL_CPU__) -# define LIKELY(x) __builtin_expect(!!(x), 1) -# define UNLIKELY(x) __builtin_expect(!!(x), 0) -# else -# define LIKELY(x) (x) -# define UNLIKELY(x) (x) -# endif - -# if defined(__GNUC__) || defined(__clang__) -# if defined(__cplusplus) +#if defined(__GNUC__) && defined(__KERNEL_CPU__) +# define LIKELY(x) __builtin_expect(!!(x), 1) +# define UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +# define LIKELY(x) (x) +# define UNLIKELY(x) (x) +#endif + +#if defined(__GNUC__) || defined(__clang__) +# if defined(__cplusplus) /* Some magic to be sure we don't have reference in the type. */ template<typename T> static inline T decltype_helper(T x) { return x; } -# define TYPEOF(x) decltype(decltype_helper(x)) -# else -# define TYPEOF(x) typeof(x) -# endif +# define TYPEOF(x) decltype(decltype_helper(x)) +# else +# define TYPEOF(x) typeof(x) # endif +#endif /* Causes warning: * incompatible types when assigning to type 'Foo' from type 'Bar' * ... the compiler optimizes away the temp var */ -# ifdef __GNUC__ -# define CHECK_TYPE(var, type) \ - { \ - TYPEOF(var) * __tmp; \ - __tmp = (type *)NULL; \ - (void)__tmp; \ - } \ - (void)0 - -# define CHECK_TYPE_PAIR(var_a, var_b) \ - { \ - TYPEOF(var_a) * __tmp; \ - __tmp = (typeof(var_b) *)NULL; \ - (void)__tmp; \ - } \ - (void)0 -# else -# define CHECK_TYPE(var, type) -# define CHECK_TYPE_PAIR(var_a, var_b) -# endif +#ifdef __GNUC__ +# define CHECK_TYPE(var, type) \ + { \ + TYPEOF(var) * __tmp; \ + __tmp = (type *)NULL; \ + (void)__tmp; \ + } \ + (void)0 + +# define CHECK_TYPE_PAIR(var_a, var_b) \ + { \ + TYPEOF(var_a) * __tmp; \ + __tmp = (typeof(var_b) *)NULL; \ + (void)__tmp; \ + } \ + (void)0 +#else +# define CHECK_TYPE(var, type) +# define CHECK_TYPE_PAIR(var_a, var_b) +#endif /* can be used in simple macros */ -# define CHECK_TYPE_INLINE(val, type) ((void)(((type)0) != (val))) - -# ifndef __KERNEL_GPU__ -# include <cassert> -# define util_assert(statement) assert(statement) -# else -# define util_assert(statement) -# endif +#define CHECK_TYPE_INLINE(val, type) ((void)(((type)0) != (val))) + +#ifndef __KERNEL_GPU__ +# include <cassert> +# define util_assert(statement) assert(statement) +#else +# define util_assert(statement) +#endif #endif /* __UTIL_DEFINES_H__ */ diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h index b4b972a4036..ceb52830319 100644 --- a/intern/cycles/util/util_static_assert.h +++ b/intern/cycles/util/util_static_assert.h @@ -15,18 +15,18 @@ */ #ifndef __UTIL_STATIC_ASSERT_H__ -# define __UTIL_STATIC_ASSERT_H__ +#define __UTIL_STATIC_ASSERT_H__ CCL_NAMESPACE_BEGIN /* TODO(sergey): In theory CUDA might work with own static assert * implementation since it's just pure C++. */ -# ifdef __KERNEL_GPU__ -# ifndef static_assert -# define static_assert(statement, message) -# endif -# endif /* __KERNEL_GPU__ */ +#ifdef __KERNEL_GPU__ +# ifndef static_assert +# define static_assert(statement, message) +# endif +#endif /* __KERNEL_GPU__ */ /* TODO(sergey): For until C++11 is a bare minimum for us, * we do a bit of a trickery to show meaningful message so @@ -42,8 +42,8 @@ CCL_NAMESPACE_BEGIN * After C++11 bump it should be possible to glue structure * name to the error message, */ -# define static_assert_align(st, align) \ - static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT +#define static_assert_align(st, align) \ + static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT CCL_NAMESPACE_END |