diff options
Diffstat (limited to 'intern')
-rw-r--r-- | intern/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/device/device_cuda.cpp | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/CMakeLists.txt | 24 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_transform_gpu.h | 6 | ||||
-rw-r--r-- | intern/cycles/kernel/filter/filter_transform_sse.h | 6 | ||||
-rw-r--r-- | intern/cycles/util/util_defines.h | 192 | ||||
-rw-r--r-- | intern/cycles/util/util_static_assert.h | 16 | ||||
-rw-r--r-- | intern/ghost/intern/GHOST_WindowViewCocoa.h | 5 |
8 files changed, 138 insertions, 118 deletions
diff --git a/intern/CMakeLists.txt b/intern/CMakeLists.txt index 4493c68a88d..e6b561c39b9 100644 --- a/intern/CMakeLists.txt +++ b/intern/CMakeLists.txt @@ -80,4 +80,3 @@ endif() if(WITH_QUADRIFLOW) add_subdirectory(quadriflow) endif() - diff --git a/intern/cycles/device/device_cuda.cpp b/intern/cycles/device/device_cuda.cpp index 00dd37f089c..b5e10b0c2cb 100644 --- a/intern/cycles/device/device_cuda.cpp +++ b/intern/cycles/device/device_cuda.cpp @@ -454,6 +454,12 @@ class CUDADevice : public Device { VLOG(1) << "Using precompiled kernel."; return cubin; } + const string ptx = path_get(string_printf("lib/%s_compute_%d%d.ptx", name, major, minor)); + VLOG(1) << "Testing for pre-compiled kernel " << ptx << "."; + if (path_exists(ptx)) { + VLOG(1) << "Using precompiled kernel."; + return ptx; + } } const string common_cflags = compile_kernel_get_common_cflags( diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 782553e405c..4077a1ad516 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -389,11 +389,20 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_cubins) macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental) - set(cuda_cubin ${name}_${arch}.cubin) + if(${arch} MATCHES "compute_.*") + set(format "ptx") + else() + set(format "cubin") + endif() + set(cuda_file ${name}_${arch}.${format}) set(kernel_sources ${sources}) if(NOT ${prev_arch} STREQUAL "none") - set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin) + if(${prev_arch} MATCHES "compute_.*") + set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx) + else() + set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin) + endif() endif() set(cuda_kernel_src "/kernels/cuda/${name}.cu") @@ -406,7 +415,7 @@ if(WITH_CYCLES_CUDA_BINARIES) -I ${CMAKE_CURRENT_SOURCE_DIR}/.. -I ${CMAKE_CURRENT_SOURCE_DIR}/kernels/cuda --use_fast_math - -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin}) + -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file}) if(${experimental}) set(cuda_flags ${cuda_flags} -D __KERNEL_EXPERIMENTAL__) @@ -440,20 +449,21 @@ if(WITH_CYCLES_CUDA_BINARIES) -v -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" DEPENDS ${kernel_sources} cycles_cubin_cc) + set(cuda_file ${cuda_cubin}) else() add_custom_command( - OUTPUT ${cuda_cubin} + OUTPUT ${cuda_file} COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} ${CUDA_NVCC_FLAGS} - --cubin + --${format} ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} --ptxas-options="-v" ${cuda_flags} DEPENDS ${kernel_sources}) endif() - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib) - list(APPEND cuda_cubins ${cuda_cubin}) + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND cuda_cubins ${cuda_file}) unset(cuda_debug_flags) endmacro() diff --git a/intern/cycles/kernel/filter/filter_transform_gpu.h b/intern/cycles/kernel/filter/filter_transform_gpu.h index 41bbadb621d..adc85881fe5 100644 --- a/intern/cycles/kernel/filter/filter_transform_gpu.h +++ b/intern/cycles/kernel/filter/filter_transform_gpu.h @@ -76,9 +76,9 @@ ccl_device void kernel_filter_construct_transform(const ccl_global float *ccl_re filter_calculate_scale(feature_scale, use_time); /* === Generate the feature transformation. === - * This transformation maps the num_features-dimentional feature space to a reduced feature - * (r-feature) space which generally has fewer dimensions. This mainly helps to prevent - * overfitting. */ + * This transformation maps the num_features-dimensional feature space to a reduced feature + * (r-feature) space which generally has fewer dimensions. + * This mainly helps to prevent overfitting. */ float feature_matrix[DENOISE_FEATURES * DENOISE_FEATURES]; math_matrix_zero(feature_matrix, num_features); FOR_PIXEL_WINDOW diff --git a/intern/cycles/kernel/filter/filter_transform_sse.h b/intern/cycles/kernel/filter/filter_transform_sse.h index 830444645d7..5a124b5d73b 100644 --- a/intern/cycles/kernel/filter/filter_transform_sse.h +++ b/intern/cycles/kernel/filter/filter_transform_sse.h @@ -73,9 +73,9 @@ ccl_device void kernel_filter_construct_transform(const float *ccl_restrict buff filter_calculate_scale_sse(feature_scale, use_time); /* === Generate the feature transformation. === - * This transformation maps the num_features-dimentional feature space to a reduced feature - * (r-feature) space which generally has fewer dimensions. This mainly helps to prevent - * overfitting. */ + * This transformation maps the num_features-dimensional feature space to a reduced feature + * (r-feature) space which generally has fewer dimensions. + * This mainly helps to prevent over-fitting. */ float4 feature_matrix_sse[DENOISE_FEATURES * DENOISE_FEATURES]; math_matrix_zero_sse(feature_matrix_sse, num_features); FOR_PIXEL_WINDOW_SSE diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h index 2778cffba3a..b29d4163133 100644 --- a/intern/cycles/util/util_defines.h +++ b/intern/cycles/util/util_defines.h @@ -16,127 +16,127 @@ */ #ifndef __UTIL_DEFINES_H__ -# define __UTIL_DEFINES_H__ +#define __UTIL_DEFINES_H__ /* Bitness */ -# if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || \ - defined(_M_X64) -# define __KERNEL_64_BIT__ -# endif +#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || \ + defined(_M_X64) +# define __KERNEL_64_BIT__ +#endif /* Qualifiers for kernel code shared by CPU and GPU */ -# ifndef __KERNEL_GPU__ -# define ccl_device static inline -# define ccl_device_noinline static -# define ccl_device_noinline_cpu ccl_device_noinline -# define ccl_global -# define ccl_static_constant static const -# define ccl_constant const -# define ccl_local -# define ccl_local_param -# define ccl_private -# define ccl_restrict __restrict -# define ccl_ref & -# define ccl_optional_struct_init -# define __KERNEL_WITH_SSE_ALIGN__ - -# if defined(_WIN32) && !defined(FREE_WINDOWS) -# define ccl_device_inline static __forceinline -# define ccl_device_forceinline static __forceinline -# define ccl_align(...) __declspec(align(__VA_ARGS__)) -# ifdef __KERNEL_64_BIT__ -# define ccl_try_align(...) __declspec(align(__VA_ARGS__)) -# else /* __KERNEL_64_BIT__ */ -# undef __KERNEL_WITH_SSE_ALIGN__ +#ifndef __KERNEL_GPU__ +# define ccl_device static inline +# define ccl_device_noinline static +# define ccl_device_noinline_cpu ccl_device_noinline +# define ccl_global +# define ccl_static_constant static const +# define ccl_constant const +# define ccl_local +# define ccl_local_param +# define ccl_private +# define ccl_restrict __restrict +# define ccl_ref & +# define ccl_optional_struct_init +# define __KERNEL_WITH_SSE_ALIGN__ + +# if defined(_WIN32) && !defined(FREE_WINDOWS) +# define ccl_device_inline static __forceinline +# define ccl_device_forceinline static __forceinline +# define ccl_align(...) __declspec(align(__VA_ARGS__)) +# ifdef __KERNEL_64_BIT__ +# define ccl_try_align(...) __declspec(align(__VA_ARGS__)) +# else /* __KERNEL_64_BIT__ */ +# undef __KERNEL_WITH_SSE_ALIGN__ /* No support for function arguments (error C2719). */ -# define ccl_try_align(...) -# endif /* __KERNEL_64_BIT__ */ -# define ccl_may_alias -# define ccl_always_inline __forceinline -# define ccl_never_inline __declspec(noinline) -# define ccl_maybe_unused -# else /* _WIN32 && !FREE_WINDOWS */ -# define ccl_device_inline static inline __attribute__((always_inline)) -# define ccl_device_forceinline static inline __attribute__((always_inline)) -# define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) -# ifndef FREE_WINDOWS64 -# define __forceinline inline __attribute__((always_inline)) -# endif -# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) -# define ccl_may_alias __attribute__((__may_alias__)) -# define ccl_always_inline __attribute__((always_inline)) -# define ccl_never_inline __attribute__((noinline)) -# define ccl_maybe_unused __attribute__((used)) -# endif /* _WIN32 && !FREE_WINDOWS */ +# define ccl_try_align(...) +# endif /* __KERNEL_64_BIT__ */ +# define ccl_may_alias +# define ccl_always_inline __forceinline +# define ccl_never_inline __declspec(noinline) +# define ccl_maybe_unused +# else /* _WIN32 && !FREE_WINDOWS */ +# define ccl_device_inline static inline __attribute__((always_inline)) +# define ccl_device_forceinline static inline __attribute__((always_inline)) +# define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) +# ifndef FREE_WINDOWS64 +# define __forceinline inline __attribute__((always_inline)) +# endif +# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) +# define ccl_may_alias __attribute__((__may_alias__)) +# define ccl_always_inline __attribute__((always_inline)) +# define ccl_never_inline __attribute__((noinline)) +# define ccl_maybe_unused __attribute__((used)) +# endif /* _WIN32 && !FREE_WINDOWS */ /* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */ -# ifndef ATTR_FALLTHROUGH -# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */ -# define ATTR_FALLTHROUGH __attribute__((fallthrough)) -# else -# define ATTR_FALLTHROUGH ((void)0) -# endif +# ifndef ATTR_FALLTHROUGH +# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */ +# define ATTR_FALLTHROUGH __attribute__((fallthrough)) +# else +# define ATTR_FALLTHROUGH ((void)0) # endif -# endif /* __KERNEL_GPU__ */ +# endif +#endif /* __KERNEL_GPU__ */ /* macros */ /* hints for branch prediction, only use in code that runs a _lot_ */ -# if defined(__GNUC__) && defined(__KERNEL_CPU__) -# define LIKELY(x) __builtin_expect(!!(x), 1) -# define UNLIKELY(x) __builtin_expect(!!(x), 0) -# else -# define LIKELY(x) (x) -# define UNLIKELY(x) (x) -# endif - -# if defined(__GNUC__) || defined(__clang__) -# if defined(__cplusplus) +#if defined(__GNUC__) && defined(__KERNEL_CPU__) +# define LIKELY(x) __builtin_expect(!!(x), 1) +# define UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +# define LIKELY(x) (x) +# define UNLIKELY(x) (x) +#endif + +#if defined(__GNUC__) || defined(__clang__) +# if defined(__cplusplus) /* Some magic to be sure we don't have reference in the type. */ template<typename T> static inline T decltype_helper(T x) { return x; } -# define TYPEOF(x) decltype(decltype_helper(x)) -# else -# define TYPEOF(x) typeof(x) -# endif +# define TYPEOF(x) decltype(decltype_helper(x)) +# else +# define TYPEOF(x) typeof(x) # endif +#endif /* Causes warning: * incompatible types when assigning to type 'Foo' from type 'Bar' * ... the compiler optimizes away the temp var */ -# ifdef __GNUC__ -# define CHECK_TYPE(var, type) \ - { \ - TYPEOF(var) * __tmp; \ - __tmp = (type *)NULL; \ - (void)__tmp; \ - } \ - (void)0 - -# define CHECK_TYPE_PAIR(var_a, var_b) \ - { \ - TYPEOF(var_a) * __tmp; \ - __tmp = (typeof(var_b) *)NULL; \ - (void)__tmp; \ - } \ - (void)0 -# else -# define CHECK_TYPE(var, type) -# define CHECK_TYPE_PAIR(var_a, var_b) -# endif +#ifdef __GNUC__ +# define CHECK_TYPE(var, type) \ + { \ + TYPEOF(var) * __tmp; \ + __tmp = (type *)NULL; \ + (void)__tmp; \ + } \ + (void)0 + +# define CHECK_TYPE_PAIR(var_a, var_b) \ + { \ + TYPEOF(var_a) * __tmp; \ + __tmp = (typeof(var_b) *)NULL; \ + (void)__tmp; \ + } \ + (void)0 +#else +# define CHECK_TYPE(var, type) +# define CHECK_TYPE_PAIR(var_a, var_b) +#endif /* can be used in simple macros */ -# define CHECK_TYPE_INLINE(val, type) ((void)(((type)0) != (val))) - -# ifndef __KERNEL_GPU__ -# include <cassert> -# define util_assert(statement) assert(statement) -# else -# define util_assert(statement) -# endif +#define CHECK_TYPE_INLINE(val, type) ((void)(((type)0) != (val))) + +#ifndef __KERNEL_GPU__ +# include <cassert> +# define util_assert(statement) assert(statement) +#else +# define util_assert(statement) +#endif #endif /* __UTIL_DEFINES_H__ */ diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h index b4b972a4036..ceb52830319 100644 --- a/intern/cycles/util/util_static_assert.h +++ b/intern/cycles/util/util_static_assert.h @@ -15,18 +15,18 @@ */ #ifndef __UTIL_STATIC_ASSERT_H__ -# define __UTIL_STATIC_ASSERT_H__ +#define __UTIL_STATIC_ASSERT_H__ CCL_NAMESPACE_BEGIN /* TODO(sergey): In theory CUDA might work with own static assert * implementation since it's just pure C++. */ -# ifdef __KERNEL_GPU__ -# ifndef static_assert -# define static_assert(statement, message) -# endif -# endif /* __KERNEL_GPU__ */ +#ifdef __KERNEL_GPU__ +# ifndef static_assert +# define static_assert(statement, message) +# endif +#endif /* __KERNEL_GPU__ */ /* TODO(sergey): For until C++11 is a bare minimum for us, * we do a bit of a trickery to show meaningful message so @@ -42,8 +42,8 @@ CCL_NAMESPACE_BEGIN * After C++11 bump it should be possible to glue structure * name to the error message, */ -# define static_assert_align(st, align) \ - static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT +#define static_assert_align(st, align) \ + static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT CCL_NAMESPACE_END diff --git a/intern/ghost/intern/GHOST_WindowViewCocoa.h b/intern/ghost/intern/GHOST_WindowViewCocoa.h index 9ed339c9992..cee40924b73 100644 --- a/intern/ghost/intern/GHOST_WindowViewCocoa.h +++ b/intern/ghost/intern/GHOST_WindowViewCocoa.h @@ -58,6 +58,11 @@ return YES; } +- (BOOL)acceptsFirstMouse:(NSEvent *)event +{ + return YES; +} + // The trick to prevent Cocoa from complaining (beeping) - (void)keyDown:(NSEvent *)event { |