Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2021-09-20 18:59:20 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-09-21 15:55:54 +0300
commit08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/util
parentfa6b1007bad065440950cd67deb16a04f368856f (diff)
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity, new shadow catcher, revamped sampling settings, subsurface scattering anisotropy, new GPU volume sampling, improved PMJ sampling pattern, and more. Some features have also been removed or changed, breaking backwards compatibility. Including the removal of the OpenCL backend, for which alternatives are under development. Release notes and code docs: https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles https://wiki.blender.org/wiki/Source/Render/Cycles Credits: * Sergey Sharybin * Brecht Van Lommel * Patrick Mours (OptiX backend) * Christophe Hery (subsurface scattering anisotropy) * William Leeson (PMJ sampling pattern) * Alaska (various fixes and tweaks) * Thomas Dinges (various fixes) For the full commit history, see the cycles-x branch. This squashes together all the changes since intermediate changes would often fail building or tests. Ref T87839, T87837, T87836 Fixes T90734, T89353, T80267, T80267, T77185, T69800
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/util_atomic.h50
-rw-r--r--intern/cycles/util/util_debug.cpp83
-rw-r--r--intern/cycles/util/util_debug.h67
-rw-r--r--intern/cycles/util/util_defines.h4
-rw-r--r--intern/cycles/util/util_half.h46
-rw-r--r--intern/cycles/util/util_logging.h1
-rw-r--r--intern/cycles/util/util_math.h97
-rw-r--r--intern/cycles/util/util_math_float2.h5
-rw-r--r--intern/cycles/util/util_math_float3.h128
-rw-r--r--intern/cycles/util/util_math_float4.h145
-rw-r--r--intern/cycles/util/util_math_int2.h4
-rw-r--r--intern/cycles/util/util_math_int3.h40
-rw-r--r--intern/cycles/util/util_path.cpp184
-rw-r--r--intern/cycles/util/util_path.h8
-rw-r--r--intern/cycles/util/util_profiling.cpp8
-rw-r--r--intern/cycles/util/util_profiling.h106
-rw-r--r--intern/cycles/util/util_progress.h22
-rw-r--r--intern/cycles/util/util_simd.h14
-rw-r--r--intern/cycles/util/util_static_assert.h4
-rw-r--r--intern/cycles/util/util_string.cpp36
-rw-r--r--intern/cycles/util/util_string.h12
-rw-r--r--intern/cycles/util/util_system.cpp9
-rw-r--r--intern/cycles/util/util_system.h3
-rw-r--r--intern/cycles/util/util_tbb.h1
-rw-r--r--intern/cycles/util/util_texture.h2
-rw-r--r--intern/cycles/util/util_transform.h34
-rw-r--r--intern/cycles/util/util_types.h10
-rw-r--r--intern/cycles/util/util_unique_ptr.h1
28 files changed, 371 insertions, 753 deletions
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index 13d177d2b25..de17efafcf2 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -34,56 +34,6 @@
#else /* __KERNEL_GPU__ */
-# ifdef __KERNEL_OPENCL__
-
-/* Float atomics implementation credits:
- * http://suhorukov.blogspot.in/2011/12/opencl-11-atomic-operations-on-floating.html
- */
-ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *source,
- const float operand)
-{
- union {
- unsigned int int_value;
- float float_value;
- } new_value;
- union {
- unsigned int int_value;
- float float_value;
- } prev_value;
- do {
- prev_value.float_value = *source;
- new_value.float_value = prev_value.float_value + operand;
- } while (atomic_cmpxchg((volatile ccl_global unsigned int *)source,
- prev_value.int_value,
- new_value.int_value) != prev_value.int_value);
- return new_value.float_value;
-}
-
-ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float *dest,
- const float old_val,
- const float new_val)
-{
- union {
- unsigned int int_value;
- float float_value;
- } new_value, prev_value, result;
- prev_value.float_value = old_val;
- new_value.float_value = new_val;
- result.int_value = atomic_cmpxchg(
- (volatile ccl_global unsigned int *)dest, prev_value.int_value, new_value.int_value);
- return result.float_value;
-}
-
-# define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x))
-# define atomic_fetch_and_inc_uint32(p) atomic_inc((p))
-# define atomic_fetch_and_dec_uint32(p) atomic_dec((p))
-# define atomic_fetch_and_or_uint32(p, x) atomic_or((p), (x))
-
-# define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE
-# define ccl_barrier(flags) barrier(flags)
-
-# endif /* __KERNEL_OPENCL__ */
-
# ifdef __KERNEL_CUDA__
# define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x))
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
index 74ecefa1917..1d598725c84 100644
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -26,13 +26,7 @@
CCL_NAMESPACE_BEGIN
DebugFlags::CPU::CPU()
- : avx2(true),
- avx(true),
- sse41(true),
- sse3(true),
- sse2(true),
- bvh_layout(BVH_LAYOUT_AUTO),
- split_kernel(false)
+ : avx2(true), avx(true), sse41(true), sse3(true), sse2(true), bvh_layout(BVH_LAYOUT_AUTO)
{
reset();
}
@@ -58,11 +52,9 @@ void DebugFlags::CPU::reset()
#undef CHECK_CPU_FLAGS
bvh_layout = BVH_LAYOUT_AUTO;
-
- split_kernel = false;
}
-DebugFlags::CUDA::CUDA() : adaptive_compile(false), split_kernel(false)
+DebugFlags::CUDA::CUDA() : adaptive_compile(false)
{
reset();
}
@@ -71,8 +63,6 @@ void DebugFlags::CUDA::reset()
{
if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL)
adaptive_compile = true;
-
- split_kernel = false;
}
DebugFlags::OptiX::OptiX()
@@ -82,42 +72,7 @@ DebugFlags::OptiX::OptiX()
void DebugFlags::OptiX::reset()
{
- cuda_streams = 1;
- curves_api = false;
-}
-
-DebugFlags::OpenCL::OpenCL() : device_type(DebugFlags::OpenCL::DEVICE_ALL), debug(false)
-{
- reset();
-}
-
-void DebugFlags::OpenCL::reset()
-{
- /* Initialize device type from environment variables. */
- device_type = DebugFlags::OpenCL::DEVICE_ALL;
- char *device = getenv("CYCLES_OPENCL_TEST");
- if (device) {
- if (strcmp(device, "NONE") == 0) {
- device_type = DebugFlags::OpenCL::DEVICE_NONE;
- }
- else if (strcmp(device, "ALL") == 0) {
- device_type = DebugFlags::OpenCL::DEVICE_ALL;
- }
- else if (strcmp(device, "DEFAULT") == 0) {
- device_type = DebugFlags::OpenCL::DEVICE_DEFAULT;
- }
- else if (strcmp(device, "CPU") == 0) {
- device_type = DebugFlags::OpenCL::DEVICE_CPU;
- }
- else if (strcmp(device, "GPU") == 0) {
- device_type = DebugFlags::OpenCL::DEVICE_GPU;
- }
- else if (strcmp(device, "ACCELERATOR") == 0) {
- device_type = DebugFlags::OpenCL::DEVICE_ACCELERATOR;
- }
- }
- /* Initialize other flags from environment variables. */
- debug = (getenv("CYCLES_OPENCL_DEBUG") != NULL);
+ use_debug = false;
}
DebugFlags::DebugFlags() : viewport_static_bvh(false), running_inside_blender(false)
@@ -131,7 +86,6 @@ void DebugFlags::reset()
cpu.reset();
cuda.reset();
optix.reset();
- opencl.reset();
}
std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags)
@@ -142,40 +96,13 @@ std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags)
<< " SSE4.1 : " << string_from_bool(debug_flags.cpu.sse41) << "\n"
<< " SSE3 : " << string_from_bool(debug_flags.cpu.sse3) << "\n"
<< " SSE2 : " << string_from_bool(debug_flags.cpu.sse2) << "\n"
- << " BVH layout : " << bvh_layout_name(debug_flags.cpu.bvh_layout) << "\n"
- << " Split : " << string_from_bool(debug_flags.cpu.split_kernel) << "\n";
+ << " BVH layout : " << bvh_layout_name(debug_flags.cpu.bvh_layout) << "\n";
os << "CUDA flags:\n"
<< " Adaptive Compile : " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n";
os << "OptiX flags:\n"
- << " CUDA streams : " << debug_flags.optix.cuda_streams << "\n";
-
- const char *opencl_device_type;
- switch (debug_flags.opencl.device_type) {
- case DebugFlags::OpenCL::DEVICE_NONE:
- opencl_device_type = "NONE";
- break;
- case DebugFlags::OpenCL::DEVICE_ALL:
- opencl_device_type = "ALL";
- break;
- case DebugFlags::OpenCL::DEVICE_DEFAULT:
- opencl_device_type = "DEFAULT";
- break;
- case DebugFlags::OpenCL::DEVICE_CPU:
- opencl_device_type = "CPU";
- break;
- case DebugFlags::OpenCL::DEVICE_GPU:
- opencl_device_type = "GPU";
- break;
- case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
- opencl_device_type = "ACCELERATOR";
- break;
- }
- os << "OpenCL flags:\n"
- << " Device type : " << opencl_device_type << "\n"
- << " Debug : " << string_from_bool(debug_flags.opencl.debug) << "\n"
- << " Memory limit : " << string_human_readable_size(debug_flags.opencl.mem_limit) << "\n";
+ << " Debug : " << string_from_bool(debug_flags.optix.use_debug) << "\n";
return os;
}
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index f7e53f90f74..99e2723180c 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -79,9 +79,6 @@ class DebugFlags {
* CPUs and GPUs can be selected here instead.
*/
BVHLayout bvh_layout;
-
- /* Whether split kernel is used */
- bool split_kernel;
};
/* Descriptor of CUDA feature-set to be used. */
@@ -94,9 +91,6 @@ class DebugFlags {
/* Whether adaptive feature based runtime compile is enabled or not.
* Requires the CUDA Toolkit and only works on Linux atm. */
bool adaptive_compile;
-
- /* Whether split kernel is used */
- bool split_kernel;
};
/* Descriptor of OptiX feature-set to be used. */
@@ -106,61 +100,9 @@ class DebugFlags {
/* Reset flags to their defaults. */
void reset();
- /* Number of CUDA streams to launch kernels concurrently from. */
- int cuda_streams;
-
- /* Use OptiX curves API for hair instead of custom implementation. */
- bool curves_api;
- };
-
- /* Descriptor of OpenCL feature-set to be used. */
- struct OpenCL {
- OpenCL();
-
- /* Reset flags to their defaults. */
- void reset();
-
- /* Available device types.
- * Only gives a hint which devices to let user to choose from, does not
- * try to use any sort of optimal device or so.
- */
- enum DeviceType {
- /* None of OpenCL devices will be used. */
- DEVICE_NONE,
- /* All OpenCL devices will be used. */
- DEVICE_ALL,
- /* Default system OpenCL device will be used. */
- DEVICE_DEFAULT,
- /* Host processor will be used. */
- DEVICE_CPU,
- /* GPU devices will be used. */
- DEVICE_GPU,
- /* Dedicated OpenCL accelerator device will be used. */
- DEVICE_ACCELERATOR,
- };
-
- /* Available kernel types. */
- enum KernelType {
- /* Do automated guess which kernel to use, based on the officially
- * supported GPUs and such.
- */
- KERNEL_DEFAULT,
- /* Force mega kernel to be used. */
- KERNEL_MEGA,
- /* Force split kernel to be used. */
- KERNEL_SPLIT,
- };
-
- /* Requested device type. */
- DeviceType device_type;
-
- /* Use debug version of the kernel. */
- bool debug;
-
- /* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all
- * devices. */
- /* Artificial memory limit in bytes (0 if disabled). */
- size_t mem_limit;
+ /* Load OptiX module with debug capabilities. Will lower logging verbosity level, enable
+ * validations, and lower optimization level. */
+ bool use_debug;
};
/* Get instance of debug flags registry. */
@@ -182,9 +124,6 @@ class DebugFlags {
/* Requested OptiX flags. */
OptiX optix;
- /* Requested OpenCL flags. */
- OpenCL opencl;
-
private:
DebugFlags();
diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h
index 0a239a944a5..9b1698d461a 100644
--- a/intern/cycles/util/util_defines.h
+++ b/intern/cycles/util/util_defines.h
@@ -43,9 +43,9 @@
# define ccl_local_param
# define ccl_private
# define ccl_restrict __restrict
-# define ccl_ref &
# define ccl_optional_struct_init
# define ccl_loop_no_unroll
+# define ccl_attr_maybe_unused [[maybe_unused]]
# define __KERNEL_WITH_SSE_ALIGN__
# if defined(_WIN32) && !defined(FREE_WINDOWS)
@@ -62,7 +62,6 @@
# define ccl_may_alias
# define ccl_always_inline __forceinline
# define ccl_never_inline __declspec(noinline)
-# define ccl_maybe_unused
# else /* _WIN32 && !FREE_WINDOWS */
# define ccl_device_inline static inline __attribute__((always_inline))
# define ccl_device_forceinline static inline __attribute__((always_inline))
@@ -74,7 +73,6 @@
# define ccl_may_alias __attribute__((__may_alias__))
# define ccl_always_inline __attribute__((always_inline))
# define ccl_never_inline __attribute__((noinline))
-# define ccl_maybe_unused __attribute__((used))
# endif /* _WIN32 && !FREE_WINDOWS */
/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index a8d4ee75e20..d9edfec5da3 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -28,14 +28,8 @@ CCL_NAMESPACE_BEGIN
/* Half Floats */
-#ifdef __KERNEL_OPENCL__
-
-# define float4_store_half(h, f, scale) vstore_half4(f *(scale), 0, h);
-
-#else
-
/* CUDA has its own half data type, no need to define then */
-# ifndef __KERNEL_CUDA__
+#ifndef __KERNEL_CUDA__
/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
* unsigned shorts. */
class half {
@@ -59,27 +53,27 @@ class half {
private:
unsigned short v;
};
-# endif
+#endif
struct half4 {
half x, y, z, w;
};
-# ifdef __KERNEL_CUDA__
+#ifdef __KERNEL_CUDA__
-ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
+ccl_device_inline void float4_store_half(half *h, float4 f)
{
- h[0] = __float2half(f.x * scale);
- h[1] = __float2half(f.y * scale);
- h[2] = __float2half(f.z * scale);
- h[3] = __float2half(f.w * scale);
+ h[0] = __float2half(f.x);
+ h[1] = __float2half(f.y);
+ h[2] = __float2half(f.z);
+ h[3] = __float2half(f.w);
}
-# else
+#else
-ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
+ccl_device_inline void float4_store_half(half *h, float4 f)
{
-# ifndef __KERNEL_SSE2__
+# ifndef __KERNEL_SSE2__
for (int i = 0; i < 4; i++) {
/* optimized float to half for pixels:
* assumes no negative, no nan, no inf, and sets denormal to 0 */
@@ -87,8 +81,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
uint i;
float f;
} in;
- float fscale = f[i] * scale;
- in.f = (fscale > 0.0f) ? ((fscale < 65504.0f) ? fscale : 65504.0f) : 0.0f;
+ in.f = (f[i] > 0.0f) ? ((f[i] < 65504.0f) ? f[i] : 65504.0f) : 0.0f;
int x = in.i;
int absolute = x & 0x7FFFFFFF;
@@ -98,23 +91,22 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
h[i] = (rshift & 0x7FFF);
}
-# else
+# else
/* same as above with SSE */
- ssef fscale = load4f(f) * scale;
- ssef x = min(max(fscale, 0.0f), 65504.0f);
+ ssef x = min(max(load4f(f), 0.0f), 65504.0f);
-# ifdef __KERNEL_AVX2__
+# ifdef __KERNEL_AVX2__
ssei rpack = _mm_cvtps_ph(x, 0);
-# else
+# else
ssei absolute = cast(x) & 0x7FFFFFFF;
ssei Z = absolute + 0xC8000000;
ssei result = andnot(absolute < 0x38800000, Z);
ssei rshift = (result >> 13) & 0x7FFF;
ssei rpack = _mm_packs_epi32(rshift, rshift);
-# endif
+# endif
_mm_storel_pi((__m64 *)h, _mm_castsi128_ps(rpack));
-# endif
+# endif
}
ccl_device_inline float half_to_float(half h)
@@ -160,8 +152,6 @@ ccl_device_inline half float_to_half(float f)
return (value_bits | sign_bit);
}
-# endif
-
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h
index c161299acd0..35c2d436d09 100644
--- a/intern/cycles/util/util_logging.h
+++ b/intern/cycles/util/util_logging.h
@@ -49,6 +49,7 @@ class LogMessageVoidify {
# define LOG(severity) LOG_SUPPRESS()
# define VLOG(severity) LOG_SUPPRESS()
# define VLOG_IF(severity, condition) LOG_SUPPRESS()
+# define VLOG_IS_ON(severity) false
# define CHECK(expression) LOG_SUPPRESS()
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index c5996ebfcb6..6d728dde679 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -26,11 +26,9 @@
# include <cmath>
#endif
-#ifndef __KERNEL_OPENCL__
-# include <float.h>
-# include <math.h>
-# include <stdio.h>
-#endif /* __KERNEL_OPENCL__ */
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
#include "util/util_types.h"
@@ -86,7 +84,6 @@ CCL_NAMESPACE_BEGIN
/* Scalar */
#ifdef _WIN32
-# ifndef __KERNEL_OPENCL__
ccl_device_inline float fmaxf(float a, float b)
{
return (a > b) ? a : b;
@@ -96,8 +93,7 @@ ccl_device_inline float fminf(float a, float b)
{
return (a < b) ? a : b;
}
-# endif /* !__KERNEL_OPENCL__ */
-#endif /* _WIN32 */
+#endif /* _WIN32 */
#ifndef __KERNEL_GPU__
using std::isfinite;
@@ -119,6 +115,11 @@ ccl_device_inline int min(int a, int b)
return (a < b) ? a : b;
}
+ccl_device_inline uint min(uint a, uint b)
+{
+ return (a < b) ? a : b;
+}
+
ccl_device_inline float max(float a, float b)
{
return (a > b) ? a : b;
@@ -166,7 +167,6 @@ ccl_device_inline float max4(float a, float b, float c, float d)
return max(max(a, b), max(c, d));
}
-#ifndef __KERNEL_OPENCL__
/* Int/Float conversion */
ccl_device_inline int as_int(uint i)
@@ -241,24 +241,23 @@ ccl_device_inline float __uint_as_float(uint i)
ccl_device_inline int4 __float4_as_int4(float4 f)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return int4(_mm_castps_si128(f.m128));
-# else
+#else
return make_int4(
__float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w));
-# endif
+#endif
}
ccl_device_inline float4 __int4_as_float4(int4 i)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_castsi128_ps(i.m128));
-# else
+#else
return make_float4(
__int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w));
-# endif
+#endif
}
-#endif /* __KERNEL_OPENCL__ */
/* Versions of functions which are safe for fast math. */
ccl_device_inline bool isnan_safe(float f)
@@ -279,7 +278,6 @@ ccl_device_inline float ensure_finite(float v)
return isfinite_safe(v) ? v : 0.0f;
}
-#ifndef __KERNEL_OPENCL__
ccl_device_inline int clamp(int a, int mn, int mx)
{
return min(max(a, mn), mx);
@@ -309,8 +307,6 @@ ccl_device_inline float smoothstep(float edge0, float edge1, float x)
return result;
}
-#endif /* __KERNEL_OPENCL__ */
-
#ifndef __KERNEL_CUDA__
ccl_device_inline float saturate(float a)
{
@@ -451,7 +447,6 @@ CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN
-#ifndef __KERNEL_OPENCL__
/* Interpolation */
template<class A, class B> A lerp(const A &a, const A &b, const B &t)
@@ -459,15 +454,9 @@ template<class A, class B> A lerp(const A &a, const A &b, const B &t)
return (A)(a * ((B)1 - t) + b * t);
}
-#endif /* __KERNEL_OPENCL__ */
-
/* Triangle */
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float triangle_area(const float3 &v1, const float3 &v2, const float3 &v3)
-#else
-ccl_device_inline float triangle_area(const float3 v1, const float3 v2, const float3 v3)
-#endif
{
return len(cross(v3 - v2, v1 - v2)) * 0.5f;
}
@@ -665,11 +654,7 @@ ccl_device_inline float pow22(float a)
ccl_device_inline float beta(float x, float y)
{
-#ifndef __KERNEL_OPENCL__
return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y));
-#else
- return expf(lgamma(x) + lgamma(y) - lgamma(x + y));
-#endif
}
ccl_device_inline float xor_signmask(float x, int y)
@@ -686,8 +671,6 @@ ccl_device_inline uint count_leading_zeros(uint x)
{
#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
return __clz(x);
-#elif defined(__KERNEL_OPENCL__)
- return clz(x);
#else
assert(x != 0);
# ifdef _MSC_VER
@@ -704,8 +687,6 @@ ccl_device_inline uint count_trailing_zeros(uint x)
{
#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
return (__ffs(x) - 1);
-#elif defined(__KERNEL_OPENCL__)
- return (31 - count_leading_zeros(x & -x));
#else
assert(x != 0);
# ifdef _MSC_VER
@@ -722,8 +703,6 @@ ccl_device_inline uint find_first_set(uint x)
{
#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
return __ffs(x);
-#elif defined(__KERNEL_OPENCL__)
- return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0;
#else
# ifdef _MSC_VER
return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0;
@@ -797,6 +776,52 @@ ccl_device_inline float precise_angle(float3 a, float3 b)
return 2.0f * atan2f(len(a - b), len(a + b));
}
+/* Return value which is greater than the given one and is a power of two. */
+ccl_device_inline uint next_power_of_two(uint x)
+{
+ return x == 0 ? 1 : 1 << (32 - count_leading_zeros(x));
+}
+
+/* Return value which is lower than the given one and is a power of two. */
+ccl_device_inline uint prev_power_of_two(uint x)
+{
+ return x < 2 ? x : 1 << (31 - count_leading_zeros(x - 1));
+}
+
+#ifndef __has_builtin
+# define __has_builtin(v) 0
+#endif
+
+/* Reverses the bits of a 32 bit integer. */
+ccl_device_inline uint32_t reverse_integer_bits(uint32_t x)
+{
+ /* Use a native instruction if it exists. */
+#if defined(__arm__) || defined(__aarch64__)
+ __asm__("rbit %w0, %w1" : "=r"(x) : "r"(x));
+ return x;
+#elif defined(__KERNEL_CUDA__)
+ return __brev(x);
+#elif __has_builtin(__builtin_bitreverse32)
+ return __builtin_bitreverse32(x);
+#else
+ /* Flip pairwise. */
+ x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1);
+ /* Flip pairs. */
+ x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2);
+ /* Flip nibbles. */
+ x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4);
+ /* Flip bytes. CPUs have an instruction for that, pretty fast one. */
+# ifdef _MSC_VER
+ return _byteswap_ulong(x);
+# elif defined(__INTEL_COMPILER)
+ return (uint32_t)_bswap((int)x);
+# else
+ /* Assuming gcc or clang. */
+ return __builtin_bswap32(x);
+# endif
+#endif
+}
+
CCL_NAMESPACE_END
#endif /* __UTIL_MATH_H__ */
diff --git a/intern/cycles/util/util_math_float2.h b/intern/cycles/util/util_math_float2.h
index 17f6f3c9382..70b80c33544 100644
--- a/intern/cycles/util/util_math_float2.h
+++ b/intern/cycles/util/util_math_float2.h
@@ -27,7 +27,6 @@ CCL_NAMESPACE_BEGIN
* Declaration.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float2 operator-(const float2 &a);
ccl_device_inline float2 operator*(const float2 &a, const float2 &b);
ccl_device_inline float2 operator*(const float2 &a, float f);
@@ -64,7 +63,6 @@ ccl_device_inline float2 fabs(const float2 &a);
ccl_device_inline float2 as_float2(const float4 &a);
ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t);
ccl_device_inline float2 floor(const float2 &a);
-#endif /* !__KERNEL_OPENCL__ */
ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b);
@@ -82,7 +80,6 @@ ccl_device_inline float2 one_float2()
return make_float2(1.0f, 1.0f);
}
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float2 operator-(const float2 &a)
{
return make_float2(-a.x, -a.y);
@@ -262,8 +259,6 @@ ccl_device_inline float2 floor(const float2 &a)
return make_float2(floorf(a.x), floorf(a.y));
}
-#endif /* !__KERNEL_OPENCL__ */
-
ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b)
{
return (b != 0.0f) ? a / b : zero_float2();
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index 9673c043189..30a1b4c3f77 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -27,7 +27,6 @@ CCL_NAMESPACE_BEGIN
* Declaration.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float3 operator-(const float3 &a);
ccl_device_inline float3 operator*(const float3 &a, const float3 &b);
ccl_device_inline float3 operator*(const float3 &a, const float f);
@@ -63,7 +62,6 @@ ccl_device_inline float3 rcp(const float3 &a);
ccl_device_inline float3 sqrt(const float3 &a);
ccl_device_inline float3 floor(const float3 &a);
ccl_device_inline float3 ceil(const float3 &a);
-#endif /* !__KERNEL_OPENCL__ */
ccl_device_inline float min3(float3 a);
ccl_device_inline float max3(float3 a);
@@ -105,50 +103,49 @@ ccl_device_inline float3 one_float3()
return make_float3(1.0f, 1.0f, 1.0f);
}
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float3 operator-(const float3 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
-# else
+#else
return make_float3(-a.x, -a.y, -a.z);
-# endif
+#endif
}
ccl_device_inline float3 operator*(const float3 &a, const float3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_mul_ps(a.m128, b.m128));
-# else
+#else
return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
-# endif
+#endif
}
ccl_device_inline float3 operator*(const float3 &a, const float f)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_mul_ps(a.m128, _mm_set1_ps(f)));
-# else
+#else
return make_float3(a.x * f, a.y * f, a.z * f);
-# endif
+#endif
}
ccl_device_inline float3 operator*(const float f, const float3 &a)
{
-# if defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE__)
return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
-# else
+#else
return make_float3(a.x * f, a.y * f, a.z * f);
-# endif
+#endif
}
ccl_device_inline float3 operator/(const float f, const float3 &a)
{
-# if defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE__)
return float3(_mm_div_ps(_mm_set1_ps(f), a.m128));
-# else
+#else
return make_float3(f / a.x, f / a.y, f / a.z);
-# endif
+#endif
}
ccl_device_inline float3 operator/(const float3 &a, const float f)
@@ -159,11 +156,11 @@ ccl_device_inline float3 operator/(const float3 &a, const float f)
ccl_device_inline float3 operator/(const float3 &a, const float3 &b)
{
-# if defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE__)
return float3(_mm_div_ps(a.m128, b.m128));
-# else
+#else
return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
-# endif
+#endif
}
ccl_device_inline float3 operator+(const float3 &a, const float f)
@@ -173,11 +170,11 @@ ccl_device_inline float3 operator+(const float3 &a, const float f)
ccl_device_inline float3 operator+(const float3 &a, const float3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_add_ps(a.m128, b.m128));
-# else
+#else
return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
-# endif
+#endif
}
ccl_device_inline float3 operator-(const float3 &a, const float f)
@@ -187,11 +184,11 @@ ccl_device_inline float3 operator-(const float3 &a, const float f)
ccl_device_inline float3 operator-(const float3 &a, const float3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_sub_ps(a.m128, b.m128));
-# else
+#else
return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
-# endif
+#endif
}
ccl_device_inline float3 operator+=(float3 &a, const float3 &b)
@@ -227,11 +224,11 @@ ccl_device_inline float3 operator/=(float3 &a, float f)
ccl_device_inline bool operator==(const float3 &a, const float3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7;
-# else
+#else
return (a.x == b.x && a.y == b.y && a.z == b.z);
-# endif
+#endif
}
ccl_device_inline bool operator!=(const float3 &a, const float3 &b)
@@ -246,20 +243,20 @@ ccl_device_inline float distance(const float3 &a, const float3 &b)
ccl_device_inline float dot(const float3 &a, const float3 &b)
{
-# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
-# else
+#else
return a.x * b.x + a.y * b.y + a.z * b.z;
-# endif
+#endif
}
ccl_device_inline float dot_xy(const float3 &a, const float3 &b)
{
-# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a, b), b));
-# else
+#else
return a.x * b.x + a.y * b.y;
-# endif
+#endif
}
ccl_device_inline float3 cross(const float3 &a, const float3 &b)
@@ -270,30 +267,30 @@ ccl_device_inline float3 cross(const float3 &a, const float3 &b)
ccl_device_inline float3 normalize(const float3 &a)
{
-# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
__m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
return float3(_mm_div_ps(a.m128, norm));
-# else
+#else
return a / len(a);
-# endif
+#endif
}
ccl_device_inline float3 min(const float3 &a, const float3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_min_ps(a.m128, b.m128));
-# else
+#else
return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
-# endif
+#endif
}
ccl_device_inline float3 max(const float3 &a, const float3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_max_ps(a.m128, b.m128));
-# else
+#else
return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
-# endif
+#endif
}
ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx)
@@ -303,43 +300,43 @@ ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &
ccl_device_inline float3 fabs(const float3 &a)
{
-# ifdef __KERNEL_SSE__
-# ifdef __KERNEL_NEON__
+#ifdef __KERNEL_SSE__
+# ifdef __KERNEL_NEON__
return float3(vabsq_f32(a.m128));
-# else
+# else
__m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
return float3(_mm_and_ps(a.m128, mask));
-# endif
-# else
- return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
# endif
+#else
+ return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
+#endif
}
ccl_device_inline float3 sqrt(const float3 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_sqrt_ps(a));
-# else
+#else
return make_float3(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z));
-# endif
+#endif
}
ccl_device_inline float3 floor(const float3 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_floor_ps(a));
-# else
+#else
return make_float3(floorf(a.x), floorf(a.y), floorf(a.z));
-# endif
+#endif
}
ccl_device_inline float3 ceil(const float3 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_ceil_ps(a));
-# else
+#else
return make_float3(ceilf(a.x), ceilf(a.y), ceilf(a.z));
-# endif
+#endif
}
ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t)
@@ -349,14 +346,13 @@ ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t)
ccl_device_inline float3 rcp(const float3 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
/* Don't use _mm_rcp_ps due to poor precision. */
return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
-# else
+#else
return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z);
-# endif
+#endif
}
-#endif /* !__KERNEL_OPENCL__ */
ccl_device_inline float min3(float3 a)
{
@@ -483,11 +479,7 @@ ccl_device_inline float average(const float3 a)
ccl_device_inline bool isequal_float3(const float3 a, const float3 b)
{
-#ifdef __KERNEL_OPENCL__
- return all(a == b);
-#else
return a == b;
-#endif
}
ccl_device_inline float3 pow3(float3 v, float e)
diff --git a/intern/cycles/util/util_math_float4.h b/intern/cycles/util/util_math_float4.h
index 0ba2bafa2f0..19af5c8c638 100644
--- a/intern/cycles/util/util_math_float4.h
+++ b/intern/cycles/util/util_math_float4.h
@@ -27,7 +27,6 @@ CCL_NAMESPACE_BEGIN
* Declaration.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float4 operator-(const float4 &a);
ccl_device_inline float4 operator*(const float4 &a, const float4 &b);
ccl_device_inline float4 operator*(const float4 &a, float f);
@@ -66,7 +65,6 @@ ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &
ccl_device_inline float4 fabs(const float4 &a);
ccl_device_inline float4 floor(const float4 &a);
ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t);
-#endif /* !__KERNEL_OPENCL__*/
ccl_device_inline float4 safe_divide_float4_float(const float4 a, const float b);
@@ -112,33 +110,32 @@ ccl_device_inline float4 one_float4()
return make_float4(1.0f, 1.0f, 1.0f, 1.0f);
}
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float4 operator-(const float4 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
__m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
return float4(_mm_xor_ps(a.m128, mask));
-# else
+#else
return make_float4(-a.x, -a.y, -a.z, -a.w);
-# endif
+#endif
}
ccl_device_inline float4 operator*(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_mul_ps(a.m128, b.m128));
-# else
+#else
return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-# endif
+#endif
}
ccl_device_inline float4 operator*(const float4 &a, float f)
{
-# if defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE__)
return a * make_float4(f);
-# else
+#else
return make_float4(a.x * f, a.y * f, a.z * f, a.w * f);
-# endif
+#endif
}
ccl_device_inline float4 operator*(float f, const float4 &a)
@@ -153,11 +150,11 @@ ccl_device_inline float4 operator/(const float4 &a, float f)
ccl_device_inline float4 operator/(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_div_ps(a.m128, b.m128));
-# else
+#else
return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
-# endif
+#endif
}
ccl_device_inline float4 operator+(const float4 &a, const float f)
@@ -167,11 +164,11 @@ ccl_device_inline float4 operator+(const float4 &a, const float f)
ccl_device_inline float4 operator+(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_add_ps(a.m128, b.m128));
-# else
+#else
return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-# endif
+#endif
}
ccl_device_inline float4 operator-(const float4 &a, const float f)
@@ -181,11 +178,11 @@ ccl_device_inline float4 operator-(const float4 &a, const float f)
ccl_device_inline float4 operator-(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_sub_ps(a.m128, b.m128));
-# else
+#else
return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-# endif
+#endif
}
ccl_device_inline float4 operator+=(float4 &a, const float4 &b)
@@ -215,38 +212,38 @@ ccl_device_inline float4 operator/=(float4 &a, float f)
ccl_device_inline int4 operator<(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return int4(_mm_castps_si128(_mm_cmplt_ps(a.m128, b.m128)));
-# else
+#else
return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
-# endif
+#endif
}
ccl_device_inline int4 operator>=(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return int4(_mm_castps_si128(_mm_cmpge_ps(a.m128, b.m128)));
-# else
+#else
return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
-# endif
+#endif
}
ccl_device_inline int4 operator<=(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return int4(_mm_castps_si128(_mm_cmple_ps(a.m128, b.m128)));
-# else
+#else
return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w);
-# endif
+#endif
}
ccl_device_inline bool operator==(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15;
-# else
+#else
return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w);
-# endif
+#endif
}
ccl_device_inline float distance(const float4 &a, const float4 &b)
@@ -256,16 +253,16 @@ ccl_device_inline float distance(const float4 &a, const float4 &b)
ccl_device_inline float dot(const float4 &a, const float4 &b)
{
-# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-# if defined(__KERNEL_NEON__)
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+# if defined(__KERNEL_NEON__)
__m128 t = vmulq_f32(a, b);
return vaddvq_f32(t);
-# else
- return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
-# endif
# else
- return (a.x * b.x + a.y * b.y) + (a.z * b.z + a.w * b.w);
+ return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
# endif
+#else
+ return (a.x * b.x + a.y * b.y) + (a.z * b.z + a.w * b.w);
+#endif
}
ccl_device_inline float len_squared(const float4 &a)
@@ -275,21 +272,21 @@ ccl_device_inline float len_squared(const float4 &a)
ccl_device_inline float4 rcp(const float4 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
/* Don't use _mm_rcp_ps due to poor precision. */
return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
-# else
+#else
return make_float4(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w);
-# endif
+#endif
}
ccl_device_inline float4 sqrt(const float4 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_sqrt_ps(a.m128));
-# else
+#else
return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
-# endif
+#endif
}
ccl_device_inline float4 sqr(const float4 &a)
@@ -299,39 +296,39 @@ ccl_device_inline float4 sqr(const float4 &a)
ccl_device_inline float4 cross(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return (shuffle<1, 2, 0, 0>(a) * shuffle<2, 0, 1, 0>(b)) -
(shuffle<2, 0, 1, 0>(a) * shuffle<1, 2, 0, 0>(b));
-# else
+#else
return make_float4(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x, 0.0f);
-# endif
+#endif
}
ccl_device_inline bool is_zero(const float4 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return a == make_float4(0.0f);
-# else
+#else
return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
-# endif
+#endif
}
ccl_device_inline float4 reduce_add(const float4 &a)
{
-# if defined(__KERNEL_SSE__)
-# if defined(__KERNEL_NEON__)
+#if defined(__KERNEL_SSE__)
+# if defined(__KERNEL_NEON__)
return float4(vdupq_n_f32(vaddvq_f32(a)));
-# elif defined(__KERNEL_SSE3__)
+# elif defined(__KERNEL_SSE3__)
float4 h(_mm_hadd_ps(a.m128, a.m128));
return float4(_mm_hadd_ps(h.m128, h.m128));
-# else
+# else
float4 h(shuffle<1, 0, 3, 2>(a) + a);
return shuffle<2, 3, 0, 1>(h) + h;
-# endif
-# else
+# endif
+#else
float sum = (a.x + a.y) + (a.z + a.w);
return make_float4(sum, sum, sum, sum);
-# endif
+#endif
}
ccl_device_inline float average(const float4 &a)
@@ -357,20 +354,20 @@ ccl_device_inline float4 safe_normalize(const float4 &a)
ccl_device_inline float4 min(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_min_ps(a.m128, b.m128));
-# else
+#else
return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
-# endif
+#endif
}
ccl_device_inline float4 max(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_max_ps(a.m128, b.m128));
-# else
+#else
return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
-# endif
+#endif
}
ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx)
@@ -380,24 +377,24 @@ ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &
ccl_device_inline float4 fabs(const float4 &a)
{
-# if defined(__KERNEL_SSE__)
-# if defined(__KERNEL_NEON__)
+#if defined(__KERNEL_SSE__)
+# if defined(__KERNEL_NEON__)
return float4(vabsq_f32(a));
-# else
- return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))));
-# endif
# else
- return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
+ return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))));
# endif
+#else
+ return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
+#endif
}
ccl_device_inline float4 floor(const float4 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_floor_ps(a));
-# else
+#else
return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w));
-# endif
+#endif
}
ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t)
@@ -405,8 +402,6 @@ ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t)
return a + t * (b - a);
}
-#endif /* !__KERNEL_OPENCL__*/
-
#ifdef __KERNEL_SSE__
template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
__forceinline const float4 shuffle(const float4 &b)
diff --git a/intern/cycles/util/util_math_int2.h b/intern/cycles/util/util_math_int2.h
index 0295cd51f7e..5782b878801 100644
--- a/intern/cycles/util/util_math_int2.h
+++ b/intern/cycles/util/util_math_int2.h
@@ -27,20 +27,17 @@ CCL_NAMESPACE_BEGIN
* Declaration.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline bool operator==(const int2 a, const int2 b);
ccl_device_inline int2 operator+(const int2 &a, const int2 &b);
ccl_device_inline int2 operator+=(int2 &a, const int2 &b);
ccl_device_inline int2 operator-(const int2 &a, const int2 &b);
ccl_device_inline int2 operator*(const int2 &a, const int2 &b);
ccl_device_inline int2 operator/(const int2 &a, const int2 &b);
-#endif /* !__KERNEL_OPENCL__ */
/*******************************************************************************
* Definition.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline bool operator==(const int2 a, const int2 b)
{
return (a.x == b.x && a.y == b.y);
@@ -70,7 +67,6 @@ ccl_device_inline int2 operator/(const int2 &a, const int2 &b)
{
return make_int2(a.x / b.x, a.y / b.y);
}
-#endif /* !__KERNEL_OPENCL__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_math_int3.h b/intern/cycles/util/util_math_int3.h
index d92ed895dc2..e0dfae7c015 100644
--- a/intern/cycles/util/util_math_int3.h
+++ b/intern/cycles/util/util_math_int3.h
@@ -27,52 +27,49 @@ CCL_NAMESPACE_BEGIN
* Declaration.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline int3 min(int3 a, int3 b);
ccl_device_inline int3 max(int3 a, int3 b);
ccl_device_inline int3 clamp(const int3 &a, int mn, int mx);
ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx);
-#endif /* !__KERNEL_OPENCL__ */
/*******************************************************************************
* Definition.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline int3 min(int3 a, int3 b)
{
-# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
return int3(_mm_min_epi32(a.m128, b.m128));
-# else
+#else
return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
-# endif
+#endif
}
ccl_device_inline int3 max(int3 a, int3 b)
{
-# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
return int3(_mm_max_epi32(a.m128, b.m128));
-# else
+#else
return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
-# endif
+#endif
}
ccl_device_inline int3 clamp(const int3 &a, int mn, int mx)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return min(max(a, make_int3(mn)), make_int3(mx));
-# else
+#else
return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx));
-# endif
+#endif
}
ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return min(max(a, mn), make_int3(mx));
-# else
+#else
return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx));
-# endif
+#endif
}
ccl_device_inline bool operator==(const int3 &a, const int3 &b)
@@ -92,22 +89,21 @@ ccl_device_inline bool operator<(const int3 &a, const int3 &b)
ccl_device_inline int3 operator+(const int3 &a, const int3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return int3(_mm_add_epi32(a.m128, b.m128));
-# else
+#else
return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
-# endif
+#endif
}
ccl_device_inline int3 operator-(const int3 &a, const int3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return int3(_mm_sub_epi32(a.m128, b.m128));
-# else
+#else
return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
-# endif
+#endif
}
-#endif /* !__KERNEL_OPENCL__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp
index 8905c8bc7f0..c78f4615013 100644
--- a/intern/cycles/util/util_path.cpp
+++ b/intern/cycles/util/util_path.cpp
@@ -66,6 +66,7 @@ typedef struct stat path_stat_t;
static string cached_path = "";
static string cached_user_path = "";
+static string cached_temp_path = "";
static string cached_xdg_cache_path = "";
namespace {
@@ -335,10 +336,11 @@ static string path_xdg_cache_get()
}
#endif
-void path_init(const string &path, const string &user_path)
+void path_init(const string &path, const string &user_path, const string &temp_path)
{
cached_path = path;
cached_user_path = user_path;
+ cached_temp_path = temp_path;
#ifdef _MSC_VER
// workaround for https://svn.boost.org/trac/boost/ticket/6320
@@ -382,6 +384,15 @@ string path_cache_get(const string &sub)
#endif
}
+string path_temp_get(const string &sub)
+{
+ if (cached_temp_path == "") {
+ cached_temp_path = Filesystem::temp_directory_path();
+ }
+
+ return path_join(cached_temp_path, sub);
+}
+
#if defined(__linux__) || defined(__APPLE__)
string path_xdg_home_get(const string &sub = "");
#endif
@@ -739,177 +750,6 @@ bool path_remove(const string &path)
return remove(path.c_str()) == 0;
}
-struct SourceReplaceState {
- typedef map<string, string> ProcessedMapping;
- /* Base director for all relative include headers. */
- string base;
- /* Result of processed files. */
- ProcessedMapping processed_files;
- /* Set of files which are considered "precompiled" and which are replaced
- * with and empty string on a subsequent occurrence in include statement.
- */
- set<string> precompiled_headers;
-};
-
-static string path_source_replace_includes_recursive(const string &source,
- const string &source_filepath,
- SourceReplaceState *state);
-
-static string line_directive(const SourceReplaceState &state, const string &path, const int line)
-{
- string unescaped_path = path;
- /* First we make path relative. */
- if (string_startswith(unescaped_path, state.base.c_str())) {
- const string base_file = path_filename(state.base);
- const size_t base_len = state.base.length();
- unescaped_path = base_file +
- unescaped_path.substr(base_len, unescaped_path.length() - base_len);
- }
- /* Second, we replace all unsafe characters. */
- const size_t length = unescaped_path.length();
- string escaped_path = "";
- for (size_t i = 0; i < length; ++i) {
- const char ch = unescaped_path[i];
- if (strchr("\"\'\?\\", ch) != NULL) {
- escaped_path += "\\";
- }
- escaped_path += ch;
- }
- /* TODO(sergey): Check whether using std::to_string combined with several
- * concatenation operations is any faster.
- */
- return string_printf("#line %d \"%s\"", line, escaped_path.c_str());
-}
-
-static string path_source_handle_preprocessor(const string &preprocessor_line,
- const string &source_filepath,
- const size_t line_number,
- SourceReplaceState *state)
-{
- string result = preprocessor_line;
- string token = string_strip(preprocessor_line.substr(1, preprocessor_line.size() - 1));
- if (string_startswith(token, "include")) {
- token = string_strip(token.substr(7, token.size() - 7));
- if (token[0] == '"') {
- const size_t n_start = 1;
- const size_t n_end = token.find("\"", n_start);
- const string filename = token.substr(n_start, n_end - n_start);
- const bool is_precompiled = string_endswith(token, "// PRECOMPILED");
- string filepath = path_join(state->base, filename);
- if (!path_exists(filepath)) {
- filepath = path_join(path_dirname(source_filepath), filename);
- }
- if (is_precompiled) {
- state->precompiled_headers.insert(filepath);
- }
- string text;
- if (path_read_text(filepath, text)) {
- text = path_source_replace_includes_recursive(text, filepath, state);
- /* Use line directives for better error messages. */
- result = line_directive(*state, filepath, 1) + "\n" + text + "\n" +
- line_directive(*state, source_filepath, line_number + 1);
- }
- }
- }
- return result;
-}
-
-/* Our own little c preprocessor that replaces #includes with the file
- * contents, to work around issue of OpenCL drivers not supporting
- * include paths with spaces in them.
- */
-static string path_source_replace_includes_recursive(const string &source,
- const string &source_filepath,
- SourceReplaceState *state)
-{
- /* Try to re-use processed file without spending time on replacing all
- * include directives again.
- */
- SourceReplaceState::ProcessedMapping::iterator replaced_file = state->processed_files.find(
- source_filepath);
- if (replaced_file != state->processed_files.end()) {
- if (state->precompiled_headers.find(source_filepath) != state->precompiled_headers.end()) {
- return "";
- }
- return replaced_file->second;
- }
- /* Perform full file processing. */
- string result = "";
- const size_t source_length = source.length();
- size_t index = 0;
- /* Information about where we are in the source. */
- size_t line_number = 0, column_number = 1;
- /* Currently gathered non-preprocessor token.
- * Store as start/length rather than token itself to avoid overhead of
- * memory re-allocations on each character concatenation.
- */
- size_t token_start = 0, token_length = 0;
- /* Denotes whether we're inside of preprocessor line, together with
- * preprocessor line itself.
- *
- * TODO(sergey): Investigate whether using token start/end position
- * gives measurable speedup.
- */
- bool inside_preprocessor = false;
- string preprocessor_line = "";
- /* Actual loop over the whole source. */
- while (index < source_length) {
- const char ch = source[index];
- if (ch == '\n') {
- if (inside_preprocessor) {
- result += path_source_handle_preprocessor(
- preprocessor_line, source_filepath, line_number, state);
- /* Start gathering net part of the token. */
- token_start = index;
- token_length = 0;
- }
- inside_preprocessor = false;
- preprocessor_line = "";
- column_number = 0;
- ++line_number;
- }
- else if (ch == '#' && column_number == 1 && !inside_preprocessor) {
- /* Append all possible non-preprocessor token to the result. */
- if (token_length != 0) {
- result.append(source, token_start, token_length);
- token_start = index;
- token_length = 0;
- }
- inside_preprocessor = true;
- }
- if (inside_preprocessor) {
- preprocessor_line += ch;
- }
- else {
- ++token_length;
- }
- ++index;
- ++column_number;
- }
- /* Append possible tokens which happened before special events handled
- * above.
- */
- if (token_length != 0) {
- result.append(source, token_start, token_length);
- }
- if (inside_preprocessor) {
- result += path_source_handle_preprocessor(
- preprocessor_line, source_filepath, line_number, state);
- }
- /* Store result for further reuse. */
- state->processed_files[source_filepath] = result;
- return result;
-}
-
-string path_source_replace_includes(const string &source,
- const string &path,
- const string &source_filename)
-{
- SourceReplaceState state;
- state.base = path;
- return path_source_replace_includes_recursive(source, path_join(path, source_filename), &state);
-}
-
FILE *path_fopen(const string &path, const string &mode)
{
#ifdef _WIN32
diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h
index 7a83c2135a4..f899bc2e01c 100644
--- a/intern/cycles/util/util_path.h
+++ b/intern/cycles/util/util_path.h
@@ -32,9 +32,10 @@
CCL_NAMESPACE_BEGIN
/* program paths */
-void path_init(const string &path = "", const string &user_path = "");
+void path_init(const string &path = "", const string &user_path = "", const string &tmp_path = "");
string path_get(const string &sub = "");
string path_user_get(const string &sub = "");
+string path_temp_get(const string &sub = "");
string path_cache_get(const string &sub = "");
/* path string manipulation */
@@ -65,11 +66,6 @@ bool path_read_text(const string &path, string &text);
/* File manipulation. */
bool path_remove(const string &path);
-/* source code utility */
-string path_source_replace_includes(const string &source,
- const string &path,
- const string &source_filename = "");
-
/* cache utility */
void path_cache_clear_except(const string &name, const set<string> &except);
diff --git a/intern/cycles/util/util_profiling.cpp b/intern/cycles/util/util_profiling.cpp
index 073b09f719f..5343f076e22 100644
--- a/intern/cycles/util/util_profiling.cpp
+++ b/intern/cycles/util/util_profiling.cpp
@@ -48,13 +48,7 @@ void Profiler::run()
}
if (cur_shader >= 0 && cur_shader < shader_samples.size()) {
- /* Only consider the active shader during events whose runtime significantly depends on it.
- */
- if (((cur_event >= PROFILING_SHADER_EVAL) && (cur_event <= PROFILING_SUBSURFACE)) ||
- ((cur_event >= PROFILING_CLOSURE_EVAL) &&
- (cur_event <= PROFILING_CLOSURE_VOLUME_SAMPLE))) {
- shader_samples[cur_shader]++;
- }
+ shader_samples[cur_shader]++;
}
if (cur_object >= 0 && cur_object < object_samples.size()) {
diff --git a/intern/cycles/util/util_profiling.h b/intern/cycles/util/util_profiling.h
index ceec08ed894..96bb682c50e 100644
--- a/intern/cycles/util/util_profiling.h
+++ b/intern/cycles/util/util_profiling.h
@@ -28,38 +28,30 @@ CCL_NAMESPACE_BEGIN
enum ProfilingEvent : uint32_t {
PROFILING_UNKNOWN,
PROFILING_RAY_SETUP,
- PROFILING_PATH_INTEGRATE,
- PROFILING_SCENE_INTERSECT,
- PROFILING_INDIRECT_EMISSION,
- PROFILING_VOLUME,
- PROFILING_SHADER_SETUP,
- PROFILING_SHADER_EVAL,
- PROFILING_SHADER_APPLY,
- PROFILING_AO,
- PROFILING_SUBSURFACE,
- PROFILING_CONNECT_LIGHT,
- PROFILING_SURFACE_BOUNCE,
- PROFILING_WRITE_RESULT,
-
- PROFILING_INTERSECT,
- PROFILING_INTERSECT_LOCAL,
- PROFILING_INTERSECT_SHADOW_ALL,
- PROFILING_INTERSECT_VOLUME,
- PROFILING_INTERSECT_VOLUME_ALL,
-
- PROFILING_CLOSURE_EVAL,
- PROFILING_CLOSURE_SAMPLE,
- PROFILING_CLOSURE_VOLUME_EVAL,
- PROFILING_CLOSURE_VOLUME_SAMPLE,
-
- PROFILING_DENOISING,
- PROFILING_DENOISING_CONSTRUCT_TRANSFORM,
- PROFILING_DENOISING_RECONSTRUCT,
- PROFILING_DENOISING_DIVIDE_SHADOW,
- PROFILING_DENOISING_NON_LOCAL_MEANS,
- PROFILING_DENOISING_COMBINE_HALVES,
- PROFILING_DENOISING_GET_FEATURE,
- PROFILING_DENOISING_DETECT_OUTLIERS,
+
+ PROFILING_INTERSECT_CLOSEST,
+ PROFILING_INTERSECT_SUBSURFACE,
+ PROFILING_INTERSECT_SHADOW,
+ PROFILING_INTERSECT_VOLUME_STACK,
+
+ PROFILING_SHADE_SURFACE_SETUP,
+ PROFILING_SHADE_SURFACE_EVAL,
+ PROFILING_SHADE_SURFACE_DIRECT_LIGHT,
+ PROFILING_SHADE_SURFACE_INDIRECT_LIGHT,
+ PROFILING_SHADE_SURFACE_AO,
+ PROFILING_SHADE_SURFACE_PASSES,
+
+ PROFILING_SHADE_VOLUME_SETUP,
+ PROFILING_SHADE_VOLUME_INTEGRATE,
+ PROFILING_SHADE_VOLUME_DIRECT_LIGHT,
+ PROFILING_SHADE_VOLUME_INDIRECT_LIGHT,
+
+ PROFILING_SHADE_SHADOW_SETUP,
+ PROFILING_SHADE_SHADOW_SURFACE,
+ PROFILING_SHADE_SHADOW_VOLUME,
+
+ PROFILING_SHADE_LIGHT_SETUP,
+ PROFILING_SHADE_LIGHT_EVAL,
PROFILING_NUM_EVENTS,
};
@@ -136,37 +128,51 @@ class ProfilingHelper {
state->event = event;
}
+ ~ProfilingHelper()
+ {
+ state->event = previous_event;
+ }
+
inline void set_event(ProfilingEvent event)
{
state->event = event;
}
- inline void set_shader(int shader)
+ protected:
+ ProfilingState *state;
+ uint32_t previous_event;
+};
+
+class ProfilingWithShaderHelper : public ProfilingHelper {
+ public:
+ ProfilingWithShaderHelper(ProfilingState *state, ProfilingEvent event)
+ : ProfilingHelper(state, event)
{
- state->shader = shader;
- if (state->active) {
- assert(shader < state->shader_hits.size());
- state->shader_hits[shader]++;
- }
}
- inline void set_object(int object)
+ ~ProfilingWithShaderHelper()
{
- state->object = object;
- if (state->active) {
- assert(object < state->object_hits.size());
- state->object_hits[object]++;
- }
+ state->object = -1;
+ state->shader = -1;
}
- ~ProfilingHelper()
+ inline void set_shader(int object, int shader)
{
- state->event = previous_event;
+ if (state->active) {
+ state->shader = shader;
+ state->object = object;
+
+ if (shader >= 0) {
+ assert(shader < state->shader_hits.size());
+ state->shader_hits[shader]++;
+ }
+
+ if (object >= 0) {
+ assert(object < state->object_hits.size());
+ state->object_hits[object]++;
+ }
+ }
}
-
- private:
- ProfilingState *state;
- uint32_t previous_event;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index 26534a29dfe..dca8d3d0ab5 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -46,7 +46,6 @@ class Progress {
substatus = "";
sync_status = "";
sync_substatus = "";
- kernel_status = "";
update_cb = function_null;
cancel = false;
cancel_message = "";
@@ -87,7 +86,6 @@ class Progress {
substatus = "";
sync_status = "";
sync_substatus = "";
- kernel_status = "";
cancel = false;
cancel_message = "";
error = false;
@@ -316,24 +314,6 @@ class Progress {
}
}
- /* kernel status */
-
- void set_kernel_status(const string &kernel_status_)
- {
- {
- thread_scoped_lock lock(progress_mutex);
- kernel_status = kernel_status_;
- }
-
- set_update();
- }
-
- void get_kernel_status(string &kernel_status_)
- {
- thread_scoped_lock lock(progress_mutex);
- kernel_status_ = kernel_status;
- }
-
/* callback */
void set_update()
@@ -378,8 +358,6 @@ class Progress {
string sync_status;
string sync_substatus;
- string kernel_status;
-
volatile bool cancel;
string cancel_message;
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 8e8caa98a1b..b4a153c329f 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -61,14 +61,14 @@ static struct TrueTy {
{
return true;
}
-} True ccl_maybe_unused;
+} True ccl_attr_maybe_unused;
static struct FalseTy {
__forceinline operator bool() const
{
return false;
}
-} False ccl_maybe_unused;
+} False ccl_attr_maybe_unused;
static struct ZeroTy {
__forceinline operator float() const
@@ -79,7 +79,7 @@ static struct ZeroTy {
{
return 0;
}
-} zero ccl_maybe_unused;
+} zero ccl_attr_maybe_unused;
static struct OneTy {
__forceinline operator float() const
@@ -90,7 +90,7 @@ static struct OneTy {
{
return 1;
}
-} one ccl_maybe_unused;
+} one ccl_attr_maybe_unused;
static struct NegInfTy {
__forceinline operator float() const
@@ -101,7 +101,7 @@ static struct NegInfTy {
{
return std::numeric_limits<int>::min();
}
-} neg_inf ccl_maybe_unused;
+} neg_inf ccl_attr_maybe_unused;
static struct PosInfTy {
__forceinline operator float() const
@@ -112,10 +112,10 @@ static struct PosInfTy {
{
return std::numeric_limits<int>::max();
}
-} inf ccl_maybe_unused, pos_inf ccl_maybe_unused;
+} inf ccl_attr_maybe_unused, pos_inf ccl_attr_maybe_unused;
static struct StepTy {
-} step ccl_maybe_unused;
+} step ccl_attr_maybe_unused;
#endif
diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h
index d809f2e06d7..7df52d462b7 100644
--- a/intern/cycles/util/util_static_assert.h
+++ b/intern/cycles/util/util_static_assert.h
@@ -24,9 +24,9 @@
CCL_NAMESPACE_BEGIN
-#if defined(__KERNEL_OPENCL__) || defined(CYCLES_CUBIN_CC)
+#if defined(CYCLES_CUBIN_CC)
# define static_assert(statement, message)
-#endif /* __KERNEL_OPENCL__ */
+#endif
#define static_assert_align(st, align) \
static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT
diff --git a/intern/cycles/util/util_string.cpp b/intern/cycles/util/util_string.cpp
index 4dfebf14923..9c0b2ca50bb 100644
--- a/intern/cycles/util/util_string.cpp
+++ b/intern/cycles/util/util_string.cpp
@@ -17,6 +17,9 @@
#include <stdarg.h>
#include <stdio.h>
+#include <algorithm>
+#include <cctype>
+
#include "util/util_foreach.h"
#include "util/util_string.h"
#include "util/util_windows.h"
@@ -107,24 +110,26 @@ void string_split(vector<string> &tokens,
}
}
-bool string_startswith(const string &s, const char *start)
+bool string_startswith(const string_view s, const string_view start)
{
- size_t len = strlen(start);
+ const size_t len = start.size();
- if (len > s.size())
- return 0;
- else
- return strncmp(s.c_str(), start, len) == 0;
+ if (len > s.size()) {
+ return false;
+ }
+
+ return strncmp(s.c_str(), start.data(), len) == 0;
}
-bool string_endswith(const string &s, const string &end)
+bool string_endswith(const string_view s, const string_view end)
{
- size_t len = end.length();
+ const size_t len = end.size();
- if (len > s.size())
- return 0;
- else
- return s.compare(s.length() - len, len, end) == 0;
+ if (len > s.size()) {
+ return false;
+ }
+
+ return strncmp(s.c_str() + s.size() - len, end.data(), len) == 0;
}
string string_strip(const string &s)
@@ -172,6 +177,13 @@ string to_string(const char *str)
return string(str);
}
+string string_to_lower(const string &s)
+{
+ string r = s;
+ std::transform(r.begin(), r.end(), r.begin(), [](char c) { return std::tolower(c); });
+ return r;
+}
+
/* Wide char strings helpers for Windows. */
#ifdef _WIN32
diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h
index f2272819b2f..55462cfd8b8 100644
--- a/intern/cycles/util/util_string.h
+++ b/intern/cycles/util/util_string.h
@@ -21,6 +21,11 @@
#include <string.h>
#include <string>
+/* Use string view implementation from OIIO.
+ * Ideally, need to switch to `std::string_view`, but this first requires getting rid of using
+ * namespace OIIO as it causes symbol collision. */
+#include <OpenImageIO/string_view.h>
+
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
@@ -31,6 +36,8 @@ using std::string;
using std::stringstream;
using std::to_string;
+using OIIO::string_view;
+
#ifdef __GNUC__
# define PRINTF_ATTRIBUTE __attribute__((format(printf, 1, 2)))
#else
@@ -45,12 +52,13 @@ void string_split(vector<string> &tokens,
const string &separators = "\t ",
bool skip_empty_tokens = true);
void string_replace(string &haystack, const string &needle, const string &other);
-bool string_startswith(const string &s, const char *start);
-bool string_endswith(const string &s, const string &end);
+bool string_startswith(string_view s, string_view start);
+bool string_endswith(string_view s, string_view end);
string string_strip(const string &s);
string string_remove_trademark(const string &s);
string string_from_bool(const bool var);
string to_string(const char *str);
+string string_to_lower(const string &s);
/* Wide char strings are only used on Windows to deal with non-ASCII
* characters in file names and such. No reason to use such strings
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index b010881058b..be8c2fb505a 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -403,4 +403,13 @@ size_t system_physical_ram()
#endif
}
+uint64_t system_self_process_id()
+{
+#ifdef _WIN32
+ return GetCurrentProcessId();
+#else
+ return getpid();
+#endif
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
index c4db8b74339..a1797e6ca44 100644
--- a/intern/cycles/util/util_system.h
+++ b/intern/cycles/util/util_system.h
@@ -65,6 +65,9 @@ size_t system_physical_ram();
/* Start a new process of the current application with the given arguments. */
bool system_call_self(const vector<string> &args);
+/* Get identifier of the currently running process. */
+uint64_t system_self_process_id();
+
CCL_NAMESPACE_END
#endif /* __UTIL_SYSTEM_H__ */
diff --git a/intern/cycles/util/util_tbb.h b/intern/cycles/util/util_tbb.h
index 73e0f92d19c..8f84377ac8c 100644
--- a/intern/cycles/util/util_tbb.h
+++ b/intern/cycles/util/util_tbb.h
@@ -23,6 +23,7 @@
#include <tbb/enumerable_thread_specific.h>
#include <tbb/parallel_for.h>
+#include <tbb/parallel_for_each.h>
#include <tbb/task_arena.h>
#include <tbb/task_group.h>
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index 71bf9c65911..4de66bf5f46 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -85,8 +85,6 @@ typedef struct TextureInfo {
uint64_t data;
/* Data Type */
uint data_type;
- /* Buffer number for OpenCL. */
- uint cl_buffer;
/* Interpolation and extension type. */
uint interpolation, extension;
/* Dimensions. */
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index f79eac4cbcf..e9cd3b0b483 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -498,36 +498,12 @@ Transform transform_from_viewplane(BoundBox2D &viewplane);
#endif
-/* TODO(sergey): This is only for until we've got OpenCL 2.0
- * on all devices we consider supported. It'll be replaced with
- * generic address space.
- */
+/* TODO: This can be removed when we know if no devices will require explicit
+ * address space qualifiers for this case. */
-#ifdef __KERNEL_OPENCL__
-
-# define OPENCL_TRANSFORM_ADDRSPACE_GLUE(a, b) a##b
-# define OPENCL_TRANSFORM_ADDRSPACE_DECLARE(function) \
- ccl_device_inline float3 OPENCL_TRANSFORM_ADDRSPACE_GLUE(function, _addrspace)( \
- ccl_addr_space const Transform *t, const float3 a) \
- { \
- Transform private_tfm = *t; \
- return function(&private_tfm, a); \
- }
-
-OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_point)
-OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction)
-OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction_transposed)
-
-# undef OPENCL_TRANSFORM_ADDRSPACE_DECLARE
-# undef OPENCL_TRANSFORM_ADDRSPACE_GLUE
-# define transform_point_auto transform_point_addrspace
-# define transform_direction_auto transform_direction_addrspace
-# define transform_direction_transposed_auto transform_direction_transposed_addrspace
-#else
-# define transform_point_auto transform_point
-# define transform_direction_auto transform_direction
-# define transform_direction_transposed_auto transform_direction_transposed
-#endif
+#define transform_point_auto transform_point
+#define transform_direction_auto transform_direction
+#define transform_direction_transposed_auto transform_direction_transposed
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 87358877e3c..442c32b3a3d 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -17,9 +17,7 @@
#ifndef __UTIL_TYPES_H__
#define __UTIL_TYPES_H__
-#ifndef __KERNEL_OPENCL__
-# include <stdlib.h>
-#endif
+#include <stdlib.h>
/* Standard Integer Types */
@@ -44,18 +42,12 @@ CCL_NAMESPACE_BEGIN
/* Shorter Unsigned Names */
-#ifndef __KERNEL_OPENCL__
typedef unsigned char uchar;
typedef unsigned int uint;
typedef unsigned short ushort;
-#endif
/* Fixed Bits Types */
-#ifdef __KERNEL_OPENCL__
-typedef unsigned long uint64_t;
-#endif
-
#ifndef __KERNEL_GPU__
/* Generic Memory Pointer */
diff --git a/intern/cycles/util/util_unique_ptr.h b/intern/cycles/util/util_unique_ptr.h
index 3aaaf083eff..3181eafd43d 100644
--- a/intern/cycles/util/util_unique_ptr.h
+++ b/intern/cycles/util/util_unique_ptr.h
@@ -21,6 +21,7 @@
CCL_NAMESPACE_BEGIN
+using std::make_unique;
using std::unique_ptr;
CCL_NAMESPACE_END