Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/util_atomic.h50
-rw-r--r--intern/cycles/util/util_debug.cpp83
-rw-r--r--intern/cycles/util/util_debug.h67
-rw-r--r--intern/cycles/util/util_defines.h4
-rw-r--r--intern/cycles/util/util_half.h46
-rw-r--r--intern/cycles/util/util_logging.h1
-rw-r--r--intern/cycles/util/util_math.h97
-rw-r--r--intern/cycles/util/util_math_float2.h5
-rw-r--r--intern/cycles/util/util_math_float3.h128
-rw-r--r--intern/cycles/util/util_math_float4.h145
-rw-r--r--intern/cycles/util/util_math_int2.h4
-rw-r--r--intern/cycles/util/util_math_int3.h40
-rw-r--r--intern/cycles/util/util_path.cpp184
-rw-r--r--intern/cycles/util/util_path.h8
-rw-r--r--intern/cycles/util/util_profiling.cpp8
-rw-r--r--intern/cycles/util/util_profiling.h106
-rw-r--r--intern/cycles/util/util_progress.h22
-rw-r--r--intern/cycles/util/util_simd.h14
-rw-r--r--intern/cycles/util/util_static_assert.h4
-rw-r--r--intern/cycles/util/util_string.cpp36
-rw-r--r--intern/cycles/util/util_string.h12
-rw-r--r--intern/cycles/util/util_system.cpp9
-rw-r--r--intern/cycles/util/util_system.h3
-rw-r--r--intern/cycles/util/util_tbb.h1
-rw-r--r--intern/cycles/util/util_texture.h2
-rw-r--r--intern/cycles/util/util_transform.h34
-rw-r--r--intern/cycles/util/util_types.h10
-rw-r--r--intern/cycles/util/util_unique_ptr.h1
28 files changed, 371 insertions, 753 deletions
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index 13d177d2b25..de17efafcf2 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -34,56 +34,6 @@
#else /* __KERNEL_GPU__ */
-# ifdef __KERNEL_OPENCL__
-
-/* Float atomics implementation credits:
- * http://suhorukov.blogspot.in/2011/12/opencl-11-atomic-operations-on-floating.html
- */
-ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *source,
- const float operand)
-{
- union {
- unsigned int int_value;
- float float_value;
- } new_value;
- union {
- unsigned int int_value;
- float float_value;
- } prev_value;
- do {
- prev_value.float_value = *source;
- new_value.float_value = prev_value.float_value + operand;
- } while (atomic_cmpxchg((volatile ccl_global unsigned int *)source,
- prev_value.int_value,
- new_value.int_value) != prev_value.int_value);
- return new_value.float_value;
-}
-
-ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float *dest,
- const float old_val,
- const float new_val)
-{
- union {
- unsigned int int_value;
- float float_value;
- } new_value, prev_value, result;
- prev_value.float_value = old_val;
- new_value.float_value = new_val;
- result.int_value = atomic_cmpxchg(
- (volatile ccl_global unsigned int *)dest, prev_value.int_value, new_value.int_value);
- return result.float_value;
-}
-
-# define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x))
-# define atomic_fetch_and_inc_uint32(p) atomic_inc((p))
-# define atomic_fetch_and_dec_uint32(p) atomic_dec((p))
-# define atomic_fetch_and_or_uint32(p, x) atomic_or((p), (x))
-
-# define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE
-# define ccl_barrier(flags) barrier(flags)
-
-# endif /* __KERNEL_OPENCL__ */
-
# ifdef __KERNEL_CUDA__
# define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x))
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
index 74ecefa1917..1d598725c84 100644
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -26,13 +26,7 @@
CCL_NAMESPACE_BEGIN
DebugFlags::CPU::CPU()
- : avx2(true),
- avx(true),
- sse41(true),
- sse3(true),
- sse2(true),
- bvh_layout(BVH_LAYOUT_AUTO),
- split_kernel(false)
+ : avx2(true), avx(true), sse41(true), sse3(true), sse2(true), bvh_layout(BVH_LAYOUT_AUTO)
{
reset();
}
@@ -58,11 +52,9 @@ void DebugFlags::CPU::reset()
#undef CHECK_CPU_FLAGS
bvh_layout = BVH_LAYOUT_AUTO;
-
- split_kernel = false;
}
-DebugFlags::CUDA::CUDA() : adaptive_compile(false), split_kernel(false)
+DebugFlags::CUDA::CUDA() : adaptive_compile(false)
{
reset();
}
@@ -71,8 +63,6 @@ void DebugFlags::CUDA::reset()
{
if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL)
adaptive_compile = true;
-
- split_kernel = false;
}
DebugFlags::OptiX::OptiX()
@@ -82,42 +72,7 @@ DebugFlags::OptiX::OptiX()
void DebugFlags::OptiX::reset()
{
- cuda_streams = 1;
- curves_api = false;
-}
-
-DebugFlags::OpenCL::OpenCL() : device_type(DebugFlags::OpenCL::DEVICE_ALL), debug(false)
-{
- reset();
-}
-
-void DebugFlags::OpenCL::reset()
-{
- /* Initialize device type from environment variables. */
- device_type = DebugFlags::OpenCL::DEVICE_ALL;
- char *device = getenv("CYCLES_OPENCL_TEST");
- if (device) {
- if (strcmp(device, "NONE") == 0) {
- device_type = DebugFlags::OpenCL::DEVICE_NONE;
- }
- else if (strcmp(device, "ALL") == 0) {
- device_type = DebugFlags::OpenCL::DEVICE_ALL;
- }
- else if (strcmp(device, "DEFAULT") == 0) {
- device_type = DebugFlags::OpenCL::DEVICE_DEFAULT;
- }
- else if (strcmp(device, "CPU") == 0) {
- device_type = DebugFlags::OpenCL::DEVICE_CPU;
- }
- else if (strcmp(device, "GPU") == 0) {
- device_type = DebugFlags::OpenCL::DEVICE_GPU;
- }
- else if (strcmp(device, "ACCELERATOR") == 0) {
- device_type = DebugFlags::OpenCL::DEVICE_ACCELERATOR;
- }
- }
- /* Initialize other flags from environment variables. */
- debug = (getenv("CYCLES_OPENCL_DEBUG") != NULL);
+ use_debug = false;
}
DebugFlags::DebugFlags() : viewport_static_bvh(false), running_inside_blender(false)
@@ -131,7 +86,6 @@ void DebugFlags::reset()
cpu.reset();
cuda.reset();
optix.reset();
- opencl.reset();
}
std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags)
@@ -142,40 +96,13 @@ std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags)
<< " SSE4.1 : " << string_from_bool(debug_flags.cpu.sse41) << "\n"
<< " SSE3 : " << string_from_bool(debug_flags.cpu.sse3) << "\n"
<< " SSE2 : " << string_from_bool(debug_flags.cpu.sse2) << "\n"
- << " BVH layout : " << bvh_layout_name(debug_flags.cpu.bvh_layout) << "\n"
- << " Split : " << string_from_bool(debug_flags.cpu.split_kernel) << "\n";
+ << " BVH layout : " << bvh_layout_name(debug_flags.cpu.bvh_layout) << "\n";
os << "CUDA flags:\n"
<< " Adaptive Compile : " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n";
os << "OptiX flags:\n"
- << " CUDA streams : " << debug_flags.optix.cuda_streams << "\n";
-
- const char *opencl_device_type;
- switch (debug_flags.opencl.device_type) {
- case DebugFlags::OpenCL::DEVICE_NONE:
- opencl_device_type = "NONE";
- break;
- case DebugFlags::OpenCL::DEVICE_ALL:
- opencl_device_type = "ALL";
- break;
- case DebugFlags::OpenCL::DEVICE_DEFAULT:
- opencl_device_type = "DEFAULT";
- break;
- case DebugFlags::OpenCL::DEVICE_CPU:
- opencl_device_type = "CPU";
- break;
- case DebugFlags::OpenCL::DEVICE_GPU:
- opencl_device_type = "GPU";
- break;
- case DebugFlags::OpenCL::DEVICE_ACCELERATOR:
- opencl_device_type = "ACCELERATOR";
- break;
- }
- os << "OpenCL flags:\n"
- << " Device type : " << opencl_device_type << "\n"
- << " Debug : " << string_from_bool(debug_flags.opencl.debug) << "\n"
- << " Memory limit : " << string_human_readable_size(debug_flags.opencl.mem_limit) << "\n";
+ << " Debug : " << string_from_bool(debug_flags.optix.use_debug) << "\n";
return os;
}
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index f7e53f90f74..99e2723180c 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -79,9 +79,6 @@ class DebugFlags {
* CPUs and GPUs can be selected here instead.
*/
BVHLayout bvh_layout;
-
- /* Whether split kernel is used */
- bool split_kernel;
};
/* Descriptor of CUDA feature-set to be used. */
@@ -94,9 +91,6 @@ class DebugFlags {
/* Whether adaptive feature based runtime compile is enabled or not.
* Requires the CUDA Toolkit and only works on Linux atm. */
bool adaptive_compile;
-
- /* Whether split kernel is used */
- bool split_kernel;
};
/* Descriptor of OptiX feature-set to be used. */
@@ -106,61 +100,9 @@ class DebugFlags {
/* Reset flags to their defaults. */
void reset();
- /* Number of CUDA streams to launch kernels concurrently from. */
- int cuda_streams;
-
- /* Use OptiX curves API for hair instead of custom implementation. */
- bool curves_api;
- };
-
- /* Descriptor of OpenCL feature-set to be used. */
- struct OpenCL {
- OpenCL();
-
- /* Reset flags to their defaults. */
- void reset();
-
- /* Available device types.
- * Only gives a hint which devices to let user to choose from, does not
- * try to use any sort of optimal device or so.
- */
- enum DeviceType {
- /* None of OpenCL devices will be used. */
- DEVICE_NONE,
- /* All OpenCL devices will be used. */
- DEVICE_ALL,
- /* Default system OpenCL device will be used. */
- DEVICE_DEFAULT,
- /* Host processor will be used. */
- DEVICE_CPU,
- /* GPU devices will be used. */
- DEVICE_GPU,
- /* Dedicated OpenCL accelerator device will be used. */
- DEVICE_ACCELERATOR,
- };
-
- /* Available kernel types. */
- enum KernelType {
- /* Do automated guess which kernel to use, based on the officially
- * supported GPUs and such.
- */
- KERNEL_DEFAULT,
- /* Force mega kernel to be used. */
- KERNEL_MEGA,
- /* Force split kernel to be used. */
- KERNEL_SPLIT,
- };
-
- /* Requested device type. */
- DeviceType device_type;
-
- /* Use debug version of the kernel. */
- bool debug;
-
- /* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all
- * devices. */
- /* Artificial memory limit in bytes (0 if disabled). */
- size_t mem_limit;
+ /* Load OptiX module with debug capabilities. Will lower logging verbosity level, enable
+ * validations, and lower optimization level. */
+ bool use_debug;
};
/* Get instance of debug flags registry. */
@@ -182,9 +124,6 @@ class DebugFlags {
/* Requested OptiX flags. */
OptiX optix;
- /* Requested OpenCL flags. */
- OpenCL opencl;
-
private:
DebugFlags();
diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h
index 0a239a944a5..9b1698d461a 100644
--- a/intern/cycles/util/util_defines.h
+++ b/intern/cycles/util/util_defines.h
@@ -43,9 +43,9 @@
# define ccl_local_param
# define ccl_private
# define ccl_restrict __restrict
-# define ccl_ref &
# define ccl_optional_struct_init
# define ccl_loop_no_unroll
+# define ccl_attr_maybe_unused [[maybe_unused]]
# define __KERNEL_WITH_SSE_ALIGN__
# if defined(_WIN32) && !defined(FREE_WINDOWS)
@@ -62,7 +62,6 @@
# define ccl_may_alias
# define ccl_always_inline __forceinline
# define ccl_never_inline __declspec(noinline)
-# define ccl_maybe_unused
# else /* _WIN32 && !FREE_WINDOWS */
# define ccl_device_inline static inline __attribute__((always_inline))
# define ccl_device_forceinline static inline __attribute__((always_inline))
@@ -74,7 +73,6 @@
# define ccl_may_alias __attribute__((__may_alias__))
# define ccl_always_inline __attribute__((always_inline))
# define ccl_never_inline __attribute__((noinline))
-# define ccl_maybe_unused __attribute__((used))
# endif /* _WIN32 && !FREE_WINDOWS */
/* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index a8d4ee75e20..d9edfec5da3 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -28,14 +28,8 @@ CCL_NAMESPACE_BEGIN
/* Half Floats */
-#ifdef __KERNEL_OPENCL__
-
-# define float4_store_half(h, f, scale) vstore_half4(f *(scale), 0, h);
-
-#else
-
/* CUDA has its own half data type, no need to define then */
-# ifndef __KERNEL_CUDA__
+#ifndef __KERNEL_CUDA__
/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
* unsigned shorts. */
class half {
@@ -59,27 +53,27 @@ class half {
private:
unsigned short v;
};
-# endif
+#endif
struct half4 {
half x, y, z, w;
};
-# ifdef __KERNEL_CUDA__
+#ifdef __KERNEL_CUDA__
-ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
+ccl_device_inline void float4_store_half(half *h, float4 f)
{
- h[0] = __float2half(f.x * scale);
- h[1] = __float2half(f.y * scale);
- h[2] = __float2half(f.z * scale);
- h[3] = __float2half(f.w * scale);
+ h[0] = __float2half(f.x);
+ h[1] = __float2half(f.y);
+ h[2] = __float2half(f.z);
+ h[3] = __float2half(f.w);
}
-# else
+#else
-ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
+ccl_device_inline void float4_store_half(half *h, float4 f)
{
-# ifndef __KERNEL_SSE2__
+# ifndef __KERNEL_SSE2__
for (int i = 0; i < 4; i++) {
/* optimized float to half for pixels:
* assumes no negative, no nan, no inf, and sets denormal to 0 */
@@ -87,8 +81,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
uint i;
float f;
} in;
- float fscale = f[i] * scale;
- in.f = (fscale > 0.0f) ? ((fscale < 65504.0f) ? fscale : 65504.0f) : 0.0f;
+ in.f = (f[i] > 0.0f) ? ((f[i] < 65504.0f) ? f[i] : 65504.0f) : 0.0f;
int x = in.i;
int absolute = x & 0x7FFFFFFF;
@@ -98,23 +91,22 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
h[i] = (rshift & 0x7FFF);
}
-# else
+# else
/* same as above with SSE */
- ssef fscale = load4f(f) * scale;
- ssef x = min(max(fscale, 0.0f), 65504.0f);
+ ssef x = min(max(load4f(f), 0.0f), 65504.0f);
-# ifdef __KERNEL_AVX2__
+# ifdef __KERNEL_AVX2__
ssei rpack = _mm_cvtps_ph(x, 0);
-# else
+# else
ssei absolute = cast(x) & 0x7FFFFFFF;
ssei Z = absolute + 0xC8000000;
ssei result = andnot(absolute < 0x38800000, Z);
ssei rshift = (result >> 13) & 0x7FFF;
ssei rpack = _mm_packs_epi32(rshift, rshift);
-# endif
+# endif
_mm_storel_pi((__m64 *)h, _mm_castsi128_ps(rpack));
-# endif
+# endif
}
ccl_device_inline float half_to_float(half h)
@@ -160,8 +152,6 @@ ccl_device_inline half float_to_half(float f)
return (value_bits | sign_bit);
}
-# endif
-
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h
index c161299acd0..35c2d436d09 100644
--- a/intern/cycles/util/util_logging.h
+++ b/intern/cycles/util/util_logging.h
@@ -49,6 +49,7 @@ class LogMessageVoidify {
# define LOG(severity) LOG_SUPPRESS()
# define VLOG(severity) LOG_SUPPRESS()
# define VLOG_IF(severity, condition) LOG_SUPPRESS()
+# define VLOG_IS_ON(severity) false
# define CHECK(expression) LOG_SUPPRESS()
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index c5996ebfcb6..6d728dde679 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -26,11 +26,9 @@
# include <cmath>
#endif
-#ifndef __KERNEL_OPENCL__
-# include <float.h>
-# include <math.h>
-# include <stdio.h>
-#endif /* __KERNEL_OPENCL__ */
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
#include "util/util_types.h"
@@ -86,7 +84,6 @@ CCL_NAMESPACE_BEGIN
/* Scalar */
#ifdef _WIN32
-# ifndef __KERNEL_OPENCL__
ccl_device_inline float fmaxf(float a, float b)
{
return (a > b) ? a : b;
@@ -96,8 +93,7 @@ ccl_device_inline float fminf(float a, float b)
{
return (a < b) ? a : b;
}
-# endif /* !__KERNEL_OPENCL__ */
-#endif /* _WIN32 */
+#endif /* _WIN32 */
#ifndef __KERNEL_GPU__
using std::isfinite;
@@ -119,6 +115,11 @@ ccl_device_inline int min(int a, int b)
return (a < b) ? a : b;
}
+ccl_device_inline uint min(uint a, uint b)
+{
+ return (a < b) ? a : b;
+}
+
ccl_device_inline float max(float a, float b)
{
return (a > b) ? a : b;
@@ -166,7 +167,6 @@ ccl_device_inline float max4(float a, float b, float c, float d)
return max(max(a, b), max(c, d));
}
-#ifndef __KERNEL_OPENCL__
/* Int/Float conversion */
ccl_device_inline int as_int(uint i)
@@ -241,24 +241,23 @@ ccl_device_inline float __uint_as_float(uint i)
ccl_device_inline int4 __float4_as_int4(float4 f)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return int4(_mm_castps_si128(f.m128));
-# else
+#else
return make_int4(
__float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w));
-# endif
+#endif
}
ccl_device_inline float4 __int4_as_float4(int4 i)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_castsi128_ps(i.m128));
-# else
+#else
return make_float4(
__int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w));
-# endif
+#endif
}
-#endif /* __KERNEL_OPENCL__ */
/* Versions of functions which are safe for fast math. */
ccl_device_inline bool isnan_safe(float f)
@@ -279,7 +278,6 @@ ccl_device_inline float ensure_finite(float v)
return isfinite_safe(v) ? v : 0.0f;
}
-#ifndef __KERNEL_OPENCL__
ccl_device_inline int clamp(int a, int mn, int mx)
{
return min(max(a, mn), mx);
@@ -309,8 +307,6 @@ ccl_device_inline float smoothstep(float edge0, float edge1, float x)
return result;
}
-#endif /* __KERNEL_OPENCL__ */
-
#ifndef __KERNEL_CUDA__
ccl_device_inline float saturate(float a)
{
@@ -451,7 +447,6 @@ CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN
-#ifndef __KERNEL_OPENCL__
/* Interpolation */
template<class A, class B> A lerp(const A &a, const A &b, const B &t)
@@ -459,15 +454,9 @@ template<class A, class B> A lerp(const A &a, const A &b, const B &t)
return (A)(a * ((B)1 - t) + b * t);
}
-#endif /* __KERNEL_OPENCL__ */
-
/* Triangle */
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float triangle_area(const float3 &v1, const float3 &v2, const float3 &v3)
-#else
-ccl_device_inline float triangle_area(const float3 v1, const float3 v2, const float3 v3)
-#endif
{
return len(cross(v3 - v2, v1 - v2)) * 0.5f;
}
@@ -665,11 +654,7 @@ ccl_device_inline float pow22(float a)
ccl_device_inline float beta(float x, float y)
{
-#ifndef __KERNEL_OPENCL__
return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y));
-#else
- return expf(lgamma(x) + lgamma(y) - lgamma(x + y));
-#endif
}
ccl_device_inline float xor_signmask(float x, int y)
@@ -686,8 +671,6 @@ ccl_device_inline uint count_leading_zeros(uint x)
{
#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
return __clz(x);
-#elif defined(__KERNEL_OPENCL__)
- return clz(x);
#else
assert(x != 0);
# ifdef _MSC_VER
@@ -704,8 +687,6 @@ ccl_device_inline uint count_trailing_zeros(uint x)
{
#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
return (__ffs(x) - 1);
-#elif defined(__KERNEL_OPENCL__)
- return (31 - count_leading_zeros(x & -x));
#else
assert(x != 0);
# ifdef _MSC_VER
@@ -722,8 +703,6 @@ ccl_device_inline uint find_first_set(uint x)
{
#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
return __ffs(x);
-#elif defined(__KERNEL_OPENCL__)
- return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0;
#else
# ifdef _MSC_VER
return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0;
@@ -797,6 +776,52 @@ ccl_device_inline float precise_angle(float3 a, float3 b)
return 2.0f * atan2f(len(a - b), len(a + b));
}
+/* Return value which is greater than the given one and is a power of two. */
+ccl_device_inline uint next_power_of_two(uint x)
+{
+ return x == 0 ? 1 : 1 << (32 - count_leading_zeros(x));
+}
+
+/* Return value which is lower than the given one and is a power of two. */
+ccl_device_inline uint prev_power_of_two(uint x)
+{
+ return x < 2 ? x : 1 << (31 - count_leading_zeros(x - 1));
+}
+
+#ifndef __has_builtin
+# define __has_builtin(v) 0
+#endif
+
+/* Reverses the bits of a 32 bit integer. */
+ccl_device_inline uint32_t reverse_integer_bits(uint32_t x)
+{
+ /* Use a native instruction if it exists. */
+#if defined(__arm__) || defined(__aarch64__)
+ __asm__("rbit %w0, %w1" : "=r"(x) : "r"(x));
+ return x;
+#elif defined(__KERNEL_CUDA__)
+ return __brev(x);
+#elif __has_builtin(__builtin_bitreverse32)
+ return __builtin_bitreverse32(x);
+#else
+ /* Flip pairwise. */
+ x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1);
+ /* Flip pairs. */
+ x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2);
+ /* Flip nibbles. */
+ x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4);
+ /* Flip bytes. CPUs have an instruction for that, pretty fast one. */
+# ifdef _MSC_VER
+ return _byteswap_ulong(x);
+# elif defined(__INTEL_COMPILER)
+ return (uint32_t)_bswap((int)x);
+# else
+ /* Assuming gcc or clang. */
+ return __builtin_bswap32(x);
+# endif
+#endif
+}
+
CCL_NAMESPACE_END
#endif /* __UTIL_MATH_H__ */
diff --git a/intern/cycles/util/util_math_float2.h b/intern/cycles/util/util_math_float2.h
index 17f6f3c9382..70b80c33544 100644
--- a/intern/cycles/util/util_math_float2.h
+++ b/intern/cycles/util/util_math_float2.h
@@ -27,7 +27,6 @@ CCL_NAMESPACE_BEGIN
* Declaration.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float2 operator-(const float2 &a);
ccl_device_inline float2 operator*(const float2 &a, const float2 &b);
ccl_device_inline float2 operator*(const float2 &a, float f);
@@ -64,7 +63,6 @@ ccl_device_inline float2 fabs(const float2 &a);
ccl_device_inline float2 as_float2(const float4 &a);
ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t);
ccl_device_inline float2 floor(const float2 &a);
-#endif /* !__KERNEL_OPENCL__ */
ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b);
@@ -82,7 +80,6 @@ ccl_device_inline float2 one_float2()
return make_float2(1.0f, 1.0f);
}
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float2 operator-(const float2 &a)
{
return make_float2(-a.x, -a.y);
@@ -262,8 +259,6 @@ ccl_device_inline float2 floor(const float2 &a)
return make_float2(floorf(a.x), floorf(a.y));
}
-#endif /* !__KERNEL_OPENCL__ */
-
ccl_device_inline float2 safe_divide_float2_float(const float2 a, const float b)
{
return (b != 0.0f) ? a / b : zero_float2();
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index 9673c043189..30a1b4c3f77 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -27,7 +27,6 @@ CCL_NAMESPACE_BEGIN
* Declaration.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float3 operator-(const float3 &a);
ccl_device_inline float3 operator*(const float3 &a, const float3 &b);
ccl_device_inline float3 operator*(const float3 &a, const float f);
@@ -63,7 +62,6 @@ ccl_device_inline float3 rcp(const float3 &a);
ccl_device_inline float3 sqrt(const float3 &a);
ccl_device_inline float3 floor(const float3 &a);
ccl_device_inline float3 ceil(const float3 &a);
-#endif /* !__KERNEL_OPENCL__ */
ccl_device_inline float min3(float3 a);
ccl_device_inline float max3(float3 a);
@@ -105,50 +103,49 @@ ccl_device_inline float3 one_float3()
return make_float3(1.0f, 1.0f, 1.0f);
}
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float3 operator-(const float3 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
-# else
+#else
return make_float3(-a.x, -a.y, -a.z);
-# endif
+#endif
}
ccl_device_inline float3 operator*(const float3 &a, const float3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_mul_ps(a.m128, b.m128));
-# else
+#else
return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
-# endif
+#endif
}
ccl_device_inline float3 operator*(const float3 &a, const float f)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_mul_ps(a.m128, _mm_set1_ps(f)));
-# else
+#else
return make_float3(a.x * f, a.y * f, a.z * f);
-# endif
+#endif
}
ccl_device_inline float3 operator*(const float f, const float3 &a)
{
-# if defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE__)
return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
-# else
+#else
return make_float3(a.x * f, a.y * f, a.z * f);
-# endif
+#endif
}
ccl_device_inline float3 operator/(const float f, const float3 &a)
{
-# if defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE__)
return float3(_mm_div_ps(_mm_set1_ps(f), a.m128));
-# else
+#else
return make_float3(f / a.x, f / a.y, f / a.z);
-# endif
+#endif
}
ccl_device_inline float3 operator/(const float3 &a, const float f)
@@ -159,11 +156,11 @@ ccl_device_inline float3 operator/(const float3 &a, const float f)
ccl_device_inline float3 operator/(const float3 &a, const float3 &b)
{
-# if defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE__)
return float3(_mm_div_ps(a.m128, b.m128));
-# else
+#else
return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
-# endif
+#endif
}
ccl_device_inline float3 operator+(const float3 &a, const float f)
@@ -173,11 +170,11 @@ ccl_device_inline float3 operator+(const float3 &a, const float f)
ccl_device_inline float3 operator+(const float3 &a, const float3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_add_ps(a.m128, b.m128));
-# else
+#else
return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
-# endif
+#endif
}
ccl_device_inline float3 operator-(const float3 &a, const float f)
@@ -187,11 +184,11 @@ ccl_device_inline float3 operator-(const float3 &a, const float f)
ccl_device_inline float3 operator-(const float3 &a, const float3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_sub_ps(a.m128, b.m128));
-# else
+#else
return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
-# endif
+#endif
}
ccl_device_inline float3 operator+=(float3 &a, const float3 &b)
@@ -227,11 +224,11 @@ ccl_device_inline float3 operator/=(float3 &a, float f)
ccl_device_inline bool operator==(const float3 &a, const float3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7;
-# else
+#else
return (a.x == b.x && a.y == b.y && a.z == b.z);
-# endif
+#endif
}
ccl_device_inline bool operator!=(const float3 &a, const float3 &b)
@@ -246,20 +243,20 @@ ccl_device_inline float distance(const float3 &a, const float3 &b)
ccl_device_inline float dot(const float3 &a, const float3 &b)
{
-# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
-# else
+#else
return a.x * b.x + a.y * b.y + a.z * b.z;
-# endif
+#endif
}
ccl_device_inline float dot_xy(const float3 &a, const float3 &b)
{
-# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a, b), b));
-# else
+#else
return a.x * b.x + a.y * b.y;
-# endif
+#endif
}
ccl_device_inline float3 cross(const float3 &a, const float3 &b)
@@ -270,30 +267,30 @@ ccl_device_inline float3 cross(const float3 &a, const float3 &b)
ccl_device_inline float3 normalize(const float3 &a)
{
-# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
__m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F));
return float3(_mm_div_ps(a.m128, norm));
-# else
+#else
return a / len(a);
-# endif
+#endif
}
ccl_device_inline float3 min(const float3 &a, const float3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_min_ps(a.m128, b.m128));
-# else
+#else
return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
-# endif
+#endif
}
ccl_device_inline float3 max(const float3 &a, const float3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_max_ps(a.m128, b.m128));
-# else
+#else
return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
-# endif
+#endif
}
ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx)
@@ -303,43 +300,43 @@ ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &
ccl_device_inline float3 fabs(const float3 &a)
{
-# ifdef __KERNEL_SSE__
-# ifdef __KERNEL_NEON__
+#ifdef __KERNEL_SSE__
+# ifdef __KERNEL_NEON__
return float3(vabsq_f32(a.m128));
-# else
+# else
__m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
return float3(_mm_and_ps(a.m128, mask));
-# endif
-# else
- return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
# endif
+#else
+ return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z));
+#endif
}
ccl_device_inline float3 sqrt(const float3 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_sqrt_ps(a));
-# else
+#else
return make_float3(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z));
-# endif
+#endif
}
ccl_device_inline float3 floor(const float3 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_floor_ps(a));
-# else
+#else
return make_float3(floorf(a.x), floorf(a.y), floorf(a.z));
-# endif
+#endif
}
ccl_device_inline float3 ceil(const float3 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float3(_mm_ceil_ps(a));
-# else
+#else
return make_float3(ceilf(a.x), ceilf(a.y), ceilf(a.z));
-# endif
+#endif
}
ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t)
@@ -349,14 +346,13 @@ ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t)
ccl_device_inline float3 rcp(const float3 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
/* Don't use _mm_rcp_ps due to poor precision. */
return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
-# else
+#else
return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z);
-# endif
+#endif
}
-#endif /* !__KERNEL_OPENCL__ */
ccl_device_inline float min3(float3 a)
{
@@ -483,11 +479,7 @@ ccl_device_inline float average(const float3 a)
ccl_device_inline bool isequal_float3(const float3 a, const float3 b)
{
-#ifdef __KERNEL_OPENCL__
- return all(a == b);
-#else
return a == b;
-#endif
}
ccl_device_inline float3 pow3(float3 v, float e)
diff --git a/intern/cycles/util/util_math_float4.h b/intern/cycles/util/util_math_float4.h
index 0ba2bafa2f0..19af5c8c638 100644
--- a/intern/cycles/util/util_math_float4.h
+++ b/intern/cycles/util/util_math_float4.h
@@ -27,7 +27,6 @@ CCL_NAMESPACE_BEGIN
* Declaration.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float4 operator-(const float4 &a);
ccl_device_inline float4 operator*(const float4 &a, const float4 &b);
ccl_device_inline float4 operator*(const float4 &a, float f);
@@ -66,7 +65,6 @@ ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &
ccl_device_inline float4 fabs(const float4 &a);
ccl_device_inline float4 floor(const float4 &a);
ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t);
-#endif /* !__KERNEL_OPENCL__*/
ccl_device_inline float4 safe_divide_float4_float(const float4 a, const float b);
@@ -112,33 +110,32 @@ ccl_device_inline float4 one_float4()
return make_float4(1.0f, 1.0f, 1.0f, 1.0f);
}
-#ifndef __KERNEL_OPENCL__
ccl_device_inline float4 operator-(const float4 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
__m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
return float4(_mm_xor_ps(a.m128, mask));
-# else
+#else
return make_float4(-a.x, -a.y, -a.z, -a.w);
-# endif
+#endif
}
ccl_device_inline float4 operator*(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_mul_ps(a.m128, b.m128));
-# else
+#else
return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
-# endif
+#endif
}
ccl_device_inline float4 operator*(const float4 &a, float f)
{
-# if defined(__KERNEL_SSE__)
+#if defined(__KERNEL_SSE__)
return a * make_float4(f);
-# else
+#else
return make_float4(a.x * f, a.y * f, a.z * f, a.w * f);
-# endif
+#endif
}
ccl_device_inline float4 operator*(float f, const float4 &a)
@@ -153,11 +150,11 @@ ccl_device_inline float4 operator/(const float4 &a, float f)
ccl_device_inline float4 operator/(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_div_ps(a.m128, b.m128));
-# else
+#else
return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
-# endif
+#endif
}
ccl_device_inline float4 operator+(const float4 &a, const float f)
@@ -167,11 +164,11 @@ ccl_device_inline float4 operator+(const float4 &a, const float f)
ccl_device_inline float4 operator+(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_add_ps(a.m128, b.m128));
-# else
+#else
return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-# endif
+#endif
}
ccl_device_inline float4 operator-(const float4 &a, const float f)
@@ -181,11 +178,11 @@ ccl_device_inline float4 operator-(const float4 &a, const float f)
ccl_device_inline float4 operator-(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_sub_ps(a.m128, b.m128));
-# else
+#else
return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-# endif
+#endif
}
ccl_device_inline float4 operator+=(float4 &a, const float4 &b)
@@ -215,38 +212,38 @@ ccl_device_inline float4 operator/=(float4 &a, float f)
ccl_device_inline int4 operator<(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return int4(_mm_castps_si128(_mm_cmplt_ps(a.m128, b.m128)));
-# else
+#else
return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
-# endif
+#endif
}
ccl_device_inline int4 operator>=(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return int4(_mm_castps_si128(_mm_cmpge_ps(a.m128, b.m128)));
-# else
+#else
return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
-# endif
+#endif
}
ccl_device_inline int4 operator<=(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return int4(_mm_castps_si128(_mm_cmple_ps(a.m128, b.m128)));
-# else
+#else
return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w);
-# endif
+#endif
}
ccl_device_inline bool operator==(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15;
-# else
+#else
return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w);
-# endif
+#endif
}
ccl_device_inline float distance(const float4 &a, const float4 &b)
@@ -256,16 +253,16 @@ ccl_device_inline float distance(const float4 &a, const float4 &b)
ccl_device_inline float dot(const float4 &a, const float4 &b)
{
-# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
-# if defined(__KERNEL_NEON__)
+#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__)
+# if defined(__KERNEL_NEON__)
__m128 t = vmulq_f32(a, b);
return vaddvq_f32(t);
-# else
- return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
-# endif
# else
- return (a.x * b.x + a.y * b.y) + (a.z * b.z + a.w * b.w);
+ return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF));
# endif
+#else
+ return (a.x * b.x + a.y * b.y) + (a.z * b.z + a.w * b.w);
+#endif
}
ccl_device_inline float len_squared(const float4 &a)
@@ -275,21 +272,21 @@ ccl_device_inline float len_squared(const float4 &a)
ccl_device_inline float4 rcp(const float4 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
/* Don't use _mm_rcp_ps due to poor precision. */
return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
-# else
+#else
return make_float4(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w);
-# endif
+#endif
}
ccl_device_inline float4 sqrt(const float4 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_sqrt_ps(a.m128));
-# else
+#else
return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w));
-# endif
+#endif
}
ccl_device_inline float4 sqr(const float4 &a)
@@ -299,39 +296,39 @@ ccl_device_inline float4 sqr(const float4 &a)
ccl_device_inline float4 cross(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return (shuffle<1, 2, 0, 0>(a) * shuffle<2, 0, 1, 0>(b)) -
(shuffle<2, 0, 1, 0>(a) * shuffle<1, 2, 0, 0>(b));
-# else
+#else
return make_float4(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x, 0.0f);
-# endif
+#endif
}
ccl_device_inline bool is_zero(const float4 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return a == make_float4(0.0f);
-# else
+#else
return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f);
-# endif
+#endif
}
ccl_device_inline float4 reduce_add(const float4 &a)
{
-# if defined(__KERNEL_SSE__)
-# if defined(__KERNEL_NEON__)
+#if defined(__KERNEL_SSE__)
+# if defined(__KERNEL_NEON__)
return float4(vdupq_n_f32(vaddvq_f32(a)));
-# elif defined(__KERNEL_SSE3__)
+# elif defined(__KERNEL_SSE3__)
float4 h(_mm_hadd_ps(a.m128, a.m128));
return float4(_mm_hadd_ps(h.m128, h.m128));
-# else
+# else
float4 h(shuffle<1, 0, 3, 2>(a) + a);
return shuffle<2, 3, 0, 1>(h) + h;
-# endif
-# else
+# endif
+#else
float sum = (a.x + a.y) + (a.z + a.w);
return make_float4(sum, sum, sum, sum);
-# endif
+#endif
}
ccl_device_inline float average(const float4 &a)
@@ -357,20 +354,20 @@ ccl_device_inline float4 safe_normalize(const float4 &a)
ccl_device_inline float4 min(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_min_ps(a.m128, b.m128));
-# else
+#else
return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
-# endif
+#endif
}
ccl_device_inline float4 max(const float4 &a, const float4 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_max_ps(a.m128, b.m128));
-# else
+#else
return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
-# endif
+#endif
}
ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx)
@@ -380,24 +377,24 @@ ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &
ccl_device_inline float4 fabs(const float4 &a)
{
-# if defined(__KERNEL_SSE__)
-# if defined(__KERNEL_NEON__)
+#if defined(__KERNEL_SSE__)
+# if defined(__KERNEL_NEON__)
return float4(vabsq_f32(a));
-# else
- return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))));
-# endif
# else
- return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
+ return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))));
# endif
+#else
+ return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w));
+#endif
}
ccl_device_inline float4 floor(const float4 &a)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return float4(_mm_floor_ps(a));
-# else
+#else
return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w));
-# endif
+#endif
}
ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t)
@@ -405,8 +402,6 @@ ccl_device_inline float4 mix(const float4 &a, const float4 &b, float t)
return a + t * (b - a);
}
-#endif /* !__KERNEL_OPENCL__*/
-
#ifdef __KERNEL_SSE__
template<size_t index_0, size_t index_1, size_t index_2, size_t index_3>
__forceinline const float4 shuffle(const float4 &b)
diff --git a/intern/cycles/util/util_math_int2.h b/intern/cycles/util/util_math_int2.h
index 0295cd51f7e..5782b878801 100644
--- a/intern/cycles/util/util_math_int2.h
+++ b/intern/cycles/util/util_math_int2.h
@@ -27,20 +27,17 @@ CCL_NAMESPACE_BEGIN
* Declaration.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline bool operator==(const int2 a, const int2 b);
ccl_device_inline int2 operator+(const int2 &a, const int2 &b);
ccl_device_inline int2 operator+=(int2 &a, const int2 &b);
ccl_device_inline int2 operator-(const int2 &a, const int2 &b);
ccl_device_inline int2 operator*(const int2 &a, const int2 &b);
ccl_device_inline int2 operator/(const int2 &a, const int2 &b);
-#endif /* !__KERNEL_OPENCL__ */
/*******************************************************************************
* Definition.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline bool operator==(const int2 a, const int2 b)
{
return (a.x == b.x && a.y == b.y);
@@ -70,7 +67,6 @@ ccl_device_inline int2 operator/(const int2 &a, const int2 &b)
{
return make_int2(a.x / b.x, a.y / b.y);
}
-#endif /* !__KERNEL_OPENCL__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_math_int3.h b/intern/cycles/util/util_math_int3.h
index d92ed895dc2..e0dfae7c015 100644
--- a/intern/cycles/util/util_math_int3.h
+++ b/intern/cycles/util/util_math_int3.h
@@ -27,52 +27,49 @@ CCL_NAMESPACE_BEGIN
* Declaration.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline int3 min(int3 a, int3 b);
ccl_device_inline int3 max(int3 a, int3 b);
ccl_device_inline int3 clamp(const int3 &a, int mn, int mx);
ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx);
-#endif /* !__KERNEL_OPENCL__ */
/*******************************************************************************
* Definition.
*/
-#ifndef __KERNEL_OPENCL__
ccl_device_inline int3 min(int3 a, int3 b)
{
-# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
return int3(_mm_min_epi32(a.m128, b.m128));
-# else
+#else
return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
-# endif
+#endif
}
ccl_device_inline int3 max(int3 a, int3 b)
{
-# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
+#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
return int3(_mm_max_epi32(a.m128, b.m128));
-# else
+#else
return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
-# endif
+#endif
}
ccl_device_inline int3 clamp(const int3 &a, int mn, int mx)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return min(max(a, make_int3(mn)), make_int3(mx));
-# else
+#else
return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx));
-# endif
+#endif
}
ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return min(max(a, mn), make_int3(mx));
-# else
+#else
return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx));
-# endif
+#endif
}
ccl_device_inline bool operator==(const int3 &a, const int3 &b)
@@ -92,22 +89,21 @@ ccl_device_inline bool operator<(const int3 &a, const int3 &b)
ccl_device_inline int3 operator+(const int3 &a, const int3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return int3(_mm_add_epi32(a.m128, b.m128));
-# else
+#else
return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
-# endif
+#endif
}
ccl_device_inline int3 operator-(const int3 &a, const int3 &b)
{
-# ifdef __KERNEL_SSE__
+#ifdef __KERNEL_SSE__
return int3(_mm_sub_epi32(a.m128, b.m128));
-# else
+#else
return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
-# endif
+#endif
}
-#endif /* !__KERNEL_OPENCL__ */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp
index 8905c8bc7f0..c78f4615013 100644
--- a/intern/cycles/util/util_path.cpp
+++ b/intern/cycles/util/util_path.cpp
@@ -66,6 +66,7 @@ typedef struct stat path_stat_t;
static string cached_path = "";
static string cached_user_path = "";
+static string cached_temp_path = "";
static string cached_xdg_cache_path = "";
namespace {
@@ -335,10 +336,11 @@ static string path_xdg_cache_get()
}
#endif
-void path_init(const string &path, const string &user_path)
+void path_init(const string &path, const string &user_path, const string &temp_path)
{
cached_path = path;
cached_user_path = user_path;
+ cached_temp_path = temp_path;
#ifdef _MSC_VER
// workaround for https://svn.boost.org/trac/boost/ticket/6320
@@ -382,6 +384,15 @@ string path_cache_get(const string &sub)
#endif
}
+string path_temp_get(const string &sub)
+{
+ if (cached_temp_path == "") {
+ cached_temp_path = Filesystem::temp_directory_path();
+ }
+
+ return path_join(cached_temp_path, sub);
+}
+
#if defined(__linux__) || defined(__APPLE__)
string path_xdg_home_get(const string &sub = "");
#endif
@@ -739,177 +750,6 @@ bool path_remove(const string &path)
return remove(path.c_str()) == 0;
}
-struct SourceReplaceState {
- typedef map<string, string> ProcessedMapping;
- /* Base director for all relative include headers. */
- string base;
- /* Result of processed files. */
- ProcessedMapping processed_files;
- /* Set of files which are considered "precompiled" and which are replaced
- * with and empty string on a subsequent occurrence in include statement.
- */
- set<string> precompiled_headers;
-};
-
-static string path_source_replace_includes_recursive(const string &source,
- const string &source_filepath,
- SourceReplaceState *state);
-
-static string line_directive(const SourceReplaceState &state, const string &path, const int line)
-{
- string unescaped_path = path;
- /* First we make path relative. */
- if (string_startswith(unescaped_path, state.base.c_str())) {
- const string base_file = path_filename(state.base);
- const size_t base_len = state.base.length();
- unescaped_path = base_file +
- unescaped_path.substr(base_len, unescaped_path.length() - base_len);
- }
- /* Second, we replace all unsafe characters. */
- const size_t length = unescaped_path.length();
- string escaped_path = "";
- for (size_t i = 0; i < length; ++i) {
- const char ch = unescaped_path[i];
- if (strchr("\"\'\?\\", ch) != NULL) {
- escaped_path += "\\";
- }
- escaped_path += ch;
- }
- /* TODO(sergey): Check whether using std::to_string combined with several
- * concatenation operations is any faster.
- */
- return string_printf("#line %d \"%s\"", line, escaped_path.c_str());
-}
-
-static string path_source_handle_preprocessor(const string &preprocessor_line,
- const string &source_filepath,
- const size_t line_number,
- SourceReplaceState *state)
-{
- string result = preprocessor_line;
- string token = string_strip(preprocessor_line.substr(1, preprocessor_line.size() - 1));
- if (string_startswith(token, "include")) {
- token = string_strip(token.substr(7, token.size() - 7));
- if (token[0] == '"') {
- const size_t n_start = 1;
- const size_t n_end = token.find("\"", n_start);
- const string filename = token.substr(n_start, n_end - n_start);
- const bool is_precompiled = string_endswith(token, "// PRECOMPILED");
- string filepath = path_join(state->base, filename);
- if (!path_exists(filepath)) {
- filepath = path_join(path_dirname(source_filepath), filename);
- }
- if (is_precompiled) {
- state->precompiled_headers.insert(filepath);
- }
- string text;
- if (path_read_text(filepath, text)) {
- text = path_source_replace_includes_recursive(text, filepath, state);
- /* Use line directives for better error messages. */
- result = line_directive(*state, filepath, 1) + "\n" + text + "\n" +
- line_directive(*state, source_filepath, line_number + 1);
- }
- }
- }
- return result;
-}
-
-/* Our own little c preprocessor that replaces #includes with the file
- * contents, to work around issue of OpenCL drivers not supporting
- * include paths with spaces in them.
- */
-static string path_source_replace_includes_recursive(const string &source,
- const string &source_filepath,
- SourceReplaceState *state)
-{
- /* Try to re-use processed file without spending time on replacing all
- * include directives again.
- */
- SourceReplaceState::ProcessedMapping::iterator replaced_file = state->processed_files.find(
- source_filepath);
- if (replaced_file != state->processed_files.end()) {
- if (state->precompiled_headers.find(source_filepath) != state->precompiled_headers.end()) {
- return "";
- }
- return replaced_file->second;
- }
- /* Perform full file processing. */
- string result = "";
- const size_t source_length = source.length();
- size_t index = 0;
- /* Information about where we are in the source. */
- size_t line_number = 0, column_number = 1;
- /* Currently gathered non-preprocessor token.
- * Store as start/length rather than token itself to avoid overhead of
- * memory re-allocations on each character concatenation.
- */
- size_t token_start = 0, token_length = 0;
- /* Denotes whether we're inside of preprocessor line, together with
- * preprocessor line itself.
- *
- * TODO(sergey): Investigate whether using token start/end position
- * gives measurable speedup.
- */
- bool inside_preprocessor = false;
- string preprocessor_line = "";
- /* Actual loop over the whole source. */
- while (index < source_length) {
- const char ch = source[index];
- if (ch == '\n') {
- if (inside_preprocessor) {
- result += path_source_handle_preprocessor(
- preprocessor_line, source_filepath, line_number, state);
- /* Start gathering net part of the token. */
- token_start = index;
- token_length = 0;
- }
- inside_preprocessor = false;
- preprocessor_line = "";
- column_number = 0;
- ++line_number;
- }
- else if (ch == '#' && column_number == 1 && !inside_preprocessor) {
- /* Append all possible non-preprocessor token to the result. */
- if (token_length != 0) {
- result.append(source, token_start, token_length);
- token_start = index;
- token_length = 0;
- }
- inside_preprocessor = true;
- }
- if (inside_preprocessor) {
- preprocessor_line += ch;
- }
- else {
- ++token_length;
- }
- ++index;
- ++column_number;
- }
- /* Append possible tokens which happened before special events handled
- * above.
- */
- if (token_length != 0) {
- result.append(source, token_start, token_length);
- }
- if (inside_preprocessor) {
- result += path_source_handle_preprocessor(
- preprocessor_line, source_filepath, line_number, state);
- }
- /* Store result for further reuse. */
- state->processed_files[source_filepath] = result;
- return result;
-}
-
-string path_source_replace_includes(const string &source,
- const string &path,
- const string &source_filename)
-{
- SourceReplaceState state;
- state.base = path;
- return path_source_replace_includes_recursive(source, path_join(path, source_filename), &state);
-}
-
FILE *path_fopen(const string &path, const string &mode)
{
#ifdef _WIN32
diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h
index 7a83c2135a4..f899bc2e01c 100644
--- a/intern/cycles/util/util_path.h
+++ b/intern/cycles/util/util_path.h
@@ -32,9 +32,10 @@
CCL_NAMESPACE_BEGIN
/* program paths */
-void path_init(const string &path = "", const string &user_path = "");
+void path_init(const string &path = "", const string &user_path = "", const string &tmp_path = "");
string path_get(const string &sub = "");
string path_user_get(const string &sub = "");
+string path_temp_get(const string &sub = "");
string path_cache_get(const string &sub = "");
/* path string manipulation */
@@ -65,11 +66,6 @@ bool path_read_text(const string &path, string &text);
/* File manipulation. */
bool path_remove(const string &path);
-/* source code utility */
-string path_source_replace_includes(const string &source,
- const string &path,
- const string &source_filename = "");
-
/* cache utility */
void path_cache_clear_except(const string &name, const set<string> &except);
diff --git a/intern/cycles/util/util_profiling.cpp b/intern/cycles/util/util_profiling.cpp
index 073b09f719f..5343f076e22 100644
--- a/intern/cycles/util/util_profiling.cpp
+++ b/intern/cycles/util/util_profiling.cpp
@@ -48,13 +48,7 @@ void Profiler::run()
}
if (cur_shader >= 0 && cur_shader < shader_samples.size()) {
- /* Only consider the active shader during events whose runtime significantly depends on it.
- */
- if (((cur_event >= PROFILING_SHADER_EVAL) && (cur_event <= PROFILING_SUBSURFACE)) ||
- ((cur_event >= PROFILING_CLOSURE_EVAL) &&
- (cur_event <= PROFILING_CLOSURE_VOLUME_SAMPLE))) {
- shader_samples[cur_shader]++;
- }
+ shader_samples[cur_shader]++;
}
if (cur_object >= 0 && cur_object < object_samples.size()) {
diff --git a/intern/cycles/util/util_profiling.h b/intern/cycles/util/util_profiling.h
index ceec08ed894..96bb682c50e 100644
--- a/intern/cycles/util/util_profiling.h
+++ b/intern/cycles/util/util_profiling.h
@@ -28,38 +28,30 @@ CCL_NAMESPACE_BEGIN
enum ProfilingEvent : uint32_t {
PROFILING_UNKNOWN,
PROFILING_RAY_SETUP,
- PROFILING_PATH_INTEGRATE,
- PROFILING_SCENE_INTERSECT,
- PROFILING_INDIRECT_EMISSION,
- PROFILING_VOLUME,
- PROFILING_SHADER_SETUP,
- PROFILING_SHADER_EVAL,
- PROFILING_SHADER_APPLY,
- PROFILING_AO,
- PROFILING_SUBSURFACE,
- PROFILING_CONNECT_LIGHT,
- PROFILING_SURFACE_BOUNCE,
- PROFILING_WRITE_RESULT,
-
- PROFILING_INTERSECT,
- PROFILING_INTERSECT_LOCAL,
- PROFILING_INTERSECT_SHADOW_ALL,
- PROFILING_INTERSECT_VOLUME,
- PROFILING_INTERSECT_VOLUME_ALL,
-
- PROFILING_CLOSURE_EVAL,
- PROFILING_CLOSURE_SAMPLE,
- PROFILING_CLOSURE_VOLUME_EVAL,
- PROFILING_CLOSURE_VOLUME_SAMPLE,
-
- PROFILING_DENOISING,
- PROFILING_DENOISING_CONSTRUCT_TRANSFORM,
- PROFILING_DENOISING_RECONSTRUCT,
- PROFILING_DENOISING_DIVIDE_SHADOW,
- PROFILING_DENOISING_NON_LOCAL_MEANS,
- PROFILING_DENOISING_COMBINE_HALVES,
- PROFILING_DENOISING_GET_FEATURE,
- PROFILING_DENOISING_DETECT_OUTLIERS,
+
+ PROFILING_INTERSECT_CLOSEST,
+ PROFILING_INTERSECT_SUBSURFACE,
+ PROFILING_INTERSECT_SHADOW,
+ PROFILING_INTERSECT_VOLUME_STACK,
+
+ PROFILING_SHADE_SURFACE_SETUP,
+ PROFILING_SHADE_SURFACE_EVAL,
+ PROFILING_SHADE_SURFACE_DIRECT_LIGHT,
+ PROFILING_SHADE_SURFACE_INDIRECT_LIGHT,
+ PROFILING_SHADE_SURFACE_AO,
+ PROFILING_SHADE_SURFACE_PASSES,
+
+ PROFILING_SHADE_VOLUME_SETUP,
+ PROFILING_SHADE_VOLUME_INTEGRATE,
+ PROFILING_SHADE_VOLUME_DIRECT_LIGHT,
+ PROFILING_SHADE_VOLUME_INDIRECT_LIGHT,
+
+ PROFILING_SHADE_SHADOW_SETUP,
+ PROFILING_SHADE_SHADOW_SURFACE,
+ PROFILING_SHADE_SHADOW_VOLUME,
+
+ PROFILING_SHADE_LIGHT_SETUP,
+ PROFILING_SHADE_LIGHT_EVAL,
PROFILING_NUM_EVENTS,
};
@@ -136,37 +128,51 @@ class ProfilingHelper {
state->event = event;
}
+ ~ProfilingHelper()
+ {
+ state->event = previous_event;
+ }
+
inline void set_event(ProfilingEvent event)
{
state->event = event;
}
- inline void set_shader(int shader)
+ protected:
+ ProfilingState *state;
+ uint32_t previous_event;
+};
+
+class ProfilingWithShaderHelper : public ProfilingHelper {
+ public:
+ ProfilingWithShaderHelper(ProfilingState *state, ProfilingEvent event)
+ : ProfilingHelper(state, event)
{
- state->shader = shader;
- if (state->active) {
- assert(shader < state->shader_hits.size());
- state->shader_hits[shader]++;
- }
}
- inline void set_object(int object)
+ ~ProfilingWithShaderHelper()
{
- state->object = object;
- if (state->active) {
- assert(object < state->object_hits.size());
- state->object_hits[object]++;
- }
+ state->object = -1;
+ state->shader = -1;
}
- ~ProfilingHelper()
+ inline void set_shader(int object, int shader)
{
- state->event = previous_event;
+ if (state->active) {
+ state->shader = shader;
+ state->object = object;
+
+ if (shader >= 0) {
+ assert(shader < state->shader_hits.size());
+ state->shader_hits[shader]++;
+ }
+
+ if (object >= 0) {
+ assert(object < state->object_hits.size());
+ state->object_hits[object]++;
+ }
+ }
}
-
- private:
- ProfilingState *state;
- uint32_t previous_event;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index 26534a29dfe..dca8d3d0ab5 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -46,7 +46,6 @@ class Progress {
substatus = "";
sync_status = "";
sync_substatus = "";
- kernel_status = "";
update_cb = function_null;
cancel = false;
cancel_message = "";
@@ -87,7 +86,6 @@ class Progress {
substatus = "";
sync_status = "";
sync_substatus = "";
- kernel_status = "";
cancel = false;
cancel_message = "";
error = false;
@@ -316,24 +314,6 @@ class Progress {
}
}
- /* kernel status */
-
- void set_kernel_status(const string &kernel_status_)
- {
- {
- thread_scoped_lock lock(progress_mutex);
- kernel_status = kernel_status_;
- }
-
- set_update();
- }
-
- void get_kernel_status(string &kernel_status_)
- {
- thread_scoped_lock lock(progress_mutex);
- kernel_status_ = kernel_status;
- }
-
/* callback */
void set_update()
@@ -378,8 +358,6 @@ class Progress {
string sync_status;
string sync_substatus;
- string kernel_status;
-
volatile bool cancel;
string cancel_message;
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 8e8caa98a1b..b4a153c329f 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -61,14 +61,14 @@ static struct TrueTy {
{
return true;
}
-} True ccl_maybe_unused;
+} True ccl_attr_maybe_unused;
static struct FalseTy {
__forceinline operator bool() const
{
return false;
}
-} False ccl_maybe_unused;
+} False ccl_attr_maybe_unused;
static struct ZeroTy {
__forceinline operator float() const
@@ -79,7 +79,7 @@ static struct ZeroTy {
{
return 0;
}
-} zero ccl_maybe_unused;
+} zero ccl_attr_maybe_unused;
static struct OneTy {
__forceinline operator float() const
@@ -90,7 +90,7 @@ static struct OneTy {
{
return 1;
}
-} one ccl_maybe_unused;
+} one ccl_attr_maybe_unused;
static struct NegInfTy {
__forceinline operator float() const
@@ -101,7 +101,7 @@ static struct NegInfTy {
{
return std::numeric_limits<int>::min();
}
-} neg_inf ccl_maybe_unused;
+} neg_inf ccl_attr_maybe_unused;
static struct PosInfTy {
__forceinline operator float() const
@@ -112,10 +112,10 @@ static struct PosInfTy {
{
return std::numeric_limits<int>::max();
}
-} inf ccl_maybe_unused, pos_inf ccl_maybe_unused;
+} inf ccl_attr_maybe_unused, pos_inf ccl_attr_maybe_unused;
static struct StepTy {
-} step ccl_maybe_unused;
+} step ccl_attr_maybe_unused;
#endif
diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h
index d809f2e06d7..7df52d462b7 100644
--- a/intern/cycles/util/util_static_assert.h
+++ b/intern/cycles/util/util_static_assert.h
@@ -24,9 +24,9 @@
CCL_NAMESPACE_BEGIN
-#if defined(__KERNEL_OPENCL__) || defined(CYCLES_CUBIN_CC)
+#if defined(CYCLES_CUBIN_CC)
# define static_assert(statement, message)
-#endif /* __KERNEL_OPENCL__ */
+#endif
#define static_assert_align(st, align) \
static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT
diff --git a/intern/cycles/util/util_string.cpp b/intern/cycles/util/util_string.cpp
index 4dfebf14923..9c0b2ca50bb 100644
--- a/intern/cycles/util/util_string.cpp
+++ b/intern/cycles/util/util_string.cpp
@@ -17,6 +17,9 @@
#include <stdarg.h>
#include <stdio.h>
+#include <algorithm>
+#include <cctype>
+
#include "util/util_foreach.h"
#include "util/util_string.h"
#include "util/util_windows.h"
@@ -107,24 +110,26 @@ void string_split(vector<string> &tokens,
}
}
-bool string_startswith(const string &s, const char *start)
+bool string_startswith(const string_view s, const string_view start)
{
- size_t len = strlen(start);
+ const size_t len = start.size();
- if (len > s.size())
- return 0;
- else
- return strncmp(s.c_str(), start, len) == 0;
+ if (len > s.size()) {
+ return false;
+ }
+
+ return strncmp(s.c_str(), start.data(), len) == 0;
}
-bool string_endswith(const string &s, const string &end)
+bool string_endswith(const string_view s, const string_view end)
{
- size_t len = end.length();
+ const size_t len = end.size();
- if (len > s.size())
- return 0;
- else
- return s.compare(s.length() - len, len, end) == 0;
+ if (len > s.size()) {
+ return false;
+ }
+
+ return strncmp(s.c_str() + s.size() - len, end.data(), len) == 0;
}
string string_strip(const string &s)
@@ -172,6 +177,13 @@ string to_string(const char *str)
return string(str);
}
+string string_to_lower(const string &s)
+{
+ string r = s;
+ std::transform(r.begin(), r.end(), r.begin(), [](char c) { return std::tolower(c); });
+ return r;
+}
+
/* Wide char strings helpers for Windows. */
#ifdef _WIN32
diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h
index f2272819b2f..55462cfd8b8 100644
--- a/intern/cycles/util/util_string.h
+++ b/intern/cycles/util/util_string.h
@@ -21,6 +21,11 @@
#include <string.h>
#include <string>
+/* Use string view implementation from OIIO.
+ * Ideally, need to switch to `std::string_view`, but this first requires getting rid of using
+ * namespace OIIO as it causes symbol collision. */
+#include <OpenImageIO/string_view.h>
+
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
@@ -31,6 +36,8 @@ using std::string;
using std::stringstream;
using std::to_string;
+using OIIO::string_view;
+
#ifdef __GNUC__
# define PRINTF_ATTRIBUTE __attribute__((format(printf, 1, 2)))
#else
@@ -45,12 +52,13 @@ void string_split(vector<string> &tokens,
const string &separators = "\t ",
bool skip_empty_tokens = true);
void string_replace(string &haystack, const string &needle, const string &other);
-bool string_startswith(const string &s, const char *start);
-bool string_endswith(const string &s, const string &end);
+bool string_startswith(string_view s, string_view start);
+bool string_endswith(string_view s, string_view end);
string string_strip(const string &s);
string string_remove_trademark(const string &s);
string string_from_bool(const bool var);
string to_string(const char *str);
+string string_to_lower(const string &s);
/* Wide char strings are only used on Windows to deal with non-ASCII
* characters in file names and such. No reason to use such strings
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index b010881058b..be8c2fb505a 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -403,4 +403,13 @@ size_t system_physical_ram()
#endif
}
+uint64_t system_self_process_id()
+{
+#ifdef _WIN32
+ return GetCurrentProcessId();
+#else
+ return getpid();
+#endif
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
index c4db8b74339..a1797e6ca44 100644
--- a/intern/cycles/util/util_system.h
+++ b/intern/cycles/util/util_system.h
@@ -65,6 +65,9 @@ size_t system_physical_ram();
/* Start a new process of the current application with the given arguments. */
bool system_call_self(const vector<string> &args);
+/* Get identifier of the currently running process. */
+uint64_t system_self_process_id();
+
CCL_NAMESPACE_END
#endif /* __UTIL_SYSTEM_H__ */
diff --git a/intern/cycles/util/util_tbb.h b/intern/cycles/util/util_tbb.h
index 73e0f92d19c..8f84377ac8c 100644
--- a/intern/cycles/util/util_tbb.h
+++ b/intern/cycles/util/util_tbb.h
@@ -23,6 +23,7 @@
#include <tbb/enumerable_thread_specific.h>
#include <tbb/parallel_for.h>
+#include <tbb/parallel_for_each.h>
#include <tbb/task_arena.h>
#include <tbb/task_group.h>
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index 71bf9c65911..4de66bf5f46 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -85,8 +85,6 @@ typedef struct TextureInfo {
uint64_t data;
/* Data Type */
uint data_type;
- /* Buffer number for OpenCL. */
- uint cl_buffer;
/* Interpolation and extension type. */
uint interpolation, extension;
/* Dimensions. */
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index f79eac4cbcf..e9cd3b0b483 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -498,36 +498,12 @@ Transform transform_from_viewplane(BoundBox2D &viewplane);
#endif
-/* TODO(sergey): This is only for until we've got OpenCL 2.0
- * on all devices we consider supported. It'll be replaced with
- * generic address space.
- */
+/* TODO: This can be removed when we know if no devices will require explicit
+ * address space qualifiers for this case. */
-#ifdef __KERNEL_OPENCL__
-
-# define OPENCL_TRANSFORM_ADDRSPACE_GLUE(a, b) a##b
-# define OPENCL_TRANSFORM_ADDRSPACE_DECLARE(function) \
- ccl_device_inline float3 OPENCL_TRANSFORM_ADDRSPACE_GLUE(function, _addrspace)( \
- ccl_addr_space const Transform *t, const float3 a) \
- { \
- Transform private_tfm = *t; \
- return function(&private_tfm, a); \
- }
-
-OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_point)
-OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction)
-OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction_transposed)
-
-# undef OPENCL_TRANSFORM_ADDRSPACE_DECLARE
-# undef OPENCL_TRANSFORM_ADDRSPACE_GLUE
-# define transform_point_auto transform_point_addrspace
-# define transform_direction_auto transform_direction_addrspace
-# define transform_direction_transposed_auto transform_direction_transposed_addrspace
-#else
-# define transform_point_auto transform_point
-# define transform_direction_auto transform_direction
-# define transform_direction_transposed_auto transform_direction_transposed
-#endif
+#define transform_point_auto transform_point
+#define transform_direction_auto transform_direction
+#define transform_direction_transposed_auto transform_direction_transposed
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 87358877e3c..442c32b3a3d 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -17,9 +17,7 @@
#ifndef __UTIL_TYPES_H__
#define __UTIL_TYPES_H__
-#ifndef __KERNEL_OPENCL__
-# include <stdlib.h>
-#endif
+#include <stdlib.h>
/* Standard Integer Types */
@@ -44,18 +42,12 @@ CCL_NAMESPACE_BEGIN
/* Shorter Unsigned Names */
-#ifndef __KERNEL_OPENCL__
typedef unsigned char uchar;
typedef unsigned int uint;
typedef unsigned short ushort;
-#endif
/* Fixed Bits Types */
-#ifdef __KERNEL_OPENCL__
-typedef unsigned long uint64_t;
-#endif
-
#ifndef __KERNEL_GPU__
/* Generic Memory Pointer */
diff --git a/intern/cycles/util/util_unique_ptr.h b/intern/cycles/util/util_unique_ptr.h
index 3aaaf083eff..3181eafd43d 100644
--- a/intern/cycles/util/util_unique_ptr.h
+++ b/intern/cycles/util/util_unique_ptr.h
@@ -21,6 +21,7 @@
CCL_NAMESPACE_BEGIN
+using std::make_unique;
using std::unique_ptr;
CCL_NAMESPACE_END