Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/CMakeLists.txt3
-rw-r--r--intern/cycles/util/util_algorithm.h2
-rw-r--r--intern/cycles/util/util_args.h2
-rw-r--r--intern/cycles/util/util_array.h289
-rw-r--r--intern/cycles/util/util_atomic.h32
-rw-r--r--intern/cycles/util/util_avxb.h72
-rw-r--r--intern/cycles/util/util_avxf.h10
-rw-r--r--intern/cycles/util/util_boundbox.h2
-rw-r--r--intern/cycles/util/util_color.h2
-rw-r--r--intern/cycles/util/util_debug.h2
-rw-r--r--intern/cycles/util/util_defines.h12
-rw-r--r--intern/cycles/util/util_foreach.h2
-rw-r--r--intern/cycles/util/util_function.h2
-rw-r--r--intern/cycles/util/util_guarded_allocator.cpp4
-rw-r--r--intern/cycles/util/util_guarded_allocator.h6
-rw-r--r--intern/cycles/util/util_half.h2
-rw-r--r--intern/cycles/util/util_hash.h2
-rw-r--r--intern/cycles/util/util_ies.cpp7
-rw-r--r--intern/cycles/util/util_ies.h2
-rw-r--r--intern/cycles/util/util_image.h2
-rw-r--r--intern/cycles/util/util_list.h2
-rw-r--r--intern/cycles/util/util_logging.cpp2
-rw-r--r--intern/cycles/util/util_logging.h6
-rw-r--r--intern/cycles/util/util_map.h2
-rw-r--r--intern/cycles/util/util_math.h30
-rw-r--r--intern/cycles/util/util_math_cdf.h2
-rw-r--r--intern/cycles/util/util_math_fast.h28
-rw-r--r--intern/cycles/util/util_math_float2.h2
-rw-r--r--intern/cycles/util/util_math_float3.h2
-rw-r--r--intern/cycles/util/util_math_float4.h14
-rw-r--r--intern/cycles/util/util_math_int2.h2
-rw-r--r--intern/cycles/util/util_math_int3.h2
-rw-r--r--intern/cycles/util/util_math_int4.h42
-rw-r--r--intern/cycles/util/util_math_intersect.h2
-rw-r--r--intern/cycles/util/util_md5.h2
-rw-r--r--intern/cycles/util/util_murmurhash.cpp127
-rw-r--r--intern/cycles/util/util_murmurhash.h30
-rw-r--r--intern/cycles/util/util_opengl.h2
-rw-r--r--intern/cycles/util/util_optimization.h2
-rw-r--r--intern/cycles/util/util_param.h2
-rw-r--r--intern/cycles/util/util_path.cpp2
-rw-r--r--intern/cycles/util/util_progress.h10
-rw-r--r--intern/cycles/util/util_projection.h4
-rw-r--r--intern/cycles/util/util_queue.h2
-rw-r--r--intern/cycles/util/util_rect.h2
-rw-r--r--intern/cycles/util/util_set.h2
-rw-r--r--intern/cycles/util/util_simd.h14
-rw-r--r--intern/cycles/util/util_sky_model.h2
-rw-r--r--intern/cycles/util/util_sseb.h2
-rw-r--r--intern/cycles/util/util_ssef.h12
-rw-r--r--intern/cycles/util/util_stack_allocator.h4
-rw-r--r--intern/cycles/util/util_static_assert.h2
-rw-r--r--intern/cycles/util/util_stats.h2
-rw-r--r--intern/cycles/util/util_string.h2
-rw-r--r--intern/cycles/util/util_system.cpp4
-rw-r--r--intern/cycles/util/util_system.h2
-rw-r--r--intern/cycles/util/util_texture.h2
-rw-r--r--intern/cycles/util/util_thread.cpp2
-rw-r--r--intern/cycles/util/util_thread.h6
-rw-r--r--intern/cycles/util/util_transform.h27
-rw-r--r--intern/cycles/util/util_types.h2
-rw-r--r--intern/cycles/util/util_types_float3.h4
-rw-r--r--intern/cycles/util/util_types_float3_impl.h4
-rw-r--r--intern/cycles/util/util_types_float4.h4
-rw-r--r--intern/cycles/util/util_types_float4_impl.h4
-rw-r--r--intern/cycles/util/util_types_float8.h6
-rw-r--r--intern/cycles/util/util_types_float8_impl.h4
-rw-r--r--intern/cycles/util/util_types_int3.h4
-rw-r--r--intern/cycles/util/util_types_int3_impl.h4
-rw-r--r--intern/cycles/util/util_types_int4.h6
-rw-r--r--intern/cycles/util/util_types_int4_impl.h14
-rw-r--r--intern/cycles/util/util_vector.h296
-rw-r--r--intern/cycles/util/util_version.h2
-rw-r--r--intern/cycles/util/util_view.cpp2
-rw-r--r--intern/cycles/util/util_view.h8
-rw-r--r--intern/cycles/util/util_windows.h2
-rw-r--r--intern/cycles/util/util_xml.h2
77 files changed, 772 insertions, 461 deletions
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 291f9a9fcae..77d47984ee7 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -15,6 +15,7 @@ set(SRC
util_logging.cpp
util_math_cdf.cpp
util_md5.cpp
+ util_murmurhash.cpp
util_path.cpp
util_string.cpp
util_simd.cpp
@@ -36,6 +37,7 @@ set(SRC_HEADERS
util_algorithm.h
util_aligned_malloc.h
util_args.h
+ util_array.h
util_atomic.h
util_boundbox.h
util_debug.h
@@ -64,6 +66,7 @@ set(SRC_HEADERS
util_math_int4.h
util_math_matrix.h
util_md5.h
+ util_murmurhash.h
util_opengl.h
util_optimization.h
util_param.h
diff --git a/intern/cycles/util/util_algorithm.h b/intern/cycles/util/util_algorithm.h
index eb874713d43..f9e6476cc52 100644
--- a/intern/cycles/util/util_algorithm.h
+++ b/intern/cycles/util/util_algorithm.h
@@ -29,4 +29,4 @@ using std::remove;
CCL_NAMESPACE_END
-#endif /* __UTIL_ALGORITHM_H__ */
+#endif /* __UTIL_ALGORITHM_H__ */
diff --git a/intern/cycles/util/util_args.h b/intern/cycles/util/util_args.h
index be6f2c2b9f1..9fe54b14d77 100644
--- a/intern/cycles/util/util_args.h
+++ b/intern/cycles/util/util_args.h
@@ -28,4 +28,4 @@ OIIO_NAMESPACE_USING
CCL_NAMESPACE_END
-#endif /* __UTIL_ARGS_H__ */
+#endif /* __UTIL_ARGS_H__ */
diff --git a/intern/cycles/util/util_array.h b/intern/cycles/util/util_array.h
new file mode 100644
index 00000000000..5f18d434c31
--- /dev/null
+++ b/intern/cycles/util/util_array.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright 2011-2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_ARRAY_H__
+#define __UTIL_ARRAY_H__
+
+#include <cassert>
+#include <cstring>
+
+#include "util/util_aligned_malloc.h"
+#include "util/util_guarded_allocator.h"
+#include "util/util_types.h"
+#include "util/util_vector.h"
+
+CCL_NAMESPACE_BEGIN
+
+/* Simplified version of vector, serving multiple purposes:
+ * - somewhat faster in that it does not clear memory on resize/alloc,
+ * this was actually showing up in profiles quite significantly. it
+ * also does not run any constructors/destructors
+ * - if this is used, we are not tempted to use inefficient operations
+ * - aligned allocation for CPU native data types */
+
+template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES>
+class array
+{
+public:
+ array()
+ : data_(NULL),
+ datasize_(0),
+ capacity_(0)
+ {}
+
+ explicit array(size_t newsize)
+ {
+ if(newsize == 0) {
+ data_ = NULL;
+ datasize_ = 0;
+ capacity_ = 0;
+ }
+ else {
+ data_ = mem_allocate(newsize);
+ datasize_ = newsize;
+ capacity_ = datasize_;
+ }
+ }
+
+ array(const array& from)
+ {
+ if(from.datasize_ == 0) {
+ data_ = NULL;
+ datasize_ = 0;
+ capacity_ = 0;
+ }
+ else {
+ data_ = mem_allocate(from.datasize_);
+ memcpy(data_, from.data_, from.datasize_*sizeof(T));
+ datasize_ = from.datasize_;
+ capacity_ = datasize_;
+ }
+ }
+
+ array& operator=(const array& from)
+ {
+ if(this != &from) {
+ resize(from.size());
+ memcpy((void*)data_, from.data_, datasize_*sizeof(T));
+ }
+
+ return *this;
+ }
+
+ array& operator=(const vector<T>& from)
+ {
+ resize(from.size());
+
+ if(from.size() > 0) {
+ memcpy(data_, &from[0], datasize_*sizeof(T));
+ }
+
+ return *this;
+ }
+
+ ~array()
+ {
+ mem_free(data_, capacity_);
+ }
+
+ bool operator==(const array<T>& other) const
+ {
+ if(datasize_ != other.datasize_) {
+ return false;
+ }
+
+ return memcmp(data_, other.data_, datasize_*sizeof(T)) == 0;
+ }
+
+ bool operator!=(const array<T>& other) const
+ {
+ return !(*this == other);
+ }
+
+ void steal_data(array& from)
+ {
+ if(this != &from) {
+ clear();
+
+ data_ = from.data_;
+ datasize_ = from.datasize_;
+ capacity_ = from.capacity_;
+
+ from.data_ = NULL;
+ from.datasize_ = 0;
+ from.capacity_ = 0;
+ }
+ }
+
+ T *steal_pointer()
+ {
+ T *ptr = data_;
+ data_ = NULL;
+ clear();
+ return ptr;
+ }
+
+ T* resize(size_t newsize)
+ {
+ if(newsize == 0) {
+ clear();
+ }
+ else if(newsize != datasize_) {
+ if(newsize > capacity_) {
+ T *newdata = mem_allocate(newsize);
+ if(newdata == NULL) {
+ /* Allocation failed, likely out of memory. */
+ clear();
+ return NULL;
+ }
+ else if(data_ != NULL) {
+ memcpy((void *)newdata,
+ data_,
+ ((datasize_ < newsize)? datasize_: newsize)*sizeof(T));
+ mem_free(data_, capacity_);
+ }
+ data_ = newdata;
+ capacity_ = newsize;
+ }
+ datasize_ = newsize;
+ }
+ return data_;
+ }
+
+ T* resize(size_t newsize, const T& value)
+ {
+ size_t oldsize = size();
+ resize(newsize);
+
+ for(size_t i = oldsize; i < size(); i++) {
+ data_[i] = value;
+ }
+
+ return data_;
+ }
+
+ void clear()
+ {
+ if(data_ != NULL) {
+ mem_free(data_, capacity_);
+ data_ = NULL;
+ }
+ datasize_ = 0;
+ capacity_ = 0;
+ }
+
+ size_t empty() const
+ {
+ return datasize_ == 0;
+ }
+
+ size_t size() const
+ {
+ return datasize_;
+ }
+
+ T* data()
+ {
+ return data_;
+ }
+
+ const T* data() const
+ {
+ return data_;
+ }
+
+ T& operator[](size_t i) const
+ {
+ assert(i < datasize_);
+ return data_[i];
+ }
+
+ void reserve(size_t newcapacity)
+ {
+ if(newcapacity > capacity_) {
+ T *newdata = mem_allocate(newcapacity);
+ if(data_ != NULL) {
+ memcpy(newdata, data_, ((datasize_ < newcapacity)? datasize_: newcapacity)*sizeof(T));
+ mem_free(data_, capacity_);
+ }
+ data_ = newdata;
+ capacity_ = newcapacity;
+ }
+ }
+
+ size_t capacity() const
+ {
+ return capacity_;
+ }
+
+ // do not use this method unless you are sure the code is not performance critical
+ void push_back_slow(const T& t)
+ {
+ if(capacity_ == datasize_)
+ {
+ reserve(datasize_ == 0 ? 1 : (size_t)((datasize_ + 1) * 1.2));
+ }
+
+ data_[datasize_++] = t;
+ }
+
+ void push_back_reserved(const T& t)
+ {
+ assert(datasize_ < capacity_);
+ push_back_slow(t);
+ }
+
+ void append(const array<T>& from)
+ {
+ if(from.size()) {
+ size_t old_size = size();
+ resize(old_size + from.size());
+ memcpy(data_ + old_size, from.data(), sizeof(T) * from.size());
+ }
+ }
+
+protected:
+ inline T* mem_allocate(size_t N)
+ {
+ if(N == 0) {
+ return NULL;
+ }
+ T *mem = (T*)util_aligned_malloc(sizeof(T)*N, alignment);
+ if(mem != NULL) {
+ util_guarded_mem_alloc(sizeof(T)*N);
+ }
+ else {
+ throw std::bad_alloc();
+ }
+ return mem;
+ }
+
+ inline void mem_free(T *mem, size_t N)
+ {
+ if(mem != NULL) {
+ util_guarded_mem_free(sizeof(T)*N);
+ util_aligned_free(mem);
+ }
+ }
+
+ T *data_;
+ size_t datasize_;
+ size_t capacity_;
+};
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_ARRAY_H__ */
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
index f3c7ae546a0..477b667a6fe 100644
--- a/intern/cycles/util/util_atomic.h
+++ b/intern/cycles/util/util_atomic.h
@@ -23,12 +23,13 @@
#include "atomic_ops.h"
#define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x))
+#define atomic_compare_and_swap_float(p, old_val, new_val) atomic_cas_float((p), (old_val), (new_val))
#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
#define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1)
#define CCL_LOCAL_MEM_FENCE 0
-#define ccl_barrier(flags) (void)0
+#define ccl_barrier(flags) ((void) 0)
#else /* __KERNEL_GPU__ */
@@ -57,6 +58,20 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so
return new_value.float_value;
}
+ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float *dest,
+ const float old_val, const float new_val)
+{
+ union {
+ unsigned int int_value;
+ float float_value;
+ } new_value, prev_value, result;
+ prev_value.float_value = old_val;
+ new_value.float_value = new_val;
+ result.int_value = atomic_cmpxchg((volatile ccl_global unsigned int *)dest,
+ prev_value.int_value, new_value.int_value);
+ return result.float_value;
+}
+
#define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x))
#define atomic_fetch_and_inc_uint32(p) atomic_inc((p))
#define atomic_fetch_and_dec_uint32(p) atomic_dec((p))
@@ -75,6 +90,19 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so
#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
#define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1)
+ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest,
+ const float old_val, const float new_val)
+{
+ union {
+ unsigned int int_value;
+ float float_value;
+ } new_value, prev_value, result;
+ prev_value.float_value = old_val;
+ new_value.float_value = new_val;
+ result.int_value = atomicCAS((unsigned int *)dest, prev_value.int_value,new_value.int_value);
+ return result.float_value;
+}
+
#define CCL_LOCAL_MEM_FENCE
#define ccl_barrier(flags) __syncthreads()
@@ -82,4 +110,4 @@ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *so
#endif /* __KERNEL_GPU__ */
-#endif /* __UTIL_ATOMIC_H__ */
+#endif /* __UTIL_ATOMIC_H__ */
diff --git a/intern/cycles/util/util_avxb.h b/intern/cycles/util/util_avxb.h
index 60d9bb44256..25ef39d39ae 100644
--- a/intern/cycles/util/util_avxb.h
+++ b/intern/cycles/util/util_avxb.h
@@ -44,23 +44,12 @@ struct avxb
__forceinline operator const __m256i( void ) const { return _mm256_castps_si256(m256); }
__forceinline operator const __m256d( void ) const { return _mm256_castps_pd(m256); }
- //__forceinline avxb ( bool a )
- // : m256(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {}
- //__forceinline avxb ( bool a, bool b)
- // : m256(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)]) {}
- //__forceinline avxb ( bool a, bool b, bool c, bool d)
- // : m256(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {}
- //__forceinline avxb(int mask) {
- // assert(mask >= 0 && mask < 16);
- // m128 = _mm_lookupmask_ps[mask];
- //}
-
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline avxb( FalseTy ) : m256(_mm256_setzero_ps()) {}
- __forceinline avxb( TrueTy ) : m256(_mm256_castsi256_ps(_mm256_cmpeq_epi32(_mm256_setzero_si256(), _mm256_setzero_si256()))) {}
+ __forceinline avxb( TrueTy ) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1))) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
@@ -97,7 +86,21 @@ __forceinline const avxb operator ^=( avxb& a, const avxb& b ) { return a = a ^
////////////////////////////////////////////////////////////////////////////////
__forceinline const avxb operator !=( const avxb& a, const avxb& b ) { return _mm256_xor_ps(a, b); }
-__forceinline const avxb operator ==( const avxb& a, const avxb& b ) { return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b)); }
+__forceinline const avxb operator ==( const avxb& a, const avxb& b )
+{
+#ifdef __KERNEL_AVX2__
+ return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b));
+#else
+ __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0));
+ __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1));
+ __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
+ __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1));
+ __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
+ __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
+ __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
+ return _mm256_castsi256_ps(result);
+#endif
+}
__forceinline const avxb select( const avxb& m, const avxb& t, const avxb& f ) {
#if defined(__KERNEL_SSE41__)
@@ -114,47 +117,6 @@ __forceinline const avxb select( const avxb& m, const avxb& t, const avxb& f ) {
__forceinline const avxb unpacklo( const avxb& a, const avxb& b ) { return _mm256_unpacklo_ps(a, b); }
__forceinline const avxb unpackhi( const avxb& a, const avxb& b ) { return _mm256_unpackhi_ps(a, b); }
-#define _MM256_SHUFFLE(fp7,fp6,fp5,fp4,fp3,fp2,fp1,fp0) (((fp7) << 14) | ((fp6) << 12) | ((fp5) << 10) | ((fp4) << 8) | \
- ((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
-
-template<size_t i0, size_t i1, size_t i2, size_t i3, size_t i4, size_t i5, size_t i6, size_t i7>
-__forceinline const avxb shuffle( const avxb& a ) {
- return _mm256_cvtepi32_ps(_mm256_shuffle_epi32(a, _MM256_SHUFFLE(i7, i6, i5, i4, i3, i2, i1, i0)));
-}
-
-/*
-template<> __forceinline const avxb shuffle<0, 1, 0, 1, 0, 1, 0, 1>( const avxb& a ) {
- return _mm_movelh_ps(a, a);
-}
-
-template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a ) {
- return _mm_movehl_ps(a, a);
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const sseb shuffle( const sseb& a, const sseb& b ) {
- return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
-}
-
-template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a, const sseb& b ) {
- return _mm_movelh_ps(a, b);
-}
-
-template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a, const sseb& b ) {
- return _mm_movehl_ps(b, a);
-}
-
-#if defined(__KERNEL_SSE3__)
-template<> __forceinline const sseb shuffle<0, 0, 2, 2>( const sseb& a ) { return _mm_moveldup_ps(a); }
-template<> __forceinline const sseb shuffle<1, 1, 3, 3>( const sseb& a ) { return _mm_movehdup_ps(a); }
-#endif
-
-#if defined(__KERNEL_SSE41__)
-template<size_t dst, size_t src, size_t clr> __forceinline const sseb insert( const sseb& a, const sseb& b ) { return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); }
-template<size_t dst, size_t src> __forceinline const sseb insert( const sseb& a, const sseb& b ) { return insert<dst, src, 0>(a, b); }
-template<size_t dst> __forceinline const sseb insert( const sseb& a, const bool b ) { return insert<dst,0>(a, sseb(b)); }
-#endif
-*/
-
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operations
////////////////////////////////////////////////////////////////////////////////
@@ -180,7 +142,7 @@ __forceinline size_t movemask( const avxb& a ) { return _mm256_movemask_ps(a); }
ccl_device_inline void print_avxb(const char *label, const avxb &a)
{
- printf("%s: %df %df %df %df %df %df %df %d\n",
+ printf("%s: %d %d %d %d %d %d %d %d\n",
label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
}
diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h
index 5596702ca20..f00c722f25b 100644
--- a/intern/cycles/util/util_avxf.h
+++ b/intern/cycles/util/util_avxf.h
@@ -40,8 +40,8 @@ struct avxf
__forceinline avxf(const __m256 a) : m256(a) {}
__forceinline avxf(const __m256i a) : m256(_mm256_castsi256_ps (a)) {}
- __forceinline operator const __m256&(void) const { return m256; }
- __forceinline operator __m256&(void) { return m256; }
+ __forceinline operator const __m256&() const { return m256; }
+ __forceinline operator __m256&() { return m256; }
__forceinline avxf (float a) : m256(_mm256_set1_ps(a)) {}
@@ -214,17 +214,19 @@ __forceinline const avxf nmadd(const avxf& a, const avxf& b, const avxf& c) {
#endif
}
__forceinline const avxf msub(const avxf& a, const avxf& b, const avxf& c) {
+#ifdef __KERNEL_AVX2__
return _mm256_fmsub_ps(a, b, c);
+#else
+ return (a*b) - c;
+#endif
}
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
-#ifdef __KERNEL_AVX2__
__forceinline const avxb operator <=(const avxf& a, const avxf& b) {
return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS);
}
-#endif
#endif
diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h
index b1bd5be0df3..fe89e398840 100644
--- a/intern/cycles/util/util_boundbox.h
+++ b/intern/cycles/util/util_boundbox.h
@@ -282,4 +282,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __UTIL_BOUNDBOX_H__ */
+#endif /* __UTIL_BOUNDBOX_H__ */
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
index 826db469d25..e6efc7d73fc 100644
--- a/intern/cycles/util/util_color.h
+++ b/intern/cycles/util/util_color.h
@@ -240,4 +240,4 @@ ccl_device float4 color_srgb_to_linear_v4(float4 c)
CCL_NAMESPACE_END
-#endif /* __UTIL_COLOR_H__ */
+#endif /* __UTIL_COLOR_H__ */
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index f17f8a560ee..864089bb118 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -182,4 +182,4 @@ std::ostream& operator <<(std::ostream &os,
CCL_NAMESPACE_END
-#endif /* __UTIL_DEBUG_H__ */
+#endif /* __UTIL_DEBUG_H__ */
diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h
index 8bce4aca699..429cfe647ef 100644
--- a/intern/cycles/util/util_defines.h
+++ b/intern/cycles/util/util_defines.h
@@ -72,7 +72,7 @@
# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */
# define ATTR_FALLTHROUGH __attribute__((fallthrough))
# else
-# define ATTR_FALLTHROUGH ((void)0)
+# define ATTR_FALLTHROUGH ((void) 0)
# endif
#endif /* __KERNEL_GPU__ */
@@ -104,14 +104,14 @@ template<typename T> static inline T decltype_helper(T x) { return x; }
#define CHECK_TYPE(var, type) { \
TYPEOF(var) *__tmp; \
__tmp = (type *)NULL; \
- (void)__tmp; \
-} (void)0
+ (void) __tmp; \
+} (void) 0
#define CHECK_TYPE_PAIR(var_a, var_b) { \
TYPEOF(var_a) *__tmp; \
__tmp = (typeof(var_b) *)NULL; \
- (void)__tmp; \
-} (void)0
+ (void) __tmp; \
+} (void) 0
#else
# define CHECK_TYPE(var, type)
# define CHECK_TYPE_PAIR(var_a, var_b)
@@ -128,4 +128,4 @@ template<typename T> static inline T decltype_helper(T x) { return x; }
# define util_assert(statement)
#endif
-#endif /* __UTIL_DEFINES_H__ */
+#endif /* __UTIL_DEFINES_H__ */
diff --git a/intern/cycles/util/util_foreach.h b/intern/cycles/util/util_foreach.h
index 2a74ff0a55d..fd106d58b43 100644
--- a/intern/cycles/util/util_foreach.h
+++ b/intern/cycles/util/util_foreach.h
@@ -21,4 +21,4 @@
#define foreach(x, y) for(x : y)
-#endif /* __UTIL_FOREACH_H__ */
+#endif /* __UTIL_FOREACH_H__ */
diff --git a/intern/cycles/util/util_function.h b/intern/cycles/util/util_function.h
index f3cc00329ad..72c7ce43073 100644
--- a/intern/cycles/util/util_function.h
+++ b/intern/cycles/util/util_function.h
@@ -36,4 +36,4 @@ using std::placeholders::_9;
CCL_NAMESPACE_END
-#endif /* __UTIL_FUNCTION_H__ */
+#endif /* __UTIL_FUNCTION_H__ */
diff --git a/intern/cycles/util/util_guarded_allocator.cpp b/intern/cycles/util/util_guarded_allocator.cpp
index 54fa6a80df5..ae1d217c54f 100644
--- a/intern/cycles/util/util_guarded_allocator.cpp
+++ b/intern/cycles/util/util_guarded_allocator.cpp
@@ -35,12 +35,12 @@ void util_guarded_mem_free(size_t n)
/* Public API. */
-size_t util_guarded_get_mem_used(void)
+size_t util_guarded_get_mem_used()
{
return global_stats.mem_used;
}
-size_t util_guarded_get_mem_peak(void)
+size_t util_guarded_get_mem_peak()
{
return global_stats.mem_peak;
}
diff --git a/intern/cycles/util/util_guarded_allocator.h b/intern/cycles/util/util_guarded_allocator.h
index 87c1526dee4..2c6f1790fd0 100644
--- a/intern/cycles/util/util_guarded_allocator.h
+++ b/intern/cycles/util/util_guarded_allocator.h
@@ -47,7 +47,7 @@ public:
T *allocate(size_t n, const void *hint = 0)
{
- (void)hint;
+ (void) hint;
size_t size = n * sizeof(T);
util_guarded_mem_alloc(size);
if(n == 0) {
@@ -158,8 +158,8 @@ public:
};
/* Get memory usage and peak from the guarded STL allocator. */
-size_t util_guarded_get_mem_used(void);
-size_t util_guarded_get_mem_peak(void);
+size_t util_guarded_get_mem_used();
+size_t util_guarded_get_mem_peak();
/* Call given function and keep track if it runs out of memory.
*
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index 53b7f2472bd..3868509c21b 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -148,4 +148,4 @@ ccl_device_inline half float_to_half(float f)
CCL_NAMESPACE_END
-#endif /* __UTIL_HALF_H__ */
+#endif /* __UTIL_HALF_H__ */
diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h
index a8a5076fbb3..f343252eaca 100644
--- a/intern/cycles/util/util_hash.h
+++ b/intern/cycles/util/util_hash.h
@@ -68,4 +68,4 @@ ccl_device_inline float hash_int_01(uint k)
CCL_NAMESPACE_END
-#endif /* __UTIL_HASH_H__ */
+#endif /* __UTIL_HASH_H__ */
diff --git a/intern/cycles/util/util_ies.cpp b/intern/cycles/util/util_ies.cpp
index e068957325b..e1de2e0c6e4 100644
--- a/intern/cycles/util/util_ies.cpp
+++ b/intern/cycles/util/util_ies.cpp
@@ -21,6 +21,13 @@
CCL_NAMESPACE_BEGIN
+// NOTE: For some reason gcc-7.2 does not instantiate this versio of allocator
+// gere (used in IESTextParser). Works fine for gcc-6, gcc-7.3 and gcc-8.
+//
+// TODO(sergey): Get to the root of this issue, or confirm this i a compiler
+// issue.
+template class GuardedAllocator<char>;
+
bool IESFile::load(ustring ies)
{
clear();
diff --git a/intern/cycles/util/util_ies.h b/intern/cycles/util/util_ies.h
index 5933cb3962a..663ad649a9c 100644
--- a/intern/cycles/util/util_ies.h
+++ b/intern/cycles/util/util_ies.h
@@ -58,4 +58,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __UTIL_IES_H__ */
+#endif /* __UTIL_IES_H__ */
diff --git a/intern/cycles/util/util_image.h b/intern/cycles/util/util_image.h
index 85bdb0d8050..da5f56271c8 100644
--- a/intern/cycles/util/util_image.h
+++ b/intern/cycles/util/util_image.h
@@ -102,6 +102,6 @@ inline half util_image_cast_from_float(float value)
CCL_NAMESPACE_END
-#endif /* __UTIL_IMAGE_H__ */
+#endif /* __UTIL_IMAGE_H__ */
#include "util/util_image_impl.h"
diff --git a/intern/cycles/util/util_list.h b/intern/cycles/util/util_list.h
index f555b001186..fcf8e4f5c74 100644
--- a/intern/cycles/util/util_list.h
+++ b/intern/cycles/util/util_list.h
@@ -25,4 +25,4 @@ using std::list;
CCL_NAMESPACE_END
-#endif /* __UTIL_LIST_H__ */
+#endif /* __UTIL_LIST_H__ */
diff --git a/intern/cycles/util/util_logging.cpp b/intern/cycles/util/util_logging.cpp
index f38683bf7de..b0922db32fb 100644
--- a/intern/cycles/util/util_logging.cpp
+++ b/intern/cycles/util/util_logging.cpp
@@ -45,7 +45,7 @@ void util_logging_init(const char *argv0)
#endif
}
-void util_logging_start(void)
+void util_logging_start()
{
#ifdef WITH_CYCLES_LOGGING
using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption;
diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h
index 5c84b6593d3..f66d7c92dcc 100644
--- a/intern/cycles/util/util_logging.h
+++ b/intern/cycles/util/util_logging.h
@@ -41,7 +41,7 @@ public:
void operator&(StubStream&) { }
};
-# define LOG_SUPPRESS() (true) ? (void) 0 : LogMessageVoidify() & StubStream()
+# define LOG_SUPPRESS() (true) ? ((void) 0) : LogMessageVoidify() & StubStream()
# define LOG(severity) LOG_SUPPRESS()
# define VLOG(severity) LOG_SUPPRESS()
#endif
@@ -52,7 +52,7 @@ struct int2;
struct float3;
void util_logging_init(const char *argv0);
-void util_logging_start(void);
+void util_logging_start();
void util_logging_verbosity_set(int verbosity);
std::ostream& operator <<(std::ostream &os,
@@ -62,4 +62,4 @@ std::ostream& operator <<(std::ostream &os,
CCL_NAMESPACE_END
-#endif /* __UTIL_LOGGING_H__ */
+#endif /* __UTIL_LOGGING_H__ */
diff --git a/intern/cycles/util/util_map.h b/intern/cycles/util/util_map.h
index 3c9288417cf..1952d33ada8 100644
--- a/intern/cycles/util/util_map.h
+++ b/intern/cycles/util/util_map.h
@@ -28,4 +28,4 @@ using std::unordered_map;
CCL_NAMESPACE_END
-#endif /* __UTIL_MAP_H__ */
+#endif /* __UTIL_MAP_H__ */
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index 52aeb8d8599..6167119f873 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -157,7 +157,7 @@ ccl_device_inline T max4(const T& a, const T& b, const T& c, const T& d)
{
return max(max(a,b),max(c,d));
}
-#endif /* __KERNEL_GPU__ */
+#endif /* __KERNEL_GPU__ */
ccl_device_inline float min4(float a, float b, float c, float d)
{
@@ -220,7 +220,31 @@ ccl_device_inline float __uint_as_float(uint i)
u.i = i;
return u.f;
}
-#endif /* __KERNEL_OPENCL__ */
+
+ccl_device_inline int4 __float4_as_int4(float4 f)
+{
+#ifdef __KERNEL_SSE__
+ return int4(_mm_castps_si128(f.m128));
+ #else
+ return make_int4(__float_as_int(f.x),
+ __float_as_int(f.y),
+ __float_as_int(f.z),
+ __float_as_int(f.w));
+#endif
+}
+
+ccl_device_inline float4 __int4_as_float4(int4 i)
+{
+#ifdef __KERNEL_SSE__
+ return float4(_mm_castsi128_ps(i.m128));
+#else
+ return make_float4(__int_as_float(i.x),
+ __int_as_float(i.y),
+ __int_as_float(i.z),
+ __int_as_float(i.w));
+#endif
+}
+#endif /* __KERNEL_OPENCL__ */
/* Versions of functions which are safe for fast math. */
ccl_device_inline bool isnan_safe(float f)
@@ -615,4 +639,4 @@ ccl_device_inline float2 map_to_sphere(const float3 co)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_H__ */
+#endif /* __UTIL_MATH_H__ */
diff --git a/intern/cycles/util/util_math_cdf.h b/intern/cycles/util/util_math_cdf.h
index 79643fe26e3..983855e3e9b 100644
--- a/intern/cycles/util/util_math_cdf.h
+++ b/intern/cycles/util/util_math_cdf.h
@@ -75,4 +75,4 @@ void util_cdf_inverted(const int resolution,
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_H_CDF__ */
+#endif /* __UTIL_MATH_H_CDF__ */
diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h
index d3960deb3b4..323d40058e5 100644
--- a/intern/cycles/util/util_math_fast.h
+++ b/intern/cycles/util/util_math_fast.h
@@ -58,6 +58,11 @@ ccl_device_inline float madd(const float a, const float b, const float c)
return a * b + c;
}
+ccl_device_inline float4 madd4(const float4 a, const float4 b, const float4 c)
+{
+ return a * b + c;
+}
+
/*
* FAST & APPROXIMATE MATH
*
@@ -438,6 +443,29 @@ ccl_device_inline float fast_expf(float x)
return fast_exp2f(x / M_LN2_F);
}
+#ifndef __KERNEL_GPU__
+ccl_device float4 fast_exp2f4(float4 x)
+{
+ const float4 one = make_float4(1.0f);
+ const float4 limit = make_float4(126.0f);
+ x = clamp(x, -limit, limit);
+ int4 m = make_int4(x);
+ x = one - (one - (x - make_float4(m)));
+ float4 r = make_float4(1.33336498402e-3f);
+ r = madd4(x, r, make_float4(9.810352697968e-3f));
+ r = madd4(x, r, make_float4(5.551834031939e-2f));
+ r = madd4(x, r, make_float4(0.2401793301105f));
+ r = madd4(x, r, make_float4(0.693144857883f));
+ r = madd4(x, r, make_float4(1.0f));
+ return __int4_as_float4(__float4_as_int4(r) + (m << 23));
+}
+
+ccl_device_inline float4 fast_expf4(float4 x)
+{
+ return fast_exp2f4(x / M_LN2_F);
+}
+#endif
+
ccl_device_inline float fast_exp10(float x)
{
/* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]:
diff --git a/intern/cycles/util/util_math_float2.h b/intern/cycles/util/util_math_float2.h
index 6f9d0855d50..e937509367f 100644
--- a/intern/cycles/util/util_math_float2.h
+++ b/intern/cycles/util/util_math_float2.h
@@ -224,4 +224,4 @@ ccl_device_inline float2 interp(const float2& a, const float2& b, float t)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_FLOAT2_H__ */
+#endif /* __UTIL_MATH_FLOAT2_H__ */
diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h
index 75265c1c9a2..a54a3f3087c 100644
--- a/intern/cycles/util/util_math_float3.h
+++ b/intern/cycles/util/util_math_float3.h
@@ -419,4 +419,4 @@ ccl_device_inline float3 ensure_finite3(float3 v)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_FLOAT3_H__ */
+#endif /* __UTIL_MATH_FLOAT3_H__ */
diff --git a/intern/cycles/util/util_math_float4.h b/intern/cycles/util/util_math_float4.h
index aa7e56fefe9..479ccf202ba 100644
--- a/intern/cycles/util/util_math_float4.h
+++ b/intern/cycles/util/util_math_float4.h
@@ -38,6 +38,7 @@ ccl_device_inline float4 operator+(const float4& a, const float4& b);
ccl_device_inline float4 operator-(const float4& a, const float4& b);
ccl_device_inline float4 operator+=(float4& a, const float4& b);
ccl_device_inline float4 operator*=(float4& a, const float4& b);
+ccl_device_inline float4 operator*=(float4& a, float f);
ccl_device_inline float4 operator/=(float4& a, float f);
ccl_device_inline int4 operator<(const float4& a, const float4& b);
@@ -58,6 +59,7 @@ ccl_device_inline float4 normalize(const float4& a);
ccl_device_inline float4 safe_normalize(const float4& a);
ccl_device_inline float4 min(const float4& a, const float4& b);
ccl_device_inline float4 max(const float4& a, const float4& b);
+ccl_device_inline float4 clamp(const float4& a, const float4& mn, const float4& mx);
ccl_device_inline float4 fabs(const float4& a);
#endif /* !__KERNEL_OPENCL__*/
@@ -168,6 +170,11 @@ ccl_device_inline float4 operator*=(float4& a, const float4& b)
return a = a * b;
}
+ccl_device_inline float4 operator*=(float4& a, float f)
+{
+ return a = a * f;
+}
+
ccl_device_inline float4 operator/=(float4& a, float f)
{
return a = a / f;
@@ -333,6 +340,11 @@ ccl_device_inline float4 max(const float4& a, const float4& b)
#endif
}
+ccl_device_inline float4 clamp(const float4& a, const float4& mn, const float4& mx)
+{
+ return min(max(a, mn), mx);
+}
+
ccl_device_inline float4 fabs(const float4& a)
{
#ifdef __KERNEL_SSE__
@@ -445,4 +457,4 @@ ccl_device_inline float4 load_float4(const float *v)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_FLOAT4_H__ */
+#endif /* __UTIL_MATH_FLOAT4_H__ */
diff --git a/intern/cycles/util/util_math_int2.h b/intern/cycles/util/util_math_int2.h
index 828c49a131c..dd401d9a091 100644
--- a/intern/cycles/util/util_math_int2.h
+++ b/intern/cycles/util/util_math_int2.h
@@ -74,4 +74,4 @@ ccl_device_inline int2 operator/(const int2 &a, const int2 &b)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_INT2_H__ */
+#endif /* __UTIL_MATH_INT2_H__ */
diff --git a/intern/cycles/util/util_math_int3.h b/intern/cycles/util/util_math_int3.h
index 81b10f31f4a..2f4752f90f1 100644
--- a/intern/cycles/util/util_math_int3.h
+++ b/intern/cycles/util/util_math_int3.h
@@ -113,4 +113,4 @@ ccl_device_inline int3 operator-(const int3 &a, const int3 &b)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_INT3_H__ */
+#endif /* __UTIL_MATH_INT3_H__ */
diff --git a/intern/cycles/util/util_math_int4.h b/intern/cycles/util/util_math_int4.h
index 79a8c0841e7..763c42318d5 100644
--- a/intern/cycles/util/util_math_int4.h
+++ b/intern/cycles/util/util_math_int4.h
@@ -31,6 +31,10 @@ CCL_NAMESPACE_BEGIN
ccl_device_inline int4 operator+(const int4& a, const int4& b);
ccl_device_inline int4 operator+=(int4& a, const int4& b);
ccl_device_inline int4 operator>>(const int4& a, int i);
+ccl_device_inline int4 operator<<(const int4& a, int i);
+ccl_device_inline int4 operator<(const int4& a, const int4& b);
+ccl_device_inline int4 operator>=(const int4& a, const int4& b);
+ccl_device_inline int4 operator&(const int4& a, const int4& b);
ccl_device_inline int4 min(int4 a, int4 b);
ccl_device_inline int4 max(int4 a, int4 b);
ccl_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx);
@@ -65,6 +69,42 @@ ccl_device_inline int4 operator>>(const int4& a, int i)
#endif
}
+ccl_device_inline int4 operator<<(const int4& a, int i)
+{
+#ifdef __KERNEL_SSE__
+ return int4(_mm_slli_epi32(a.m128, i));
+#else
+ return make_int4(a.x << i, a.y << i, a.z << i, a.w << i);
+#endif
+}
+
+ccl_device_inline int4 operator<(const int4& a, const int4& b)
+{
+#ifdef __KERNEL_SSE__
+ return int4(_mm_cmplt_epi32(a.m128, b.m128));
+#else
+ return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
+#endif
+}
+
+ccl_device_inline int4 operator>=(const int4& a, const int4& b)
+{
+#ifdef __KERNEL_SSE__
+ return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128)));
+#else
+ return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
+#endif
+}
+
+ccl_device_inline int4 operator&(const int4& a, const int4& b)
+{
+#ifdef __KERNEL_SSE__
+ return int4(_mm_and_si128(a.m128, b.m128));
+#else
+ return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w);
+#endif
+}
+
ccl_device_inline int4 min(int4 a, int4 b)
{
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
@@ -116,4 +156,4 @@ ccl_device_inline int4 load_int4(const int *v)
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_INT4_H__ */
+#endif /* __UTIL_MATH_INT4_H__ */
diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h
index b5fbb24091f..190c2f5d6b0 100644
--- a/intern/cycles/util/util_math_intersect.h
+++ b/intern/cycles/util/util_math_intersect.h
@@ -219,4 +219,4 @@ ccl_device bool ray_quad_intersect(float3 ray_P, float3 ray_D,
CCL_NAMESPACE_END
-#endif /* __UTIL_MATH_INTERSECT_H__ */
+#endif /* __UTIL_MATH_INTERSECT_H__ */
diff --git a/intern/cycles/util/util_md5.h b/intern/cycles/util/util_md5.h
index 9023ccee4c2..f8c0115d8ce 100644
--- a/intern/cycles/util/util_md5.h
+++ b/intern/cycles/util/util_md5.h
@@ -58,4 +58,4 @@ string util_md5_string(const string& str);
CCL_NAMESPACE_END
-#endif /* __UTIL_MD5_H__ */
+#endif /* __UTIL_MD5_H__ */
diff --git a/intern/cycles/util/util_murmurhash.cpp b/intern/cycles/util/util_murmurhash.cpp
new file mode 100644
index 00000000000..68b2f2031be
--- /dev/null
+++ b/intern/cycles/util/util_murmurhash.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This is taken from alShaders/Cryptomatte/MurmurHash3.h:
+ *
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain. The author hereby disclaims copyright to this source code.
+ *
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "util/util_algorithm.h"
+#include "util/util_murmurhash.h"
+
+#if defined(_MSC_VER)
+# define ROTL32(x,y) _rotl(x,y)
+# define ROTL64(x,y) _rotl64(x,y)
+# define BIG_CONSTANT(x) (x)
+#else
+ccl_device_inline uint32_t rotl32(uint32_t x, int8_t r)
+{
+ return (x << r) | (x >> (32 - r));
+}
+# define ROTL32(x,y) rotl32(x,y)
+# define BIG_CONSTANT(x) (x##LLU)
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Block read - if your platform needs to do endian-swapping or can only
+ * handle aligned reads, do the conversion here. */
+ccl_device_inline uint32_t mm_hash_getblock32(const uint32_t *p, int i)
+{
+ return p[i];
+}
+
+/* Finalization mix - force all bits of a hash block to avalanche */
+ccl_device_inline uint32_t mm_hash_fmix32 ( uint32_t h )
+{
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+ return h;
+}
+
+uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed)
+{
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 4;
+
+ uint32_t h1 = seed;
+
+ const uint32_t c1 = 0xcc9e2d51;
+ const uint32_t c2 = 0x1b873593;
+
+ const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+ for(int i = -nblocks; i; i++) {
+ uint32_t k1 = mm_hash_getblock32(blocks,i);
+
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+
+ h1 ^= k1;
+ h1 = ROTL32(h1,13);
+ h1 = h1 * 5 + 0xe6546b64;
+ }
+
+ const uint8_t *tail = (const uint8_t*)(data + nblocks*4);
+
+ uint32_t k1 = 0;
+
+ switch(len & 3) {
+ case 3:
+ k1 ^= tail[2] << 16;
+ ATTR_FALLTHROUGH;
+ case 2:
+ k1 ^= tail[1] << 8;
+ ATTR_FALLTHROUGH;
+ case 1:
+ k1 ^= tail[0];
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+ h1 ^= k1;
+ }
+
+ h1 ^= len;
+ h1 = mm_hash_fmix32(h1);
+ return h1;
+}
+
+/* This is taken from the cryptomatte specification 1.0 */
+float util_hash_to_float(uint32_t hash)
+{
+ uint32_t mantissa = hash & (( 1 << 23) - 1);
+ uint32_t exponent = (hash >> 23) & ((1 << 8) - 1);
+ exponent = max(exponent, (uint32_t) 1);
+ exponent = min(exponent, (uint32_t) 254);
+ exponent = exponent << 23;
+ uint32_t sign = (hash >> 31);
+ sign = sign << 31;
+ uint32_t float_bits = sign | exponent | mantissa;
+ float f;
+ memcpy(&f, &float_bits, sizeof(uint32_t));
+ return f;
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_murmurhash.h b/intern/cycles/util/util_murmurhash.h
new file mode 100644
index 00000000000..3e7897d3ae6
--- /dev/null
+++ b/intern/cycles/util/util_murmurhash.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2018 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef __UTIL_MURMURHASH_H__
+#define __UTIL_MURMURHASH_H__
+
+#include "util/util_types.h"
+
+CCL_NAMESPACE_BEGIN
+
+uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed);
+float util_hash_to_float(uint32_t hash);
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MURMURHASH_H__ */
diff --git a/intern/cycles/util/util_opengl.h b/intern/cycles/util/util_opengl.h
index 0b5462e0a09..2ca7b7e4c87 100644
--- a/intern/cycles/util/util_opengl.h
+++ b/intern/cycles/util/util_opengl.h
@@ -28,4 +28,4 @@
# define mxMakeCurrentContext(x) (x)
#endif
-#endif /* __UTIL_OPENGL_H__ */
+#endif /* __UTIL_OPENGL_H__ */
diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h
index 3b3627242d5..5267bd9a97a 100644
--- a/intern/cycles/util/util_optimization.h
+++ b/intern/cycles/util/util_optimization.h
@@ -68,4 +68,4 @@
#endif
-#endif /* __UTIL_OPTIMIZATION_H__ */
+#endif /* __UTIL_OPTIMIZATION_H__ */
diff --git a/intern/cycles/util/util_param.h b/intern/cycles/util/util_param.h
index 246b5cb7d63..4453c66aae2 100644
--- a/intern/cycles/util/util_param.h
+++ b/intern/cycles/util/util_param.h
@@ -30,4 +30,4 @@ OIIO_NAMESPACE_USING
CCL_NAMESPACE_END
-#endif /* __UTIL_PARAM_H__ */
+#endif /* __UTIL_PARAM_H__ */
diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp
index 51b7944705e..93080a6c80c 100644
--- a/intern/cycles/util/util_path.cpp
+++ b/intern/cycles/util/util_path.cpp
@@ -614,7 +614,7 @@ bool path_exists(const string& path)
return 0;
}
return st.st_mode != 0;
-#endif /* _WIN32 */
+#endif /* _WIN32 */
}
bool path_is_directory(const string& path)
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index 3ef15c5c09a..4ed9ebd60ff 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -114,7 +114,7 @@ public:
return cancel_message;
}
- void set_cancel_callback(function<void(void)> function)
+ void set_cancel_callback(function<void()> function)
{
cancel_cb = function;
}
@@ -323,7 +323,7 @@ public:
}
}
- void set_update_callback(function<void(void)> function)
+ void set_update_callback(function<void()> function)
{
update_cb = function;
}
@@ -331,8 +331,8 @@ public:
protected:
thread_mutex progress_mutex;
thread_mutex update_mutex;
- function<void(void)> update_cb;
- function<void(void)> cancel_cb;
+ function<void()> update_cb;
+ function<void()> cancel_cb;
/* pixel_samples counts how many samples have been rendered over all pixel, not just per pixel.
* This makes the progress estimate more accurate when tiles with different sizes are used.
@@ -365,4 +365,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __UTIL_PROGRESS_H__ */
+#endif /* __UTIL_PROGRESS_H__ */
diff --git a/intern/cycles/util/util_projection.h b/intern/cycles/util/util_projection.h
index 26b4843928c..d1af013ae3a 100644
--- a/intern/cycles/util/util_projection.h
+++ b/intern/cycles/util/util_projection.h
@@ -169,8 +169,8 @@ ccl_device_inline ProjectionTransform projection_orthographic(float znear, float
return ProjectionTransform(t);
}
-#endif /* __KERNEL_GPU__ */
+#endif /* __KERNEL_GPU__ */
CCL_NAMESPACE_END
-#endif /* __UTIL_PROJECTION_H__ */
+#endif /* __UTIL_PROJECTION_H__ */
diff --git a/intern/cycles/util/util_queue.h b/intern/cycles/util/util_queue.h
index 622f4fe3e47..0a2b7718f57 100644
--- a/intern/cycles/util/util_queue.h
+++ b/intern/cycles/util/util_queue.h
@@ -25,4 +25,4 @@ using std::queue;
CCL_NAMESPACE_END
-#endif /* __UTIL_LIST_H__ */
+#endif /* __UTIL_LIST_H__ */
diff --git a/intern/cycles/util/util_rect.h b/intern/cycles/util/util_rect.h
index d13baefe85e..389669acf2e 100644
--- a/intern/cycles/util/util_rect.h
+++ b/intern/cycles/util/util_rect.h
@@ -69,4 +69,4 @@ ccl_device_inline int rect_size(int4 rect)
CCL_NAMESPACE_END
-#endif /* __UTIL_RECT_H__ */
+#endif /* __UTIL_RECT_H__ */
diff --git a/intern/cycles/util/util_set.h b/intern/cycles/util/util_set.h
index 298e1f7729a..a9c56bb4919 100644
--- a/intern/cycles/util/util_set.h
+++ b/intern/cycles/util/util_set.h
@@ -31,4 +31,4 @@ using std::unordered_set;
CCL_NAMESPACE_END
-#endif /* __UTIL_SET_H__ */
+#endif /* __UTIL_SET_H__ */
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index cc7f436c8fe..565ea768089 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -219,9 +219,9 @@ __forceinline size_t __bscf(size_t& v)
return i;
}
-#endif /* __KERNEL_64_BIT__ */
+#endif /* __KERNEL_64_BIT__ */
-#else /* _WIN32 */
+#else /* _WIN32 */
__forceinline unsigned int __popcnt(unsigned int in) {
int r = 0; asm ("popcnt %1,%0" : "=r"(r) : "r"(in)); return r;
@@ -344,7 +344,7 @@ __forceinline size_t __bscf(size_t& v)
}
#endif
-#endif /* _WIN32 */
+#endif /* _WIN32 */
/* Test __KERNEL_SSE41__ for MSVC which does not define __SSE4_1__, and test
* __SSE4_1__ to avoid OpenImageIO conflicts with our emulation macros on other
@@ -442,7 +442,7 @@ __forceinline __m128 _mm_round_ps_emu( __m128 value, const int flags)
return value;
}
-#endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */
+#endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */
#else /* __KERNEL_SSE2__ */
@@ -470,7 +470,7 @@ ccl_device_inline int __bsr(int value)
return bit;
}
-#endif /* __KERNEL_SSE2__ */
+#endif /* __KERNEL_SSE2__ */
/* quiet unused define warnings */
#if defined(__KERNEL_SSE2__) || \
@@ -484,6 +484,6 @@ ccl_device_inline int __bsr(int value)
CCL_NAMESPACE_END
-#endif /* __KERNEL_GPU__ */
+#endif /* __KERNEL_GPU__ */
-#endif /* __UTIL_SIMD_TYPES_H__ */
+#endif /* __UTIL_SIMD_TYPES_H__ */
diff --git a/intern/cycles/util/util_sky_model.h b/intern/cycles/util/util_sky_model.h
index 237e4e61bf5..2e593f58c39 100644
--- a/intern/cycles/util/util_sky_model.h
+++ b/intern/cycles/util/util_sky_model.h
@@ -437,6 +437,6 @@ double arhosekskymodel_solar_radiance(ArHosekSkyModelState* state,
double wavelength);
-#endif // _SKY_MODEL_H_
+#endif // _SKY_MODEL_H_
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h
index 115b133c662..f6810505126 100644
--- a/intern/cycles/util/util_sseb.h
+++ b/intern/cycles/util/util_sseb.h
@@ -177,7 +177,7 @@ __forceinline size_t movemask( const sseb& a ) { return _mm_movemask_ps(a); }
ccl_device_inline void print_sseb(const char *label, const sseb &a)
{
- printf("%s: %df %df %df %d\n",
+ printf("%s: %d %d %d %d\n",
label, a[0], a[1], a[2], a[3]);
}
diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h
index b5623860e33..66670c9a779 100644
--- a/intern/cycles/util/util_ssef.h
+++ b/intern/cycles/util/util_ssef.h
@@ -44,8 +44,8 @@ struct ssef
__forceinline ssef& operator=(const ssef& other) { m128 = other.m128; return *this; }
__forceinline ssef(const __m128 a) : m128(a) {}
- __forceinline operator const __m128&(void) const { return m128; }
- __forceinline operator __m128&(void) { return m128; }
+ __forceinline operator const __m128&() const { return m128; }
+ __forceinline operator __m128&() { return m128; }
__forceinline ssef (float a) : m128(_mm_set1_ps(a)) {}
__forceinline ssef (float a, float b, float c, float d) : m128(_mm_setr_ps(a, b, c, d)) {}
@@ -517,12 +517,12 @@ ccl_device_inline float len3(const ssef& a)
/* faster version for SSSE3 */
typedef ssei shuffle_swap_t;
-ccl_device_inline shuffle_swap_t shuffle_swap_identity(void)
+ccl_device_inline shuffle_swap_t shuffle_swap_identity()
{
return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
}
-ccl_device_inline shuffle_swap_t shuffle_swap_swap(void)
+ccl_device_inline shuffle_swap_t shuffle_swap_swap()
{
return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
}
@@ -537,12 +537,12 @@ ccl_device_inline const ssef shuffle_swap(const ssef& a, const shuffle_swap_t& s
/* somewhat slower version for SSE2 */
typedef int shuffle_swap_t;
-ccl_device_inline shuffle_swap_t shuffle_swap_identity(void)
+ccl_device_inline shuffle_swap_t shuffle_swap_identity()
{
return 0;
}
-ccl_device_inline shuffle_swap_t shuffle_swap_swap(void)
+ccl_device_inline shuffle_swap_t shuffle_swap_swap()
{
return 1;
}
diff --git a/intern/cycles/util/util_stack_allocator.h b/intern/cycles/util/util_stack_allocator.h
index 79a535bd170..4e978e18bee 100644
--- a/intern/cycles/util/util_stack_allocator.h
+++ b/intern/cycles/util/util_stack_allocator.h
@@ -53,7 +53,7 @@ public:
T *allocate(size_t n, const void *hint = 0)
{
- (void)hint;
+ (void) hint;
if(n == 0) {
return NULL;
}
@@ -164,4 +164,4 @@ private:
CCL_NAMESPACE_END
-#endif /* __UTIL_GUARDED_ALLOCATOR_H__ */
+#endif /* __UTIL_STACK_ALLOCATOR_H__ */
diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h
index dc3cb3f6ecc..b1c6c374693 100644
--- a/intern/cycles/util/util_static_assert.h
+++ b/intern/cycles/util/util_static_assert.h
@@ -47,4 +47,4 @@ CCL_NAMESPACE_BEGIN
CCL_NAMESPACE_END
-#endif /* __UTIL_STATIC_ASSERT_H__ */
+#endif /* __UTIL_STATIC_ASSERT_H__ */
diff --git a/intern/cycles/util/util_stats.h b/intern/cycles/util/util_stats.h
index 7667f58eb7d..0ba58422a67 100644
--- a/intern/cycles/util/util_stats.h
+++ b/intern/cycles/util/util_stats.h
@@ -44,4 +44,4 @@ public:
CCL_NAMESPACE_END
-#endif /* __UTIL_STATS_H__ */
+#endif /* __UTIL_STATS_H__ */
diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h
index 3a4f4398158..f17112c30d2 100644
--- a/intern/cycles/util/util_string.h
+++ b/intern/cycles/util/util_string.h
@@ -74,4 +74,4 @@ string string_human_readable_number(size_t num);
CCL_NAMESPACE_END
-#endif /* __UTIL_STRING_H__ */
+#endif /* __UTIL_STRING_H__ */
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index 1b039888452..34f428f111c 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -52,14 +52,14 @@ int system_cpu_group_thread_count(int group)
util_windows_init_numa_groups();
return GetActiveProcessorCount(group);
#elif defined(__APPLE__)
- (void)group;
+ (void) group;
int count;
size_t len = sizeof(count);
int mib[2] = { CTL_HW, HW_NCPU };
sysctl(mib, 2, &count, &len, NULL, 0);
return count;
#else
- (void)group;
+ (void) group;
return sysconf(_SC_NPROCESSORS_ONLN);
#endif
}
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
index 42ad72356b9..241ac897157 100644
--- a/intern/cycles/util/util_system.h
+++ b/intern/cycles/util/util_system.h
@@ -46,4 +46,4 @@ size_t system_physical_ram();
CCL_NAMESPACE_END
-#endif /* __UTIL_SYSTEM_H__ */
+#endif /* __UTIL_SYSTEM_H__ */
diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h
index f752e81128d..233cfe33305 100644
--- a/intern/cycles/util/util_texture.h
+++ b/intern/cycles/util/util_texture.h
@@ -89,4 +89,4 @@ typedef struct TextureInfo {
CCL_NAMESPACE_END
-#endif /* __UTIL_TEXTURE_H__ */
+#endif /* __UTIL_TEXTURE_H__ */
diff --git a/intern/cycles/util/util_thread.cpp b/intern/cycles/util/util_thread.cpp
index 16a8591a8a9..37d8bdbd4b0 100644
--- a/intern/cycles/util/util_thread.cpp
+++ b/intern/cycles/util/util_thread.cpp
@@ -21,7 +21,7 @@
CCL_NAMESPACE_BEGIN
-thread::thread(function<void(void)> run_cb, int group)
+thread::thread(function<void()> run_cb, int group)
: run_cb_(run_cb),
joined_(false),
group_(group)
diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h
index f39fcfb4279..6250bb95dcf 100644
--- a/intern/cycles/util/util_thread.h
+++ b/intern/cycles/util/util_thread.h
@@ -46,14 +46,14 @@ typedef std::condition_variable thread_condition_variable;
class thread {
public:
- thread(function<void(void)> run_cb, int group = -1);
+ thread(function<void()> run_cb, int group = -1);
~thread();
static void *run(void *arg);
bool join();
protected:
- function<void(void)> run_cb_;
+ function<void()> run_cb_;
std::thread thread_;
bool joined_;
int group_;
@@ -138,4 +138,4 @@ protected:
CCL_NAMESPACE_END
-#endif /* __UTIL_THREAD_H__ */
+#endif /* __UTIL_THREAD_H__ */
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index e781f85dded..e4cadd3e81a 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -424,6 +424,31 @@ ccl_device void transform_motion_array_interpolate(Transform *tfm,
#ifndef __KERNEL_GPU__
+#ifdef WITH_EMBREE
+ccl_device void transform_motion_array_interpolate_straight(Transform *tfm,
+ const ccl_global DecomposedTransform *motion,
+ uint numsteps,
+ float time)
+{
+ /* Figure out which steps we need to interpolate. */
+ int maxstep = numsteps - 1;
+ int step = min((int)(time*maxstep), maxstep - 1);
+ float t = time * maxstep - step;
+
+ const ccl_global DecomposedTransform *a = motion + step;
+ const ccl_global DecomposedTransform *b = motion + step + 1;
+ Transform step1, step2;
+
+ transform_compose(&step1, a);
+ transform_compose(&step2, b);
+
+ /* matrix lerp */
+ tfm->x = (1.0f - t) * step1.x + t * step2.x;
+ tfm->y = (1.0f - t) * step1.y + t * step2.y;
+ tfm->z = (1.0f - t) * step1.z + t * step2.z;
+}
+#endif
+
class BoundBox2D;
ccl_device_inline bool operator==(const DecomposedTransform& A, const DecomposedTransform& B)
@@ -470,4 +495,4 @@ OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction_transposed)
CCL_NAMESPACE_END
-#endif /* __UTIL_TRANSFORM_H__ */
+#endif /* __UTIL_TRANSFORM_H__ */
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 96c549b9be5..535048d8f8c 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -156,4 +156,4 @@ CCL_NAMESPACE_END
#endif
#endif
-#endif /* __UTIL_TYPES_H__ */
+#endif /* __UTIL_TYPES_H__ */
diff --git a/intern/cycles/util/util_types_float3.h b/intern/cycles/util/util_types_float3.h
index 28146ad04f7..ed2300e7996 100644
--- a/intern/cycles/util/util_types_float3.h
+++ b/intern/cycles/util/util_types_float3.h
@@ -35,8 +35,8 @@ struct ccl_try_align(16) float3 {
__forceinline float3(const float3& a);
__forceinline explicit float3(const __m128& a);
- __forceinline operator const __m128&(void) const;
- __forceinline operator __m128&(void);
+ __forceinline operator const __m128&() const;
+ __forceinline operator __m128&();
__forceinline float3& operator =(const float3& a);
#else /* __KERNEL_SSE__ */
diff --git a/intern/cycles/util/util_types_float3_impl.h b/intern/cycles/util/util_types_float3_impl.h
index 45f61767d3f..2e840a5c399 100644
--- a/intern/cycles/util/util_types_float3_impl.h
+++ b/intern/cycles/util/util_types_float3_impl.h
@@ -43,12 +43,12 @@ __forceinline float3::float3(const __m128& a)
{
}
-__forceinline float3::operator const __m128&(void) const
+__forceinline float3::operator const __m128&() const
{
return m128;
}
-__forceinline float3::operator __m128&(void)
+__forceinline float3::operator __m128&()
{
return m128;
}
diff --git a/intern/cycles/util/util_types_float4.h b/intern/cycles/util/util_types_float4.h
index 154391f6881..5c10d483c2e 100644
--- a/intern/cycles/util/util_types_float4.h
+++ b/intern/cycles/util/util_types_float4.h
@@ -36,8 +36,8 @@ struct ccl_try_align(16) float4 {
__forceinline float4();
__forceinline explicit float4(const __m128& a);
- __forceinline operator const __m128&(void) const;
- __forceinline operator __m128&(void);
+ __forceinline operator const __m128&() const;
+ __forceinline operator __m128&();
__forceinline float4& operator =(const float4& a);
diff --git a/intern/cycles/util/util_types_float4_impl.h b/intern/cycles/util/util_types_float4_impl.h
index a49fac65b10..a83148031f1 100644
--- a/intern/cycles/util/util_types_float4_impl.h
+++ b/intern/cycles/util/util_types_float4_impl.h
@@ -38,12 +38,12 @@ __forceinline float4::float4(const __m128& a)
{
}
-__forceinline float4::operator const __m128&(void) const
+__forceinline float4::operator const __m128&() const
{
return m128;
}
-__forceinline float4::operator __m128&(void)
+__forceinline float4::operator __m128&()
{
return m128;
}
diff --git a/intern/cycles/util/util_types_float8.h b/intern/cycles/util/util_types_float8.h
index 64ec5a71355..08720b8ff48 100644
--- a/intern/cycles/util/util_types_float8.h
+++ b/intern/cycles/util/util_types_float8.h
@@ -37,7 +37,7 @@ CCL_NAMESPACE_BEGIN
#ifndef __KERNEL_GPU__
-struct ccl_try_align(16) float8 {
+struct ccl_try_align(32) float8 {
#ifdef __KERNEL_AVX2__
union {
__m256 m256;
@@ -48,8 +48,8 @@ struct ccl_try_align(16) float8 {
__forceinline float8(const float8& a);
__forceinline explicit float8(const __m256& a);
- __forceinline operator const __m256&(void) const;
- __forceinline operator __m256&(void);
+ __forceinline operator const __m256&() const;
+ __forceinline operator __m256&();
__forceinline float8& operator =(const float8& a);
diff --git a/intern/cycles/util/util_types_float8_impl.h b/intern/cycles/util/util_types_float8_impl.h
index ebf8260bc7c..84fe233c334 100644
--- a/intern/cycles/util/util_types_float8_impl.h
+++ b/intern/cycles/util/util_types_float8_impl.h
@@ -55,12 +55,12 @@ __forceinline float8::float8(const __m256& f)
{
}
-__forceinline float8::operator const __m256&(void) const
+__forceinline float8::operator const __m256&() const
{
return m256;
}
-__forceinline float8::operator __m256&(void)
+__forceinline float8::operator __m256&()
{
return m256;
}
diff --git a/intern/cycles/util/util_types_int3.h b/intern/cycles/util/util_types_int3.h
index 9d43b201c02..f68074b982b 100644
--- a/intern/cycles/util/util_types_int3.h
+++ b/intern/cycles/util/util_types_int3.h
@@ -35,8 +35,8 @@ struct ccl_try_align(16) int3 {
__forceinline int3(const int3& a);
__forceinline explicit int3(const __m128i& a);
- __forceinline operator const __m128i&(void) const;
- __forceinline operator __m128i&(void);
+ __forceinline operator const __m128i&() const;
+ __forceinline operator __m128i&();
__forceinline int3& operator =(const int3& a);
#else /* __KERNEL_SSE__ */
diff --git a/intern/cycles/util/util_types_int3_impl.h b/intern/cycles/util/util_types_int3_impl.h
index ada50c4812c..1b195ca753f 100644
--- a/intern/cycles/util/util_types_int3_impl.h
+++ b/intern/cycles/util/util_types_int3_impl.h
@@ -43,12 +43,12 @@ __forceinline int3::int3(const int3& a)
{
}
-__forceinline int3::operator const __m128i&(void) const
+__forceinline int3::operator const __m128i&() const
{
return m128;
}
-__forceinline int3::operator __m128i&(void)
+__forceinline int3::operator __m128i&()
{
return m128;
}
diff --git a/intern/cycles/util/util_types_int4.h b/intern/cycles/util/util_types_int4.h
index cdd0ecbdae5..52e6fed8c14 100644
--- a/intern/cycles/util/util_types_int4.h
+++ b/intern/cycles/util/util_types_int4.h
@@ -26,6 +26,7 @@ CCL_NAMESPACE_BEGIN
#ifndef __KERNEL_GPU__
struct float3;
+struct float4;
struct ccl_try_align(16) int4 {
#ifdef __KERNEL_SSE__
@@ -38,8 +39,8 @@ struct ccl_try_align(16) int4 {
__forceinline int4(const int4& a);
__forceinline explicit int4(const __m128i& a);
- __forceinline operator const __m128i&(void) const;
- __forceinline operator __m128i&(void);
+ __forceinline operator const __m128i&() const;
+ __forceinline operator __m128i&();
__forceinline int4& operator=(const int4& a);
#else /* __KERNEL_SSE__ */
@@ -53,6 +54,7 @@ struct ccl_try_align(16) int4 {
ccl_device_inline int4 make_int4(int i);
ccl_device_inline int4 make_int4(int x, int y, int z, int w);
ccl_device_inline int4 make_int4(const float3& f);
+ccl_device_inline int4 make_int4(const float4& f);
ccl_device_inline void print_int4(const char *label, const int4& a);
#endif /* __KERNEL_GPU__ */
diff --git a/intern/cycles/util/util_types_int4_impl.h b/intern/cycles/util/util_types_int4_impl.h
index 07cdc88f2dc..c058f86c400 100644
--- a/intern/cycles/util/util_types_int4_impl.h
+++ b/intern/cycles/util/util_types_int4_impl.h
@@ -43,12 +43,12 @@ __forceinline int4::int4(const __m128i& a)
{
}
-__forceinline int4::operator const __m128i&(void) const
+__forceinline int4::operator const __m128i&() const
{
return m128;
}
-__forceinline int4::operator __m128i&(void)
+__forceinline int4::operator __m128i&()
{
return m128;
}
@@ -104,6 +104,16 @@ ccl_device_inline int4 make_int4(const float3& f)
return a;
}
+ccl_device_inline int4 make_int4(const float4& f)
+{
+#ifdef __KERNEL_SSE__
+ int4 a(_mm_cvtps_epi32(f.m128));
+#else
+ int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w};
+#endif
+ return a;
+}
+
ccl_device_inline void print_int4(const char *label, const int4& a)
{
printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w);
diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h
index 0b33221ad4d..18fa231d6e7 100644
--- a/intern/cycles/util/util_vector.h
+++ b/intern/cycles/util/util_vector.h
@@ -17,8 +17,6 @@
#ifndef __UTIL_VECTOR_H__
#define __UTIL_VECTOR_H__
-/* Vector */
-
#include <cassert>
#include <cstring>
#include <vector>
@@ -29,12 +27,9 @@
CCL_NAMESPACE_BEGIN
-/* Vector
- *
- * Own subclass-ed vestion of std::vector. Subclass is needed because:
+/* Own subclass-ed vestion of std::vector. Subclass is needed because:
*
* - Use own allocator which keeps track of used/peak memory.
- *
* - Have method to ensure capacity is re-set to 0.
*/
template<typename value_type,
@@ -42,30 +37,16 @@ template<typename value_type,
class vector : public std::vector<value_type, allocator_type>
{
public:
- /* Default constructor. */
- explicit vector() : std::vector<value_type, allocator_type>() { }
-
- /* Fill constructor. */
- explicit vector(size_t n, const value_type& val = value_type())
- : std::vector<value_type, allocator_type>(n, val) { }
-
- /* Range constructor. */
- template <class InputIterator>
- vector(InputIterator first, InputIterator last)
- : std::vector<value_type, allocator_type>(first, last) { }
-
- /* Copy constructor. */
- vector(const vector &x) : std::vector<value_type, allocator_type>(x) { }
+ typedef std::vector<value_type, allocator_type> BaseClass;
- void shrink_to_fit(void)
- {
- std::vector<value_type, allocator_type>::shrink_to_fit();
- }
+ /* Inherit all constructors from base class. */
+ using BaseClass::vector;
- void free_memory(void)
+ /* Try as hard as possible to use zero memory. */
+ void free_memory()
{
- std::vector<value_type, allocator_type>::resize(0);
- shrink_to_fit();
+ BaseClass::resize(0);
+ BaseClass::shrink_to_fit();
}
/* Some external API might demand working with std::vector. */
@@ -75,265 +56,6 @@ public:
}
};
-/* Array
- *
- * Simplified version of vector, serving multiple purposes:
- * - somewhat faster in that it does not clear memory on resize/alloc,
- * this was actually showing up in profiles quite significantly. it
- * also does not run any constructors/destructors
- * - if this is used, we are not tempted to use inefficient operations
- * - aligned allocation for CPU native data types */
-
-template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES>
-class array
-{
-public:
- array()
- : data_(NULL),
- datasize_(0),
- capacity_(0)
- {}
-
- explicit array(size_t newsize)
- {
- if(newsize == 0) {
- data_ = NULL;
- datasize_ = 0;
- capacity_ = 0;
- }
- else {
- data_ = mem_allocate(newsize);
- datasize_ = newsize;
- capacity_ = datasize_;
- }
- }
-
- array(const array& from)
- {
- if(from.datasize_ == 0) {
- data_ = NULL;
- datasize_ = 0;
- capacity_ = 0;
- }
- else {
- data_ = mem_allocate(from.datasize_);
- memcpy(data_, from.data_, from.datasize_*sizeof(T));
- datasize_ = from.datasize_;
- capacity_ = datasize_;
- }
- }
-
- array& operator=(const array& from)
- {
- if(this != &from) {
- resize(from.size());
- memcpy((void*)data_, from.data_, datasize_*sizeof(T));
- }
-
- return *this;
- }
-
- array& operator=(const vector<T>& from)
- {
- resize(from.size());
-
- if(from.size() > 0) {
- memcpy(data_, &from[0], datasize_*sizeof(T));
- }
-
- return *this;
- }
-
- ~array()
- {
- mem_free(data_, capacity_);
- }
-
- bool operator==(const array<T>& other) const
- {
- if(datasize_ != other.datasize_) {
- return false;
- }
-
- return memcmp(data_, other.data_, datasize_*sizeof(T)) == 0;
- }
-
- bool operator!=(const array<T>& other) const
- {
- return !(*this == other);
- }
-
- void steal_data(array& from)
- {
- if(this != &from) {
- clear();
-
- data_ = from.data_;
- datasize_ = from.datasize_;
- capacity_ = from.capacity_;
-
- from.data_ = NULL;
- from.datasize_ = 0;
- from.capacity_ = 0;
- }
- }
-
- T *steal_pointer()
- {
- T *ptr = data_;
- data_ = NULL;
- clear();
- return ptr;
- }
-
- T* resize(size_t newsize)
- {
- if(newsize == 0) {
- clear();
- }
- else if(newsize != datasize_) {
- if(newsize > capacity_) {
- T *newdata = mem_allocate(newsize);
- if(newdata == NULL) {
- /* Allocation failed, likely out of memory. */
- clear();
- return NULL;
- }
- else if(data_ != NULL) {
- memcpy((void *)newdata,
- data_,
- ((datasize_ < newsize)? datasize_: newsize)*sizeof(T));
- mem_free(data_, capacity_);
- }
- data_ = newdata;
- capacity_ = newsize;
- }
- datasize_ = newsize;
- }
- return data_;
- }
-
- T* resize(size_t newsize, const T& value)
- {
- size_t oldsize = size();
- resize(newsize);
-
- for(size_t i = oldsize; i < size(); i++) {
- data_[i] = value;
- }
-
- return data_;
- }
-
- void clear()
- {
- if(data_ != NULL) {
- mem_free(data_, capacity_);
- data_ = NULL;
- }
- datasize_ = 0;
- capacity_ = 0;
- }
-
- size_t empty() const
- {
- return datasize_ == 0;
- }
-
- size_t size() const
- {
- return datasize_;
- }
-
- T* data()
- {
- return data_;
- }
-
- const T* data() const
- {
- return data_;
- }
-
- T& operator[](size_t i) const
- {
- assert(i < datasize_);
- return data_[i];
- }
-
- void reserve(size_t newcapacity)
- {
- if(newcapacity > capacity_) {
- T *newdata = mem_allocate(newcapacity);
- if(data_ != NULL) {
- memcpy(newdata, data_, ((datasize_ < newcapacity)? datasize_: newcapacity)*sizeof(T));
- mem_free(data_, capacity_);
- }
- data_ = newdata;
- capacity_ = newcapacity;
- }
- }
-
- size_t capacity() const
- {
- return capacity_;
- }
-
- // do not use this method unless you are sure the code is not performance critical
- void push_back_slow(const T& t)
- {
- if(capacity_ == datasize_)
- {
- reserve(datasize_ == 0 ? 1 : (size_t)((datasize_ + 1) * 1.2));
- }
-
- data_[datasize_++] = t;
- }
-
- void push_back_reserved(const T& t)
- {
- assert(datasize_ < capacity_);
- push_back_slow(t);
- }
-
- void append(const array<T>& from)
- {
- if(from.size()) {
- size_t old_size = size();
- resize(old_size + from.size());
- memcpy(data_ + old_size, from.data(), sizeof(T) * from.size());
- }
- }
-
-protected:
- inline T* mem_allocate(size_t N)
- {
- if(N == 0) {
- return NULL;
- }
- T *mem = (T*)util_aligned_malloc(sizeof(T)*N, alignment);
- if(mem != NULL) {
- util_guarded_mem_alloc(sizeof(T)*N);
- }
- else {
- throw std::bad_alloc();
- }
- return mem;
- }
-
- inline void mem_free(T *mem, size_t N)
- {
- if(mem != NULL) {
- util_guarded_mem_free(sizeof(T)*N);
- util_aligned_free(mem);
- }
- }
-
- T *data_;
- size_t datasize_;
- size_t capacity_;
-};
-
CCL_NAMESPACE_END
-#endif /* __UTIL_VECTOR_H__ */
+#endif /* __UTIL_VECTOR_H__ */
diff --git a/intern/cycles/util/util_version.h b/intern/cycles/util/util_version.h
index 112255f447b..980c5a269e6 100644
--- a/intern/cycles/util/util_version.h
+++ b/intern/cycles/util/util_version.h
@@ -34,4 +34,4 @@ CCL_NAMESPACE_BEGIN
CCL_NAMESPACE_END
-#endif /* __UTIL_VERSION_H__ */
+#endif /* __UTIL_VERSION_H__ */
diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp
index 3836cc86ee0..9dffd7a80bd 100644
--- a/intern/cycles/util/util_view.cpp
+++ b/intern/cycles/util/util_view.cpp
@@ -215,7 +215,7 @@ static void view_motion(int x, int y)
V.mouseY = y;
}
-static void view_idle(void)
+static void view_idle()
{
if(V.redraw) {
V.redraw = false;
diff --git a/intern/cycles/util/util_view.h b/intern/cycles/util/util_view.h
index e603e605776..ae50b098b39 100644
--- a/intern/cycles/util/util_view.h
+++ b/intern/cycles/util/util_view.h
@@ -22,10 +22,10 @@
CCL_NAMESPACE_BEGIN
-typedef void (*ViewInitFunc)(void);
-typedef void (*ViewExitFunc)(void);
+typedef void (*ViewInitFunc)();
+typedef void (*ViewExitFunc)();
typedef void (*ViewResizeFunc)(int width, int height);
-typedef void (*ViewDisplayFunc)(void);
+typedef void (*ViewDisplayFunc)();
typedef void (*ViewKeyboardFunc)(unsigned char key);
typedef void (*ViewMotionFunc)(int x, int y, int button);
@@ -40,4 +40,4 @@ void view_redraw();
CCL_NAMESPACE_END
-#endif /*__UTIL_VIEW_H__*/
+#endif /*__UTIL_VIEW_H__*/
diff --git a/intern/cycles/util/util_windows.h b/intern/cycles/util/util_windows.h
index 3b23ce8a3cb..9b9268fed7a 100644
--- a/intern/cycles/util/util_windows.h
+++ b/intern/cycles/util/util_windows.h
@@ -56,4 +56,4 @@ CCL_NAMESPACE_END
#endif /* WIN32 */
-#endif /* __UTIL_WINDOWS_H__ */
+#endif /* __UTIL_WINDOWS_H__ */
diff --git a/intern/cycles/util/util_xml.h b/intern/cycles/util/util_xml.h
index 6f06f17937b..c8a3a495f30 100644
--- a/intern/cycles/util/util_xml.h
+++ b/intern/cycles/util/util_xml.h
@@ -38,4 +38,4 @@ using PUGIXML_NAMESPACE::xml_parse_result;
CCL_NAMESPACE_END
-#endif /* __UTIL_XML_H__ */
+#endif /* __UTIL_XML_H__ */