diff options
Diffstat (limited to 'intern/cycles/util')
117 files changed, 14290 insertions, 13178 deletions
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index b8b38a531ea..1c7a6549253 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -1,30 +1,30 @@ set(INC - .. - ../../glew-mx + .. + ../../glew-mx ) set(INC_SYS - ${GLEW_INCLUDE_DIR} + ${GLEW_INCLUDE_DIR} ) set(SRC - util_aligned_malloc.cpp - util_debug.cpp - util_ies.cpp - util_logging.cpp - util_math_cdf.cpp - util_md5.cpp - util_murmurhash.cpp - util_path.cpp - util_profiling.cpp - util_string.cpp - util_simd.cpp - util_system.cpp - util_task.cpp - util_thread.cpp - util_time.cpp - util_transform.cpp + util_aligned_malloc.cpp + util_debug.cpp + util_ies.cpp + util_logging.cpp + util_math_cdf.cpp + util_md5.cpp + util_murmurhash.cpp + util_path.cpp + util_profiling.cpp + util_string.cpp + util_simd.cpp + util_system.cpp + util_task.cpp + util_thread.cpp + util_time.cpp + util_transform.cpp ) set(LIB @@ -32,117 +32,117 @@ set(LIB ) if(WITH_CYCLES_STANDALONE) - if (WITH_CYCLES_STANDALONE_GUI) - list(APPEND SRC - util_view.cpp - ) - endif() + if (WITH_CYCLES_STANDALONE_GUI) + list(APPEND SRC + util_view.cpp + ) + endif() endif() if(CYCLES_STANDALONE_REPOSITORY) - list(APPEND INC_SYS ../../third_party/numaapi/include) + list(APPEND INC_SYS ../../third_party/numaapi/include) else() - list(APPEND INC_SYS ../../numaapi/include) + list(APPEND INC_SYS ../../numaapi/include) endif() set(SRC_HEADERS - util_algorithm.h - util_aligned_malloc.h - util_args.h - util_array.h - util_atomic.h - util_boundbox.h - util_debug.h - util_defines.h - util_guarded_allocator.cpp - util_foreach.h - util_function.h - util_guarded_allocator.h - util_half.h - util_hash.h - util_ies.h - util_image.h - util_image_impl.h - util_list.h - util_logging.h - util_map.h - util_math.h - util_math_cdf.h - util_math_fast.h - util_math_intersect.h - util_math_float2.h - util_math_float3.h - util_math_float4.h - util_math_int2.h - util_math_int3.h - util_math_int4.h - util_math_matrix.h - util_md5.h - util_murmurhash.h - util_opengl.h - util_optimization.h - util_param.h - util_path.h - util_profiling.h - util_progress.h - util_projection.h - util_queue.h - util_rect.h - util_set.h - util_simd.h - util_sky_model.cpp - util_sky_model.h - util_sky_model_data.h - util_avxf.h - util_avxb.h - util_sseb.h - util_ssef.h - util_ssei.h - util_stack_allocator.h - util_static_assert.h - util_stats.h - util_string.h - util_system.h - util_task.h - util_texture.h - util_thread.h - util_time.h - util_transform.h - util_types.h - util_types_float2.h - util_types_float2_impl.h - util_types_float3.h - util_types_float3_impl.h - util_types_float4.h - util_types_float4_impl.h - util_types_float8.h - util_types_float8_impl.h - util_types_int2.h - util_types_int2_impl.h - util_types_int3.h - util_types_int3_impl.h - util_types_int4.h - util_types_int4_impl.h - util_types_uchar2.h - util_types_uchar2_impl.h - util_types_uchar3.h - util_types_uchar3_impl.h - util_types_uchar4.h - util_types_uchar4_impl.h - util_types_uint2.h - util_types_uint2_impl.h - util_types_uint3.h - util_types_uint3_impl.h - util_types_uint4.h - util_types_uint4_impl.h - util_types_ushort4.h - util_types_vector3.h - util_types_vector3_impl.h - util_vector.h - util_version.h - util_view.h - util_windows.h - util_xml.h + util_algorithm.h + util_aligned_malloc.h + util_args.h + util_array.h + util_atomic.h + util_boundbox.h + util_debug.h + util_defines.h + util_guarded_allocator.cpp + util_foreach.h + util_function.h + util_guarded_allocator.h + util_half.h + util_hash.h + util_ies.h + util_image.h + util_image_impl.h + util_list.h + util_logging.h + util_map.h + util_math.h + util_math_cdf.h + util_math_fast.h + util_math_intersect.h + util_math_float2.h + util_math_float3.h + util_math_float4.h + util_math_int2.h + util_math_int3.h + util_math_int4.h + util_math_matrix.h + util_md5.h + util_murmurhash.h + util_opengl.h + util_optimization.h + util_param.h + util_path.h + util_profiling.h + util_progress.h + util_projection.h + util_queue.h + util_rect.h + util_set.h + util_simd.h + util_sky_model.cpp + util_sky_model.h + util_sky_model_data.h + util_avxf.h + util_avxb.h + util_sseb.h + util_ssef.h + util_ssei.h + util_stack_allocator.h + util_static_assert.h + util_stats.h + util_string.h + util_system.h + util_task.h + util_texture.h + util_thread.h + util_time.h + util_transform.h + util_types.h + util_types_float2.h + util_types_float2_impl.h + util_types_float3.h + util_types_float3_impl.h + util_types_float4.h + util_types_float4_impl.h + util_types_float8.h + util_types_float8_impl.h + util_types_int2.h + util_types_int2_impl.h + util_types_int3.h + util_types_int3_impl.h + util_types_int4.h + util_types_int4_impl.h + util_types_uchar2.h + util_types_uchar2_impl.h + util_types_uchar3.h + util_types_uchar3_impl.h + util_types_uchar4.h + util_types_uchar4_impl.h + util_types_uint2.h + util_types_uint2_impl.h + util_types_uint3.h + util_types_uint3_impl.h + util_types_uint4.h + util_types_uint4_impl.h + util_types_ushort4.h + util_types_vector3.h + util_types_vector3_impl.h + util_vector.h + util_version.h + util_view.h + util_windows.h + util_xml.h ) include_directories(${INC}) diff --git a/intern/cycles/util/util_algorithm.h b/intern/cycles/util/util_algorithm.h index f9e6476cc52..62093039625 100644 --- a/intern/cycles/util/util_algorithm.h +++ b/intern/cycles/util/util_algorithm.h @@ -21,12 +21,12 @@ CCL_NAMESPACE_BEGIN -using std::sort; -using std::swap; using std::max; using std::min; using std::remove; +using std::sort; +using std::swap; CCL_NAMESPACE_END -#endif /* __UTIL_ALGORITHM_H__ */ +#endif /* __UTIL_ALGORITHM_H__ */ diff --git a/intern/cycles/util/util_aligned_malloc.cpp b/intern/cycles/util/util_aligned_malloc.cpp index cc7252dcc58..104e6c5e3f4 100644 --- a/intern/cycles/util/util_aligned_malloc.cpp +++ b/intern/cycles/util/util_aligned_malloc.cpp @@ -29,7 +29,7 @@ # undef __MSVCRT_VERSION__ # endif # define __MSVCRT_VERSION__ 0x0700 -# endif /* FREE_WINDOWS */ +# endif /* FREE_WINDOWS */ # include <malloc.h> #else /* Apple's malloc is 16-byte aligned, and does not have malloc.h, so include @@ -43,39 +43,39 @@ CCL_NAMESPACE_BEGIN void *util_aligned_malloc(size_t size, int alignment) { #ifdef WITH_BLENDER_GUARDEDALLOC - return MEM_mallocN_aligned(size, alignment, "Cycles Aligned Alloc"); + return MEM_mallocN_aligned(size, alignment, "Cycles Aligned Alloc"); #elif defined(_WIN32) - return _aligned_malloc(size, alignment); + return _aligned_malloc(size, alignment); #elif defined(__APPLE__) - /* On Mac OS X, both the heap and the stack are guaranteed 16-byte aligned so - * they work natively with SSE types with no further work. - */ - assert(alignment == 16); - return malloc(size); + /* On Mac OS X, both the heap and the stack are guaranteed 16-byte aligned so + * they work natively with SSE types with no further work. + */ + assert(alignment == 16); + return malloc(size); #elif defined(__FreeBSD__) || defined(__NetBSD__) - void *result; - if(posix_memalign(&result, alignment, size)) { - /* Non-zero means allocation error - * either no allocation or bad alignment value. - */ - return NULL; - } - return result; -#else /* This is for Linux. */ - return memalign(alignment, size); + void *result; + if (posix_memalign(&result, alignment, size)) { + /* Non-zero means allocation error + * either no allocation or bad alignment value. + */ + return NULL; + } + return result; +#else /* This is for Linux. */ + return memalign(alignment, size); #endif } void util_aligned_free(void *ptr) { #if defined(WITH_BLENDER_GUARDEDALLOC) - if(ptr != NULL) { - MEM_freeN(ptr); - } + if (ptr != NULL) { + MEM_freeN(ptr); + } #elif defined(_WIN32) - _aligned_free(ptr); + _aligned_free(ptr); #else - free(ptr); + free(ptr); #endif } diff --git a/intern/cycles/util/util_aligned_malloc.h b/intern/cycles/util/util_aligned_malloc.h index 66d77c83454..0f006e95f6a 100644 --- a/intern/cycles/util/util_aligned_malloc.h +++ b/intern/cycles/util/util_aligned_malloc.h @@ -32,4 +32,4 @@ void util_aligned_free(void *ptr); CCL_NAMESPACE_END -#endif /* __UTIL_ALIGNED_MALLOC_H__ */ +#endif /* __UTIL_ALIGNED_MALLOC_H__ */ diff --git a/intern/cycles/util/util_args.h b/intern/cycles/util/util_args.h index 9fe54b14d77..be6f2c2b9f1 100644 --- a/intern/cycles/util/util_args.h +++ b/intern/cycles/util/util_args.h @@ -28,4 +28,4 @@ OIIO_NAMESPACE_USING CCL_NAMESPACE_END -#endif /* __UTIL_ARGS_H__ */ +#endif /* __UTIL_ARGS_H__ */ diff --git a/intern/cycles/util/util_array.h b/intern/cycles/util/util_array.h index 5f18d434c31..1d7e39344f6 100644 --- a/intern/cycles/util/util_array.h +++ b/intern/cycles/util/util_array.h @@ -34,256 +34,250 @@ CCL_NAMESPACE_BEGIN * - if this is used, we are not tempted to use inefficient operations * - aligned allocation for CPU native data types */ -template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> -class array -{ -public: - array() - : data_(NULL), - datasize_(0), - capacity_(0) - {} - - explicit array(size_t newsize) - { - if(newsize == 0) { - data_ = NULL; - datasize_ = 0; - capacity_ = 0; - } - else { - data_ = mem_allocate(newsize); - datasize_ = newsize; - capacity_ = datasize_; - } - } - - array(const array& from) - { - if(from.datasize_ == 0) { - data_ = NULL; - datasize_ = 0; - capacity_ = 0; - } - else { - data_ = mem_allocate(from.datasize_); - memcpy(data_, from.data_, from.datasize_*sizeof(T)); - datasize_ = from.datasize_; - capacity_ = datasize_; - } - } - - array& operator=(const array& from) - { - if(this != &from) { - resize(from.size()); - memcpy((void*)data_, from.data_, datasize_*sizeof(T)); - } - - return *this; - } - - array& operator=(const vector<T>& from) - { - resize(from.size()); - - if(from.size() > 0) { - memcpy(data_, &from[0], datasize_*sizeof(T)); - } - - return *this; - } - - ~array() - { - mem_free(data_, capacity_); - } - - bool operator==(const array<T>& other) const - { - if(datasize_ != other.datasize_) { - return false; - } - - return memcmp(data_, other.data_, datasize_*sizeof(T)) == 0; - } - - bool operator!=(const array<T>& other) const - { - return !(*this == other); - } - - void steal_data(array& from) - { - if(this != &from) { - clear(); - - data_ = from.data_; - datasize_ = from.datasize_; - capacity_ = from.capacity_; - - from.data_ = NULL; - from.datasize_ = 0; - from.capacity_ = 0; - } - } - - T *steal_pointer() - { - T *ptr = data_; - data_ = NULL; - clear(); - return ptr; - } - - T* resize(size_t newsize) - { - if(newsize == 0) { - clear(); - } - else if(newsize != datasize_) { - if(newsize > capacity_) { - T *newdata = mem_allocate(newsize); - if(newdata == NULL) { - /* Allocation failed, likely out of memory. */ - clear(); - return NULL; - } - else if(data_ != NULL) { - memcpy((void *)newdata, - data_, - ((datasize_ < newsize)? datasize_: newsize)*sizeof(T)); - mem_free(data_, capacity_); - } - data_ = newdata; - capacity_ = newsize; - } - datasize_ = newsize; - } - return data_; - } - - T* resize(size_t newsize, const T& value) - { - size_t oldsize = size(); - resize(newsize); - - for(size_t i = oldsize; i < size(); i++) { - data_[i] = value; - } - - return data_; - } - - void clear() - { - if(data_ != NULL) { - mem_free(data_, capacity_); - data_ = NULL; - } - datasize_ = 0; - capacity_ = 0; - } - - size_t empty() const - { - return datasize_ == 0; - } - - size_t size() const - { - return datasize_; - } - - T* data() - { - return data_; - } - - const T* data() const - { - return data_; - } - - T& operator[](size_t i) const - { - assert(i < datasize_); - return data_[i]; - } - - void reserve(size_t newcapacity) - { - if(newcapacity > capacity_) { - T *newdata = mem_allocate(newcapacity); - if(data_ != NULL) { - memcpy(newdata, data_, ((datasize_ < newcapacity)? datasize_: newcapacity)*sizeof(T)); - mem_free(data_, capacity_); - } - data_ = newdata; - capacity_ = newcapacity; - } - } - - size_t capacity() const - { - return capacity_; - } - - // do not use this method unless you are sure the code is not performance critical - void push_back_slow(const T& t) - { - if(capacity_ == datasize_) - { - reserve(datasize_ == 0 ? 1 : (size_t)((datasize_ + 1) * 1.2)); - } - - data_[datasize_++] = t; - } - - void push_back_reserved(const T& t) - { - assert(datasize_ < capacity_); - push_back_slow(t); - } - - void append(const array<T>& from) - { - if(from.size()) { - size_t old_size = size(); - resize(old_size + from.size()); - memcpy(data_ + old_size, from.data(), sizeof(T) * from.size()); - } - } - -protected: - inline T* mem_allocate(size_t N) - { - if(N == 0) { - return NULL; - } - T *mem = (T*)util_aligned_malloc(sizeof(T)*N, alignment); - if(mem != NULL) { - util_guarded_mem_alloc(sizeof(T)*N); - } - else { - throw std::bad_alloc(); - } - return mem; - } - - inline void mem_free(T *mem, size_t N) - { - if(mem != NULL) { - util_guarded_mem_free(sizeof(T)*N); - util_aligned_free(mem); - } - } - - T *data_; - size_t datasize_; - size_t capacity_; +template<typename T, size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES> class array { + public: + array() : data_(NULL), datasize_(0), capacity_(0) + { + } + + explicit array(size_t newsize) + { + if (newsize == 0) { + data_ = NULL; + datasize_ = 0; + capacity_ = 0; + } + else { + data_ = mem_allocate(newsize); + datasize_ = newsize; + capacity_ = datasize_; + } + } + + array(const array &from) + { + if (from.datasize_ == 0) { + data_ = NULL; + datasize_ = 0; + capacity_ = 0; + } + else { + data_ = mem_allocate(from.datasize_); + memcpy(data_, from.data_, from.datasize_ * sizeof(T)); + datasize_ = from.datasize_; + capacity_ = datasize_; + } + } + + array &operator=(const array &from) + { + if (this != &from) { + resize(from.size()); + memcpy((void *)data_, from.data_, datasize_ * sizeof(T)); + } + + return *this; + } + + array &operator=(const vector<T> &from) + { + resize(from.size()); + + if (from.size() > 0) { + memcpy(data_, &from[0], datasize_ * sizeof(T)); + } + + return *this; + } + + ~array() + { + mem_free(data_, capacity_); + } + + bool operator==(const array<T> &other) const + { + if (datasize_ != other.datasize_) { + return false; + } + + return memcmp(data_, other.data_, datasize_ * sizeof(T)) == 0; + } + + bool operator!=(const array<T> &other) const + { + return !(*this == other); + } + + void steal_data(array &from) + { + if (this != &from) { + clear(); + + data_ = from.data_; + datasize_ = from.datasize_; + capacity_ = from.capacity_; + + from.data_ = NULL; + from.datasize_ = 0; + from.capacity_ = 0; + } + } + + T *steal_pointer() + { + T *ptr = data_; + data_ = NULL; + clear(); + return ptr; + } + + T *resize(size_t newsize) + { + if (newsize == 0) { + clear(); + } + else if (newsize != datasize_) { + if (newsize > capacity_) { + T *newdata = mem_allocate(newsize); + if (newdata == NULL) { + /* Allocation failed, likely out of memory. */ + clear(); + return NULL; + } + else if (data_ != NULL) { + memcpy( + (void *)newdata, data_, ((datasize_ < newsize) ? datasize_ : newsize) * sizeof(T)); + mem_free(data_, capacity_); + } + data_ = newdata; + capacity_ = newsize; + } + datasize_ = newsize; + } + return data_; + } + + T *resize(size_t newsize, const T &value) + { + size_t oldsize = size(); + resize(newsize); + + for (size_t i = oldsize; i < size(); i++) { + data_[i] = value; + } + + return data_; + } + + void clear() + { + if (data_ != NULL) { + mem_free(data_, capacity_); + data_ = NULL; + } + datasize_ = 0; + capacity_ = 0; + } + + size_t empty() const + { + return datasize_ == 0; + } + + size_t size() const + { + return datasize_; + } + + T *data() + { + return data_; + } + + const T *data() const + { + return data_; + } + + T &operator[](size_t i) const + { + assert(i < datasize_); + return data_[i]; + } + + void reserve(size_t newcapacity) + { + if (newcapacity > capacity_) { + T *newdata = mem_allocate(newcapacity); + if (data_ != NULL) { + memcpy(newdata, data_, ((datasize_ < newcapacity) ? datasize_ : newcapacity) * sizeof(T)); + mem_free(data_, capacity_); + } + data_ = newdata; + capacity_ = newcapacity; + } + } + + size_t capacity() const + { + return capacity_; + } + + // do not use this method unless you are sure the code is not performance critical + void push_back_slow(const T &t) + { + if (capacity_ == datasize_) { + reserve(datasize_ == 0 ? 1 : (size_t)((datasize_ + 1) * 1.2)); + } + + data_[datasize_++] = t; + } + + void push_back_reserved(const T &t) + { + assert(datasize_ < capacity_); + push_back_slow(t); + } + + void append(const array<T> &from) + { + if (from.size()) { + size_t old_size = size(); + resize(old_size + from.size()); + memcpy(data_ + old_size, from.data(), sizeof(T) * from.size()); + } + } + + protected: + inline T *mem_allocate(size_t N) + { + if (N == 0) { + return NULL; + } + T *mem = (T *)util_aligned_malloc(sizeof(T) * N, alignment); + if (mem != NULL) { + util_guarded_mem_alloc(sizeof(T) * N); + } + else { + throw std::bad_alloc(); + } + return mem; + } + + inline void mem_free(T *mem, size_t N) + { + if (mem != NULL) { + util_guarded_mem_free(sizeof(T) * N); + util_aligned_free(mem); + } + } + + T *data_; + size_t datasize_; + size_t capacity_; }; CCL_NAMESPACE_END -#endif /* __UTIL_ARRAY_H__ */ +#endif /* __UTIL_ARRAY_H__ */ diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h index 477b667a6fe..a8ea1dc925e 100644 --- a/intern/cycles/util/util_atomic.h +++ b/intern/cycles/util/util_atomic.h @@ -20,94 +20,97 @@ #ifndef __KERNEL_GPU__ /* Using atomic ops header from Blender. */ -#include "atomic_ops.h" +# include "atomic_ops.h" -#define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x)) -#define atomic_compare_and_swap_float(p, old_val, new_val) atomic_cas_float((p), (old_val), (new_val)) +# define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x)) +# define atomic_compare_and_swap_float(p, old_val, new_val) \ + atomic_cas_float((p), (old_val), (new_val)) -#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) -#define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1) +# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) +# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1) -#define CCL_LOCAL_MEM_FENCE 0 -#define ccl_barrier(flags) ((void) 0) +# define CCL_LOCAL_MEM_FENCE 0 +# define ccl_barrier(flags) ((void)0) -#else /* __KERNEL_GPU__ */ +#else /* __KERNEL_GPU__ */ -#ifdef __KERNEL_OPENCL__ +# ifdef __KERNEL_OPENCL__ /* Float atomics implementation credits: * http://suhorukov.blogspot.in/2011/12/opencl-11-atomic-operations-on-floating.html */ ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *source, - const float operand) + const float operand) { - union { - unsigned int int_value; - float float_value; - } new_value; - union { - unsigned int int_value; - float float_value; - } prev_value; - do { - prev_value.float_value = *source; - new_value.float_value = prev_value.float_value + operand; - } while(atomic_cmpxchg((volatile ccl_global unsigned int *)source, - prev_value.int_value, - new_value.int_value) != prev_value.int_value); - return new_value.float_value; + union { + unsigned int int_value; + float float_value; + } new_value; + union { + unsigned int int_value; + float float_value; + } prev_value; + do { + prev_value.float_value = *source; + new_value.float_value = prev_value.float_value + operand; + } while (atomic_cmpxchg((volatile ccl_global unsigned int *)source, + prev_value.int_value, + new_value.int_value) != prev_value.int_value); + return new_value.float_value; } ccl_device_inline float atomic_compare_and_swap_float(volatile ccl_global float *dest, - const float old_val, const float new_val) + const float old_val, + const float new_val) { - union { - unsigned int int_value; - float float_value; - } new_value, prev_value, result; - prev_value.float_value = old_val; - new_value.float_value = new_val; - result.int_value = atomic_cmpxchg((volatile ccl_global unsigned int *)dest, - prev_value.int_value, new_value.int_value); - return result.float_value; + union { + unsigned int int_value; + float float_value; + } new_value, prev_value, result; + prev_value.float_value = old_val; + new_value.float_value = new_val; + result.int_value = atomic_cmpxchg( + (volatile ccl_global unsigned int *)dest, prev_value.int_value, new_value.int_value); + return result.float_value; } -#define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x)) -#define atomic_fetch_and_inc_uint32(p) atomic_inc((p)) -#define atomic_fetch_and_dec_uint32(p) atomic_dec((p)) +# define atomic_fetch_and_add_uint32(p, x) atomic_add((p), (x)) +# define atomic_fetch_and_inc_uint32(p) atomic_inc((p)) +# define atomic_fetch_and_dec_uint32(p) atomic_dec((p)) -#define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE -#define ccl_barrier(flags) barrier(flags) +# define CCL_LOCAL_MEM_FENCE CLK_LOCAL_MEM_FENCE +# define ccl_barrier(flags) barrier(flags) -#endif /* __KERNEL_OPENCL__ */ +# endif /* __KERNEL_OPENCL__ */ -#ifdef __KERNEL_CUDA__ +# ifdef __KERNEL_CUDA__ -#define atomic_add_and_fetch_float(p, x) (atomicAdd((float*)(p), (float)(x)) + (float)(x)) +# define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x)) -#define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int*)(p), (unsigned int)(x)) -#define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int*)(p), (unsigned int)(x)) -#define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) -#define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1) +# define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int *)(p), (unsigned int)(x)) +# define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x)) +# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1) +# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1) ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest, - const float old_val, const float new_val) + const float old_val, + const float new_val) { - union { - unsigned int int_value; - float float_value; - } new_value, prev_value, result; - prev_value.float_value = old_val; - new_value.float_value = new_val; - result.int_value = atomicCAS((unsigned int *)dest, prev_value.int_value,new_value.int_value); - return result.float_value; + union { + unsigned int int_value; + float float_value; + } new_value, prev_value, result; + prev_value.float_value = old_val; + new_value.float_value = new_val; + result.int_value = atomicCAS((unsigned int *)dest, prev_value.int_value, new_value.int_value); + return result.float_value; } -#define CCL_LOCAL_MEM_FENCE -#define ccl_barrier(flags) __syncthreads() +# define CCL_LOCAL_MEM_FENCE +# define ccl_barrier(flags) __syncthreads() -#endif /* __KERNEL_CUDA__ */ +# endif /* __KERNEL_CUDA__ */ -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ -#endif /* __UTIL_ATOMIC_H__ */ +#endif /* __UTIL_ATOMIC_H__ */ diff --git a/intern/cycles/util/util_avxb.h b/intern/cycles/util/util_avxb.h index 25ef39d39ae..54dd8068eca 100644 --- a/intern/cycles/util/util_avxb.h +++ b/intern/cycles/util/util_avxb.h @@ -16,125 +16,214 @@ */ #ifndef __UTIL_AVXB_H__ -#define __UTIL_AVXB_H__ +# define __UTIL_AVXB_H__ CCL_NAMESPACE_BEGIN struct avxf; /*! 4-wide SSE bool type. */ -struct avxb -{ - typedef avxb Mask; // mask type - typedef avxf Float; // float type - - enum { size = 8 }; // number of SIMD elements - union { __m256 m256; int32_t v[8]; }; // data - - //////////////////////////////////////////////////////////////////////////////// - /// Constructors, Assignment & Cast Operators - //////////////////////////////////////////////////////////////////////////////// - - __forceinline avxb ( ) {} - __forceinline avxb ( const avxb& other ) { m256 = other.m256; } - __forceinline avxb& operator=( const avxb& other ) { m256 = other.m256; return *this; } - - __forceinline avxb( const __m256 input ) : m256(input) {} - __forceinline operator const __m256&( void ) const { return m256; } - __forceinline operator const __m256i( void ) const { return _mm256_castps_si256(m256); } - __forceinline operator const __m256d( void ) const { return _mm256_castps_pd(m256); } - - //////////////////////////////////////////////////////////////////////////////// - /// Constants - //////////////////////////////////////////////////////////////////////////////// - - __forceinline avxb( FalseTy ) : m256(_mm256_setzero_ps()) {} - __forceinline avxb( TrueTy ) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1))) {} - - //////////////////////////////////////////////////////////////////////////////// - /// Array Access - //////////////////////////////////////////////////////////////////////////////// - - __forceinline bool operator []( const size_t i ) const { assert(i < 8); return (_mm256_movemask_ps(m256) >> i) & 1; } - __forceinline int32_t& operator []( const size_t i ) { assert(i < 8); return v[i]; } +struct avxb { + typedef avxb Mask; // mask type + typedef avxf Float; // float type + + enum { size = 8 }; // number of SIMD elements + union { + __m256 m256; + int32_t v[8]; + }; // data + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline avxb() + { + } + __forceinline avxb(const avxb &other) + { + m256 = other.m256; + } + __forceinline avxb &operator=(const avxb &other) + { + m256 = other.m256; + return *this; + } + + __forceinline avxb(const __m256 input) : m256(input) + { + } + __forceinline operator const __m256 &(void)const + { + return m256; + } + __forceinline operator const __m256i(void) const + { + return _mm256_castps_si256(m256); + } + __forceinline operator const __m256d(void) const + { + return _mm256_castps_pd(m256); + } + + //////////////////////////////////////////////////////////////////////////////// + /// Constants + //////////////////////////////////////////////////////////////////////////////// + + __forceinline avxb(FalseTy) : m256(_mm256_setzero_ps()) + { + } + __forceinline avxb(TrueTy) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1))) + { + } + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + __forceinline bool operator[](const size_t i) const + { + assert(i < 8); + return (_mm256_movemask_ps(m256) >> i) & 1; + } + __forceinline int32_t &operator[](const size_t i) + { + assert(i < 8); + return v[i]; + } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const avxb operator !( const avxb& a ) { return _mm256_xor_ps(a, avxb(True)); } +__forceinline const avxb operator!(const avxb &a) +{ + return _mm256_xor_ps(a, avxb(True)); +} //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const avxb operator &( const avxb& a, const avxb& b ) { return _mm256_and_ps(a, b); } -__forceinline const avxb operator |( const avxb& a, const avxb& b ) { return _mm256_or_ps (a, b); } -__forceinline const avxb operator ^( const avxb& a, const avxb& b ) { return _mm256_xor_ps(a, b); } +__forceinline const avxb operator&(const avxb &a, const avxb &b) +{ + return _mm256_and_ps(a, b); +} +__forceinline const avxb operator|(const avxb &a, const avxb &b) +{ + return _mm256_or_ps(a, b); +} +__forceinline const avxb operator^(const avxb &a, const avxb &b) +{ + return _mm256_xor_ps(a, b); +} //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const avxb operator &=( avxb& a, const avxb& b ) { return a = a & b; } -__forceinline const avxb operator |=( avxb& a, const avxb& b ) { return a = a | b; } -__forceinline const avxb operator ^=( avxb& a, const avxb& b ) { return a = a ^ b; } +__forceinline const avxb operator&=(avxb &a, const avxb &b) +{ + return a = a & b; +} +__forceinline const avxb operator|=(avxb &a, const avxb &b) +{ + return a = a | b; +} +__forceinline const avxb operator^=(avxb &a, const avxb &b) +{ + return a = a ^ b; +} //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators + Select //////////////////////////////////////////////////////////////////////////////// -__forceinline const avxb operator !=( const avxb& a, const avxb& b ) { return _mm256_xor_ps(a, b); } -__forceinline const avxb operator ==( const avxb& a, const avxb& b ) -{ -#ifdef __KERNEL_AVX2__ - return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b)); -#else - __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0)); - __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1)); - __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0)); - __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1)); - __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo); - __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi); - __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1); - return _mm256_castsi256_ps(result); -#endif +__forceinline const avxb operator!=(const avxb &a, const avxb &b) +{ + return _mm256_xor_ps(a, b); +} +__forceinline const avxb operator==(const avxb &a, const avxb &b) +{ +# ifdef __KERNEL_AVX2__ + return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b)); +# else + __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0)); + __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1)); + __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0)); + __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1)); + __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo); + __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi); + __m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1); + return _mm256_castsi256_ps(result); +# endif } -__forceinline const avxb select( const avxb& m, const avxb& t, const avxb& f ) { -#if defined(__KERNEL_SSE41__) - return _mm256_blendv_ps(f, t, m); -#else - return _mm256_or_ps(_mm256_and_ps(m, t), _mm256_andnot_ps(m, f)); -#endif +__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f) +{ +# if defined(__KERNEL_SSE41__) + return _mm256_blendv_ps(f, t, m); +# else + return _mm256_or_ps(_mm256_and_ps(m, t), _mm256_andnot_ps(m, f)); +# endif } //////////////////////////////////////////////////////////////////////////////// /// Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// -__forceinline const avxb unpacklo( const avxb& a, const avxb& b ) { return _mm256_unpacklo_ps(a, b); } -__forceinline const avxb unpackhi( const avxb& a, const avxb& b ) { return _mm256_unpackhi_ps(a, b); } +__forceinline const avxb unpacklo(const avxb &a, const avxb &b) +{ + return _mm256_unpacklo_ps(a, b); +} +__forceinline const avxb unpackhi(const avxb &a, const avxb &b) +{ + return _mm256_unpackhi_ps(a, b); +} //////////////////////////////////////////////////////////////////////////////// /// Reduction Operations //////////////////////////////////////////////////////////////////////////////// -#if defined(__KERNEL_SSE41__) -__forceinline size_t popcnt( const avxb& a ) { return __popcnt(_mm256_movemask_ps(a)); } -#else -__forceinline size_t popcnt( const avxb& a ) { return bool(a[0])+bool(a[1])+bool(a[2])+bool(a[3])+bool(a[4])+ - bool(a[5])+bool(a[6])+bool(a[7]); } -#endif +# if defined(__KERNEL_SSE41__) +__forceinline size_t popcnt(const avxb &a) +{ + return __popcnt(_mm256_movemask_ps(a)); +} +# else +__forceinline size_t popcnt(const avxb &a) +{ + return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]) + bool(a[4]) + bool(a[5]) + bool(a[6]) + + bool(a[7]); +} +# endif -__forceinline bool reduce_and( const avxb& a ) { return _mm256_movemask_ps(a) == 0xf; } -__forceinline bool reduce_or ( const avxb& a ) { return _mm256_movemask_ps(a) != 0x0; } -__forceinline bool all ( const avxb& b ) { return _mm256_movemask_ps(b) == 0xf; } -__forceinline bool any ( const avxb& b ) { return _mm256_movemask_ps(b) != 0x0; } -__forceinline bool none ( const avxb& b ) { return _mm256_movemask_ps(b) == 0x0; } +__forceinline bool reduce_and(const avxb &a) +{ + return _mm256_movemask_ps(a) == 0xf; +} +__forceinline bool reduce_or(const avxb &a) +{ + return _mm256_movemask_ps(a) != 0x0; +} +__forceinline bool all(const avxb &b) +{ + return _mm256_movemask_ps(b) == 0xf; +} +__forceinline bool any(const avxb &b) +{ + return _mm256_movemask_ps(b) != 0x0; +} +__forceinline bool none(const avxb &b) +{ + return _mm256_movemask_ps(b) == 0x0; +} -__forceinline size_t movemask( const avxb& a ) { return _mm256_movemask_ps(a); } +__forceinline size_t movemask(const avxb &a) +{ + return _mm256_movemask_ps(a); +} //////////////////////////////////////////////////////////////////////////////// /// Debug Functions @@ -142,8 +231,7 @@ __forceinline size_t movemask( const avxb& a ) { return _mm256_movemask_ps(a); } ccl_device_inline void print_avxb(const char *label, const avxb &a) { - printf("%s: %d %d %d %d %d %d %d %d\n", - label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]); + printf("%s: %d %d %d %d %d %d %d %d\n", label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]); } #endif diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h index f00c722f25b..156607e65fb 100644 --- a/intern/cycles/util/util_avxf.h +++ b/intern/cycles/util/util_avxf.h @@ -15,217 +15,330 @@ */ #ifndef __UTIL_AVXF_H__ -#define __UTIL_AVXF_H__ +# define __UTIL_AVXF_H__ CCL_NAMESPACE_BEGIN struct avxb; -struct avxf -{ - typedef avxf Float; - - enum { size = 8 }; /* Number of SIMD elements. */ - - union { - __m256 m256; - float f[8]; - int i[8]; - }; - - __forceinline avxf () {} - __forceinline avxf (const avxf& other) { m256 = other.m256; } - __forceinline avxf& operator=(const avxf& other) { m256 = other.m256; return *this; } - - __forceinline avxf(const __m256 a) : m256(a) {} - __forceinline avxf(const __m256i a) : m256(_mm256_castsi256_ps (a)) {} - - __forceinline operator const __m256&() const { return m256; } - __forceinline operator __m256&() { return m256; } - - __forceinline avxf (float a) : m256(_mm256_set1_ps(a)) {} - - __forceinline avxf(float high32x4, float low32x4) : - m256(_mm256_set_ps(high32x4, high32x4, high32x4, high32x4, low32x4, low32x4, low32x4, low32x4)) {} - - __forceinline avxf(float a3, float a2, float a1, float a0) : - m256(_mm256_set_ps(a3, a2, a1, a0, a3, a2, a1, a0)) {} - - __forceinline avxf(float a7, float a6, float a5, float a4, float a3, float a2, float a1, float a0) : - m256(_mm256_set_ps(a7, a6, a5, a4, a3, a2, a1, a0)) {} - - __forceinline avxf(float3 a) : - m256(_mm256_set_ps(a.w, a.z, a.y, a.x, a.w, a.z, a.y, a.x)) {} - - - __forceinline avxf(int a3, int a2, int a1, int a0) - { - const __m256i foo = _mm256_set_epi32(a3, a2, a1, a0, a3, a2, a1, a0); - m256 = _mm256_castsi256_ps(foo); - } - - - __forceinline avxf(int a7, int a6, int a5, int a4, int a3, int a2, int a1, int a0) - { - const __m256i foo = _mm256_set_epi32(a7, a6, a5, a4, a3, a2, a1, a0); - m256 = _mm256_castsi256_ps(foo); - } - - __forceinline avxf(__m128 a, __m128 b) - { - const __m256 foo = _mm256_castps128_ps256(a); - m256 = _mm256_insertf128_ps(foo, b, 1); - } - - __forceinline const float& operator [](const size_t i) const { assert(i < 8); return f[i]; } - __forceinline float& operator [](const size_t i) { assert(i < 8); return f[i]; } +struct avxf { + typedef avxf Float; + + enum { size = 8 }; /* Number of SIMD elements. */ + + union { + __m256 m256; + float f[8]; + int i[8]; + }; + + __forceinline avxf() + { + } + __forceinline avxf(const avxf &other) + { + m256 = other.m256; + } + __forceinline avxf &operator=(const avxf &other) + { + m256 = other.m256; + return *this; + } + + __forceinline avxf(const __m256 a) : m256(a) + { + } + __forceinline avxf(const __m256i a) : m256(_mm256_castsi256_ps(a)) + { + } + + __forceinline operator const __m256 &() const + { + return m256; + } + __forceinline operator __m256 &() + { + return m256; + } + + __forceinline avxf(float a) : m256(_mm256_set1_ps(a)) + { + } + + __forceinline avxf(float high32x4, float low32x4) + : m256(_mm256_set_ps( + high32x4, high32x4, high32x4, high32x4, low32x4, low32x4, low32x4, low32x4)) + { + } + + __forceinline avxf(float a3, float a2, float a1, float a0) + : m256(_mm256_set_ps(a3, a2, a1, a0, a3, a2, a1, a0)) + { + } + + __forceinline avxf( + float a7, float a6, float a5, float a4, float a3, float a2, float a1, float a0) + : m256(_mm256_set_ps(a7, a6, a5, a4, a3, a2, a1, a0)) + { + } + + __forceinline avxf(float3 a) : m256(_mm256_set_ps(a.w, a.z, a.y, a.x, a.w, a.z, a.y, a.x)) + { + } + + __forceinline avxf(int a3, int a2, int a1, int a0) + { + const __m256i foo = _mm256_set_epi32(a3, a2, a1, a0, a3, a2, a1, a0); + m256 = _mm256_castsi256_ps(foo); + } + + __forceinline avxf(int a7, int a6, int a5, int a4, int a3, int a2, int a1, int a0) + { + const __m256i foo = _mm256_set_epi32(a7, a6, a5, a4, a3, a2, a1, a0); + m256 = _mm256_castsi256_ps(foo); + } + + __forceinline avxf(__m128 a, __m128 b) + { + const __m256 foo = _mm256_castps128_ps256(a); + m256 = _mm256_insertf128_ps(foo, b, 1); + } + + __forceinline const float &operator[](const size_t i) const + { + assert(i < 8); + return f[i]; + } + __forceinline float &operator[](const size_t i) + { + assert(i < 8); + return f[i]; + } }; -__forceinline avxf cross(const avxf& a, const avxf& b) +__forceinline avxf cross(const avxf &a, const avxf &b) { - avxf r(0.0, a[4]*b[5] - a[5]*b[4], a[6]*b[4] - a[4]*b[6], a[5]*b[6] - a[6]*b[5], - 0.0, a[0]*b[1] - a[1]*b[0], a[2]*b[0] - a[0]*b[2], a[1]*b[2] - a[2]*b[1]); - return r; + avxf r(0.0, + a[4] * b[5] - a[5] * b[4], + a[6] * b[4] - a[4] * b[6], + a[5] * b[6] - a[6] * b[5], + 0.0, + a[0] * b[1] - a[1] * b[0], + a[2] * b[0] - a[0] * b[2], + a[1] * b[2] - a[2] * b[1]); + return r; } -__forceinline void dot3(const avxf& a, const avxf& b, float &den, float &den2) +__forceinline void dot3(const avxf &a, const avxf &b, float &den, float &den2) { - const avxf t = _mm256_mul_ps(a.m256, b.m256); - den = ((float*)&t)[0] + ((float*)&t)[1] + ((float*)&t)[2]; - den2 = ((float*)&t)[4] + ((float*)&t)[5] + ((float*)&t)[6]; + const avxf t = _mm256_mul_ps(a.m256, b.m256); + den = ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]; + den2 = ((float *)&t)[4] + ((float *)&t)[5] + ((float *)&t)[6]; } //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const avxf mm256_sqrt(const avxf& a) { return _mm256_sqrt_ps(a.m256); } +__forceinline const avxf mm256_sqrt(const avxf &a) +{ + return _mm256_sqrt_ps(a.m256); +} //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const avxf operator +(const avxf& a, const avxf& b) { return _mm256_add_ps(a.m256, b.m256); } -__forceinline const avxf operator +(const avxf& a, const float& b) { return a + avxf(b); } -__forceinline const avxf operator +(const float& a, const avxf& b) { return avxf(a) + b; } +__forceinline const avxf operator+(const avxf &a, const avxf &b) +{ + return _mm256_add_ps(a.m256, b.m256); +} +__forceinline const avxf operator+(const avxf &a, const float &b) +{ + return a + avxf(b); +} +__forceinline const avxf operator+(const float &a, const avxf &b) +{ + return avxf(a) + b; +} -__forceinline const avxf operator -(const avxf& a, const avxf& b) { return _mm256_sub_ps(a.m256, b.m256); } -__forceinline const avxf operator -(const avxf& a, const float& b) { return a - avxf(b); } -__forceinline const avxf operator -(const float& a, const avxf& b) { return avxf(a) - b; } +__forceinline const avxf operator-(const avxf &a, const avxf &b) +{ + return _mm256_sub_ps(a.m256, b.m256); +} +__forceinline const avxf operator-(const avxf &a, const float &b) +{ + return a - avxf(b); +} +__forceinline const avxf operator-(const float &a, const avxf &b) +{ + return avxf(a) - b; +} -__forceinline const avxf operator *(const avxf& a, const avxf& b) { return _mm256_mul_ps(a.m256, b.m256); } -__forceinline const avxf operator *(const avxf& a, const float& b) { return a * avxf(b); } -__forceinline const avxf operator *(const float& a, const avxf& b) { return avxf(a) * b; } +__forceinline const avxf operator*(const avxf &a, const avxf &b) +{ + return _mm256_mul_ps(a.m256, b.m256); +} +__forceinline const avxf operator*(const avxf &a, const float &b) +{ + return a * avxf(b); +} +__forceinline const avxf operator*(const float &a, const avxf &b) +{ + return avxf(a) * b; +} -__forceinline const avxf operator /(const avxf& a, const avxf& b) { return _mm256_div_ps(a.m256,b.m256); } -__forceinline const avxf operator /(const avxf& a, const float& b) { return a/avxf(b); } -__forceinline const avxf operator /(const float& a, const avxf& b) { return avxf(a)/b; } +__forceinline const avxf operator/(const avxf &a, const avxf &b) +{ + return _mm256_div_ps(a.m256, b.m256); +} +__forceinline const avxf operator/(const avxf &a, const float &b) +{ + return a / avxf(b); +} +__forceinline const avxf operator/(const float &a, const avxf &b) +{ + return avxf(a) / b; +} -__forceinline const avxf operator|(const avxf& a, const avxf& b) { return _mm256_or_ps(a.m256,b.m256); } +__forceinline const avxf operator|(const avxf &a, const avxf &b) +{ + return _mm256_or_ps(a.m256, b.m256); +} -__forceinline const avxf operator^(const avxf& a, const avxf& b) { return _mm256_xor_ps(a.m256,b.m256); } +__forceinline const avxf operator^(const avxf &a, const avxf &b) +{ + return _mm256_xor_ps(a.m256, b.m256); +} -__forceinline const avxf operator&(const avxf& a, const avxf& b) { return _mm256_and_ps(a.m256,b.m256); } +__forceinline const avxf operator&(const avxf &a, const avxf &b) +{ + return _mm256_and_ps(a.m256, b.m256); +} -__forceinline const avxf max(const avxf& a, const avxf& b) { return _mm256_max_ps(a.m256, b.m256); } -__forceinline const avxf min(const avxf& a, const avxf& b) { return _mm256_min_ps(a.m256, b.m256); } +__forceinline const avxf max(const avxf &a, const avxf &b) +{ + return _mm256_max_ps(a.m256, b.m256); +} +__forceinline const avxf min(const avxf &a, const avxf &b) +{ + return _mm256_min_ps(a.m256, b.m256); +} //////////////////////////////////////////////////////////////////////////////// /// Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// -__forceinline const avxf shuffle(const avxf& a, const __m256i &shuf) { - return _mm256_permutevar_ps(a, shuf); +__forceinline const avxf shuffle(const avxf &a, const __m256i &shuf) +{ + return _mm256_permutevar_ps(a, shuf); } -template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7> __forceinline const avxf shuffle(const avxf& a) { - return _mm256_permutevar_ps(a, _mm256_set_epi32( i7,i6,i5,i4 ,i3,i2,i1,i0)); +template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7> +__forceinline const avxf shuffle(const avxf &a) +{ + return _mm256_permutevar_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0)); } -template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const avxf shuffle(const avxf& a, const avxf& b) { - return _mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); +template<size_t i0, size_t i1, size_t i2, size_t i3> +__forceinline const avxf shuffle(const avxf &a, const avxf &b) +{ + return _mm256_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); } -template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const avxf shuffle(const avxf& a) { - return shuffle<i0,i1,i2,i3>(a,a); +template<size_t i0, size_t i1, size_t i2, size_t i3> +__forceinline const avxf shuffle(const avxf &a) +{ + return shuffle<i0, i1, i2, i3>(a, a); } -template<size_t i0> __forceinline const avxf shuffle(const avxf& a, const avxf& b) { - return shuffle<i0,i0,i0,i0>(a, b); +template<size_t i0> __forceinline const avxf shuffle(const avxf &a, const avxf &b) +{ + return shuffle<i0, i0, i0, i0>(a, b); } -template<size_t i0> __forceinline const avxf shuffle(const avxf& a) { - return shuffle<i0>(a,a); +template<size_t i0> __forceinline const avxf shuffle(const avxf &a) +{ + return shuffle<i0>(a, a); } -template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7> __forceinline const avxf permute(const avxf& a) { -#ifdef __KERNEL_AVX2__ - return _mm256_permutevar8x32_ps(a,_mm256_set_epi32( i7,i6,i5,i4 ,i3,i2,i1,i0)); -#else - float temp[8]; - _mm256_storeu_ps((float*)&temp, a); - return avxf(temp[i7], temp[i6], temp[i5], temp[i4], temp[i3], temp[i2], temp[i1], temp[i0]); -#endif +template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7> +__forceinline const avxf permute(const avxf &a) +{ +# ifdef __KERNEL_AVX2__ + return _mm256_permutevar8x32_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0)); +# else + float temp[8]; + _mm256_storeu_ps((float *)&temp, a); + return avxf(temp[i7], temp[i6], temp[i5], temp[i4], temp[i3], temp[i2], temp[i1], temp[i0]); +# endif } -template<int S0, int S1, int S2, int S3,int S4,int S5,int S6, int S7> +template<int S0, int S1, int S2, int S3, int S4, int S5, int S6, int S7> ccl_device_inline const avxf set_sign_bit(const avxf &a) { - return a ^ avxf(S7 << 31, S6 << 31, S5 << 31, S4 << 31, S3 << 31,S2 << 31,S1 << 31,S0 << 31); + return a ^ avxf(S7 << 31, S6 << 31, S5 << 31, S4 << 31, S3 << 31, S2 << 31, S1 << 31, S0 << 31); } -template<size_t S0, size_t S1, size_t S2, size_t S3,size_t S4,size_t S5,size_t S6, size_t S7> +template<size_t S0, size_t S1, size_t S2, size_t S3, size_t S4, size_t S5, size_t S6, size_t S7> ccl_device_inline const avxf blend(const avxf &a, const avxf &b) { - return _mm256_blend_ps(a,b,S7 << 0 | S6 << 1 | S5 << 2 | S4 << 3 | S3 << 4 | S2 << 5 | S1 << 6 | S0 << 7); + return _mm256_blend_ps( + a, b, S7 << 0 | S6 << 1 | S5 << 2 | S4 << 3 | S3 << 4 | S2 << 5 | S1 << 6 | S0 << 7); } -template<size_t S0, size_t S1, size_t S2, size_t S3 > +template<size_t S0, size_t S1, size_t S2, size_t S3> ccl_device_inline const avxf blend(const avxf &a, const avxf &b) { - return blend<S0,S1,S2,S3,S0,S1,S2,S3>(a,b); + return blend<S0, S1, S2, S3, S0, S1, S2, S3>(a, b); } //#if defined(__KERNEL_SSE41__) -__forceinline avxf maxi(const avxf& a, const avxf& b) { - const avxf ci = _mm256_max_ps(a, b); - return ci; +__forceinline avxf maxi(const avxf &a, const avxf &b) +{ + const avxf ci = _mm256_max_ps(a, b); + return ci; } -__forceinline avxf mini(const avxf& a, const avxf& b) { - const avxf ci = _mm256_min_ps(a, b); - return ci; +__forceinline avxf mini(const avxf &a, const avxf &b) +{ + const avxf ci = _mm256_min_ps(a, b); + return ci; } //#endif //////////////////////////////////////////////////////////////////////////////// /// Ternary Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const avxf madd (const avxf& a, const avxf& b, const avxf& c) { -#ifdef __KERNEL_AVX2__ - return _mm256_fmadd_ps(a,b,c); -#else - return c+(a*b); -#endif +__forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c) +{ +# ifdef __KERNEL_AVX2__ + return _mm256_fmadd_ps(a, b, c); +# else + return c + (a * b); +# endif } -__forceinline const avxf nmadd(const avxf& a, const avxf& b, const avxf& c) { -#ifdef __KERNEL_AVX2__ - return _mm256_fnmadd_ps(a, b, c); -#else - return c-(a*b); -#endif +__forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c) +{ +# ifdef __KERNEL_AVX2__ + return _mm256_fnmadd_ps(a, b, c); +# else + return c - (a * b); +# endif } -__forceinline const avxf msub(const avxf& a, const avxf& b, const avxf& c) { -#ifdef __KERNEL_AVX2__ - return _mm256_fmsub_ps(a, b, c); -#else - return (a*b) - c; -#endif +__forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c) +{ +# ifdef __KERNEL_AVX2__ + return _mm256_fmsub_ps(a, b, c); +# else + return (a * b) - c; +# endif } //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const avxb operator <=(const avxf& a, const avxf& b) { - return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS); +__forceinline const avxb operator<=(const avxf &a, const avxf &b) +{ + return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS); } #endif @@ -236,6 +349,6 @@ __forceinline const avxb operator <=(const avxf& a, const avxf& b) { #endif #define _mm256_loadu2_m128(/* float const* */ hiaddr, /* float const* */ loaddr) \ - _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr)) + _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr)) CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h index fe89e398840..b5c3f1a8954 100644 --- a/intern/cycles/util/util_boundbox.h +++ b/intern/cycles/util/util_boundbox.h @@ -29,257 +29,254 @@ CCL_NAMESPACE_BEGIN /* 3D BoundBox */ -class BoundBox -{ -public: - float3 min, max; - - __forceinline BoundBox() - { - } - - __forceinline BoundBox(const float3& pt) - : min(pt), max(pt) - { - } - - __forceinline BoundBox(const float3& min_, const float3& max_) - : min(min_), max(max_) - { - } - - enum empty_t { empty = 0}; - - __forceinline BoundBox(empty_t) - : min(make_float3(FLT_MAX, FLT_MAX, FLT_MAX)), max(make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX)) - { - } - - __forceinline void grow(const float3& pt) - { - /* the order of arguments to min is such that if pt is nan, it will not - * influence the resulting bounding box */ - min = ccl::min(pt, min); - max = ccl::max(pt, max); - } - - __forceinline void grow(const float3& pt, float border) - { - float3 shift = make_float3(border, border, border); - min = ccl::min(pt - shift, min); - max = ccl::max(pt + shift, max); - } - - __forceinline void grow(const BoundBox& bbox) - { - grow(bbox.min); - grow(bbox.max); - } - - __forceinline void grow_safe(const float3& pt) - { - /* the order of arguments to min is such that if pt is nan, it will not - * influence the resulting bounding box */ - if(isfinite(pt.x) && isfinite(pt.y) && isfinite(pt.z)) { - min = ccl::min(pt, min); - max = ccl::max(pt, max); - } - } - - __forceinline void grow_safe(const float3& pt, float border) - { - if(isfinite(pt.x) && isfinite(pt.y) && isfinite(pt.z) && isfinite(border)) { - float3 shift = make_float3(border, border, border); - min = ccl::min(pt - shift, min); - max = ccl::max(pt + shift, max); - } - } - - __forceinline void grow_safe(const BoundBox& bbox) - { - grow_safe(bbox.min); - grow_safe(bbox.max); - } - - __forceinline void intersect(const BoundBox& bbox) - { - min = ccl::max(min, bbox.min); - max = ccl::min(max, bbox.max); - } - - /* todo: avoid using this */ - __forceinline float safe_area() const - { - if(!((min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z))) - return 0.0f; - - return area(); - } - - __forceinline float area() const - { - return half_area()*2.0f; - } - - __forceinline float half_area() const - { - float3 d = max - min; - return (d.x*d.z + d.y*d.z + d.x*d.y); - } - - __forceinline float3 center() const - { - return 0.5f*(min + max); - } - - __forceinline float3 center2() const - { - return min + max; - } - - __forceinline float3 size() const - { - return max - min; - } - - __forceinline bool valid() const - { - return (min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z) && - (isfinite(min.x) && isfinite(min.y) && isfinite(min.z)) && - (isfinite(max.x) && isfinite(max.y) && isfinite(max.z)); - } - - BoundBox transformed(const Transform *tfm) const - { - BoundBox result = BoundBox::empty; - - for(int i = 0; i < 8; i++) { - float3 p; - - p.x = (i & 1)? min.x: max.x; - p.y = (i & 2)? min.y: max.y; - p.z = (i & 4)? min.z: max.z; - - result.grow(transform_point(tfm, p)); - } - - return result; - } - - __forceinline bool intersects(const BoundBox& other) - { - float3 center_diff = center() - other.center(), - total_size = (size() + other.size()) * 0.5f; - return fabsf(center_diff.x) <= total_size.x && - fabsf(center_diff.y) <= total_size.y && - fabsf(center_diff.z) <= total_size.z; - } +class BoundBox { + public: + float3 min, max; + + __forceinline BoundBox() + { + } + + __forceinline BoundBox(const float3 &pt) : min(pt), max(pt) + { + } + + __forceinline BoundBox(const float3 &min_, const float3 &max_) : min(min_), max(max_) + { + } + + enum empty_t { empty = 0 }; + + __forceinline BoundBox(empty_t) + : min(make_float3(FLT_MAX, FLT_MAX, FLT_MAX)), max(make_float3(-FLT_MAX, -FLT_MAX, -FLT_MAX)) + { + } + + __forceinline void grow(const float3 &pt) + { + /* the order of arguments to min is such that if pt is nan, it will not + * influence the resulting bounding box */ + min = ccl::min(pt, min); + max = ccl::max(pt, max); + } + + __forceinline void grow(const float3 &pt, float border) + { + float3 shift = make_float3(border, border, border); + min = ccl::min(pt - shift, min); + max = ccl::max(pt + shift, max); + } + + __forceinline void grow(const BoundBox &bbox) + { + grow(bbox.min); + grow(bbox.max); + } + + __forceinline void grow_safe(const float3 &pt) + { + /* the order of arguments to min is such that if pt is nan, it will not + * influence the resulting bounding box */ + if (isfinite(pt.x) && isfinite(pt.y) && isfinite(pt.z)) { + min = ccl::min(pt, min); + max = ccl::max(pt, max); + } + } + + __forceinline void grow_safe(const float3 &pt, float border) + { + if (isfinite(pt.x) && isfinite(pt.y) && isfinite(pt.z) && isfinite(border)) { + float3 shift = make_float3(border, border, border); + min = ccl::min(pt - shift, min); + max = ccl::max(pt + shift, max); + } + } + + __forceinline void grow_safe(const BoundBox &bbox) + { + grow_safe(bbox.min); + grow_safe(bbox.max); + } + + __forceinline void intersect(const BoundBox &bbox) + { + min = ccl::max(min, bbox.min); + max = ccl::min(max, bbox.max); + } + + /* todo: avoid using this */ + __forceinline float safe_area() const + { + if (!((min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z))) + return 0.0f; + + return area(); + } + + __forceinline float area() const + { + return half_area() * 2.0f; + } + + __forceinline float half_area() const + { + float3 d = max - min; + return (d.x * d.z + d.y * d.z + d.x * d.y); + } + + __forceinline float3 center() const + { + return 0.5f * (min + max); + } + + __forceinline float3 center2() const + { + return min + max; + } + + __forceinline float3 size() const + { + return max - min; + } + + __forceinline bool valid() const + { + return (min.x <= max.x) && (min.y <= max.y) && (min.z <= max.z) && + (isfinite(min.x) && isfinite(min.y) && isfinite(min.z)) && + (isfinite(max.x) && isfinite(max.y) && isfinite(max.z)); + } + + BoundBox transformed(const Transform *tfm) const + { + BoundBox result = BoundBox::empty; + + for (int i = 0; i < 8; i++) { + float3 p; + + p.x = (i & 1) ? min.x : max.x; + p.y = (i & 2) ? min.y : max.y; + p.z = (i & 4) ? min.z : max.z; + + result.grow(transform_point(tfm, p)); + } + + return result; + } + + __forceinline bool intersects(const BoundBox &other) + { + float3 center_diff = center() - other.center(), total_size = (size() + other.size()) * 0.5f; + return fabsf(center_diff.x) <= total_size.x && fabsf(center_diff.y) <= total_size.y && + fabsf(center_diff.z) <= total_size.z; + } }; -__forceinline BoundBox merge(const BoundBox& bbox, const float3& pt) +__forceinline BoundBox merge(const BoundBox &bbox, const float3 &pt) { - return BoundBox(min(bbox.min, pt), max(bbox.max, pt)); + return BoundBox(min(bbox.min, pt), max(bbox.max, pt)); } -__forceinline BoundBox merge(const BoundBox& a, const BoundBox& b) +__forceinline BoundBox merge(const BoundBox &a, const BoundBox &b) { - return BoundBox(min(a.min, b.min), max(a.max, b.max)); + return BoundBox(min(a.min, b.min), max(a.max, b.max)); } -__forceinline BoundBox merge(const BoundBox& a, const BoundBox& b, const BoundBox& c, const BoundBox& d) +__forceinline BoundBox merge(const BoundBox &a, + const BoundBox &b, + const BoundBox &c, + const BoundBox &d) { - return merge(merge(a, b), merge(c, d)); + return merge(merge(a, b), merge(c, d)); } -__forceinline BoundBox intersect(const BoundBox& a, const BoundBox& b) +__forceinline BoundBox intersect(const BoundBox &a, const BoundBox &b) { - return BoundBox(max(a.min, b.min), min(a.max, b.max)); + return BoundBox(max(a.min, b.min), min(a.max, b.max)); } -__forceinline BoundBox intersect(const BoundBox& a, const BoundBox& b, const BoundBox& c) +__forceinline BoundBox intersect(const BoundBox &a, const BoundBox &b, const BoundBox &c) { - return intersect(a, intersect(b, c)); + return intersect(a, intersect(b, c)); } /* 2D BoundBox */ class BoundBox2D { -public: - float left; - float right; - float bottom; - float top; - - BoundBox2D() - : left(0.0f), right(1.0f), bottom(0.0f), top(1.0f) - { - } - - bool operator==(const BoundBox2D& other) const - { - return (left == other.left && right == other.right && - bottom == other.bottom && top == other.top); - } - - float width() - { - return right - left; - } - - float height() - { - return top - bottom; - } - - BoundBox2D operator*(float f) const - { - BoundBox2D result; - - result.left = left*f; - result.right = right*f; - result.bottom = bottom*f; - result.top = top*f; - - return result; - } - - BoundBox2D subset(const BoundBox2D& other) const - { - BoundBox2D subset; - - subset.left = left + other.left*(right - left); - subset.right = left + other.right*(right - left); - subset.bottom = bottom + other.bottom*(top - bottom); - subset.top = bottom + other.top*(top - bottom); - - return subset; - } - - BoundBox2D make_relative_to(const BoundBox2D& other) const - { - BoundBox2D result; - - result.left = ((left - other.left) / (other.right - other.left)); - result.right = ((right - other.left) / (other.right - other.left)); - result.bottom = ((bottom - other.bottom) / (other.top - other.bottom)); - result.top = ((top - other.bottom) / (other.top - other.bottom)); - - return result; - } - - BoundBox2D clamp(float mn = 0.0f, float mx = 1.0f) - { - BoundBox2D result; - - result.left = ccl::clamp(left, mn, mx); - result.right = ccl::clamp(right, mn, mx); - result.bottom = ccl::clamp(bottom, mn, mx); - result.top = ccl::clamp(top, mn, mx); - - return result; - } + public: + float left; + float right; + float bottom; + float top; + + BoundBox2D() : left(0.0f), right(1.0f), bottom(0.0f), top(1.0f) + { + } + + bool operator==(const BoundBox2D &other) const + { + return (left == other.left && right == other.right && bottom == other.bottom && + top == other.top); + } + + float width() + { + return right - left; + } + + float height() + { + return top - bottom; + } + + BoundBox2D operator*(float f) const + { + BoundBox2D result; + + result.left = left * f; + result.right = right * f; + result.bottom = bottom * f; + result.top = top * f; + + return result; + } + + BoundBox2D subset(const BoundBox2D &other) const + { + BoundBox2D subset; + + subset.left = left + other.left * (right - left); + subset.right = left + other.right * (right - left); + subset.bottom = bottom + other.bottom * (top - bottom); + subset.top = bottom + other.top * (top - bottom); + + return subset; + } + + BoundBox2D make_relative_to(const BoundBox2D &other) const + { + BoundBox2D result; + + result.left = ((left - other.left) / (other.right - other.left)); + result.right = ((right - other.left) / (other.right - other.left)); + result.bottom = ((bottom - other.bottom) / (other.top - other.bottom)); + result.top = ((top - other.bottom) / (other.top - other.bottom)); + + return result; + } + + BoundBox2D clamp(float mn = 0.0f, float mx = 1.0f) + { + BoundBox2D result; + + result.left = ccl::clamp(left, mn, mx); + result.right = ccl::clamp(right, mn, mx); + result.bottom = ccl::clamp(bottom, mn, mx); + result.top = ccl::clamp(top, mn, mx); + + return result; + } }; CCL_NAMESPACE_END -#endif /* __UTIL_BOUNDBOX_H__ */ +#endif /* __UTIL_BOUNDBOX_H__ */ diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h index e6efc7d73fc..ca4c393f66e 100644 --- a/intern/cycles/util/util_color.h +++ b/intern/cycles/util/util_color.h @@ -21,133 +21,147 @@ #include "util/util_types.h" #ifdef __KERNEL_SSE2__ -#include "util/util_simd.h" +# include "util/util_simd.h" #endif CCL_NAMESPACE_BEGIN ccl_device uchar float_to_byte(float val) { - return ((val <= 0.0f) ? 0 : ((val > (1.0f - 0.5f / 255.0f)) ? 255 : (uchar)((255.0f * val) + 0.5f))); + return ((val <= 0.0f) ? 0 : + ((val > (1.0f - 0.5f / 255.0f)) ? 255 : (uchar)((255.0f * val) + 0.5f))); } ccl_device uchar4 color_float_to_byte(float3 c) { - uchar r, g, b; + uchar r, g, b; - r = float_to_byte(c.x); - g = float_to_byte(c.y); - b = float_to_byte(c.z); + r = float_to_byte(c.x); + g = float_to_byte(c.y); + b = float_to_byte(c.z); - return make_uchar4(r, g, b, 0); + return make_uchar4(r, g, b, 0); } ccl_device_inline float3 color_byte_to_float(uchar4 c) { - return make_float3(c.x*(1.0f/255.0f), c.y*(1.0f/255.0f), c.z*(1.0f/255.0f)); + return make_float3(c.x * (1.0f / 255.0f), c.y * (1.0f / 255.0f), c.z * (1.0f / 255.0f)); } ccl_device float color_srgb_to_linear(float c) { - if(c < 0.04045f) - return (c < 0.0f)? 0.0f: c * (1.0f/12.92f); - else - return powf((c + 0.055f) * (1.0f / 1.055f), 2.4f); + if (c < 0.04045f) + return (c < 0.0f) ? 0.0f : c * (1.0f / 12.92f); + else + return powf((c + 0.055f) * (1.0f / 1.055f), 2.4f); } ccl_device float color_linear_to_srgb(float c) { - if(c < 0.0031308f) - return (c < 0.0f)? 0.0f: c * 12.92f; - else - return 1.055f * powf(c, 1.0f / 2.4f) - 0.055f; + if (c < 0.0031308f) + return (c < 0.0f) ? 0.0f : c * 12.92f; + else + return 1.055f * powf(c, 1.0f / 2.4f) - 0.055f; } ccl_device float3 rgb_to_hsv(float3 rgb) { - float cmax, cmin, h, s, v, cdelta; - float3 c; - - cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z)); - cmin = min(rgb.x, min(rgb.y, rgb.z)); - cdelta = cmax - cmin; - - v = cmax; - - if(cmax != 0.0f) { - s = cdelta/cmax; - } - else { - s = 0.0f; - h = 0.0f; - } - - if(s != 0.0f) { - float3 cmax3 = make_float3(cmax, cmax, cmax); - c = (cmax3 - rgb)/cdelta; - - if (rgb.x == cmax) h = c.z - c.y; - else if(rgb.y == cmax) h = 2.0f + c.x - c.z; - else h = 4.0f + c.y - c.x; - - h /= 6.0f; - - if(h < 0.0f) - h += 1.0f; - } - else { - h = 0.0f; - } - - return make_float3(h, s, v); + float cmax, cmin, h, s, v, cdelta; + float3 c; + + cmax = fmaxf(rgb.x, fmaxf(rgb.y, rgb.z)); + cmin = min(rgb.x, min(rgb.y, rgb.z)); + cdelta = cmax - cmin; + + v = cmax; + + if (cmax != 0.0f) { + s = cdelta / cmax; + } + else { + s = 0.0f; + h = 0.0f; + } + + if (s != 0.0f) { + float3 cmax3 = make_float3(cmax, cmax, cmax); + c = (cmax3 - rgb) / cdelta; + + if (rgb.x == cmax) + h = c.z - c.y; + else if (rgb.y == cmax) + h = 2.0f + c.x - c.z; + else + h = 4.0f + c.y - c.x; + + h /= 6.0f; + + if (h < 0.0f) + h += 1.0f; + } + else { + h = 0.0f; + } + + return make_float3(h, s, v); } ccl_device float3 hsv_to_rgb(float3 hsv) { - float i, f, p, q, t, h, s, v; - float3 rgb; - - h = hsv.x; - s = hsv.y; - v = hsv.z; - - if(s != 0.0f) { - if(h == 1.0f) - h = 0.0f; - - h *= 6.0f; - i = floorf(h); - f = h - i; - rgb = make_float3(f, f, f); - p = v*(1.0f-s); - q = v*(1.0f-(s*f)); - t = v*(1.0f-(s*(1.0f-f))); - - if (i == 0.0f) rgb = make_float3(v, t, p); - else if(i == 1.0f) rgb = make_float3(q, v, p); - else if(i == 2.0f) rgb = make_float3(p, v, t); - else if(i == 3.0f) rgb = make_float3(p, q, v); - else if(i == 4.0f) rgb = make_float3(t, p, v); - else rgb = make_float3(v, p, q); - } - else { - rgb = make_float3(v, v, v); - } - - return rgb; + float i, f, p, q, t, h, s, v; + float3 rgb; + + h = hsv.x; + s = hsv.y; + v = hsv.z; + + if (s != 0.0f) { + if (h == 1.0f) + h = 0.0f; + + h *= 6.0f; + i = floorf(h); + f = h - i; + rgb = make_float3(f, f, f); + p = v * (1.0f - s); + q = v * (1.0f - (s * f)); + t = v * (1.0f - (s * (1.0f - f))); + + if (i == 0.0f) + rgb = make_float3(v, t, p); + else if (i == 1.0f) + rgb = make_float3(q, v, p); + else if (i == 2.0f) + rgb = make_float3(p, v, t); + else if (i == 3.0f) + rgb = make_float3(p, q, v); + else if (i == 4.0f) + rgb = make_float3(t, p, v); + else + rgb = make_float3(v, p, q); + } + else { + rgb = make_float3(v, v, v); + } + + return rgb; } ccl_device float3 xyY_to_xyz(float x, float y, float Y) { - float X, Z; + float X, Z; - if(y != 0.0f) X = (x / y) * Y; - else X = 0.0f; + if (y != 0.0f) + X = (x / y) * Y; + else + X = 0.0f; - if(y != 0.0f && Y != 0.0f) Z = (1.0f - x - y) / y * Y; - else Z = 0.0f; + if (y != 0.0f && Y != 0.0f) + Z = (1.0f - x - y) / y * Y; + else + Z = 0.0f; - return make_float3(X, Y, Z); + return make_float3(X, Y, Z); } #ifdef __KERNEL_SSE2__ @@ -158,86 +172,84 @@ ccl_device float3 xyY_to_xyz(float x, float y, float Y) * exp = exponent, encoded as uint32_t * e2coeff = 2^(127/exponent - 127) * bias_coeff^(1/exponent), encoded as uint32_t */ -template<unsigned exp, unsigned e2coeff> -ccl_device_inline ssef fastpow(const ssef &arg) +template<unsigned exp, unsigned e2coeff> ccl_device_inline ssef fastpow(const ssef &arg) { - ssef ret; - ret = arg * cast(ssei(e2coeff)); - ret = ssef(cast(ret)); - ret = ret * cast(ssei(exp)); - ret = cast(ssei(ret)); - return ret; + ssef ret; + ret = arg * cast(ssei(e2coeff)); + ret = ssef(cast(ret)); + ret = ret * cast(ssei(exp)); + ret = cast(ssei(ret)); + return ret; } /* Improve x ^ 1.0f/5.0f solution with Newton-Raphson method */ ccl_device_inline ssef improve_5throot_solution(const ssef &old_result, const ssef &x) { - ssef approx2 = old_result * old_result; - ssef approx4 = approx2 * approx2; - ssef t = x / approx4; - ssef summ = madd(ssef(4.0f), old_result, t); - return summ * ssef(1.0f/5.0f); + ssef approx2 = old_result * old_result; + ssef approx4 = approx2 * approx2; + ssef t = x / approx4; + ssef summ = madd(ssef(4.0f), old_result, t); + return summ * ssef(1.0f / 5.0f); } /* Calculate powf(x, 2.4). Working domain: 1e-10 < x < 1e+10 */ ccl_device_inline ssef fastpow24(const ssef &arg) { - /* max, avg and |avg| errors were calculated in gcc without FMA instructions - * The final precision should be better than powf in glibc */ - - /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */ - /* 0x3F4CCCCD = 4/5 */ - /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */ - ssef x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(arg); // error max = 0.17 avg = 0.0018 |avg| = 0.05 - ssef arg2 = arg * arg; - ssef arg4 = arg2 * arg2; - x = improve_5throot_solution(x, arg4); /* error max = 0.018 avg = 0.0031 |avg| = 0.0031 */ - x = improve_5throot_solution(x, arg4); /* error max = 0.00021 avg = 1.6e-05 |avg| = 1.6e-05 */ - x = improve_5throot_solution(x, arg4); /* error max = 6.1e-07 avg = 5.2e-08 |avg| = 1.1e-07 */ - return x * (x * x); + /* max, avg and |avg| errors were calculated in gcc without FMA instructions + * The final precision should be better than powf in glibc */ + + /* Calculate x^4/5, coefficient 0.994 was constructed manually to minimize avg error */ + /* 0x3F4CCCCD = 4/5 */ + /* 0x4F55A7FB = 2^(127/(4/5) - 127) * 0.994^(1/(4/5)) */ + ssef x = fastpow<0x3F4CCCCD, 0x4F55A7FB>(arg); // error max = 0.17 avg = 0.0018 |avg| = 0.05 + ssef arg2 = arg * arg; + ssef arg4 = arg2 * arg2; + x = improve_5throot_solution(x, + arg4); /* error max = 0.018 avg = 0.0031 |avg| = 0.0031 */ + x = improve_5throot_solution(x, + arg4); /* error max = 0.00021 avg = 1.6e-05 |avg| = 1.6e-05 */ + x = improve_5throot_solution(x, + arg4); /* error max = 6.1e-07 avg = 5.2e-08 |avg| = 1.1e-07 */ + return x * (x * x); } ccl_device ssef color_srgb_to_linear(const ssef &c) { - sseb cmp = c < ssef(0.04045f); - ssef lt = max(c * ssef(1.0f/12.92f), ssef(0.0f)); - ssef gtebase = (c + ssef(0.055f)) * ssef(1.0f/1.055f); /* fma */ - ssef gte = fastpow24(gtebase); - return select(cmp, lt, gte); + sseb cmp = c < ssef(0.04045f); + ssef lt = max(c * ssef(1.0f / 12.92f), ssef(0.0f)); + ssef gtebase = (c + ssef(0.055f)) * ssef(1.0f / 1.055f); /* fma */ + ssef gte = fastpow24(gtebase); + return select(cmp, lt, gte); } -#endif /* __KERNEL_SSE2__ */ +#endif /* __KERNEL_SSE2__ */ ccl_device float3 color_srgb_to_linear_v3(float3 c) { - return make_float3(color_srgb_to_linear(c.x), - color_srgb_to_linear(c.y), - color_srgb_to_linear(c.z)); + return make_float3( + color_srgb_to_linear(c.x), color_srgb_to_linear(c.y), color_srgb_to_linear(c.z)); } ccl_device float3 color_linear_to_srgb_v3(float3 c) { - return make_float3(color_linear_to_srgb(c.x), - color_linear_to_srgb(c.y), - color_linear_to_srgb(c.z)); + return make_float3( + color_linear_to_srgb(c.x), color_linear_to_srgb(c.y), color_linear_to_srgb(c.z)); } ccl_device float4 color_srgb_to_linear_v4(float4 c) { #ifdef __KERNEL_SSE2__ - ssef r_ssef; - float4 &r = (float4 &)r_ssef; - r = c; - r_ssef = color_srgb_to_linear(r_ssef); - r.w = c.w; - return r; + ssef r_ssef; + float4 &r = (float4 &)r_ssef; + r = c; + r_ssef = color_srgb_to_linear(r_ssef); + r.w = c.w; + return r; #else - return make_float4(color_srgb_to_linear(c.x), - color_srgb_to_linear(c.y), - color_srgb_to_linear(c.z), - c.w); + return make_float4( + color_srgb_to_linear(c.x), color_srgb_to_linear(c.y), color_srgb_to_linear(c.z), c.w); #endif } CCL_NAMESPACE_END -#endif /* __UTIL_COLOR_H__ */ +#endif /* __UTIL_COLOR_H__ */ diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp index b4cc69411ed..aabfea7fc49 100644 --- a/intern/cycles/util/util_debug.cpp +++ b/intern/cycles/util/util_debug.cpp @@ -26,159 +26,153 @@ CCL_NAMESPACE_BEGIN DebugFlags::CPU::CPU() - : avx2(true), - avx(true), - sse41(true), - sse3(true), - sse2(true), - bvh_layout(BVH_LAYOUT_DEFAULT), - split_kernel(false) + : avx2(true), + avx(true), + sse41(true), + sse3(true), + sse2(true), + bvh_layout(BVH_LAYOUT_DEFAULT), + split_kernel(false) { - reset(); + reset(); } void DebugFlags::CPU::reset() { #define STRINGIFY(x) #x #define CHECK_CPU_FLAGS(flag, env) \ - do { \ - flag = (getenv(env) == NULL); \ - if(!flag) { \ - VLOG(1) << "Disabling " << STRINGIFY(flag) << " instruction set."; \ - } \ - } while(0) - - CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2"); - CHECK_CPU_FLAGS(avx, "CYCLES_CPU_NO_AVX"); - CHECK_CPU_FLAGS(sse41, "CYCLES_CPU_NO_SSE41"); - CHECK_CPU_FLAGS(sse3, "CYCLES_CPU_NO_SSE3"); - CHECK_CPU_FLAGS(sse2, "CYCLES_CPU_NO_SSE2"); + do { \ + flag = (getenv(env) == NULL); \ + if (!flag) { \ + VLOG(1) << "Disabling " << STRINGIFY(flag) << " instruction set."; \ + } \ + } while (0) + + CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2"); + CHECK_CPU_FLAGS(avx, "CYCLES_CPU_NO_AVX"); + CHECK_CPU_FLAGS(sse41, "CYCLES_CPU_NO_SSE41"); + CHECK_CPU_FLAGS(sse3, "CYCLES_CPU_NO_SSE3"); + CHECK_CPU_FLAGS(sse2, "CYCLES_CPU_NO_SSE2"); #undef STRINGIFY #undef CHECK_CPU_FLAGS - if(getenv("CYCLES_BVH2") != NULL) { - bvh_layout = BVH_LAYOUT_BVH2; - } - else if(getenv("CYCLES_BVH4") != NULL) { - bvh_layout = BVH_LAYOUT_BVH4; - } - else if(getenv("CYCLES_BVH8") != NULL) { - bvh_layout = BVH_LAYOUT_BVH8; - } - else { - bvh_layout = BVH_LAYOUT_DEFAULT; - } - - split_kernel = false; + if (getenv("CYCLES_BVH2") != NULL) { + bvh_layout = BVH_LAYOUT_BVH2; + } + else if (getenv("CYCLES_BVH4") != NULL) { + bvh_layout = BVH_LAYOUT_BVH4; + } + else if (getenv("CYCLES_BVH8") != NULL) { + bvh_layout = BVH_LAYOUT_BVH8; + } + else { + bvh_layout = BVH_LAYOUT_DEFAULT; + } + + split_kernel = false; } -DebugFlags::CUDA::CUDA() - : adaptive_compile(false), - split_kernel(false) +DebugFlags::CUDA::CUDA() : adaptive_compile(false), split_kernel(false) { - reset(); + reset(); } void DebugFlags::CUDA::reset() { - if(getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL) - adaptive_compile = true; + if (getenv("CYCLES_CUDA_ADAPTIVE_COMPILE") != NULL) + adaptive_compile = true; - split_kernel = false; + split_kernel = false; } -DebugFlags::OpenCL::OpenCL() - : device_type(DebugFlags::OpenCL::DEVICE_ALL), - debug(false) +DebugFlags::OpenCL::OpenCL() : device_type(DebugFlags::OpenCL::DEVICE_ALL), debug(false) { - reset(); + reset(); } void DebugFlags::OpenCL::reset() { - /* Initialize device type from environment variables. */ - device_type = DebugFlags::OpenCL::DEVICE_ALL; - char *device = getenv("CYCLES_OPENCL_TEST"); - if(device) { - if(strcmp(device, "NONE") == 0) { - device_type = DebugFlags::OpenCL::DEVICE_NONE; - } - else if(strcmp(device, "ALL") == 0) { - device_type = DebugFlags::OpenCL::DEVICE_ALL; - } - else if(strcmp(device, "DEFAULT") == 0) { - device_type = DebugFlags::OpenCL::DEVICE_DEFAULT; - } - else if(strcmp(device, "CPU") == 0) { - device_type = DebugFlags::OpenCL::DEVICE_CPU; - } - else if(strcmp(device, "GPU") == 0) { - device_type = DebugFlags::OpenCL::DEVICE_GPU; - } - else if(strcmp(device, "ACCELERATOR") == 0) { - device_type = DebugFlags::OpenCL::DEVICE_ACCELERATOR; - } - } - /* Initialize other flags from environment variables. */ - debug = (getenv("CYCLES_OPENCL_DEBUG") != NULL); + /* Initialize device type from environment variables. */ + device_type = DebugFlags::OpenCL::DEVICE_ALL; + char *device = getenv("CYCLES_OPENCL_TEST"); + if (device) { + if (strcmp(device, "NONE") == 0) { + device_type = DebugFlags::OpenCL::DEVICE_NONE; + } + else if (strcmp(device, "ALL") == 0) { + device_type = DebugFlags::OpenCL::DEVICE_ALL; + } + else if (strcmp(device, "DEFAULT") == 0) { + device_type = DebugFlags::OpenCL::DEVICE_DEFAULT; + } + else if (strcmp(device, "CPU") == 0) { + device_type = DebugFlags::OpenCL::DEVICE_CPU; + } + else if (strcmp(device, "GPU") == 0) { + device_type = DebugFlags::OpenCL::DEVICE_GPU; + } + else if (strcmp(device, "ACCELERATOR") == 0) { + device_type = DebugFlags::OpenCL::DEVICE_ACCELERATOR; + } + } + /* Initialize other flags from environment variables. */ + debug = (getenv("CYCLES_OPENCL_DEBUG") != NULL); } -DebugFlags::DebugFlags() -: viewport_static_bvh(false) +DebugFlags::DebugFlags() : viewport_static_bvh(false) { - /* Nothing for now. */ + /* Nothing for now. */ } void DebugFlags::reset() { - viewport_static_bvh = false; - cpu.reset(); - cuda.reset(); - opencl.reset(); + viewport_static_bvh = false; + cpu.reset(); + cuda.reset(); + opencl.reset(); } -std::ostream& operator <<(std::ostream &os, - DebugFlagsConstRef debug_flags) +std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags) { - os << "CPU flags:\n" - << " AVX2 : " << string_from_bool(debug_flags.cpu.avx2) << "\n" - << " AVX : " << string_from_bool(debug_flags.cpu.avx) << "\n" - << " SSE4.1 : " << string_from_bool(debug_flags.cpu.sse41) << "\n" - << " SSE3 : " << string_from_bool(debug_flags.cpu.sse3) << "\n" - << " SSE2 : " << string_from_bool(debug_flags.cpu.sse2) << "\n" - << " BVH layout : " << bvh_layout_name(debug_flags.cpu.bvh_layout) << "\n" - << " Split : " << string_from_bool(debug_flags.cpu.split_kernel) << "\n"; - - os << "CUDA flags:\n" - << " Adaptive Compile: " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n"; - - const char *opencl_device_type; - switch(debug_flags.opencl.device_type) { - case DebugFlags::OpenCL::DEVICE_NONE: - opencl_device_type = "NONE"; - break; - case DebugFlags::OpenCL::DEVICE_ALL: - opencl_device_type = "ALL"; - break; - case DebugFlags::OpenCL::DEVICE_DEFAULT: - opencl_device_type = "DEFAULT"; - break; - case DebugFlags::OpenCL::DEVICE_CPU: - opencl_device_type = "CPU"; - break; - case DebugFlags::OpenCL::DEVICE_GPU: - opencl_device_type = "GPU"; - break; - case DebugFlags::OpenCL::DEVICE_ACCELERATOR: - opencl_device_type = "ACCELERATOR"; - break; - } - os << "OpenCL flags:\n" - << " Device type : " << opencl_device_type << "\n" - << " Debug : " << string_from_bool(debug_flags.opencl.debug) << "\n" - << " Memory limit : " << string_human_readable_size(debug_flags.opencl.mem_limit) << "\n"; - return os; + os << "CPU flags:\n" + << " AVX2 : " << string_from_bool(debug_flags.cpu.avx2) << "\n" + << " AVX : " << string_from_bool(debug_flags.cpu.avx) << "\n" + << " SSE4.1 : " << string_from_bool(debug_flags.cpu.sse41) << "\n" + << " SSE3 : " << string_from_bool(debug_flags.cpu.sse3) << "\n" + << " SSE2 : " << string_from_bool(debug_flags.cpu.sse2) << "\n" + << " BVH layout : " << bvh_layout_name(debug_flags.cpu.bvh_layout) << "\n" + << " Split : " << string_from_bool(debug_flags.cpu.split_kernel) << "\n"; + + os << "CUDA flags:\n" + << " Adaptive Compile: " << string_from_bool(debug_flags.cuda.adaptive_compile) << "\n"; + + const char *opencl_device_type; + switch (debug_flags.opencl.device_type) { + case DebugFlags::OpenCL::DEVICE_NONE: + opencl_device_type = "NONE"; + break; + case DebugFlags::OpenCL::DEVICE_ALL: + opencl_device_type = "ALL"; + break; + case DebugFlags::OpenCL::DEVICE_DEFAULT: + opencl_device_type = "DEFAULT"; + break; + case DebugFlags::OpenCL::DEVICE_CPU: + opencl_device_type = "CPU"; + break; + case DebugFlags::OpenCL::DEVICE_GPU: + opencl_device_type = "GPU"; + break; + case DebugFlags::OpenCL::DEVICE_ACCELERATOR: + opencl_device_type = "ACCELERATOR"; + break; + } + os << "OpenCL flags:\n" + << " Device type : " << opencl_device_type << "\n" + << " Debug : " << string_from_bool(debug_flags.opencl.debug) << "\n" + << " Memory limit : " << string_human_readable_size(debug_flags.opencl.mem_limit) << "\n"; + return os; } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h index e8c272cdb80..d668ddc6d6c 100644 --- a/intern/cycles/util/util_debug.h +++ b/intern/cycles/util/util_debug.h @@ -29,151 +29,166 @@ CCL_NAMESPACE_BEGIN * the interface. */ class DebugFlags { -public: - /* Use static BVH in viewport, to match final render exactly. */ - bool viewport_static_bvh; - - /* Descriptor of CPU feature-set to be used. */ - struct CPU { - CPU(); - - /* Reset flags to their defaults. */ - void reset(); - - /* Flags describing which instructions sets are allowed for use. */ - bool avx2; - bool avx; - bool sse41; - bool sse3; - bool sse2; - - /* Check functions to see whether instructions up to the given one - * are allowed for use. - */ - bool has_avx2() { return has_avx() && avx2; } - bool has_avx() { return has_sse41() && avx; } - bool has_sse41() { return has_sse3() && sse41; } - bool has_sse3() { return has_sse2() && sse3; } - bool has_sse2() { return sse2; } - - /* Requested BVH size. - * - * Rendering will use widest possible BVH which is below or equal - * this one. - */ - BVHLayout bvh_layout; - - /* Whether split kernel is used */ - bool split_kernel; - }; - - /* Descriptor of CUDA feature-set to be used. */ - struct CUDA { - CUDA(); - - /* Reset flags to their defaults. */ - void reset(); - - /* Whether adaptive feature based runtime compile is enabled or not. - * Requires the CUDA Toolkit and only works on Linux atm. */ - bool adaptive_compile; - - /* Whether split kernel is used */ - bool split_kernel; - }; - - /* Descriptor of OpenCL feature-set to be used. */ - struct OpenCL { - OpenCL(); - - /* Reset flags to their defaults. */ - void reset(); - - /* Available device types. - * Only gives a hint which devices to let user to choose from, does not - * try to use any sort of optimal device or so. - */ - enum DeviceType { - /* None of OpenCL devices will be used. */ - DEVICE_NONE, - /* All OpenCL devices will be used. */ - DEVICE_ALL, - /* Default system OpenCL device will be used. */ - DEVICE_DEFAULT, - /* Host processor will be used. */ - DEVICE_CPU, - /* GPU devices will be used. */ - DEVICE_GPU, - /* Dedicated OpenCL accelerator device will be used. */ - DEVICE_ACCELERATOR, - }; - - /* Available kernel types. */ - enum KernelType { - /* Do automated guess which kernel to use, based on the officially - * supported GPUs and such. - */ - KERNEL_DEFAULT, - /* Force mega kernel to be used. */ - KERNEL_MEGA, - /* Force split kernel to be used. */ - KERNEL_SPLIT, - }; - - /* Requested device type. */ - DeviceType device_type; - - /* Use debug version of the kernel. */ - bool debug; - - /* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all devices. */ - /* Artificial memory limit in bytes (0 if disabled). */ - size_t mem_limit; - }; - - /* Get instance of debug flags registry. */ - static DebugFlags& get() - { - static DebugFlags instance; - return instance; - } - - /* Reset flags to their defaults. */ - void reset(); - - /* Requested CPU flags. */ - CPU cpu; - - /* Requested CUDA flags. */ - CUDA cuda; - - /* Requested OpenCL flags. */ - OpenCL opencl; - -private: - DebugFlags(); + public: + /* Use static BVH in viewport, to match final render exactly. */ + bool viewport_static_bvh; + + /* Descriptor of CPU feature-set to be used. */ + struct CPU { + CPU(); + + /* Reset flags to their defaults. */ + void reset(); + + /* Flags describing which instructions sets are allowed for use. */ + bool avx2; + bool avx; + bool sse41; + bool sse3; + bool sse2; + + /* Check functions to see whether instructions up to the given one + * are allowed for use. + */ + bool has_avx2() + { + return has_avx() && avx2; + } + bool has_avx() + { + return has_sse41() && avx; + } + bool has_sse41() + { + return has_sse3() && sse41; + } + bool has_sse3() + { + return has_sse2() && sse3; + } + bool has_sse2() + { + return sse2; + } + + /* Requested BVH size. + * + * Rendering will use widest possible BVH which is below or equal + * this one. + */ + BVHLayout bvh_layout; + + /* Whether split kernel is used */ + bool split_kernel; + }; + + /* Descriptor of CUDA feature-set to be used. */ + struct CUDA { + CUDA(); + + /* Reset flags to their defaults. */ + void reset(); + + /* Whether adaptive feature based runtime compile is enabled or not. + * Requires the CUDA Toolkit and only works on Linux atm. */ + bool adaptive_compile; + + /* Whether split kernel is used */ + bool split_kernel; + }; + + /* Descriptor of OpenCL feature-set to be used. */ + struct OpenCL { + OpenCL(); + + /* Reset flags to their defaults. */ + void reset(); + + /* Available device types. + * Only gives a hint which devices to let user to choose from, does not + * try to use any sort of optimal device or so. + */ + enum DeviceType { + /* None of OpenCL devices will be used. */ + DEVICE_NONE, + /* All OpenCL devices will be used. */ + DEVICE_ALL, + /* Default system OpenCL device will be used. */ + DEVICE_DEFAULT, + /* Host processor will be used. */ + DEVICE_CPU, + /* GPU devices will be used. */ + DEVICE_GPU, + /* Dedicated OpenCL accelerator device will be used. */ + DEVICE_ACCELERATOR, + }; + + /* Available kernel types. */ + enum KernelType { + /* Do automated guess which kernel to use, based on the officially + * supported GPUs and such. + */ + KERNEL_DEFAULT, + /* Force mega kernel to be used. */ + KERNEL_MEGA, + /* Force split kernel to be used. */ + KERNEL_SPLIT, + }; + + /* Requested device type. */ + DeviceType device_type; + + /* Use debug version of the kernel. */ + bool debug; + + /* TODO(mai): Currently this is only for OpenCL, but we should have it implemented for all devices. */ + /* Artificial memory limit in bytes (0 if disabled). */ + size_t mem_limit; + }; + + /* Get instance of debug flags registry. */ + static DebugFlags &get() + { + static DebugFlags instance; + return instance; + } + + /* Reset flags to their defaults. */ + void reset(); + + /* Requested CPU flags. */ + CPU cpu; + + /* Requested CUDA flags. */ + CUDA cuda; + + /* Requested OpenCL flags. */ + OpenCL opencl; + + private: + DebugFlags(); #if (__cplusplus > 199711L) -public: - explicit DebugFlags(DebugFlags const& /*other*/) = delete; - void operator=(DebugFlags const& /*other*/) = delete; + public: + explicit DebugFlags(DebugFlags const & /*other*/) = delete; + void operator=(DebugFlags const & /*other*/) = delete; #else -private: - explicit DebugFlags(DebugFlags const& /*other*/); - void operator=(DebugFlags const& /*other*/); + private: + explicit DebugFlags(DebugFlags const & /*other*/); + void operator=(DebugFlags const & /*other*/); #endif }; -typedef DebugFlags& DebugFlagsRef; -typedef const DebugFlags& DebugFlagsConstRef; +typedef DebugFlags &DebugFlagsRef; +typedef const DebugFlags &DebugFlagsConstRef; -inline DebugFlags& DebugFlags() { +inline DebugFlags &DebugFlags() +{ return DebugFlags::get(); } -std::ostream& operator <<(std::ostream &os, - DebugFlagsConstRef debug_flags); +std::ostream &operator<<(std::ostream &os, DebugFlagsConstRef debug_flags); CCL_NAMESPACE_END -#endif /* __UTIL_DEBUG_H__ */ +#endif /* __UTIL_DEBUG_H__ */ diff --git a/intern/cycles/util/util_defines.h b/intern/cycles/util/util_defines.h index 2cb42d9bd56..7f3bead0a18 100644 --- a/intern/cycles/util/util_defines.h +++ b/intern/cycles/util/util_defines.h @@ -16,118 +16,125 @@ */ #ifndef __UTIL_DEFINES_H__ -#define __UTIL_DEFINES_H__ +# define __UTIL_DEFINES_H__ /* Bitness */ -#if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) -# define __KERNEL_64_BIT__ -#endif +# if defined(__ppc64__) || defined(__PPC64__) || defined(__x86_64__) || defined(__ia64__) || \ + defined(_M_X64) +# define __KERNEL_64_BIT__ +# endif /* Qualifiers for kernel code shared by CPU and GPU */ -#ifndef __KERNEL_GPU__ -# define ccl_device static inline -# define ccl_device_noinline static -# define ccl_global -# define ccl_static_constant static const -# define ccl_constant const -# define ccl_local -# define ccl_local_param -# define ccl_private -# define ccl_restrict __restrict -# define ccl_ref & -# define __KERNEL_WITH_SSE_ALIGN__ - -# if defined(_WIN32) && !defined(FREE_WINDOWS) -# define ccl_device_inline static __forceinline -# define ccl_device_forceinline static __forceinline -# define ccl_align(...) __declspec(align(__VA_ARGS__)) -# ifdef __KERNEL_64_BIT__ -# define ccl_try_align(...) __declspec(align(__VA_ARGS__)) -# else /* __KERNEL_64_BIT__ */ -# undef __KERNEL_WITH_SSE_ALIGN__ +# ifndef __KERNEL_GPU__ +# define ccl_device static inline +# define ccl_device_noinline static +# define ccl_global +# define ccl_static_constant static const +# define ccl_constant const +# define ccl_local +# define ccl_local_param +# define ccl_private +# define ccl_restrict __restrict +# define ccl_ref & +# define __KERNEL_WITH_SSE_ALIGN__ + +# if defined(_WIN32) && !defined(FREE_WINDOWS) +# define ccl_device_inline static __forceinline +# define ccl_device_forceinline static __forceinline +# define ccl_align(...) __declspec(align(__VA_ARGS__)) +# ifdef __KERNEL_64_BIT__ +# define ccl_try_align(...) __declspec(align(__VA_ARGS__)) +# else /* __KERNEL_64_BIT__ */ +# undef __KERNEL_WITH_SSE_ALIGN__ /* No support for function arguments (error C2719). */ -# define ccl_try_align(...) -# endif /* __KERNEL_64_BIT__ */ -# define ccl_may_alias -# define ccl_always_inline __forceinline -# define ccl_never_inline __declspec(noinline) -# define ccl_maybe_unused -# else /* _WIN32 && !FREE_WINDOWS */ -# define ccl_device_inline static inline __attribute__((always_inline)) -# define ccl_device_forceinline static inline __attribute__((always_inline)) -# define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) -# ifndef FREE_WINDOWS64 -# define __forceinline inline __attribute__((always_inline)) -# endif -# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) -# define ccl_may_alias __attribute__((__may_alias__)) -# define ccl_always_inline __attribute__((always_inline)) -# define ccl_never_inline __attribute__((noinline)) -# define ccl_maybe_unused __attribute__((used)) -# endif /* _WIN32 && !FREE_WINDOWS */ +# define ccl_try_align(...) +# endif /* __KERNEL_64_BIT__ */ +# define ccl_may_alias +# define ccl_always_inline __forceinline +# define ccl_never_inline __declspec(noinline) +# define ccl_maybe_unused +# else /* _WIN32 && !FREE_WINDOWS */ +# define ccl_device_inline static inline __attribute__((always_inline)) +# define ccl_device_forceinline static inline __attribute__((always_inline)) +# define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) +# ifndef FREE_WINDOWS64 +# define __forceinline inline __attribute__((always_inline)) +# endif +# define ccl_try_align(...) __attribute__((aligned(__VA_ARGS__))) +# define ccl_may_alias __attribute__((__may_alias__)) +# define ccl_always_inline __attribute__((always_inline)) +# define ccl_never_inline __attribute__((noinline)) +# define ccl_maybe_unused __attribute__((used)) +# endif /* _WIN32 && !FREE_WINDOWS */ /* Use to suppress '-Wimplicit-fallthrough' (in place of 'break'). */ -# ifndef ATTR_FALLTHROUGH -# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */ -# define ATTR_FALLTHROUGH __attribute__((fallthrough)) -# else -# define ATTR_FALLTHROUGH ((void) 0) +# ifndef ATTR_FALLTHROUGH +# if defined(__GNUC__) && (__GNUC__ >= 7) /* gcc7.0+ only */ +# define ATTR_FALLTHROUGH __attribute__((fallthrough)) +# else +# define ATTR_FALLTHROUGH ((void)0) +# endif # endif -# endif -#endif /* __KERNEL_GPU__ */ +# endif /* __KERNEL_GPU__ */ /* macros */ /* hints for branch prediction, only use in code that runs a _lot_ */ -#if defined(__GNUC__) && defined(__KERNEL_CPU__) -# define LIKELY(x) __builtin_expect(!!(x), 1) -# define UNLIKELY(x) __builtin_expect(!!(x), 0) -#else -# define LIKELY(x) (x) -# define UNLIKELY(x) (x) -#endif - -#if defined(__GNUC__) || defined(__clang__) -# if defined(__cplusplus) -/* Some magic to be sure we don't have reference in the type. */ -template<typename T> static inline T decltype_helper(T x) { return x; } -# define TYPEOF(x) decltype(decltype_helper(x)) +# if defined(__GNUC__) && defined(__KERNEL_CPU__) +# define LIKELY(x) __builtin_expect(!!(x), 1) +# define UNLIKELY(x) __builtin_expect(!!(x), 0) # else -# define TYPEOF(x) typeof(x) +# define LIKELY(x) (x) +# define UNLIKELY(x) (x) +# endif + +# if defined(__GNUC__) || defined(__clang__) +# if defined(__cplusplus) +/* Some magic to be sure we don't have reference in the type. */ +template<typename T> static inline T decltype_helper(T x) +{ + return x; +} +# define TYPEOF(x) decltype(decltype_helper(x)) +# else +# define TYPEOF(x) typeof(x) +# endif # endif -#endif /* Causes warning: * incompatible types when assigning to type 'Foo' from type 'Bar' * ... the compiler optimizes away the temp var */ -#ifdef __GNUC__ -#define CHECK_TYPE(var, type) { \ - TYPEOF(var) *__tmp; \ - __tmp = (type *)NULL; \ - (void) __tmp; \ -} (void) 0 - -#define CHECK_TYPE_PAIR(var_a, var_b) { \ - TYPEOF(var_a) *__tmp; \ - __tmp = (typeof(var_b) *)NULL; \ - (void) __tmp; \ -} (void) 0 -#else -# define CHECK_TYPE(var, type) -# define CHECK_TYPE_PAIR(var_a, var_b) -#endif +# ifdef __GNUC__ +# define CHECK_TYPE(var, type) \ + { \ + TYPEOF(var) * __tmp; \ + __tmp = (type *)NULL; \ + (void)__tmp; \ + } \ + (void)0 + +# define CHECK_TYPE_PAIR(var_a, var_b) \ + { \ + TYPEOF(var_a) * __tmp; \ + __tmp = (typeof(var_b) *)NULL; \ + (void)__tmp; \ + } \ + (void)0 +# else +# define CHECK_TYPE(var, type) +# define CHECK_TYPE_PAIR(var_a, var_b) +# endif /* can be used in simple macros */ -#define CHECK_TYPE_INLINE(val, type) \ - ((void)(((type)0) != (val))) +# define CHECK_TYPE_INLINE(val, type) ((void)(((type)0) != (val))) -#ifndef __KERNEL_GPU__ -# include <cassert> -# define util_assert(statement) assert(statement) -#else -# define util_assert(statement) -#endif +# ifndef __KERNEL_GPU__ +# include <cassert> +# define util_assert(statement) assert(statement) +# else +# define util_assert(statement) +# endif -#endif /* __UTIL_DEFINES_H__ */ +#endif /* __UTIL_DEFINES_H__ */ diff --git a/intern/cycles/util/util_foreach.h b/intern/cycles/util/util_foreach.h index fd106d58b43..d907974be91 100644 --- a/intern/cycles/util/util_foreach.h +++ b/intern/cycles/util/util_foreach.h @@ -19,6 +19,6 @@ /* Nice foreach() loops for STL data structures. */ -#define foreach(x, y) for(x : y) +#define foreach(x, y) for (x : y) -#endif /* __UTIL_FOREACH_H__ */ +#endif /* __UTIL_FOREACH_H__ */ diff --git a/intern/cycles/util/util_function.h b/intern/cycles/util/util_function.h index 72c7ce43073..f3cc00329ad 100644 --- a/intern/cycles/util/util_function.h +++ b/intern/cycles/util/util_function.h @@ -36,4 +36,4 @@ using std::placeholders::_9; CCL_NAMESPACE_END -#endif /* __UTIL_FUNCTION_H__ */ +#endif /* __UTIL_FUNCTION_H__ */ diff --git a/intern/cycles/util/util_guarded_allocator.cpp b/intern/cycles/util/util_guarded_allocator.cpp index ae1d217c54f..1cb466a1ffa 100644 --- a/intern/cycles/util/util_guarded_allocator.cpp +++ b/intern/cycles/util/util_guarded_allocator.cpp @@ -25,25 +25,24 @@ static Stats global_stats(Stats::static_init); void util_guarded_mem_alloc(size_t n) { - global_stats.mem_alloc(n); + global_stats.mem_alloc(n); } void util_guarded_mem_free(size_t n) { - global_stats.mem_free(n); + global_stats.mem_free(n); } /* Public API. */ size_t util_guarded_get_mem_used() { - return global_stats.mem_used; + return global_stats.mem_used; } size_t util_guarded_get_mem_peak() { - return global_stats.mem_peak; + return global_stats.mem_peak; } - CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_guarded_allocator.h b/intern/cycles/util/util_guarded_allocator.h index 99edf77e2c7..2d09326d2ca 100644 --- a/intern/cycles/util/util_guarded_allocator.h +++ b/intern/cycles/util/util_guarded_allocator.h @@ -31,117 +31,127 @@ void util_guarded_mem_alloc(size_t n); void util_guarded_mem_free(size_t n); /* Guarded allocator for the use with STL. */ -template <typename T> -class GuardedAllocator { -public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef T *pointer; - typedef const T *const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef T value_type; - - GuardedAllocator() {} - GuardedAllocator(const GuardedAllocator&) {} - - T *allocate(size_t n, const void *hint = 0) - { - (void) hint; - size_t size = n * sizeof(T); - util_guarded_mem_alloc(size); - if(n == 0) { - return NULL; - } - T *mem; +template<typename T> class GuardedAllocator { + public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T *pointer; + typedef const T *const_pointer; + typedef T &reference; + typedef const T &const_reference; + typedef T value_type; + + GuardedAllocator() + { + } + GuardedAllocator(const GuardedAllocator &) + { + } + + T *allocate(size_t n, const void *hint = 0) + { + (void)hint; + size_t size = n * sizeof(T); + util_guarded_mem_alloc(size); + if (n == 0) { + return NULL; + } + T *mem; #ifdef WITH_BLENDER_GUARDEDALLOC - /* C++ standard requires allocation functions to allocate memory suitably - * aligned for any standard type. This is 16 bytes for 64 bit platform as - * far as i concerned. We might over-align on 32bit here, but that should - * be all safe actually. - */ - mem = (T*)MEM_mallocN_aligned(size, 16, "Cycles Alloc"); + /* C++ standard requires allocation functions to allocate memory suitably + * aligned for any standard type. This is 16 bytes for 64 bit platform as + * far as i concerned. We might over-align on 32bit here, but that should + * be all safe actually. + */ + mem = (T *)MEM_mallocN_aligned(size, 16, "Cycles Alloc"); #else - mem = (T*)malloc(size); + mem = (T *)malloc(size); #endif - if(mem == NULL) { - throw std::bad_alloc(); - } - return mem; - } - - void deallocate(T *p, size_t n) - { - util_guarded_mem_free(n * sizeof(T)); - if(p != NULL) { + if (mem == NULL) { + throw std::bad_alloc(); + } + return mem; + } + + void deallocate(T *p, size_t n) + { + util_guarded_mem_free(n * sizeof(T)); + if (p != NULL) { #ifdef WITH_BLENDER_GUARDEDALLOC - MEM_freeN(p); + MEM_freeN(p); #else - free(p); + free(p); #endif - } - } - - T *address(T& x) const - { - return &x; - } - - const T *address(const T& x) const - { - return &x; - } - - GuardedAllocator<T>& operator=(const GuardedAllocator&) - { - return *this; - } - - size_t max_size() const - { - return size_t(-1); - } - - template <class U> - struct rebind { - typedef GuardedAllocator<U> other; - }; - - template <class U> - GuardedAllocator(const GuardedAllocator<U>&) {} - - template <class U> - GuardedAllocator& operator=(const GuardedAllocator<U>&) { return *this; } - - inline bool operator==(GuardedAllocator const& /*other*/) const { return true; } - inline bool operator!=(GuardedAllocator const& other) const { return !operator==(other); } + } + } + + T *address(T &x) const + { + return &x; + } + + const T *address(const T &x) const + { + return &x; + } + + GuardedAllocator<T> &operator=(const GuardedAllocator &) + { + return *this; + } + + size_t max_size() const + { + return size_t(-1); + } + + template<class U> struct rebind { + typedef GuardedAllocator<U> other; + }; + + template<class U> GuardedAllocator(const GuardedAllocator<U> &) + { + } + + template<class U> GuardedAllocator &operator=(const GuardedAllocator<U> &) + { + return *this; + } + + inline bool operator==(GuardedAllocator const & /*other*/) const + { + return true; + } + inline bool operator!=(GuardedAllocator const &other) const + { + return !operator==(other); + } #ifdef _MSC_VER - /* Welcome to the black magic here. - * - * The issue is that MSVC C++ allocates container proxy on any - * vector initialization, including static vectors which don't - * have any data yet. This leads to several issues: - * - * - Static objects initialization fiasco (global_stats from - * util_stats.h might not be initialized yet). - * - If main() function changes allocator type (for example, - * this might happen with `blender --debug-memory`) nobody - * will know how to convert already allocated memory to a new - * guarded allocator. - * - * Here we work this around by making it so container proxy does - * not use guarded allocation. A bit fragile, unfortunately. - */ - template<> - struct rebind<std::_Container_proxy> { - typedef std::allocator<std::_Container_proxy> other; - }; - - operator std::allocator<std::_Container_proxy>() const - { - return std::allocator<std::_Container_proxy>(); - } + /* Welcome to the black magic here. + * + * The issue is that MSVC C++ allocates container proxy on any + * vector initialization, including static vectors which don't + * have any data yet. This leads to several issues: + * + * - Static objects initialization fiasco (global_stats from + * util_stats.h might not be initialized yet). + * - If main() function changes allocator type (for example, + * this might happen with `blender --debug-memory`) nobody + * will know how to convert already allocated memory to a new + * guarded allocator. + * + * Here we work this around by making it so container proxy does + * not use guarded allocation. A bit fragile, unfortunately. + */ + template<> struct rebind<std::_Container_proxy> { + typedef std::allocator<std::_Container_proxy> other; + }; + + operator std::allocator<std::_Container_proxy>() const + { + return std::allocator<std::_Container_proxy>(); + } #endif }; @@ -158,17 +168,17 @@ size_t util_guarded_get_mem_peak(); * when running out of memory. */ #define MEM_GUARDED_CALL(progress, func, ...) \ - do { \ - try { \ - (func)(__VA_ARGS__); \ - } \ - catch (std::bad_alloc&) { \ - fprintf(stderr, "Error: run out of memory!\n"); \ - fflush(stderr); \ - (progress)->set_error("Out of memory"); \ - } \ - } while(false) + do { \ + try { \ + (func)(__VA_ARGS__); \ + } \ + catch (std::bad_alloc &) { \ + fprintf(stderr, "Error: run out of memory!\n"); \ + fflush(stderr); \ + (progress)->set_error("Out of memory"); \ + } \ + } while (false) CCL_NAMESPACE_END -#endif /* __UTIL_GUARDED_ALLOCATOR_H__ */ +#endif /* __UTIL_GUARDED_ALLOCATOR_H__ */ diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index 3868509c21b..9c40f5310c2 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -21,7 +21,7 @@ #include "util/util_math.h" #ifdef __KERNEL_SSE2__ -#include "util/util_simd.h" +# include "util/util_simd.h" #endif CCL_NAMESPACE_BEGIN @@ -30,122 +30,139 @@ CCL_NAMESPACE_BEGIN #ifdef __KERNEL_OPENCL__ -#define float4_store_half(h, f, scale) vstore_half4(f * (scale), 0, h); +# define float4_store_half(h, f, scale) vstore_half4(f *(scale), 0, h); #else /* CUDA has its own half data type, no need to define then */ -#ifndef __KERNEL_CUDA__ +# ifndef __KERNEL_CUDA__ /* Implementing this as a class rather than a typedef so that the compiler can tell it apart from unsigned shorts. */ class half { -public: - half() : v(0) {} - half(const unsigned short& i) : v(i) {} - operator unsigned short() { return v; } - half& operator =(const unsigned short& i) { v = i; return *this; } -private: - unsigned short v; + public: + half() : v(0) + { + } + half(const unsigned short &i) : v(i) + { + } + operator unsigned short() + { + return v; + } + half &operator=(const unsigned short &i) + { + v = i; + return *this; + } + + private: + unsigned short v; }; -#endif +# endif -struct half4 { half x, y, z, w; }; +struct half4 { + half x, y, z, w; +}; -#ifdef __KERNEL_CUDA__ +# ifdef __KERNEL_CUDA__ ccl_device_inline void float4_store_half(half *h, float4 f, float scale) { - h[0] = __float2half(f.x * scale); - h[1] = __float2half(f.y * scale); - h[2] = __float2half(f.z * scale); - h[3] = __float2half(f.w * scale); + h[0] = __float2half(f.x * scale); + h[1] = __float2half(f.y * scale); + h[2] = __float2half(f.z * scale); + h[3] = __float2half(f.w * scale); } -#else +# else ccl_device_inline void float4_store_half(half *h, float4 f, float scale) { -#ifndef __KERNEL_SSE2__ - for(int i = 0; i < 4; i++) { - /* optimized float to half for pixels: - * assumes no negative, no nan, no inf, and sets denormal to 0 */ - union { uint i; float f; } in; - float fscale = f[i] * scale; - in.f = (fscale > 0.0f)? ((fscale < 65504.0f)? fscale: 65504.0f): 0.0f; - int x = in.i; - - int absolute = x & 0x7FFFFFFF; - int Z = absolute + 0xC8000000; - int result = (absolute < 0x38800000)? 0: Z; - int rshift = (result >> 13); - - h[i] = (rshift & 0x7FFF); - } -#else - /* same as above with SSE */ - ssef fscale = load4f(f) * scale; - ssef x = min(max(fscale, 0.0f), 65504.0f); - -#ifdef __KERNEL_AVX2__ - ssei rpack = _mm_cvtps_ph(x, 0); -#else - ssei absolute = cast(x) & 0x7FFFFFFF; - ssei Z = absolute + 0xC8000000; - ssei result = andnot(absolute < 0x38800000, Z); - ssei rshift = (result >> 13) & 0x7FFF; - ssei rpack = _mm_packs_epi32(rshift, rshift); -#endif - - _mm_storel_pi((__m64*)h, _mm_castsi128_ps(rpack)); -#endif +# ifndef __KERNEL_SSE2__ + for (int i = 0; i < 4; i++) { + /* optimized float to half for pixels: + * assumes no negative, no nan, no inf, and sets denormal to 0 */ + union { + uint i; + float f; + } in; + float fscale = f[i] * scale; + in.f = (fscale > 0.0f) ? ((fscale < 65504.0f) ? fscale : 65504.0f) : 0.0f; + int x = in.i; + + int absolute = x & 0x7FFFFFFF; + int Z = absolute + 0xC8000000; + int result = (absolute < 0x38800000) ? 0 : Z; + int rshift = (result >> 13); + + h[i] = (rshift & 0x7FFF); + } +# else + /* same as above with SSE */ + ssef fscale = load4f(f) * scale; + ssef x = min(max(fscale, 0.0f), 65504.0f); + +# ifdef __KERNEL_AVX2__ + ssei rpack = _mm_cvtps_ph(x, 0); +# else + ssei absolute = cast(x) & 0x7FFFFFFF; + ssei Z = absolute + 0xC8000000; + ssei result = andnot(absolute < 0x38800000, Z); + ssei rshift = (result >> 13) & 0x7FFF; + ssei rpack = _mm_packs_epi32(rshift, rshift); +# endif + + _mm_storel_pi((__m64 *)h, _mm_castsi128_ps(rpack)); +# endif } ccl_device_inline float half_to_float(half h) { - float f; + float f; - *((int*) &f) = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13); + *((int *)&f) = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13); - return f; + return f; } ccl_device_inline float4 half4_to_float4(half4 h) { - float4 f; + float4 f; - f.x = half_to_float(h.x); - f.y = half_to_float(h.y); - f.z = half_to_float(h.z); - f.w = half_to_float(h.w); + f.x = half_to_float(h.x); + f.y = half_to_float(h.y); + f.z = half_to_float(h.z); + f.w = half_to_float(h.w); - return f; + return f; } ccl_device_inline half float_to_half(float f) { - const uint u = __float_as_uint(f); - /* Sign bit, shifted to it's position. */ - uint sign_bit = u & 0x80000000; - sign_bit >>= 16; - /* Exponent. */ - uint exponent_bits = u & 0x7f800000; - /* Non-sign bits. */ - uint value_bits = u & 0x7fffffff; - value_bits >>= 13; /* Align mantissa on MSB. */ - value_bits -= 0x1c000; /* Adjust bias. */ - /* Flush-to-zero. */ - value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits; - /* Clamp-to-max. */ - value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits; - /* Denormals-as-zero. */ - value_bits = (exponent_bits == 0 ? 0 : value_bits); - /* Re-insert sign bit and return. */ - return (value_bits | sign_bit); + const uint u = __float_as_uint(f); + /* Sign bit, shifted to it's position. */ + uint sign_bit = u & 0x80000000; + sign_bit >>= 16; + /* Exponent. */ + uint exponent_bits = u & 0x7f800000; + /* Non-sign bits. */ + uint value_bits = u & 0x7fffffff; + value_bits >>= 13; /* Align mantissa on MSB. */ + value_bits -= 0x1c000; /* Adjust bias. */ + /* Flush-to-zero. */ + value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits; + /* Clamp-to-max. */ + value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits; + /* Denormals-as-zero. */ + value_bits = (exponent_bits == 0 ? 0 : value_bits); + /* Re-insert sign bit and return. */ + return (value_bits | sign_bit); } -#endif +# endif #endif CCL_NAMESPACE_END -#endif /* __UTIL_HALF_H__ */ +#endif /* __UTIL_HALF_H__ */ diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h index f343252eaca..785482967db 100644 --- a/intern/cycles/util/util_hash.h +++ b/intern/cycles/util/util_hash.h @@ -23,49 +23,56 @@ CCL_NAMESPACE_BEGIN ccl_device_inline uint hash_int_2d(uint kx, uint ky) { -#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) - - uint a, b, c; - - a = b = c = 0xdeadbeef + (2 << 2) + 13; - a += kx; - b += ky; - - c ^= b; c -= rot(b,14); - a ^= c; a -= rot(c,11); - b ^= a; b -= rot(a,25); - c ^= b; c -= rot(b,16); - a ^= c; a -= rot(c,4); - b ^= a; b -= rot(a,14); - c ^= b; c -= rot(b,24); - - return c; +#define rot(x, k) (((x) << (k)) | ((x) >> (32 - (k)))) + + uint a, b, c; + + a = b = c = 0xdeadbeef + (2 << 2) + 13; + a += kx; + b += ky; + + c ^= b; + c -= rot(b, 14); + a ^= c; + a -= rot(c, 11); + b ^= a; + b -= rot(a, 25); + c ^= b; + c -= rot(b, 16); + a ^= c; + a -= rot(c, 4); + b ^= a; + b -= rot(a, 14); + c ^= b; + c -= rot(b, 24); + + return c; #undef rot } ccl_device_inline uint hash_int(uint k) { - return hash_int_2d(k, 0); + return hash_int_2d(k, 0); } #ifndef __KERNEL_GPU__ static inline uint hash_string(const char *str) { - uint i = 0, c; + uint i = 0, c; - while((c = *str++)) - i = i * 37 + c; + while ((c = *str++)) + i = i * 37 + c; - return i; + return i; } #endif ccl_device_inline float hash_int_01(uint k) { - return (float)hash_int(k) * (1.0f/(float)0xFFFFFFFF); + return (float)hash_int(k) * (1.0f / (float)0xFFFFFFFF); } CCL_NAMESPACE_END -#endif /* __UTIL_HASH_H__ */ +#endif /* __UTIL_HASH_H__ */ diff --git a/intern/cycles/util/util_ies.cpp b/intern/cycles/util/util_ies.cpp index 277045d9bc4..ff5c709b406 100644 --- a/intern/cycles/util/util_ies.cpp +++ b/intern/cycles/util/util_ies.cpp @@ -30,374 +30,376 @@ template class GuardedAllocator<char>; bool IESFile::load(ustring ies) { - clear(); - if(!parse(ies) || !process()) { - clear(); - return false; - } - return true; + clear(); + if (!parse(ies) || !process()) { + clear(); + return false; + } + return true; } void IESFile::clear() { - intensity.clear(); - v_angles.clear(); - h_angles.clear(); + intensity.clear(); + v_angles.clear(); + h_angles.clear(); } int IESFile::packed_size() { - if(v_angles.size() && h_angles.size() > 0) { - return 2 + h_angles.size() + v_angles.size() + h_angles.size()*v_angles.size(); - } - return 0; + if (v_angles.size() && h_angles.size() > 0) { + return 2 + h_angles.size() + v_angles.size() + h_angles.size() * v_angles.size(); + } + return 0; } void IESFile::pack(float *data) { - if(v_angles.size() && h_angles.size()) { - *(data++) = __int_as_float(h_angles.size()); - *(data++) = __int_as_float(v_angles.size()); - - memcpy(data, &h_angles[0], h_angles.size()*sizeof(float)); - data += h_angles.size(); - memcpy(data, &v_angles[0], v_angles.size()*sizeof(float)); - data += v_angles.size(); - - for(int h = 0; h < intensity.size(); h++) { - memcpy(data, &intensity[h][0], v_angles.size()*sizeof(float)); - data += v_angles.size(); - } - } + if (v_angles.size() && h_angles.size()) { + *(data++) = __int_as_float(h_angles.size()); + *(data++) = __int_as_float(v_angles.size()); + + memcpy(data, &h_angles[0], h_angles.size() * sizeof(float)); + data += h_angles.size(); + memcpy(data, &v_angles[0], v_angles.size() * sizeof(float)); + data += v_angles.size(); + + for (int h = 0; h < intensity.size(); h++) { + memcpy(data, &intensity[h][0], v_angles.size() * sizeof(float)); + data += v_angles.size(); + } + } } class IESTextParser { -public: - vector<char> text; - char *data; - - IESTextParser(ustring str) - : text(str.begin(), str.end()) - { - std::replace(text.begin(), text.end(), ',', ' '); - data = strstr(&text[0], "\nTILT="); - } - - bool eof() { - return (data == NULL) || (data[0] == '\0'); - } - - double get_double() { - if(eof()) { - return 0.0; - } - char *old_data = data; - double val = strtod(data, &data); - if(data == old_data) { - data = NULL; - return 0.0; - } - return val; - } - - long get_long() { - if(eof()) { - return 0; - } - char *old_data = data; - long val = strtol(data, &data, 10); - if(data == old_data) { - data = NULL; - return 0; - } - return val; - } + public: + vector<char> text; + char *data; + + IESTextParser(ustring str) : text(str.begin(), str.end()) + { + std::replace(text.begin(), text.end(), ',', ' '); + data = strstr(&text[0], "\nTILT="); + } + + bool eof() + { + return (data == NULL) || (data[0] == '\0'); + } + + double get_double() + { + if (eof()) { + return 0.0; + } + char *old_data = data; + double val = strtod(data, &data); + if (data == old_data) { + data = NULL; + return 0.0; + } + return val; + } + + long get_long() + { + if (eof()) { + return 0; + } + char *old_data = data; + long val = strtol(data, &data, 10); + if (data == old_data) { + data = NULL; + return 0; + } + return val; + } }; bool IESFile::parse(ustring ies) { - if(ies.empty()) { - return false; - } - - IESTextParser parser(ies); - if(parser.eof()) { - return false; - } - - /* Handle the tilt data block. */ - if(strncmp(parser.data, "\nTILT=INCLUDE", 13) == 0) { - parser.data += 13; - parser.get_double(); /* Lamp to Luminaire geometry */ - int num_tilt = parser.get_long(); /* Amount of tilt angles and factors */ - /* Skip over angles and factors. */ - for(int i = 0; i < 2*num_tilt; i++) { - parser.get_double(); - } - } - else { - /* Skip to next line. */ - parser.data = strstr(parser.data+1, "\n"); - } - - if(parser.eof()) { - return false; - } - parser.data++; - - parser.get_long(); /* Number of lamps */ - parser.get_double(); /* Lumens per lamp */ - double factor = parser.get_double(); /* Candela multiplier */ - int v_angles_num = parser.get_long(); /* Number of vertical angles */ - int h_angles_num = parser.get_long(); /* Number of horizontal angles */ - type = (IESType) parser.get_long(); /* Photometric type */ - - /* TODO(lukas): Test whether the current type B processing can also deal with type A files. - * In theory the only difference should be orientation which we ignore anyways, but with IES you never know... - */ - if(type != TYPE_B && type != TYPE_C) { - return false; - } - - parser.get_long(); /* Unit of the geometry data */ - parser.get_double(); /* Width */ - parser.get_double(); /* Length */ - parser.get_double(); /* Height */ - factor *= parser.get_double(); /* Ballast factor */ - factor *= parser.get_double(); /* Ballast-Lamp Photometric factor */ - parser.get_double(); /* Input Watts */ - - /* Intensity values in IES files are specified in candela (lumen/sr), a photometric quantity. - * Cycles expects radiometric quantities, though, which requires a conversion. - * However, the Luminous efficacy (ratio of lumens per Watt) depends on the spectral distribution - * of the light source since lumens take human perception into account. - * Since this spectral distribution is not known from the IES file, a typical one must be assumed. - * The D65 standard illuminant has a Luminous efficacy of 177.83, which is used here to convert to Watt/sr. - * A more advanced approach would be to add a Blackbody Temperature input to the node and numerically - * integrate the Luminous efficacy from the resulting spectral distribution. - * Also, the Watt/sr value must be multiplied by 4*pi to get the Watt value that Cycles expects - * for lamp strength. Therefore, the conversion here uses 4*pi/177.83 as a Candela to Watt factor. - */ - factor *= 0.0706650768394; - - v_angles.reserve(v_angles_num); - for(int i = 0; i < v_angles_num; i++) { - v_angles.push_back((float) parser.get_double()); - } - - h_angles.reserve(h_angles_num); - for(int i = 0; i < h_angles_num; i++) { - h_angles.push_back((float) parser.get_double()); - } - - intensity.resize(h_angles_num); - for(int i = 0; i < h_angles_num; i++) { - intensity[i].reserve(v_angles_num); - for(int j = 0; j < v_angles_num; j++) { - intensity[i].push_back((float) (factor * parser.get_double())); - } - } - - return !parser.eof(); + if (ies.empty()) { + return false; + } + + IESTextParser parser(ies); + if (parser.eof()) { + return false; + } + + /* Handle the tilt data block. */ + if (strncmp(parser.data, "\nTILT=INCLUDE", 13) == 0) { + parser.data += 13; + parser.get_double(); /* Lamp to Luminaire geometry */ + int num_tilt = parser.get_long(); /* Amount of tilt angles and factors */ + /* Skip over angles and factors. */ + for (int i = 0; i < 2 * num_tilt; i++) { + parser.get_double(); + } + } + else { + /* Skip to next line. */ + parser.data = strstr(parser.data + 1, "\n"); + } + + if (parser.eof()) { + return false; + } + parser.data++; + + parser.get_long(); /* Number of lamps */ + parser.get_double(); /* Lumens per lamp */ + double factor = parser.get_double(); /* Candela multiplier */ + int v_angles_num = parser.get_long(); /* Number of vertical angles */ + int h_angles_num = parser.get_long(); /* Number of horizontal angles */ + type = (IESType)parser.get_long(); /* Photometric type */ + + /* TODO(lukas): Test whether the current type B processing can also deal with type A files. + * In theory the only difference should be orientation which we ignore anyways, but with IES you never know... + */ + if (type != TYPE_B && type != TYPE_C) { + return false; + } + + parser.get_long(); /* Unit of the geometry data */ + parser.get_double(); /* Width */ + parser.get_double(); /* Length */ + parser.get_double(); /* Height */ + factor *= parser.get_double(); /* Ballast factor */ + factor *= parser.get_double(); /* Ballast-Lamp Photometric factor */ + parser.get_double(); /* Input Watts */ + + /* Intensity values in IES files are specified in candela (lumen/sr), a photometric quantity. + * Cycles expects radiometric quantities, though, which requires a conversion. + * However, the Luminous efficacy (ratio of lumens per Watt) depends on the spectral distribution + * of the light source since lumens take human perception into account. + * Since this spectral distribution is not known from the IES file, a typical one must be assumed. + * The D65 standard illuminant has a Luminous efficacy of 177.83, which is used here to convert to Watt/sr. + * A more advanced approach would be to add a Blackbody Temperature input to the node and numerically + * integrate the Luminous efficacy from the resulting spectral distribution. + * Also, the Watt/sr value must be multiplied by 4*pi to get the Watt value that Cycles expects + * for lamp strength. Therefore, the conversion here uses 4*pi/177.83 as a Candela to Watt factor. + */ + factor *= 0.0706650768394; + + v_angles.reserve(v_angles_num); + for (int i = 0; i < v_angles_num; i++) { + v_angles.push_back((float)parser.get_double()); + } + + h_angles.reserve(h_angles_num); + for (int i = 0; i < h_angles_num; i++) { + h_angles.push_back((float)parser.get_double()); + } + + intensity.resize(h_angles_num); + for (int i = 0; i < h_angles_num; i++) { + intensity[i].reserve(v_angles_num); + for (int j = 0; j < v_angles_num; j++) { + intensity[i].push_back((float)(factor * parser.get_double())); + } + } + + return !parser.eof(); } bool IESFile::process_type_b() { - vector<vector<float> > newintensity; - newintensity.resize(v_angles.size()); - for(int i = 0; i < v_angles.size(); i++) { - newintensity[i].reserve(h_angles.size()); - for(int j = 0; j < h_angles.size(); j++) { - newintensity[i].push_back(intensity[j][i]); - } - } - intensity.swap(newintensity); - h_angles.swap(v_angles); - - float h_first = h_angles[0], h_last = h_angles[h_angles.size()-1]; - if(h_last != 90.0f) { - return false; - } - - if(h_first == 0.0f) { - /* The range in the file corresponds to 90°-180°, we need to mirror that to get the - * full 180° range. */ - vector<float> new_h_angles; - vector<vector<float> > new_intensity; - int hnum = h_angles.size(); - new_h_angles.reserve(2*hnum-1); - new_intensity.reserve(2*hnum-1); - for(int i = hnum-1; i > 0; i--) { - new_h_angles.push_back(90.0f - h_angles[i]); - new_intensity.push_back(intensity[i]); - } - for(int i = 0; i < hnum; i++) { - new_h_angles.push_back(90.0f + h_angles[i]); - new_intensity.push_back(intensity[i]); - } - h_angles.swap(new_h_angles); - intensity.swap(new_intensity); - } - else if(h_first == -90.0f) { - /* We have full 180° coverage, so just shift to match the angle range convention. */ - for(int i = 0; i < h_angles.size(); i++) { - h_angles[i] += 90.0f; - } - } - /* To get correct results with the cubic interpolation in the kernel, the horizontal range - * has to cover all 360°. Therefore, we copy the 0° entry to 360° to ensure full coverage - * and seamless interpolation. */ - h_angles.push_back(360.0f); - intensity.push_back(intensity[0]); - - float v_first = v_angles[0], v_last = v_angles[v_angles.size()-1]; - if(v_last != 90.0f) { - return false; - } - - if(v_first == 0.0f) { - /* The range in the file corresponds to 90°-180°, we need to mirror that to get the - * full 180° range. */ - vector<float> new_v_angles; - int hnum = h_angles.size(); - int vnum = v_angles.size(); - new_v_angles.reserve(2*vnum-1); - for(int i = vnum-1; i > 0; i--) { - new_v_angles.push_back(90.0f - v_angles[i]); - } - for(int i = 0; i < vnum; i++) { - new_v_angles.push_back(90.0f + v_angles[i]); - } - for(int i = 0; i < hnum; i++) { - vector<float> new_intensity; - new_intensity.reserve(2*vnum-1); - for(int j = vnum-2; j >= 0; j--) { - new_intensity.push_back(intensity[i][j]); - } - new_intensity.insert(new_intensity.end(), intensity[i].begin(), intensity[i].end()); - intensity[i].swap(new_intensity); - } - v_angles.swap(new_v_angles); - } - else if(v_first == -90.0f) { - /* We have full 180° coverage, so just shift to match the angle range convention. */ - for(int i = 0; i < v_angles.size(); i++) { - v_angles[i] += 90.0f; - } - } - - return true; + vector<vector<float>> newintensity; + newintensity.resize(v_angles.size()); + for (int i = 0; i < v_angles.size(); i++) { + newintensity[i].reserve(h_angles.size()); + for (int j = 0; j < h_angles.size(); j++) { + newintensity[i].push_back(intensity[j][i]); + } + } + intensity.swap(newintensity); + h_angles.swap(v_angles); + + float h_first = h_angles[0], h_last = h_angles[h_angles.size() - 1]; + if (h_last != 90.0f) { + return false; + } + + if (h_first == 0.0f) { + /* The range in the file corresponds to 90°-180°, we need to mirror that to get the + * full 180° range. */ + vector<float> new_h_angles; + vector<vector<float>> new_intensity; + int hnum = h_angles.size(); + new_h_angles.reserve(2 * hnum - 1); + new_intensity.reserve(2 * hnum - 1); + for (int i = hnum - 1; i > 0; i--) { + new_h_angles.push_back(90.0f - h_angles[i]); + new_intensity.push_back(intensity[i]); + } + for (int i = 0; i < hnum; i++) { + new_h_angles.push_back(90.0f + h_angles[i]); + new_intensity.push_back(intensity[i]); + } + h_angles.swap(new_h_angles); + intensity.swap(new_intensity); + } + else if (h_first == -90.0f) { + /* We have full 180° coverage, so just shift to match the angle range convention. */ + for (int i = 0; i < h_angles.size(); i++) { + h_angles[i] += 90.0f; + } + } + /* To get correct results with the cubic interpolation in the kernel, the horizontal range + * has to cover all 360°. Therefore, we copy the 0° entry to 360° to ensure full coverage + * and seamless interpolation. */ + h_angles.push_back(360.0f); + intensity.push_back(intensity[0]); + + float v_first = v_angles[0], v_last = v_angles[v_angles.size() - 1]; + if (v_last != 90.0f) { + return false; + } + + if (v_first == 0.0f) { + /* The range in the file corresponds to 90°-180°, we need to mirror that to get the + * full 180° range. */ + vector<float> new_v_angles; + int hnum = h_angles.size(); + int vnum = v_angles.size(); + new_v_angles.reserve(2 * vnum - 1); + for (int i = vnum - 1; i > 0; i--) { + new_v_angles.push_back(90.0f - v_angles[i]); + } + for (int i = 0; i < vnum; i++) { + new_v_angles.push_back(90.0f + v_angles[i]); + } + for (int i = 0; i < hnum; i++) { + vector<float> new_intensity; + new_intensity.reserve(2 * vnum - 1); + for (int j = vnum - 2; j >= 0; j--) { + new_intensity.push_back(intensity[i][j]); + } + new_intensity.insert(new_intensity.end(), intensity[i].begin(), intensity[i].end()); + intensity[i].swap(new_intensity); + } + v_angles.swap(new_v_angles); + } + else if (v_first == -90.0f) { + /* We have full 180° coverage, so just shift to match the angle range convention. */ + for (int i = 0; i < v_angles.size(); i++) { + v_angles[i] += 90.0f; + } + } + + return true; } bool IESFile::process_type_c() { - if(h_angles[0] == 90.0f) { - /* Some files are stored from 90° to 270°, so we just rotate them to the regular 0°-180° range here. */ - for(int i = 0; i < h_angles.size(); i++) { - h_angles[i] -= 90.0f; - } - } - - if(h_angles[0] != 0.0f) { - return false; - } - - if(h_angles.size() == 1) { - h_angles.push_back(360.0f); - intensity.push_back(intensity[0]); - } - - if(h_angles[h_angles.size()-1] == 90.0f) { - /* Only one quadrant is defined, so we need to mirror twice (from one to two, then to four). - * Since the two->four mirroring step might also be required if we get an input of two quadrants, - * we only do the first mirror here and later do the second mirror in either case. */ - int hnum = h_angles.size(); - for(int i = hnum-2; i >= 0; i--) { - h_angles.push_back(180.0f - h_angles[i]); - intensity.push_back(intensity[i]); - } - } - - if(h_angles[h_angles.size()-1] == 180.0f) { - /* Mirror half to the full range. */ - int hnum = h_angles.size(); - for(int i = hnum-2; i >= 0; i--) { - h_angles.push_back(360.0f - h_angles[i]); - intensity.push_back(intensity[i]); - } - } - - /* Some files skip the 360° entry (contrary to standard) because it's supposed to be identical to the 0° entry. - * If the file has a discernible order in its spacing, just fix this. */ - if(h_angles[h_angles.size()-1] != 360.0f) { - int hnum = h_angles.size(); - float last_step = h_angles[hnum-1]-h_angles[hnum-2]; - float first_step = h_angles[1]-h_angles[0]; - float difference = 360.0f - h_angles[hnum-1]; - if(last_step == difference || first_step == difference) { - h_angles.push_back(360.0f); - intensity.push_back(intensity[0]); - } - else { - return false; - } - } - - float v_first = v_angles[0], v_last = v_angles[v_angles.size()-1]; - if(v_first == 90.0f) { - if(v_last == 180.0f) { - /* Flip to ensure that vertical angles always start at 0°. */ - for(int i = 0; i < v_angles.size(); i++) { - v_angles[i] = 180.0f - v_angles[i]; - } - } - else { - return false; - } - } - else if(v_first != 0.0f) { - return false; - } - - return true; + if (h_angles[0] == 90.0f) { + /* Some files are stored from 90° to 270°, so we just rotate them to the regular 0°-180° range here. */ + for (int i = 0; i < h_angles.size(); i++) { + h_angles[i] -= 90.0f; + } + } + + if (h_angles[0] != 0.0f) { + return false; + } + + if (h_angles.size() == 1) { + h_angles.push_back(360.0f); + intensity.push_back(intensity[0]); + } + + if (h_angles[h_angles.size() - 1] == 90.0f) { + /* Only one quadrant is defined, so we need to mirror twice (from one to two, then to four). + * Since the two->four mirroring step might also be required if we get an input of two quadrants, + * we only do the first mirror here and later do the second mirror in either case. */ + int hnum = h_angles.size(); + for (int i = hnum - 2; i >= 0; i--) { + h_angles.push_back(180.0f - h_angles[i]); + intensity.push_back(intensity[i]); + } + } + + if (h_angles[h_angles.size() - 1] == 180.0f) { + /* Mirror half to the full range. */ + int hnum = h_angles.size(); + for (int i = hnum - 2; i >= 0; i--) { + h_angles.push_back(360.0f - h_angles[i]); + intensity.push_back(intensity[i]); + } + } + + /* Some files skip the 360° entry (contrary to standard) because it's supposed to be identical to the 0° entry. + * If the file has a discernible order in its spacing, just fix this. */ + if (h_angles[h_angles.size() - 1] != 360.0f) { + int hnum = h_angles.size(); + float last_step = h_angles[hnum - 1] - h_angles[hnum - 2]; + float first_step = h_angles[1] - h_angles[0]; + float difference = 360.0f - h_angles[hnum - 1]; + if (last_step == difference || first_step == difference) { + h_angles.push_back(360.0f); + intensity.push_back(intensity[0]); + } + else { + return false; + } + } + + float v_first = v_angles[0], v_last = v_angles[v_angles.size() - 1]; + if (v_first == 90.0f) { + if (v_last == 180.0f) { + /* Flip to ensure that vertical angles always start at 0°. */ + for (int i = 0; i < v_angles.size(); i++) { + v_angles[i] = 180.0f - v_angles[i]; + } + } + else { + return false; + } + } + else if (v_first != 0.0f) { + return false; + } + + return true; } bool IESFile::process() { - if(h_angles.size() == 0 || v_angles.size() == 0) { - return false; - } - - if(type == TYPE_B) { - if(!process_type_b()) { - return false; - } - } - else { - assert(type == TYPE_C); - if(!process_type_c()) { - return false; - } - } - - assert(v_angles[0] == 0.0f); - assert(h_angles[0] == 0.0f); - assert(h_angles[h_angles.size()-1] == 360.0f); - - /* Convert from deg to rad. */ - for(int i = 0; i < v_angles.size(); i++) { - v_angles[i] *= M_PI_F / 180.f; - } - for(int i = 0; i < h_angles.size(); i++) { - h_angles[i] *= M_PI_F / 180.f; - } - - return true; + if (h_angles.size() == 0 || v_angles.size() == 0) { + return false; + } + + if (type == TYPE_B) { + if (!process_type_b()) { + return false; + } + } + else { + assert(type == TYPE_C); + if (!process_type_c()) { + return false; + } + } + + assert(v_angles[0] == 0.0f); + assert(h_angles[0] == 0.0f); + assert(h_angles[h_angles.size() - 1] == 360.0f); + + /* Convert from deg to rad. */ + for (int i = 0; i < v_angles.size(); i++) { + v_angles[i] *= M_PI_F / 180.f; + } + for (int i = 0; i < h_angles.size(); i++) { + h_angles[i] *= M_PI_F / 180.f; + } + + return true; } IESFile::~IESFile() { - clear(); + clear(); } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_ies.h b/intern/cycles/util/util_ies.h index 096b1fdf803..ab1b9ea57cf 100644 --- a/intern/cycles/util/util_ies.h +++ b/intern/cycles/util/util_ies.h @@ -23,39 +23,37 @@ CCL_NAMESPACE_BEGIN class IESFile { -public: - IESFile() {} - ~IESFile(); - - int packed_size(); - void pack(float *data); - - bool load(ustring ies); - void clear(); - -protected: - bool parse(ustring ies); - bool process(); - bool process_type_b(); - bool process_type_c(); - - /* The brightness distribution is stored in spherical coordinates. - * The horizontal angles correspond to theta in the regular notation - * and always span the full range from 0° to 360°. - * The vertical angles correspond to phi and always start at 0°. */ - vector<float> v_angles, h_angles; - /* The actual values are stored here, with every entry storing the values - * of one horizontal segment. */ - vector<vector<float> > intensity; - - /* Types of angle representation in IES files. Currently, only B and C are supported. */ - enum IESType { - TYPE_A = 3, - TYPE_B = 2, - TYPE_C = 1 - } type; + public: + IESFile() + { + } + ~IESFile(); + + int packed_size(); + void pack(float *data); + + bool load(ustring ies); + void clear(); + + protected: + bool parse(ustring ies); + bool process(); + bool process_type_b(); + bool process_type_c(); + + /* The brightness distribution is stored in spherical coordinates. + * The horizontal angles correspond to theta in the regular notation + * and always span the full range from 0° to 360°. + * The vertical angles correspond to phi and always start at 0°. */ + vector<float> v_angles, h_angles; + /* The actual values are stored here, with every entry storing the values + * of one horizontal segment. */ + vector<vector<float>> intensity; + + /* Types of angle representation in IES files. Currently, only B and C are supported. */ + enum IESType { TYPE_A = 3, TYPE_B = 2, TYPE_C = 1 } type; }; CCL_NAMESPACE_END -#endif /* __UTIL_IES_H__ */ +#endif /* __UTIL_IES_H__ */ diff --git a/intern/cycles/util/util_image.h b/intern/cycles/util/util_image.h index da5f56271c8..8962c09d098 100644 --- a/intern/cycles/util/util_image.h +++ b/intern/cycles/util/util_image.h @@ -15,20 +15,20 @@ */ #ifndef __UTIL_IMAGE_H__ -#define __UTIL_IMAGE_H__ +# define __UTIL_IMAGE_H__ /* OpenImageIO is used for all image file reading and writing. */ -#include <OpenImageIO/imageio.h> +# include <OpenImageIO/imageio.h> -#include "util/util_vector.h" +# include "util/util_vector.h" CCL_NAMESPACE_BEGIN OIIO_NAMESPACE_USING template<typename T> -void util_image_resize_pixels(const vector<T>& input_pixels, +void util_image_resize_pixels(const vector<T> &input_pixels, const size_t input_width, const size_t input_height, const size_t input_depth, @@ -39,69 +39,59 @@ void util_image_resize_pixels(const vector<T>& input_pixels, size_t *output_depth); /* Cast input pixel from unknown storage to float. */ -template<typename T> -inline float util_image_cast_to_float(T value); +template<typename T> inline float util_image_cast_to_float(T value); -template<> -inline float util_image_cast_to_float(float value) +template<> inline float util_image_cast_to_float(float value) { - return value; + return value; } -template<> -inline float util_image_cast_to_float(uchar value) +template<> inline float util_image_cast_to_float(uchar value) { - return (float)value / 255.0f; + return (float)value / 255.0f; } -template<> -inline float util_image_cast_to_float(uint16_t value) +template<> inline float util_image_cast_to_float(uint16_t value) { - return (float)value / 65535.0f; + return (float)value / 65535.0f; } -template<> -inline float util_image_cast_to_float(half value) +template<> inline float util_image_cast_to_float(half value) { - return half_to_float(value); + return half_to_float(value); } /* Cast float value to output pixel type. */ -template<typename T> -inline T util_image_cast_from_float(float value); +template<typename T> inline T util_image_cast_from_float(float value); -template<> -inline float util_image_cast_from_float(float value) +template<> inline float util_image_cast_from_float(float value) { - return value; + return value; } -template<> -inline uchar util_image_cast_from_float(float value) +template<> inline uchar util_image_cast_from_float(float value) { - if(value < 0.0f) { - return 0; - } - else if(value > (1.0f - 0.5f / 255.0f)) { - return 255; - } - return (uchar)((255.0f * value) + 0.5f); + if (value < 0.0f) { + return 0; + } + else if (value > (1.0f - 0.5f / 255.0f)) { + return 255; + } + return (uchar)((255.0f * value) + 0.5f); } -template<> -inline uint16_t util_image_cast_from_float(float value) +template<> inline uint16_t util_image_cast_from_float(float value) { - if(value < 0.0f) { - return 0; - } - else if(value > (1.0f - 0.5f / 65535.0f)) { - return 65535; - } - return (uint16_t)((65535.0f * value) + 0.5f); + if (value < 0.0f) { + return 0; + } + else if (value > (1.0f - 0.5f / 65535.0f)) { + return 65535; + } + return (uint16_t)((65535.0f * value) + 0.5f); } -template<> -inline half util_image_cast_from_float(float value) +template<> inline half util_image_cast_from_float(float value) { - return float_to_half(value); + return float_to_half(value); } CCL_NAMESPACE_END -#endif /* __UTIL_IMAGE_H__ */ +#endif /* __UTIL_IMAGE_H__ */ #include "util/util_image_impl.h" diff --git a/intern/cycles/util/util_image_impl.h b/intern/cycles/util/util_image_impl.h index 5bc1c727595..3eb30d070ea 100644 --- a/intern/cycles/util/util_image_impl.h +++ b/intern/cycles/util/util_image_impl.h @@ -26,20 +26,21 @@ CCL_NAMESPACE_BEGIN namespace { template<typename T> -const T *util_image_read(const vector<T>& pixels, +const T *util_image_read(const vector<T> &pixels, const size_t width, const size_t height, const size_t /*depth*/, const size_t components, - const size_t x, const size_t y, const size_t z) { - const size_t index = ((size_t)z * (width * height) + - (size_t)y * width + - (size_t)x) * components; - return &pixels[index]; + const size_t x, + const size_t y, + const size_t z) +{ + const size_t index = ((size_t)z * (width * height) + (size_t)y * width + (size_t)x) * components; + return &pixels[index]; } template<typename T> -void util_image_downscale_sample(const vector<T>& pixels, +void util_image_downscale_sample(const vector<T> &pixels, const size_t width, const size_t height, const size_t depth, @@ -50,48 +51,41 @@ void util_image_downscale_sample(const vector<T>& pixels, const float z, T *result) { - assert(components <= 4); - const size_t ix = (size_t)x, - iy = (size_t)y, - iz = (size_t)z; - /* TODO(sergey): Support something smarter than box filer. */ - float accum[4] = {0}; - size_t count = 0; - for(size_t dz = 0; dz < kernel_size; ++dz) { - for(size_t dy = 0; dy < kernel_size; ++dy) { - for(size_t dx = 0; dx < kernel_size; ++dx) { - const size_t nx = ix + dx, - ny = iy + dy, - nz = iz + dz; - if(nx >= width || ny >= height || nz >= depth) { - continue; - } - const T *pixel = util_image_read(pixels, - width, height, depth, - components, - nx, ny, nz); - for(size_t k = 0; k < components; ++k) { - accum[k] += util_image_cast_to_float(pixel[k]); - } - ++count; - } - } - } - if(count != 0) { - const float inv_count = 1.0f / (float)count; - for(size_t k = 0; k < components; ++k) { - result[k] = util_image_cast_from_float<T>(accum[k] * inv_count); - } - } - else { - for(size_t k = 0; k < components; ++k) { - result[k] = T(0.0f); - } - } + assert(components <= 4); + const size_t ix = (size_t)x, iy = (size_t)y, iz = (size_t)z; + /* TODO(sergey): Support something smarter than box filer. */ + float accum[4] = {0}; + size_t count = 0; + for (size_t dz = 0; dz < kernel_size; ++dz) { + for (size_t dy = 0; dy < kernel_size; ++dy) { + for (size_t dx = 0; dx < kernel_size; ++dx) { + const size_t nx = ix + dx, ny = iy + dy, nz = iz + dz; + if (nx >= width || ny >= height || nz >= depth) { + continue; + } + const T *pixel = util_image_read(pixels, width, height, depth, components, nx, ny, nz); + for (size_t k = 0; k < components; ++k) { + accum[k] += util_image_cast_to_float(pixel[k]); + } + ++count; + } + } + } + if (count != 0) { + const float inv_count = 1.0f / (float)count; + for (size_t k = 0; k < components; ++k) { + result[k] = util_image_cast_from_float<T>(accum[k] * inv_count); + } + } + else { + for (size_t k = 0; k < components; ++k) { + result[k] = T(0.0f); + } + } } template<typename T> -void util_image_downscale_pixels(const vector<T>& input_pixels, +void util_image_downscale_pixels(const vector<T> &input_pixels, const size_t input_width, const size_t input_height, const size_t input_depth, @@ -102,31 +96,33 @@ void util_image_downscale_pixels(const vector<T>& input_pixels, const size_t output_depth, vector<T> *output_pixels) { - const size_t kernel_size = (size_t)(inv_scale_factor + 0.5f); - for(size_t z = 0; z < output_depth; ++z) { - for(size_t y = 0; y < output_height; ++y) { - for(size_t x = 0; x < output_width; ++x) { - const float input_x = (float)x * inv_scale_factor, - input_y = (float)y * inv_scale_factor, - input_z = (float)z * inv_scale_factor; - const size_t output_index = - (z * output_width * output_height + - y * output_width + x) * components; - util_image_downscale_sample(input_pixels, - input_width, input_height, input_depth, - components, - kernel_size, - input_x, input_y, input_z, - &output_pixels->at(output_index)); - } - } - } + const size_t kernel_size = (size_t)(inv_scale_factor + 0.5f); + for (size_t z = 0; z < output_depth; ++z) { + for (size_t y = 0; y < output_height; ++y) { + for (size_t x = 0; x < output_width; ++x) { + const float input_x = (float)x * inv_scale_factor, input_y = (float)y * inv_scale_factor, + input_z = (float)z * inv_scale_factor; + const size_t output_index = (z * output_width * output_height + y * output_width + x) * + components; + util_image_downscale_sample(input_pixels, + input_width, + input_height, + input_depth, + components, + kernel_size, + input_x, + input_y, + input_z, + &output_pixels->at(output_index)); + } + } + } } -} /* namespace */ +} /* namespace */ template<typename T> -void util_image_resize_pixels(const vector<T>& input_pixels, +void util_image_resize_pixels(const vector<T> &input_pixels, const size_t input_width, const size_t input_height, const size_t input_depth, @@ -137,39 +133,43 @@ void util_image_resize_pixels(const vector<T>& input_pixels, size_t *output_height, size_t *output_depth) { - /* Early output for case when no scaling is applied. */ - if(scale_factor == 1.0f) { - *output_width = input_width; - *output_height = input_height; - *output_depth = input_depth; - *output_pixels = input_pixels; - return; - } - /* First of all, we calculate output image dimensions. - * We clamp them to be 1 pixel at least so we do not generate degenerate - * image. - */ - *output_width = max((size_t)((float)input_width * scale_factor), (size_t)1); - *output_height = max((size_t)((float)input_height * scale_factor), (size_t)1); - *output_depth = max((size_t)((float)input_depth * scale_factor), (size_t)1); - /* Prepare pixel storage for the result. */ - const size_t num_output_pixels = ((*output_width) * - (*output_height) * - (*output_depth)) * components; - output_pixels->resize(num_output_pixels); - if(scale_factor < 1.0f) { - const float inv_scale_factor = 1.0f / scale_factor; - util_image_downscale_pixels(input_pixels, - input_width, input_height, input_depth, - components, - inv_scale_factor, - *output_width, *output_height, *output_depth, - output_pixels); - } else { - /* TODO(sergey): Needs implementation. */ - } + /* Early output for case when no scaling is applied. */ + if (scale_factor == 1.0f) { + *output_width = input_width; + *output_height = input_height; + *output_depth = input_depth; + *output_pixels = input_pixels; + return; + } + /* First of all, we calculate output image dimensions. + * We clamp them to be 1 pixel at least so we do not generate degenerate + * image. + */ + *output_width = max((size_t)((float)input_width * scale_factor), (size_t)1); + *output_height = max((size_t)((float)input_height * scale_factor), (size_t)1); + *output_depth = max((size_t)((float)input_depth * scale_factor), (size_t)1); + /* Prepare pixel storage for the result. */ + const size_t num_output_pixels = ((*output_width) * (*output_height) * (*output_depth)) * + components; + output_pixels->resize(num_output_pixels); + if (scale_factor < 1.0f) { + const float inv_scale_factor = 1.0f / scale_factor; + util_image_downscale_pixels(input_pixels, + input_width, + input_height, + input_depth, + components, + inv_scale_factor, + *output_width, + *output_height, + *output_depth, + output_pixels); + } + else { + /* TODO(sergey): Needs implementation. */ + } } CCL_NAMESPACE_END -#endif /* __UTIL_IMAGE_IMPL_H__ */ +#endif /* __UTIL_IMAGE_IMPL_H__ */ diff --git a/intern/cycles/util/util_list.h b/intern/cycles/util/util_list.h index fcf8e4f5c74..f555b001186 100644 --- a/intern/cycles/util/util_list.h +++ b/intern/cycles/util/util_list.h @@ -25,4 +25,4 @@ using std::list; CCL_NAMESPACE_END -#endif /* __UTIL_LIST_H__ */ +#endif /* __UTIL_LIST_H__ */ diff --git a/intern/cycles/util/util_logging.cpp b/intern/cycles/util/util_logging.cpp index b0922db32fb..4a5e7e6a9ea 100644 --- a/intern/cycles/util/util_logging.cpp +++ b/intern/cycles/util/util_logging.cpp @@ -28,63 +28,55 @@ CCL_NAMESPACE_BEGIN void util_logging_init(const char *argv0) { #ifdef WITH_CYCLES_LOGGING - using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; + using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; - /* Make it so ERROR messages are always print into console. */ - char severity_fatal[32]; - snprintf(severity_fatal, sizeof(severity_fatal), "%d", - google::GLOG_ERROR); + /* Make it so ERROR messages are always print into console. */ + char severity_fatal[32]; + snprintf(severity_fatal, sizeof(severity_fatal), "%d", google::GLOG_ERROR); - google::InitGoogleLogging(argv0); - SetCommandLineOption("logtostderr", "1"); - SetCommandLineOption("v", "0"); - SetCommandLineOption("stderrthreshold", severity_fatal); - SetCommandLineOption("minloglevel", severity_fatal); + google::InitGoogleLogging(argv0); + SetCommandLineOption("logtostderr", "1"); + SetCommandLineOption("v", "0"); + SetCommandLineOption("stderrthreshold", severity_fatal); + SetCommandLineOption("minloglevel", severity_fatal); #else - (void) argv0; + (void)argv0; #endif } void util_logging_start() { #ifdef WITH_CYCLES_LOGGING - using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; - SetCommandLineOption("logtostderr", "1"); - SetCommandLineOption("v", "2"); - SetCommandLineOption("stderrthreshold", "1"); - SetCommandLineOption("minloglevel", "0"); + using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; + SetCommandLineOption("logtostderr", "1"); + SetCommandLineOption("v", "2"); + SetCommandLineOption("stderrthreshold", "1"); + SetCommandLineOption("minloglevel", "0"); #endif } void util_logging_verbosity_set(int verbosity) { #ifdef WITH_CYCLES_LOGGING - using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; - char val[10]; - snprintf(val, sizeof(val), "%d", verbosity); - SetCommandLineOption("v", val); + using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; + char val[10]; + snprintf(val, sizeof(val), "%d", verbosity); + SetCommandLineOption("v", val); #else - (void) verbosity; + (void)verbosity; #endif } -std::ostream& operator <<(std::ostream &os, - const int2 &value) +std::ostream &operator<<(std::ostream &os, const int2 &value) { - os << "(" << value.x - << ", " << value.y - << ")"; - return os; + os << "(" << value.x << ", " << value.y << ")"; + return os; } -std::ostream& operator <<(std::ostream &os, - const float3 &value) +std::ostream &operator<<(std::ostream &os, const float3 &value) { - os << "(" << value.x - << ", " << value.y - << ", " << value.z - << ")"; - return os; + os << "(" << value.x << ", " << value.y << ", " << value.z << ")"; + return os; } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h index f66d7c92dcc..1a5e6666b32 100644 --- a/intern/cycles/util/util_logging.h +++ b/intern/cycles/util/util_logging.h @@ -28,25 +28,31 @@ CCL_NAMESPACE_BEGIN #if !defined(WITH_CYCLES_LOGGING) || defined(__KERNEL_GPU__) class StubStream { -public: - template<class T> - StubStream& operator<<(const T&) { - return *this; - } + public: + template<class T> StubStream &operator<<(const T &) + { + return *this; + } }; class LogMessageVoidify { -public: - LogMessageVoidify() { } - void operator&(StubStream&) { } + public: + LogMessageVoidify() + { + } + void operator&(StubStream &) + { + } }; -# define LOG_SUPPRESS() (true) ? ((void) 0) : LogMessageVoidify() & StubStream() +# define LOG_SUPPRESS() (true) ? ((void)0) : LogMessageVoidify() & StubStream() # define LOG(severity) LOG_SUPPRESS() # define VLOG(severity) LOG_SUPPRESS() #endif -#define VLOG_ONCE(level, flag) if(!flag) flag = true, VLOG(level) +#define VLOG_ONCE(level, flag) \ + if (!flag) \ + flag = true, VLOG(level) struct int2; struct float3; @@ -55,11 +61,9 @@ void util_logging_init(const char *argv0); void util_logging_start(); void util_logging_verbosity_set(int verbosity); -std::ostream& operator <<(std::ostream &os, - const int2 &value); -std::ostream& operator <<(std::ostream &os, - const float3 &value); +std::ostream &operator<<(std::ostream &os, const int2 &value); +std::ostream &operator<<(std::ostream &os, const float3 &value); CCL_NAMESPACE_END -#endif /* __UTIL_LOGGING_H__ */ +#endif /* __UTIL_LOGGING_H__ */ diff --git a/intern/cycles/util/util_map.h b/intern/cycles/util/util_map.h index 1952d33ada8..3c9288417cf 100644 --- a/intern/cycles/util/util_map.h +++ b/intern/cycles/util/util_map.h @@ -28,4 +28,4 @@ using std::unordered_map; CCL_NAMESPACE_END -#endif /* __UTIL_MAP_H__ */ +#endif /* __UTIL_MAP_H__ */ diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index 6167119f873..2c7f826db93 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -26,12 +26,11 @@ # include <cmath> #endif - #ifndef __KERNEL_OPENCL__ # include <float.h> # include <math.h> # include <stdio.h> -#endif /* __KERNEL_OPENCL__ */ +#endif /* __KERNEL_OPENCL__ */ #include "util/util_types.h" @@ -41,22 +40,22 @@ CCL_NAMESPACE_BEGIN /* Division */ #ifndef M_PI_F -# define M_PI_F (3.1415926535897932f) /* pi */ +# define M_PI_F (3.1415926535897932f) /* pi */ #endif #ifndef M_PI_2_F -# define M_PI_2_F (1.5707963267948966f) /* pi/2 */ +# define M_PI_2_F (1.5707963267948966f) /* pi/2 */ #endif #ifndef M_PI_4_F -# define M_PI_4_F (0.7853981633974830f) /* pi/4 */ +# define M_PI_4_F (0.7853981633974830f) /* pi/4 */ #endif #ifndef M_1_PI_F -# define M_1_PI_F (0.3183098861837067f) /* 1/pi */ +# define M_1_PI_F (0.3183098861837067f) /* 1/pi */ #endif #ifndef M_2_PI_F -# define M_2_PI_F (0.6366197723675813f) /* 2/pi */ +# define M_2_PI_F (0.6366197723675813f) /* 2/pi */ #endif #ifndef M_1_2PI_F -# define M_1_2PI_F (0.1591549430918953f) /* 1/(2*pi) */ +# define M_1_2PI_F (0.1591549430918953f) /* 1/(2*pi) */ #endif #ifndef M_SQRT_PI_8_F # define M_SQRT_PI_8_F (0.6266570686577501f) /* sqrt(pi/8) */ @@ -67,21 +66,21 @@ CCL_NAMESPACE_BEGIN /* Multiplication */ #ifndef M_2PI_F -# define M_2PI_F (6.2831853071795864f) /* 2*pi */ +# define M_2PI_F (6.2831853071795864f) /* 2*pi */ #endif #ifndef M_4PI_F -# define M_4PI_F (12.566370614359172f) /* 4*pi */ +# define M_4PI_F (12.566370614359172f) /* 4*pi */ #endif /* Float sqrt variations */ #ifndef M_SQRT2_F -# define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */ +# define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */ #endif #ifndef M_LN2_F -# define M_LN2_F (0.6931471805599453f) /* ln(2) */ +# define M_LN2_F (0.6931471805599453f) /* ln(2) */ #endif #ifndef M_LN10_F -# define M_LN10_F (2.3025850929940457f) /* ln(10) */ +# define M_LN10_F (2.3025850929940457f) /* ln(10) */ #endif /* Scalar */ @@ -90,15 +89,15 @@ CCL_NAMESPACE_BEGIN # ifndef __KERNEL_OPENCL__ ccl_device_inline float fmaxf(float a, float b) { - return (a > b)? a: b; + return (a > b) ? a : b; } ccl_device_inline float fminf(float a, float b) { - return (a < b)? a: b; + return (a < b) ? a : b; } -# endif /* !__KERNEL_OPENCL__ */ -#endif /* _WIN32 */ +# endif /* !__KERNEL_OPENCL__ */ +#endif /* _WIN32 */ #ifndef __KERNEL_GPU__ using std::isfinite; @@ -107,37 +106,37 @@ using std::sqrt; ccl_device_inline int abs(int x) { - return (x > 0)? x: -x; + return (x > 0) ? x : -x; } ccl_device_inline int max(int a, int b) { - return (a > b)? a: b; + return (a > b) ? a : b; } ccl_device_inline int min(int a, int b) { - return (a < b)? a: b; + return (a < b) ? a : b; } ccl_device_inline float max(float a, float b) { - return (a > b)? a: b; + return (a > b) ? a : b; } ccl_device_inline float min(float a, float b) { - return (a < b)? a: b; + return (a < b) ? a : b; } ccl_device_inline double max(double a, double b) { - return (a > b)? a: b; + return (a > b) ? a : b; } ccl_device_inline double min(double a, double b) { - return (a < b)? a: b; + return (a < b) ? a : b; } /* These 2 guys are templated for usage with registers data. @@ -146,27 +145,25 @@ ccl_device_inline double min(double a, double b) * But for other devices we'll need to be careful about this. */ -template<typename T> -ccl_device_inline T min4(const T& a, const T& b, const T& c, const T& d) +template<typename T> ccl_device_inline T min4(const T &a, const T &b, const T &c, const T &d) { - return min(min(a,b),min(c,d)); + return min(min(a, b), min(c, d)); } -template<typename T> -ccl_device_inline T max4(const T& a, const T& b, const T& c, const T& d) +template<typename T> ccl_device_inline T max4(const T &a, const T &b, const T &c, const T &d) { - return max(max(a,b),max(c,d)); + return max(max(a, b), max(c, d)); } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ ccl_device_inline float min4(float a, float b, float c, float d) { - return min(min(a, b), min(c, d)); + return min(min(a, b), min(c, d)); } ccl_device_inline float max4(float a, float b, float c, float d) { - return max(max(a, b), max(c, d)); + return max(max(a, b), max(c, d)); } #ifndef __KERNEL_OPENCL__ @@ -174,189 +171,210 @@ ccl_device_inline float max4(float a, float b, float c, float d) ccl_device_inline int as_int(uint i) { - union { uint ui; int i; } u; - u.ui = i; - return u.i; + union { + uint ui; + int i; + } u; + u.ui = i; + return u.i; } ccl_device_inline uint as_uint(int i) { - union { uint ui; int i; } u; - u.i = i; - return u.ui; + union { + uint ui; + int i; + } u; + u.i = i; + return u.ui; } ccl_device_inline uint as_uint(float f) { - union { uint i; float f; } u; - u.f = f; - return u.i; + union { + uint i; + float f; + } u; + u.f = f; + return u.i; } ccl_device_inline int __float_as_int(float f) { - union { int i; float f; } u; - u.f = f; - return u.i; + union { + int i; + float f; + } u; + u.f = f; + return u.i; } ccl_device_inline float __int_as_float(int i) { - union { int i; float f; } u; - u.i = i; - return u.f; + union { + int i; + float f; + } u; + u.i = i; + return u.f; } ccl_device_inline uint __float_as_uint(float f) { - union { uint i; float f; } u; - u.f = f; - return u.i; + union { + uint i; + float f; + } u; + u.f = f; + return u.i; } ccl_device_inline float __uint_as_float(uint i) { - union { uint i; float f; } u; - u.i = i; - return u.f; + union { + uint i; + float f; + } u; + u.i = i; + return u.f; } ccl_device_inline int4 __float4_as_int4(float4 f) { -#ifdef __KERNEL_SSE__ - return int4(_mm_castps_si128(f.m128)); - #else - return make_int4(__float_as_int(f.x), - __float_as_int(f.y), - __float_as_int(f.z), - __float_as_int(f.w)); -#endif +# ifdef __KERNEL_SSE__ + return int4(_mm_castps_si128(f.m128)); +# else + return make_int4( + __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w)); +# endif } ccl_device_inline float4 __int4_as_float4(int4 i) { -#ifdef __KERNEL_SSE__ - return float4(_mm_castsi128_ps(i.m128)); -#else - return make_float4(__int_as_float(i.x), - __int_as_float(i.y), - __int_as_float(i.z), - __int_as_float(i.w)); -#endif +# ifdef __KERNEL_SSE__ + return float4(_mm_castsi128_ps(i.m128)); +# else + return make_float4( + __int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w)); +# endif } -#endif /* __KERNEL_OPENCL__ */ +#endif /* __KERNEL_OPENCL__ */ /* Versions of functions which are safe for fast math. */ ccl_device_inline bool isnan_safe(float f) { - unsigned int x = __float_as_uint(f); - return (x << 1) > 0xff000000u; + unsigned int x = __float_as_uint(f); + return (x << 1) > 0xff000000u; } ccl_device_inline bool isfinite_safe(float f) { - /* By IEEE 754 rule, 2*Inf equals Inf */ - unsigned int x = __float_as_uint(f); - return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f*f)) && !((x << 1) > 0xff000000u); + /* By IEEE 754 rule, 2*Inf equals Inf */ + unsigned int x = __float_as_uint(f); + return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u); } ccl_device_inline float ensure_finite(float v) { - return isfinite_safe(v)? v : 0.0f; + return isfinite_safe(v) ? v : 0.0f; } #ifndef __KERNEL_OPENCL__ ccl_device_inline int clamp(int a, int mn, int mx) { - return min(max(a, mn), mx); + return min(max(a, mn), mx); } ccl_device_inline float clamp(float a, float mn, float mx) { - return min(max(a, mn), mx); + return min(max(a, mn), mx); } ccl_device_inline float mix(float a, float b, float t) { - return a + t*(b - a); + return a + t * (b - a); } -#endif /* __KERNEL_OPENCL__ */ +#endif /* __KERNEL_OPENCL__ */ #ifndef __KERNEL_CUDA__ ccl_device_inline float saturate(float a) { - return clamp(a, 0.0f, 1.0f); + return clamp(a, 0.0f, 1.0f); } -#endif /* __KERNEL_CUDA__ */ +#endif /* __KERNEL_CUDA__ */ ccl_device_inline int float_to_int(float f) { - return (int)f; + return (int)f; } ccl_device_inline int floor_to_int(float f) { - return float_to_int(floorf(f)); + return float_to_int(floorf(f)); } ccl_device_inline int quick_floor_to_int(float x) { - return float_to_int(x) - ((x < 0) ? 1 : 0); + return float_to_int(x) - ((x < 0) ? 1 : 0); } ccl_device_inline int ceil_to_int(float f) { - return float_to_int(ceilf(f)); + return float_to_int(ceilf(f)); } ccl_device_inline float signf(float f) { - return (f < 0.0f)? -1.0f: 1.0f; + return (f < 0.0f) ? -1.0f : 1.0f; } ccl_device_inline float nonzerof(float f, float eps) { - if(fabsf(f) < eps) - return signf(f)*eps; - else - return f; + if (fabsf(f) < eps) + return signf(f) * eps; + else + return f; } ccl_device_inline float smoothstepf(float f) { - float ff = f*f; - return (3.0f*ff - 2.0f*ff*f); + float ff = f * f; + return (3.0f * ff - 2.0f * ff * f); } ccl_device_inline int mod(int x, int m) { - return (x % m + m) % m; + return (x % m + m) % m; } ccl_device_inline float3 float2_to_float3(const float2 a) { - return make_float3(a.x, a.y, 0.0f); + return make_float3(a.x, a.y, 0.0f); } ccl_device_inline float3 float4_to_float3(const float4 a) { - return make_float3(a.x, a.y, a.z); + return make_float3(a.x, a.y, a.z); } ccl_device_inline float4 float3_to_float4(const float3 a) { - return make_float4(a.x, a.y, a.z, 1.0f); + return make_float4(a.x, a.y, a.z, 1.0f); } ccl_device_inline float inverse_lerp(float a, float b, float x) { - return (x - a) / (b - a); + return (x - a) / (b - a); } /* Cubic interpolation between b and c, a and d are the previous and next point. */ ccl_device_inline float cubic_interp(float a, float b, float c, float d, float x) { - return 0.5f*(((d + 3.0f*(b-c) - a)*x + (2.0f*a - 5.0f*b + 4.0f*c - d))*x + (c - a))*x + b; + return 0.5f * + (((d + 3.0f * (b - c) - a) * x + (2.0f * a - 5.0f * b + 4.0f * c - d)) * x + + (c - a)) * + x + + b; } CCL_NAMESPACE_END @@ -376,26 +394,22 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_OPENCL__ /* Interpolation */ -template<class A, class B> A lerp(const A& a, const A& b, const B& t) +template<class A, class B> A lerp(const A &a, const A &b, const B &t) { - return (A)(a * ((B)1 - t) + b * t); + return (A)(a * ((B)1 - t) + b * t); } -#endif /* __KERNEL_OPENCL__ */ +#endif /* __KERNEL_OPENCL__ */ /* Triangle */ #ifndef __KERNEL_OPENCL__ -ccl_device_inline float triangle_area(const float3& v1, - const float3& v2, - const float3& v3) +ccl_device_inline float triangle_area(const float3 &v1, const float3 &v2, const float3 &v3) #else -ccl_device_inline float triangle_area(const float3 v1, - const float3 v2, - const float3 v3) +ccl_device_inline float triangle_area(const float3 v1, const float3 v2, const float3 v3) #endif { - return len(cross(v3 - v2, v1 - v2))*0.5f; + return len(cross(v3 - v2, v1 - v2)) * 0.5f; } /* Orthonormal vectors */ @@ -403,240 +417,240 @@ ccl_device_inline float triangle_area(const float3 v1, ccl_device_inline void make_orthonormals(const float3 N, float3 *a, float3 *b) { #if 0 - if(fabsf(N.y) >= 0.999f) { - *a = make_float3(1, 0, 0); - *b = make_float3(0, 0, 1); - return; - } - if(fabsf(N.z) >= 0.999f) { - *a = make_float3(1, 0, 0); - *b = make_float3(0, 1, 0); - return; - } + if(fabsf(N.y) >= 0.999f) { + *a = make_float3(1, 0, 0); + *b = make_float3(0, 0, 1); + return; + } + if(fabsf(N.z) >= 0.999f) { + *a = make_float3(1, 0, 0); + *b = make_float3(0, 1, 0); + return; + } #endif - if(N.x != N.y || N.x != N.z) - *a = make_float3(N.z-N.y, N.x-N.z, N.y-N.x); //(1,1,1)x N - else - *a = make_float3(N.z-N.y, N.x+N.z, -N.y-N.x); //(-1,1,1)x N + if (N.x != N.y || N.x != N.z) + *a = make_float3(N.z - N.y, N.x - N.z, N.y - N.x); //(1,1,1)x N + else + *a = make_float3(N.z - N.y, N.x + N.z, -N.y - N.x); //(-1,1,1)x N - *a = normalize(*a); - *b = cross(N, *a); + *a = normalize(*a); + *b = cross(N, *a); } /* Color division */ ccl_device_inline float3 safe_invert_color(float3 a) { - float x, y, z; + float x, y, z; - x = (a.x != 0.0f)? 1.0f/a.x: 0.0f; - y = (a.y != 0.0f)? 1.0f/a.y: 0.0f; - z = (a.z != 0.0f)? 1.0f/a.z: 0.0f; + x = (a.x != 0.0f) ? 1.0f / a.x : 0.0f; + y = (a.y != 0.0f) ? 1.0f / a.y : 0.0f; + z = (a.z != 0.0f) ? 1.0f / a.z : 0.0f; - return make_float3(x, y, z); + return make_float3(x, y, z); } ccl_device_inline float3 safe_divide_color(float3 a, float3 b) { - float x, y, z; + float x, y, z; - x = (b.x != 0.0f)? a.x/b.x: 0.0f; - y = (b.y != 0.0f)? a.y/b.y: 0.0f; - z = (b.z != 0.0f)? a.z/b.z: 0.0f; + x = (b.x != 0.0f) ? a.x / b.x : 0.0f; + y = (b.y != 0.0f) ? a.y / b.y : 0.0f; + z = (b.z != 0.0f) ? a.z / b.z : 0.0f; - return make_float3(x, y, z); + return make_float3(x, y, z); } ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b) { - float x, y, z; - - x = (b.x != 0.0f)? a.x/b.x: 0.0f; - y = (b.y != 0.0f)? a.y/b.y: 0.0f; - z = (b.z != 0.0f)? a.z/b.z: 0.0f; - - /* try to get gray even if b is zero */ - if(b.x == 0.0f) { - if(b.y == 0.0f) { - x = z; - y = z; - } - else if(b.z == 0.0f) { - x = y; - z = y; - } - else - x = 0.5f*(y + z); - } - else if(b.y == 0.0f) { - if(b.z == 0.0f) { - y = x; - z = x; - } - else - y = 0.5f*(x + z); - } - else if(b.z == 0.0f) { - z = 0.5f*(x + y); - } - - return make_float3(x, y, z); + float x, y, z; + + x = (b.x != 0.0f) ? a.x / b.x : 0.0f; + y = (b.y != 0.0f) ? a.y / b.y : 0.0f; + z = (b.z != 0.0f) ? a.z / b.z : 0.0f; + + /* try to get gray even if b is zero */ + if (b.x == 0.0f) { + if (b.y == 0.0f) { + x = z; + y = z; + } + else if (b.z == 0.0f) { + x = y; + z = y; + } + else + x = 0.5f * (y + z); + } + else if (b.y == 0.0f) { + if (b.z == 0.0f) { + y = x; + z = x; + } + else + y = 0.5f * (x + z); + } + else if (b.z == 0.0f) { + z = 0.5f * (x + y); + } + + return make_float3(x, y, z); } /* Rotation of point around axis and angle */ ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) { - float costheta = cosf(angle); - float sintheta = sinf(angle); - float3 r; + float costheta = cosf(angle); + float sintheta = sinf(angle); + float3 r; - r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) + - (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) + - (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z); + r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) + + (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) + + (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z); - r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) + - ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) + - (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z); + r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) + + ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) + + (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z); - r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) + - (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) + - ((costheta + (1 - costheta) * axis.z * axis.z) * p.z); + r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) + + (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) + + ((costheta + (1 - costheta) * axis.z * axis.z) * p.z); - return r; + return r; } /* NaN-safe math ops */ ccl_device_inline float safe_sqrtf(float f) { - return sqrtf(max(f, 0.0f)); + return sqrtf(max(f, 0.0f)); } ccl_device float safe_asinf(float a) { - return asinf(clamp(a, -1.0f, 1.0f)); + return asinf(clamp(a, -1.0f, 1.0f)); } ccl_device float safe_acosf(float a) { - return acosf(clamp(a, -1.0f, 1.0f)); + return acosf(clamp(a, -1.0f, 1.0f)); } ccl_device float compatible_powf(float x, float y) { #ifdef __KERNEL_GPU__ - if(y == 0.0f) /* x^0 -> 1, including 0^0 */ - return 1.0f; - - /* GPU pow doesn't accept negative x, do manual checks here */ - if(x < 0.0f) { - if(fmodf(-y, 2.0f) == 0.0f) - return powf(-x, y); - else - return -powf(-x, y); - } - else if(x == 0.0f) - return 0.0f; + if (y == 0.0f) /* x^0 -> 1, including 0^0 */ + return 1.0f; + + /* GPU pow doesn't accept negative x, do manual checks here */ + if (x < 0.0f) { + if (fmodf(-y, 2.0f) == 0.0f) + return powf(-x, y); + else + return -powf(-x, y); + } + else if (x == 0.0f) + return 0.0f; #endif - return powf(x, y); + return powf(x, y); } ccl_device float safe_powf(float a, float b) { - if(UNLIKELY(a < 0.0f && b != float_to_int(b))) - return 0.0f; + if (UNLIKELY(a < 0.0f && b != float_to_int(b))) + return 0.0f; - return compatible_powf(a, b); + return compatible_powf(a, b); } ccl_device float safe_divide(float a, float b) { - return (b != 0.0f)? a/b: 0.0f; + return (b != 0.0f) ? a / b : 0.0f; } ccl_device float safe_logf(float a, float b) { - if(UNLIKELY(a <= 0.0f || b <= 0.0f)) - return 0.0f; + if (UNLIKELY(a <= 0.0f || b <= 0.0f)) + return 0.0f; - return safe_divide(logf(a),logf(b)); + return safe_divide(logf(a), logf(b)); } ccl_device float safe_modulo(float a, float b) { - return (b != 0.0f)? fmodf(a, b): 0.0f; + return (b != 0.0f) ? fmodf(a, b) : 0.0f; } ccl_device_inline float sqr(float a) { - return a * a; + return a * a; } ccl_device_inline float pow20(float a) { - return sqr(sqr(sqr(sqr(a))*a)); + return sqr(sqr(sqr(sqr(a)) * a)); } ccl_device_inline float pow22(float a) { - return sqr(a*sqr(sqr(sqr(a))*a)); + return sqr(a * sqr(sqr(sqr(a)) * a)); } ccl_device_inline float beta(float x, float y) { #ifndef __KERNEL_OPENCL__ - return expf(lgammaf(x) + lgammaf(y) - lgammaf(x+y)); + return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y)); #else - return expf(lgamma(x) + lgamma(y) - lgamma(x+y)); + return expf(lgamma(x) + lgamma(y) - lgamma(x + y)); #endif } ccl_device_inline float xor_signmask(float x, int y) { - return __int_as_float(__float_as_int(x) ^ y); + return __int_as_float(__float_as_int(x) ^ y); } ccl_device float bits_to_01(uint bits) { - return bits * (1.0f/(float)0xFFFFFFFF); + return bits * (1.0f / (float)0xFFFFFFFF); } /* projections */ ccl_device_inline float2 map_to_tube(const float3 co) { - float len, u, v; - len = sqrtf(co.x * co.x + co.y * co.y); - if(len > 0.0f) { - u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f; - v = (co.z + 1.0f) * 0.5f; - } - else { - u = v = 0.0f; - } - return make_float2(u, v); + float len, u, v; + len = sqrtf(co.x * co.x + co.y * co.y); + if (len > 0.0f) { + u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f; + v = (co.z + 1.0f) * 0.5f; + } + else { + u = v = 0.0f; + } + return make_float2(u, v); } ccl_device_inline float2 map_to_sphere(const float3 co) { - float l = len(co); - float u, v; - if(l > 0.0f) { - if(UNLIKELY(co.x == 0.0f && co.y == 0.0f)) { - u = 0.0f; /* othwise domain error */ - } - else { - u = (1.0f - atan2f(co.x, co.y) / M_PI_F) / 2.0f; - } - v = 1.0f - safe_acosf(co.z / l) / M_PI_F; - } - else { - u = v = 0.0f; - } - return make_float2(u, v); + float l = len(co); + float u, v; + if (l > 0.0f) { + if (UNLIKELY(co.x == 0.0f && co.y == 0.0f)) { + u = 0.0f; /* othwise domain error */ + } + else { + u = (1.0f - atan2f(co.x, co.y) / M_PI_F) / 2.0f; + } + v = 1.0f - safe_acosf(co.z / l) / M_PI_F; + } + else { + u = v = 0.0f; + } + return make_float2(u, v); } CCL_NAMESPACE_END -#endif /* __UTIL_MATH_H__ */ +#endif /* __UTIL_MATH_H__ */ diff --git a/intern/cycles/util/util_math_cdf.cpp b/intern/cycles/util/util_math_cdf.cpp index c14d4793ea1..a58bab188ef 100644 --- a/intern/cycles/util/util_math_cdf.cpp +++ b/intern/cycles/util/util_math_cdf.cpp @@ -27,41 +27,44 @@ void util_cdf_invert(const int resolution, const float to, const vector<float> &cdf, const bool make_symmetric, - vector<float> &inv_cdf) { - const float inv_resolution = 1.0f / (float)resolution; - const float range = to - from; - inv_cdf.resize(resolution); - if(make_symmetric) { - const int half_size = (resolution - 1) / 2; - for(int i = 0; i <= half_size; i++) { - float x = i / (float)half_size; - int index = upper_bound(cdf.begin(), cdf.end(), x) - cdf.begin(); - float t; - if(index < cdf.size() - 1) { - t = (x - cdf[index])/(cdf[index+1] - cdf[index]); - } else { - t = 0.0f; - index = cdf.size() - 1; - } - float y = ((index + t) / (resolution - 1)) * (2.0f * range); - inv_cdf[half_size+i] = 0.5f*(1.0f + y); - inv_cdf[half_size-i] = 0.5f*(1.0f - y); - } - } - else { - for(int i = 0; i < resolution; i++) { - float x = from + range * (float)i * inv_resolution; - int index = upper_bound(cdf.begin(), cdf.end(), x) - cdf.begin(); - float t; - if(index < cdf.size() - 1) { - t = (x - cdf[index])/(cdf[index+1] - cdf[index]); - } else { - t = 0.0f; - index = resolution; - } - inv_cdf[i] = (index + t) * inv_resolution; - } - } + vector<float> &inv_cdf) +{ + const float inv_resolution = 1.0f / (float)resolution; + const float range = to - from; + inv_cdf.resize(resolution); + if (make_symmetric) { + const int half_size = (resolution - 1) / 2; + for (int i = 0; i <= half_size; i++) { + float x = i / (float)half_size; + int index = upper_bound(cdf.begin(), cdf.end(), x) - cdf.begin(); + float t; + if (index < cdf.size() - 1) { + t = (x - cdf[index]) / (cdf[index + 1] - cdf[index]); + } + else { + t = 0.0f; + index = cdf.size() - 1; + } + float y = ((index + t) / (resolution - 1)) * (2.0f * range); + inv_cdf[half_size + i] = 0.5f * (1.0f + y); + inv_cdf[half_size - i] = 0.5f * (1.0f - y); + } + } + else { + for (int i = 0; i < resolution; i++) { + float x = from + range * (float)i * inv_resolution; + int index = upper_bound(cdf.begin(), cdf.end(), x) - cdf.begin(); + float t; + if (index < cdf.size() - 1) { + t = (x - cdf[index]) / (cdf[index + 1] - cdf[index]); + } + else { + t = 0.0f; + index = resolution; + } + inv_cdf[i] = (index + t) * inv_resolution; + } + } } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_math_cdf.h b/intern/cycles/util/util_math_cdf.h index 983855e3e9b..43995204263 100644 --- a/intern/cycles/util/util_math_cdf.h +++ b/intern/cycles/util/util_math_cdf.h @@ -24,27 +24,24 @@ CCL_NAMESPACE_BEGIN /* Evaluate CDF of a given functor with given range and resolution. */ -template <typename Functor> -void util_cdf_evaluate(const int resolution, - const float from, - const float to, - Functor functor, - vector<float> &cdf) +template<typename Functor> +void util_cdf_evaluate( + const int resolution, const float from, const float to, Functor functor, vector<float> &cdf) { - const int cdf_count = resolution + 1; - const float range = to - from; - cdf.resize(cdf_count); - cdf[0] = 0.0f; - /* Actual CDF evaluation. */ - for(int i = 0; i < resolution; ++i) { - float x = from + range * (float)i / (resolution - 1); - float y = functor(x); - cdf[i + 1] = cdf[i] + fabsf(y); - } - /* Normalize the CDF. */ - for(int i = 0; i <= resolution; i++) { - cdf[i] /= cdf[resolution]; - } + const int cdf_count = resolution + 1; + const float range = to - from; + cdf.resize(cdf_count); + cdf[0] = 0.0f; + /* Actual CDF evaluation. */ + for (int i = 0; i < resolution; ++i) { + float x = from + range * (float)i / (resolution - 1); + float y = functor(x); + cdf[i + 1] = cdf[i] + fabsf(y); + } + /* Normalize the CDF. */ + for (int i = 0; i <= resolution; i++) { + cdf[i] /= cdf[resolution]; + } } /* Invert pre-calculated CDF function. */ @@ -56,7 +53,7 @@ void util_cdf_invert(const int resolution, vector<float> &inv_cdf); /* Evaluate inverted CDF of a given functor with given range and resolution. */ -template <typename Functor> +template<typename Functor> void util_cdf_inverted(const int resolution, const float from, const float to, @@ -64,15 +61,15 @@ void util_cdf_inverted(const int resolution, const bool make_symmetric, vector<float> &inv_cdf) { - vector<float> cdf; - /* There is no much smartness going around lower resolution for the CDF table, - * this just to match the old code from pixel filter so it all stays exactly - * the same and no regression tests are failed. - */ - util_cdf_evaluate(resolution - 1, from, to, functor, cdf); - util_cdf_invert(resolution, from, to, cdf, make_symmetric, inv_cdf); + vector<float> cdf; + /* There is no much smartness going around lower resolution for the CDF table, + * this just to match the old code from pixel filter so it all stays exactly + * the same and no regression tests are failed. + */ + util_cdf_evaluate(resolution - 1, from, to, functor, cdf); + util_cdf_invert(resolution, from, to, cdf, make_symmetric, inv_cdf); } CCL_NAMESPACE_END -#endif /* __UTIL_MATH_H_CDF__ */ +#endif /* __UTIL_MATH_H_CDF__ */ diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h index fe4e02197a4..872271666aa 100644 --- a/intern/cycles/util/util_math_fast.h +++ b/intern/cycles/util/util_math_fast.h @@ -49,18 +49,18 @@ CCL_NAMESPACE_BEGIN ccl_device_inline float madd(const float a, const float b, const float c) { - /* NOTE: In the future we may want to explicitly ask for a fused - * multiply-add in a specialized version for float. - * - * NOTE: GCC/ICC will turn this (for float) into a FMA unless - * explicitly asked not to, clang seems to leave the code alone. - */ - return a * b + c; + /* NOTE: In the future we may want to explicitly ask for a fused + * multiply-add in a specialized version for float. + * + * NOTE: GCC/ICC will turn this (for float) into a FMA unless + * explicitly asked not to, clang seems to leave the code alone. + */ + return a * b + c; } ccl_device_inline float4 madd4(const float4 a, const float4 b, const float4 c) { - return a * b + c; + return a * b + c; } /* @@ -82,116 +82,117 @@ ccl_device_inline float4 madd4(const float4 a, const float4 b, const float4 c) /* Round to nearest integer, returning as an int. */ ccl_device_inline int fast_rint(float x) { - /* used by sin/cos/tan range reduction. */ + /* used by sin/cos/tan range reduction. */ #ifdef __KERNEL_SSE4__ - /* Single roundps instruction on SSE4.1+ (for gcc/clang at least). */ - return float_to_int(rintf(x)); + /* Single roundps instruction on SSE4.1+ (for gcc/clang at least). */ + return float_to_int(rintf(x)); #else - /* emulate rounding by adding/substracting 0.5. */ - return float_to_int(x + copysignf(0.5f, x)); + /* emulate rounding by adding/substracting 0.5. */ + return float_to_int(x + copysignf(0.5f, x)); #endif } ccl_device float fast_sinf(float x) { - /* Very accurate argument reduction from SLEEF, - * starts failing around x=262000 - * - * Results on: [-2pi,2pi]. - * - * Examined 2173837240 values of sin: 0.00662760244 avg ulp diff, 2 max ulp, - * 1.19209e-07 max error - */ - int q = fast_rint(x * M_1_PI_F); - float qf = q; - x = madd(qf, -0.78515625f*4, x); - x = madd(qf, -0.00024187564849853515625f*4, x); - x = madd(qf, -3.7747668102383613586e-08f*4, x); - x = madd(qf, -1.2816720341285448015e-12f*4, x); - x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals */ - float s = x * x; - if((q & 1) != 0) x = -x; - /* This polynomial approximation has very low error on [-pi/2,+pi/2] - * 1.19209e-07 max error in total over [-2pi,+2pi]. */ - float u = 2.6083159809786593541503e-06f; - u = madd(u, s, -0.0001981069071916863322258f); - u = madd(u, s, +0.00833307858556509017944336f); - u = madd(u, s, -0.166666597127914428710938f); - u = madd(s, u * x, x); - /* For large x, the argument reduction can fail and the polynomial can be - * evaluated with arguments outside the valid internal. Just clamp the bad - * values away (setting to 0.0f means no branches need to be generated). */ - if(fabsf(u) > 1.0f) { - u = 0.0f; - } - return u; + /* Very accurate argument reduction from SLEEF, + * starts failing around x=262000 + * + * Results on: [-2pi,2pi]. + * + * Examined 2173837240 values of sin: 0.00662760244 avg ulp diff, 2 max ulp, + * 1.19209e-07 max error + */ + int q = fast_rint(x * M_1_PI_F); + float qf = q; + x = madd(qf, -0.78515625f * 4, x); + x = madd(qf, -0.00024187564849853515625f * 4, x); + x = madd(qf, -3.7747668102383613586e-08f * 4, x); + x = madd(qf, -1.2816720341285448015e-12f * 4, x); + x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals */ + float s = x * x; + if ((q & 1) != 0) + x = -x; + /* This polynomial approximation has very low error on [-pi/2,+pi/2] + * 1.19209e-07 max error in total over [-2pi,+2pi]. */ + float u = 2.6083159809786593541503e-06f; + u = madd(u, s, -0.0001981069071916863322258f); + u = madd(u, s, +0.00833307858556509017944336f); + u = madd(u, s, -0.166666597127914428710938f); + u = madd(s, u * x, x); + /* For large x, the argument reduction can fail and the polynomial can be + * evaluated with arguments outside the valid internal. Just clamp the bad + * values away (setting to 0.0f means no branches need to be generated). */ + if (fabsf(u) > 1.0f) { + u = 0.0f; + } + return u; } ccl_device float fast_cosf(float x) { - /* Same argument reduction as fast_sinf(). */ - int q = fast_rint(x * M_1_PI_F); - float qf = q; - x = madd(qf, -0.78515625f*4, x); - x = madd(qf, -0.00024187564849853515625f*4, x); - x = madd(qf, -3.7747668102383613586e-08f*4, x); - x = madd(qf, -1.2816720341285448015e-12f*4, x); - x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals. */ - float s = x * x; - /* Polynomial from SLEEF's sincosf, max error is - * 4.33127e-07 over [-2pi,2pi] (98% of values are "exact"). */ - float u = -2.71811842367242206819355e-07f; - u = madd(u, s, +2.47990446951007470488548e-05f); - u = madd(u, s, -0.00138888787478208541870117f); - u = madd(u, s, +0.0416666641831398010253906f); - u = madd(u, s, -0.5f); - u = madd(u, s, +1.0f); - if((q & 1) != 0) { - u = -u; - } - if(fabsf(u) > 1.0f) { - u = 0.0f; - } - return u; + /* Same argument reduction as fast_sinf(). */ + int q = fast_rint(x * M_1_PI_F); + float qf = q; + x = madd(qf, -0.78515625f * 4, x); + x = madd(qf, -0.00024187564849853515625f * 4, x); + x = madd(qf, -3.7747668102383613586e-08f * 4, x); + x = madd(qf, -1.2816720341285448015e-12f * 4, x); + x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals. */ + float s = x * x; + /* Polynomial from SLEEF's sincosf, max error is + * 4.33127e-07 over [-2pi,2pi] (98% of values are "exact"). */ + float u = -2.71811842367242206819355e-07f; + u = madd(u, s, +2.47990446951007470488548e-05f); + u = madd(u, s, -0.00138888787478208541870117f); + u = madd(u, s, +0.0416666641831398010253906f); + u = madd(u, s, -0.5f); + u = madd(u, s, +1.0f); + if ((q & 1) != 0) { + u = -u; + } + if (fabsf(u) > 1.0f) { + u = 0.0f; + } + return u; } -ccl_device void fast_sincosf(float x, float* sine, float* cosine) +ccl_device void fast_sincosf(float x, float *sine, float *cosine) { - /* Same argument reduction as fast_sin. */ - int q = fast_rint(x * M_1_PI_F); - float qf = q; - x = madd(qf, -0.78515625f*4, x); - x = madd(qf, -0.00024187564849853515625f*4, x); - x = madd(qf, -3.7747668102383613586e-08f*4, x); - x = madd(qf, -1.2816720341285448015e-12f*4, x); - x = M_PI_2_F - (M_PI_2_F - x); // crush denormals - float s = x * x; - /* NOTE: same exact polynomials as fast_sinf() and fast_cosf() above. */ - if((q & 1) != 0) { - x = -x; - } - float su = 2.6083159809786593541503e-06f; - su = madd(su, s, -0.0001981069071916863322258f); - su = madd(su, s, +0.00833307858556509017944336f); - su = madd(su, s, -0.166666597127914428710938f); - su = madd(s, su * x, x); - float cu = -2.71811842367242206819355e-07f; - cu = madd(cu, s, +2.47990446951007470488548e-05f); - cu = madd(cu, s, -0.00138888787478208541870117f); - cu = madd(cu, s, +0.0416666641831398010253906f); - cu = madd(cu, s, -0.5f); - cu = madd(cu, s, +1.0f); - if((q & 1) != 0) { - cu = -cu; - } - if(fabsf(su) > 1.0f) { - su = 0.0f; - } - if(fabsf(cu) > 1.0f) { - cu = 0.0f; - } - *sine = su; - *cosine = cu; + /* Same argument reduction as fast_sin. */ + int q = fast_rint(x * M_1_PI_F); + float qf = q; + x = madd(qf, -0.78515625f * 4, x); + x = madd(qf, -0.00024187564849853515625f * 4, x); + x = madd(qf, -3.7747668102383613586e-08f * 4, x); + x = madd(qf, -1.2816720341285448015e-12f * 4, x); + x = M_PI_2_F - (M_PI_2_F - x); // crush denormals + float s = x * x; + /* NOTE: same exact polynomials as fast_sinf() and fast_cosf() above. */ + if ((q & 1) != 0) { + x = -x; + } + float su = 2.6083159809786593541503e-06f; + su = madd(su, s, -0.0001981069071916863322258f); + su = madd(su, s, +0.00833307858556509017944336f); + su = madd(su, s, -0.166666597127914428710938f); + su = madd(s, su * x, x); + float cu = -2.71811842367242206819355e-07f; + cu = madd(cu, s, +2.47990446951007470488548e-05f); + cu = madd(cu, s, -0.00138888787478208541870117f); + cu = madd(cu, s, +0.0416666641831398010253906f); + cu = madd(cu, s, -0.5f); + cu = madd(cu, s, +1.0f); + if ((q & 1) != 0) { + cu = -cu; + } + if (fabsf(su) > 1.0f) { + su = 0.0f; + } + if (fabsf(cu) > 1.0f) { + cu = 0.0f; + } + *sine = su; + *cosine = cu; } /* NOTE: this approximation is only valid on [-8192.0,+8192.0], it starts @@ -200,33 +201,33 @@ ccl_device void fast_sincosf(float x, float* sine, float* cosine) */ ccl_device float fast_tanf(float x) { - /* Derived from SLEEF implementation. - * - * Note that we cannot apply the "denormal crush" trick everywhere because - * we sometimes need to take the reciprocal of the polynomial - */ - int q = fast_rint(x * 2.0f * M_1_PI_F); - float qf = q; - x = madd(qf, -0.78515625f*2, x); - x = madd(qf, -0.00024187564849853515625f*2, x); - x = madd(qf, -3.7747668102383613586e-08f*2, x); - x = madd(qf, -1.2816720341285448015e-12f*2, x); - if((q & 1) == 0) { - /* Crush denormals (only if we aren't inverting the result later). */ - x = M_PI_4_F - (M_PI_4_F - x); - } - float s = x * x; - float u = 0.00927245803177356719970703f; - u = madd(u, s, 0.00331984995864331722259521f); - u = madd(u, s, 0.0242998078465461730957031f); - u = madd(u, s, 0.0534495301544666290283203f); - u = madd(u, s, 0.133383005857467651367188f); - u = madd(u, s, 0.333331853151321411132812f); - u = madd(s, u * x, x); - if((q & 1) != 0) { - u = -1.0f / u; - } - return u; + /* Derived from SLEEF implementation. + * + * Note that we cannot apply the "denormal crush" trick everywhere because + * we sometimes need to take the reciprocal of the polynomial + */ + int q = fast_rint(x * 2.0f * M_1_PI_F); + float qf = q; + x = madd(qf, -0.78515625f * 2, x); + x = madd(qf, -0.00024187564849853515625f * 2, x); + x = madd(qf, -3.7747668102383613586e-08f * 2, x); + x = madd(qf, -1.2816720341285448015e-12f * 2, x); + if ((q & 1) == 0) { + /* Crush denormals (only if we aren't inverting the result later). */ + x = M_PI_4_F - (M_PI_4_F - x); + } + float s = x * x; + float u = 0.00927245803177356719970703f; + u = madd(u, s, 0.00331984995864331722259521f); + u = madd(u, s, 0.0242998078465461730957031f); + u = madd(u, s, 0.0534495301544666290283203f); + u = madd(u, s, 0.133383005857467651367188f); + u = madd(u, s, 0.333331853151321411132812f); + u = madd(s, u * x, x); + if ((q & 1) != 0) { + u = -1.0f / u; + } + return u; } /* Fast, approximate sin(x*M_PI) with maximum absolute error of 0.000918954611. @@ -235,122 +236,119 @@ ccl_device float fast_tanf(float x) */ ccl_device float fast_sinpif(float x) { - /* Fast trick to strip the integral part off, so our domain is [-1, 1]. */ - const float z = x - ((x + 25165824.0f) - 25165824.0f); - const float y = z - z * fabsf(z); - const float Q = 3.10396624f; - const float P = 3.584135056f; /* P = 16-4*Q */ - return y * (Q + P * fabsf(y)); - - /* The original article used used inferior constants for Q and P and - * so had max error 1.091e-3. - * - * The optimal value for Q was determined by exhaustive search, minimizing - * the absolute numerical error relative to float(std::sin(double(phi*M_PI))) - * over the interval [0,2] (which is where most of the invocations happen). - * - * The basic idea of this approximation starts with the coarse approximation: - * sin(pi*x) ~= f(x) = 4 * (x - x * abs(x)) - * - * This approximation always _over_ estimates the target. On the other hand, - * the curve: - * sin(pi*x) ~= f(x) * abs(f(x)) / 4 - * - * always lies _under_ the target. Thus we can simply numerically search for - * the optimal constant to LERP these curves into a more precise - * approximation. - * - * After folding the constants together and simplifying the resulting math, - * we end up with the compact implementation above. - * - * NOTE: this function actually computes sin(x * pi) which avoids one or two - * mults in many cases and guarantees exact values at integer periods. - */ + /* Fast trick to strip the integral part off, so our domain is [-1, 1]. */ + const float z = x - ((x + 25165824.0f) - 25165824.0f); + const float y = z - z * fabsf(z); + const float Q = 3.10396624f; + const float P = 3.584135056f; /* P = 16-4*Q */ + return y * (Q + P * fabsf(y)); + + /* The original article used used inferior constants for Q and P and + * so had max error 1.091e-3. + * + * The optimal value for Q was determined by exhaustive search, minimizing + * the absolute numerical error relative to float(std::sin(double(phi*M_PI))) + * over the interval [0,2] (which is where most of the invocations happen). + * + * The basic idea of this approximation starts with the coarse approximation: + * sin(pi*x) ~= f(x) = 4 * (x - x * abs(x)) + * + * This approximation always _over_ estimates the target. On the other hand, + * the curve: + * sin(pi*x) ~= f(x) * abs(f(x)) / 4 + * + * always lies _under_ the target. Thus we can simply numerically search for + * the optimal constant to LERP these curves into a more precise + * approximation. + * + * After folding the constants together and simplifying the resulting math, + * we end up with the compact implementation above. + * + * NOTE: this function actually computes sin(x * pi) which avoids one or two + * mults in many cases and guarantees exact values at integer periods. + */ } /* Fast approximate cos(x*M_PI) with ~0.1% absolute error. */ ccl_device_inline float fast_cospif(float x) { - return fast_sinpif(x+0.5f); + return fast_sinpif(x + 0.5f); } ccl_device float fast_acosf(float x) { - const float f = fabsf(x); - /* clamp and crush denormals. */ - const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f; - /* Based on http://www.pouet.net/topic.php?which=9132&page=2 - * 85% accurate (ulp 0) - * Examined 2130706434 values of acos: 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush" - * Examined 2130706434 values of acos: 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush" - */ - const float a = sqrtf(1.0f - m) * - (1.5707963267f + m * (-0.213300989f + m * - (0.077980478f + m * -0.02164095f))); - return x < 0 ? M_PI_F - a : a; + const float f = fabsf(x); + /* clamp and crush denormals. */ + const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f; + /* Based on http://www.pouet.net/topic.php?which=9132&page=2 + * 85% accurate (ulp 0) + * Examined 2130706434 values of acos: 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush" + * Examined 2130706434 values of acos: 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush" + */ + const float a = sqrtf(1.0f - m) * + (1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f))); + return x < 0 ? M_PI_F - a : a; } ccl_device float fast_asinf(float x) { - /* Based on acosf approximation above. - * Max error is 4.51133e-05 (ulps are higher because we are consistently off - * by a little amount). - */ - const float f = fabsf(x); - /* Clamp and crush denormals. */ - const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f; - const float a = M_PI_2_F - sqrtf(1.0f - m) * - (1.5707963267f + m * (-0.213300989f + m * - (0.077980478f + m * -0.02164095f))); - return copysignf(a, x); + /* Based on acosf approximation above. + * Max error is 4.51133e-05 (ulps are higher because we are consistently off + * by a little amount). + */ + const float f = fabsf(x); + /* Clamp and crush denormals. */ + const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f; + const float a = M_PI_2_F - + sqrtf(1.0f - m) * (1.5707963267f + + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f))); + return copysignf(a, x); } ccl_device float fast_atanf(float x) { - const float a = fabsf(x); - const float k = a > 1.0f ? 1 / a : a; - const float s = 1.0f - (1.0f - k); /* Crush denormals. */ - const float t = s * s; - /* http://mathforum.org/library/drmath/view/62672.html - * Examined 4278190080 values of atan: 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals) - * Examined 4278190080 values of atan: 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals) - */ - float r = s * madd(0.43157974f, t, 1.0f) / - madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f); - if(a > 1.0f) { - r = M_PI_2_F - r; - } - return copysignf(r, x); + const float a = fabsf(x); + const float k = a > 1.0f ? 1 / a : a; + const float s = 1.0f - (1.0f - k); /* Crush denormals. */ + const float t = s * s; + /* http://mathforum.org/library/drmath/view/62672.html + * Examined 4278190080 values of atan: 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals) + * Examined 4278190080 values of atan: 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals) + */ + float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f); + if (a > 1.0f) { + r = M_PI_2_F - r; + } + return copysignf(r, x); } ccl_device float fast_atan2f(float y, float x) { - /* Based on atan approximation above. - * - * The special cases around 0 and infinity were tested explicitly. - * - * The only case not handled correctly is x=NaN,y=0 which returns 0 instead - * of nan. - */ - const float a = fabsf(x); - const float b = fabsf(y); - - const float k = (b == 0) ? 0.0f : ((a == b) ? 1.0f : (b > a ? a / b : b / a)); - const float s = 1.0f - (1.0f - k); /* Crush denormals */ - const float t = s * s; - - float r = s * madd(0.43157974f, t, 1.0f) / - madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f); - - if(b > a) { - /* Account for arg reduction. */ - r = M_PI_2_F - r; - } - /* Test sign bit of x. */ - if(__float_as_uint(x) & 0x80000000u) { - r = M_PI_F - r; - } - return copysignf(r, y); + /* Based on atan approximation above. + * + * The special cases around 0 and infinity were tested explicitly. + * + * The only case not handled correctly is x=NaN,y=0 which returns 0 instead + * of nan. + */ + const float a = fabsf(x); + const float b = fabsf(y); + + const float k = (b == 0) ? 0.0f : ((a == b) ? 1.0f : (b > a ? a / b : b / a)); + const float s = 1.0f - (1.0f - k); /* Crush denormals */ + const float t = s * s; + + float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f); + + if (b > a) { + /* Account for arg reduction. */ + r = M_PI_2_F - r; + } + /* Test sign bit of x. */ + if (__float_as_uint(x) & 0x80000000u) { + r = M_PI_F - r; + } + return copysignf(r, y); } /* Based on: @@ -359,204 +357,207 @@ ccl_device float fast_atan2f(float y, float x) */ ccl_device float fast_log2f(float x) { - /* NOTE: clamp to avoid special cases and make result "safe" from large - * negative values/nans. */ - x = clamp(x, FLT_MIN, FLT_MAX); - unsigned bits = __float_as_uint(x); - int exponent = (int)(bits >> 23) - 127; - float f = __uint_as_float((bits & 0x007FFFFF) | 0x3f800000) - 1.0f; - /* Examined 2130706432 values of log2 on [1.17549435e-38,3.40282347e+38]: - * 0.0797524457 avg ulp diff, 3713596 max ulp, 7.62939e-06 max error. - * ulp histogram: - * 0 = 97.46% - * 1 = 2.29% - * 2 = 0.11% - */ - float f2 = f * f; - float f4 = f2 * f2; - float hi = madd(f, -0.00931049621349f, 0.05206469089414f); - float lo = madd(f, 0.47868480909345f, -0.72116591947498f); - hi = madd(f, hi, -0.13753123777116f); - hi = madd(f, hi, 0.24187369696082f); - hi = madd(f, hi, -0.34730547155299f); - lo = madd(f, lo, 1.442689881667200f); - return ((f4 * hi) + (f * lo)) + exponent; + /* NOTE: clamp to avoid special cases and make result "safe" from large + * negative values/nans. */ + x = clamp(x, FLT_MIN, FLT_MAX); + unsigned bits = __float_as_uint(x); + int exponent = (int)(bits >> 23) - 127; + float f = __uint_as_float((bits & 0x007FFFFF) | 0x3f800000) - 1.0f; + /* Examined 2130706432 values of log2 on [1.17549435e-38,3.40282347e+38]: + * 0.0797524457 avg ulp diff, 3713596 max ulp, 7.62939e-06 max error. + * ulp histogram: + * 0 = 97.46% + * 1 = 2.29% + * 2 = 0.11% + */ + float f2 = f * f; + float f4 = f2 * f2; + float hi = madd(f, -0.00931049621349f, 0.05206469089414f); + float lo = madd(f, 0.47868480909345f, -0.72116591947498f); + hi = madd(f, hi, -0.13753123777116f); + hi = madd(f, hi, 0.24187369696082f); + hi = madd(f, hi, -0.34730547155299f); + lo = madd(f, lo, 1.442689881667200f); + return ((f4 * hi) + (f * lo)) + exponent; } ccl_device_inline float fast_logf(float x) { - /* Examined 2130706432 values of logf on [1.17549435e-38,3.40282347e+38]: - * 0.313865375 avg ulp diff, 5148137 max ulp, 7.62939e-06 max error. - */ - return fast_log2f(x) * M_LN2_F; + /* Examined 2130706432 values of logf on [1.17549435e-38,3.40282347e+38]: + * 0.313865375 avg ulp diff, 5148137 max ulp, 7.62939e-06 max error. + */ + return fast_log2f(x) * M_LN2_F; } ccl_device_inline float fast_log10(float x) { - /* Examined 2130706432 values of log10f on [1.17549435e-38,3.40282347e+38]: - * 0.631237033 avg ulp diff, 4471615 max ulp, 3.8147e-06 max error. - */ - return fast_log2f(x) * M_LN2_F / M_LN10_F; + /* Examined 2130706432 values of log10f on [1.17549435e-38,3.40282347e+38]: + * 0.631237033 avg ulp diff, 4471615 max ulp, 3.8147e-06 max error. + */ + return fast_log2f(x) * M_LN2_F / M_LN10_F; } ccl_device float fast_logb(float x) { - /* Don't bother with denormals. */ - x = fabsf(x); - x = clamp(x, FLT_MIN, FLT_MAX); - unsigned bits = __float_as_uint(x); - return (int)(bits >> 23) - 127; + /* Don't bother with denormals. */ + x = fabsf(x); + x = clamp(x, FLT_MIN, FLT_MAX); + unsigned bits = __float_as_uint(x); + return (int)(bits >> 23) - 127; } ccl_device float fast_exp2f(float x) { - /* Clamp to safe range for final addition. */ - x = clamp(x, -126.0f, 126.0f); - /* Range reduction. */ - int m = (int)x; x -= m; - x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ulps!). */ - /* 5th degree polynomial generated with sollya - * Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ulp diff, - * 232 max ulp. - * - * ulp histogram: - * 0 = 87.81% - * 1 = 4.18% - */ - float r = 1.33336498402e-3f; - r = madd(x, r, 9.810352697968e-3f); - r = madd(x, r, 5.551834031939e-2f); - r = madd(x, r, 0.2401793301105f); - r = madd(x, r, 0.693144857883f); - r = madd(x, r, 1.0f); - /* Multiply by 2 ^ m by adding in the exponent. */ - /* NOTE: left-shift of negative number is undefined behavior. */ - return __uint_as_float(__float_as_uint(r) + ((unsigned)m << 23)); + /* Clamp to safe range for final addition. */ + x = clamp(x, -126.0f, 126.0f); + /* Range reduction. */ + int m = (int)x; + x -= m; + x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ulps!). */ + /* 5th degree polynomial generated with sollya + * Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ulp diff, + * 232 max ulp. + * + * ulp histogram: + * 0 = 87.81% + * 1 = 4.18% + */ + float r = 1.33336498402e-3f; + r = madd(x, r, 9.810352697968e-3f); + r = madd(x, r, 5.551834031939e-2f); + r = madd(x, r, 0.2401793301105f); + r = madd(x, r, 0.693144857883f); + r = madd(x, r, 1.0f); + /* Multiply by 2 ^ m by adding in the exponent. */ + /* NOTE: left-shift of negative number is undefined behavior. */ + return __uint_as_float(__float_as_uint(r) + ((unsigned)m << 23)); } ccl_device_inline float fast_expf(float x) { - /* Examined 2237485550 values of exp on [-87.3300018,87.3300018]: - * 2.6666452 avg ulp diff, 230 max ulp. - */ - return fast_exp2f(x / M_LN2_F); + /* Examined 2237485550 values of exp on [-87.3300018,87.3300018]: + * 2.6666452 avg ulp diff, 230 max ulp. + */ + return fast_exp2f(x / M_LN2_F); } #ifndef __KERNEL_GPU__ ccl_device float4 fast_exp2f4(float4 x) { - const float4 one = make_float4(1.0f); - const float4 limit = make_float4(126.0f); - x = clamp(x, -limit, limit); - int4 m = make_int4(x); - x = one - (one - (x - make_float4(m))); - float4 r = make_float4(1.33336498402e-3f); - r = madd4(x, r, make_float4(9.810352697968e-3f)); - r = madd4(x, r, make_float4(5.551834031939e-2f)); - r = madd4(x, r, make_float4(0.2401793301105f)); - r = madd4(x, r, make_float4(0.693144857883f)); - r = madd4(x, r, make_float4(1.0f)); - return __int4_as_float4(__float4_as_int4(r) + (m << 23)); + const float4 one = make_float4(1.0f); + const float4 limit = make_float4(126.0f); + x = clamp(x, -limit, limit); + int4 m = make_int4(x); + x = one - (one - (x - make_float4(m))); + float4 r = make_float4(1.33336498402e-3f); + r = madd4(x, r, make_float4(9.810352697968e-3f)); + r = madd4(x, r, make_float4(5.551834031939e-2f)); + r = madd4(x, r, make_float4(0.2401793301105f)); + r = madd4(x, r, make_float4(0.693144857883f)); + r = madd4(x, r, make_float4(1.0f)); + return __int4_as_float4(__float4_as_int4(r) + (m << 23)); } ccl_device_inline float4 fast_expf4(float4 x) { - return fast_exp2f4(x / M_LN2_F); + return fast_exp2f4(x / M_LN2_F); } #endif ccl_device_inline float fast_exp10(float x) { - /* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]: - * 2.71732409 avg ulp diff, 232 max ulp. - */ - return fast_exp2f(x * M_LN10_F / M_LN2_F); + /* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]: + * 2.71732409 avg ulp diff, 232 max ulp. + */ + return fast_exp2f(x * M_LN10_F / M_LN2_F); } ccl_device_inline float fast_expm1f(float x) { - if(fabsf(x) < 1e-5f) { - x = 1.0f - (1.0f - x); /* Crush denormals. */ - return madd(0.5f, x * x, x); - } - else { - return fast_expf(x) - 1.0f; - } + if (fabsf(x) < 1e-5f) { + x = 1.0f - (1.0f - x); /* Crush denormals. */ + return madd(0.5f, x * x, x); + } + else { + return fast_expf(x) - 1.0f; + } } ccl_device float fast_sinhf(float x) { - float a = fabsf(x); - if(a > 1.0f) { - /* Examined 53389559 values of sinh on [1,87.3300018]: - * 33.6886442 avg ulp diff, 178 max ulp. */ - float e = fast_expf(a); - return copysignf(0.5f * e - 0.5f / e, x); - } - else { - a = 1.0f - (1.0f - a); /* Crush denorms. */ - float a2 = a * a; - /* Degree 7 polynomial generated with sollya. */ - /* Examined 2130706434 values of sinh on [-1,1]: 1.19209e-07 max error. */ - float r = 2.03945513931e-4f; - r = madd(r, a2, 8.32990277558e-3f); - r = madd(r, a2, 0.1666673421859f); - r = madd(r * a, a2, a); - return copysignf(r, x); - } + float a = fabsf(x); + if (a > 1.0f) { + /* Examined 53389559 values of sinh on [1,87.3300018]: + * 33.6886442 avg ulp diff, 178 max ulp. */ + float e = fast_expf(a); + return copysignf(0.5f * e - 0.5f / e, x); + } + else { + a = 1.0f - (1.0f - a); /* Crush denorms. */ + float a2 = a * a; + /* Degree 7 polynomial generated with sollya. */ + /* Examined 2130706434 values of sinh on [-1,1]: 1.19209e-07 max error. */ + float r = 2.03945513931e-4f; + r = madd(r, a2, 8.32990277558e-3f); + r = madd(r, a2, 0.1666673421859f); + r = madd(r * a, a2, a); + return copysignf(r, x); + } } ccl_device_inline float fast_coshf(float x) { - /* Examined 2237485550 values of cosh on [-87.3300018,87.3300018]: - * 1.78256726 avg ulp diff, 178 max ulp. - */ - float e = fast_expf(fabsf(x)); - return 0.5f * e + 0.5f / e; + /* Examined 2237485550 values of cosh on [-87.3300018,87.3300018]: + * 1.78256726 avg ulp diff, 178 max ulp. + */ + float e = fast_expf(fabsf(x)); + return 0.5f * e + 0.5f / e; } ccl_device_inline float fast_tanhf(float x) { - /* Examined 4278190080 values of tanh on [-3.40282347e+38,3.40282347e+38]: - * 3.12924e-06 max error. - */ - /* NOTE: ulp error is high because of sub-optimal handling around the origin. */ - float e = fast_expf(2.0f * fabsf(x)); - return copysignf(1.0f - 2.0f / (1.0f + e), x); + /* Examined 4278190080 values of tanh on [-3.40282347e+38,3.40282347e+38]: + * 3.12924e-06 max error. + */ + /* NOTE: ulp error is high because of sub-optimal handling around the origin. */ + float e = fast_expf(2.0f * fabsf(x)); + return copysignf(1.0f - 2.0f / (1.0f + e), x); } ccl_device float fast_safe_powf(float x, float y) { - if(y == 0) return 1.0f; /* x^1=1 */ - if(x == 0) return 0.0f; /* 0^y=0 */ - float sign = 1.0f; - if(x < 0.0f) { - /* if x is negative, only deal with integer powers - * powf returns NaN for non-integers, we will return 0 instead. - */ - int ybits = __float_as_int(y) & 0x7fffffff; - if(ybits >= 0x4b800000) { - // always even int, keep positive - } - else if(ybits >= 0x3f800000) { - /* Bigger than 1, check. */ - int k = (ybits >> 23) - 127; /* Get exponent. */ - int j = ybits >> (23 - k); /* Shift out possible fractional bits. */ - if((j << (23 - k)) == ybits) { /* rebuild number and check for a match. */ - /* +1 for even, -1 for odd. */ - sign = __int_as_float(0x3f800000 | (j << 31)); - } - else { - /* Not an integer. */ - return 0.0f; - } - } - else { - /* Not an integer. */ - return 0.0f; - } - } - return sign * fast_exp2f(y * fast_log2f(fabsf(x))); + if (y == 0) + return 1.0f; /* x^1=1 */ + if (x == 0) + return 0.0f; /* 0^y=0 */ + float sign = 1.0f; + if (x < 0.0f) { + /* if x is negative, only deal with integer powers + * powf returns NaN for non-integers, we will return 0 instead. + */ + int ybits = __float_as_int(y) & 0x7fffffff; + if (ybits >= 0x4b800000) { + // always even int, keep positive + } + else if (ybits >= 0x3f800000) { + /* Bigger than 1, check. */ + int k = (ybits >> 23) - 127; /* Get exponent. */ + int j = ybits >> (23 - k); /* Shift out possible fractional bits. */ + if ((j << (23 - k)) == ybits) { /* rebuild number and check for a match. */ + /* +1 for even, -1 for odd. */ + sign = __int_as_float(0x3f800000 | (j << 31)); + } + else { + /* Not an integer. */ + return 0.0f; + } + } + else { + /* Not an integer. */ + return 0.0f; + } + } + return sign * fast_exp2f(y * fast_log2f(fabsf(x))); } /* TODO(sergey): Check speed with our erf functions implementation from @@ -565,74 +566,75 @@ ccl_device float fast_safe_powf(float x, float y) ccl_device_inline float fast_erff(float x) { - /* Examined 1082130433 values of erff on [0,4]: 1.93715e-06 max error. */ - /* Abramowitz and Stegun, 7.1.28. */ - const float a1 = 0.0705230784f; - const float a2 = 0.0422820123f; - const float a3 = 0.0092705272f; - const float a4 = 0.0001520143f; - const float a5 = 0.0002765672f; - const float a6 = 0.0000430638f; - const float a = fabsf(x); - if(a >= 12.3f) { - return copysignf(1.0f, x); - } - const float b = 1.0f - (1.0f - a); /* Crush denormals. */ - const float r = madd(madd(madd(madd(madd(madd(a6, b, a5), b, a4), b, a3), b, a2), b, a1), b, 1.0f); - const float s = r * r; /* ^2 */ - const float t = s * s; /* ^4 */ - const float u = t * t; /* ^8 */ - const float v = u * u; /* ^16 */ - return copysignf(1.0f - 1.0f / v, x); + /* Examined 1082130433 values of erff on [0,4]: 1.93715e-06 max error. */ + /* Abramowitz and Stegun, 7.1.28. */ + const float a1 = 0.0705230784f; + const float a2 = 0.0422820123f; + const float a3 = 0.0092705272f; + const float a4 = 0.0001520143f; + const float a5 = 0.0002765672f; + const float a6 = 0.0000430638f; + const float a = fabsf(x); + if (a >= 12.3f) { + return copysignf(1.0f, x); + } + const float b = 1.0f - (1.0f - a); /* Crush denormals. */ + const float r = madd( + madd(madd(madd(madd(madd(a6, b, a5), b, a4), b, a3), b, a2), b, a1), b, 1.0f); + const float s = r * r; /* ^2 */ + const float t = s * s; /* ^4 */ + const float u = t * t; /* ^8 */ + const float v = u * u; /* ^16 */ + return copysignf(1.0f - 1.0f / v, x); } ccl_device_inline float fast_erfcf(float x) { - /* Examined 2164260866 values of erfcf on [-4,4]: 1.90735e-06 max error. - * - * ulp histogram: - * - * 0 = 80.30% - */ - return 1.0f - fast_erff(x); + /* Examined 2164260866 values of erfcf on [-4,4]: 1.90735e-06 max error. + * + * ulp histogram: + * + * 0 = 80.30% + */ + return 1.0f - fast_erff(x); } ccl_device_inline float fast_ierff(float x) { - /* From: Approximating the erfinv function by Mike Giles. */ - /* To avoid trouble at the limit, clamp input to 1-eps. */ - float a = fabsf(x); - if(a > 0.99999994f) { - a = 0.99999994f; - } - float w = -fast_logf((1.0f - a) * (1.0f + a)), p; - if(w < 5.0f) { - w = w - 2.5f; - p = 2.81022636e-08f; - p = madd(p, w, 3.43273939e-07f); - p = madd(p, w, -3.5233877e-06f); - p = madd(p, w, -4.39150654e-06f); - p = madd(p, w, 0.00021858087f); - p = madd(p, w, -0.00125372503f); - p = madd(p, w, -0.00417768164f); - p = madd(p, w, 0.246640727f); - p = madd(p, w, 1.50140941f); - } - else { - w = sqrtf(w) - 3.0f; - p = -0.000200214257f; - p = madd(p, w, 0.000100950558f); - p = madd(p, w, 0.00134934322f); - p = madd(p, w, -0.00367342844f); - p = madd(p, w, 0.00573950773f); - p = madd(p, w, -0.0076224613f); - p = madd(p, w, 0.00943887047f); - p = madd(p, w, 1.00167406f); - p = madd(p, w, 2.83297682f); - } - return p * x; + /* From: Approximating the erfinv function by Mike Giles. */ + /* To avoid trouble at the limit, clamp input to 1-eps. */ + float a = fabsf(x); + if (a > 0.99999994f) { + a = 0.99999994f; + } + float w = -fast_logf((1.0f - a) * (1.0f + a)), p; + if (w < 5.0f) { + w = w - 2.5f; + p = 2.81022636e-08f; + p = madd(p, w, 3.43273939e-07f); + p = madd(p, w, -3.5233877e-06f); + p = madd(p, w, -4.39150654e-06f); + p = madd(p, w, 0.00021858087f); + p = madd(p, w, -0.00125372503f); + p = madd(p, w, -0.00417768164f); + p = madd(p, w, 0.246640727f); + p = madd(p, w, 1.50140941f); + } + else { + w = sqrtf(w) - 3.0f; + p = -0.000200214257f; + p = madd(p, w, 0.000100950558f); + p = madd(p, w, 0.00134934322f); + p = madd(p, w, -0.00367342844f); + p = madd(p, w, 0.00573950773f); + p = madd(p, w, -0.0076224613f); + p = madd(p, w, 0.00943887047f); + p = madd(p, w, 1.00167406f); + p = madd(p, w, 2.83297682f); + } + return p * x; } CCL_NAMESPACE_END -#endif /* __UTIL_FAST_MATH__ */ +#endif /* __UTIL_FAST_MATH__ */ diff --git a/intern/cycles/util/util_math_float2.h b/intern/cycles/util/util_math_float2.h index 6fe8e3b4115..9feaf042d19 100644 --- a/intern/cycles/util/util_math_float2.h +++ b/intern/cycles/util/util_math_float2.h @@ -28,206 +28,206 @@ CCL_NAMESPACE_BEGIN */ #ifndef __KERNEL_OPENCL__ -ccl_device_inline float2 operator-(const float2& a); -ccl_device_inline float2 operator*(const float2& a, const float2& b); -ccl_device_inline float2 operator*(const float2& a, float f); -ccl_device_inline float2 operator*(float f, const float2& a); -ccl_device_inline float2 operator/(float f, const float2& a); -ccl_device_inline float2 operator/(const float2& a, float f); -ccl_device_inline float2 operator/(const float2& a, const float2& b); -ccl_device_inline float2 operator+(const float2& a, const float2& b); -ccl_device_inline float2 operator-(const float2& a, const float2& b); -ccl_device_inline float2 operator+=(float2& a, const float2& b); -ccl_device_inline float2 operator*=(float2& a, const float2& b); -ccl_device_inline float2 operator*=(float2& a, float f); -ccl_device_inline float2 operator/=(float2& a, const float2& b); -ccl_device_inline float2 operator/=(float2& a, float f); - -ccl_device_inline bool operator==(const float2& a, const float2& b); -ccl_device_inline bool operator!=(const float2& a, const float2& b); - -ccl_device_inline bool is_zero(const float2& a); -ccl_device_inline float average(const float2& a); -ccl_device_inline float dot(const float2& a, const float2& b); -ccl_device_inline float cross(const float2& a, const float2& b); -ccl_device_inline float len(const float2& a); -ccl_device_inline float2 normalize(const float2& a); -ccl_device_inline float2 normalize_len(const float2& a, float *t); -ccl_device_inline float2 safe_normalize(const float2& a); -ccl_device_inline float2 min(const float2& a, const float2& b); -ccl_device_inline float2 max(const float2& a, const float2& b); -ccl_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx); -ccl_device_inline float2 fabs(const float2& a); -ccl_device_inline float2 as_float2(const float4& a); -ccl_device_inline float2 interp(const float2& a, const float2& b, float t); -#endif /* !__KERNEL_OPENCL__ */ +ccl_device_inline float2 operator-(const float2 &a); +ccl_device_inline float2 operator*(const float2 &a, const float2 &b); +ccl_device_inline float2 operator*(const float2 &a, float f); +ccl_device_inline float2 operator*(float f, const float2 &a); +ccl_device_inline float2 operator/(float f, const float2 &a); +ccl_device_inline float2 operator/(const float2 &a, float f); +ccl_device_inline float2 operator/(const float2 &a, const float2 &b); +ccl_device_inline float2 operator+(const float2 &a, const float2 &b); +ccl_device_inline float2 operator-(const float2 &a, const float2 &b); +ccl_device_inline float2 operator+=(float2 &a, const float2 &b); +ccl_device_inline float2 operator*=(float2 &a, const float2 &b); +ccl_device_inline float2 operator*=(float2 &a, float f); +ccl_device_inline float2 operator/=(float2 &a, const float2 &b); +ccl_device_inline float2 operator/=(float2 &a, float f); + +ccl_device_inline bool operator==(const float2 &a, const float2 &b); +ccl_device_inline bool operator!=(const float2 &a, const float2 &b); + +ccl_device_inline bool is_zero(const float2 &a); +ccl_device_inline float average(const float2 &a); +ccl_device_inline float dot(const float2 &a, const float2 &b); +ccl_device_inline float cross(const float2 &a, const float2 &b); +ccl_device_inline float len(const float2 &a); +ccl_device_inline float2 normalize(const float2 &a); +ccl_device_inline float2 normalize_len(const float2 &a, float *t); +ccl_device_inline float2 safe_normalize(const float2 &a); +ccl_device_inline float2 min(const float2 &a, const float2 &b); +ccl_device_inline float2 max(const float2 &a, const float2 &b); +ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &mx); +ccl_device_inline float2 fabs(const float2 &a); +ccl_device_inline float2 as_float2(const float4 &a); +ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t); +#endif /* !__KERNEL_OPENCL__ */ /******************************************************************************* * Definition. */ #ifndef __KERNEL_OPENCL__ -ccl_device_inline float2 operator-(const float2& a) +ccl_device_inline float2 operator-(const float2 &a) { - return make_float2(-a.x, -a.y); + return make_float2(-a.x, -a.y); } -ccl_device_inline float2 operator*(const float2& a, const float2& b) +ccl_device_inline float2 operator*(const float2 &a, const float2 &b) { - return make_float2(a.x*b.x, a.y*b.y); + return make_float2(a.x * b.x, a.y * b.y); } -ccl_device_inline float2 operator*(const float2& a, float f) +ccl_device_inline float2 operator*(const float2 &a, float f) { - return make_float2(a.x*f, a.y*f); + return make_float2(a.x * f, a.y * f); } -ccl_device_inline float2 operator*(float f, const float2& a) +ccl_device_inline float2 operator*(float f, const float2 &a) { - return make_float2(a.x*f, a.y*f); + return make_float2(a.x * f, a.y * f); } -ccl_device_inline float2 operator/(float f, const float2& a) +ccl_device_inline float2 operator/(float f, const float2 &a) { - return make_float2(f/a.x, f/a.y); + return make_float2(f / a.x, f / a.y); } -ccl_device_inline float2 operator/(const float2& a, float f) +ccl_device_inline float2 operator/(const float2 &a, float f) { - float invf = 1.0f/f; - return make_float2(a.x*invf, a.y*invf); + float invf = 1.0f / f; + return make_float2(a.x * invf, a.y * invf); } -ccl_device_inline float2 operator/(const float2& a, const float2& b) +ccl_device_inline float2 operator/(const float2 &a, const float2 &b) { - return make_float2(a.x/b.x, a.y/b.y); + return make_float2(a.x / b.x, a.y / b.y); } -ccl_device_inline float2 operator+(const float2& a, const float2& b) +ccl_device_inline float2 operator+(const float2 &a, const float2 &b) { - return make_float2(a.x+b.x, a.y+b.y); + return make_float2(a.x + b.x, a.y + b.y); } -ccl_device_inline float2 operator-(const float2& a, const float2& b) +ccl_device_inline float2 operator-(const float2 &a, const float2 &b) { - return make_float2(a.x-b.x, a.y-b.y); + return make_float2(a.x - b.x, a.y - b.y); } -ccl_device_inline float2 operator+=(float2& a, const float2& b) +ccl_device_inline float2 operator+=(float2 &a, const float2 &b) { - return a = a + b; + return a = a + b; } -ccl_device_inline float2 operator*=(float2& a, const float2& b) +ccl_device_inline float2 operator*=(float2 &a, const float2 &b) { - return a = a * b; + return a = a * b; } -ccl_device_inline float2 operator*=(float2& a, float f) +ccl_device_inline float2 operator*=(float2 &a, float f) { - return a = a * f; + return a = a * f; } -ccl_device_inline float2 operator/=(float2& a, const float2& b) +ccl_device_inline float2 operator/=(float2 &a, const float2 &b) { - return a = a / b; + return a = a / b; } -ccl_device_inline float2 operator/=(float2& a, float f) +ccl_device_inline float2 operator/=(float2 &a, float f) { - float invf = 1.0f/f; - return a = a * invf; + float invf = 1.0f / f; + return a = a * invf; } -ccl_device_inline bool operator==(const float2& a, const float2& b) +ccl_device_inline bool operator==(const float2 &a, const float2 &b) { - return (a.x == b.x && a.y == b.y); + return (a.x == b.x && a.y == b.y); } -ccl_device_inline bool operator!=(const float2& a, const float2& b) +ccl_device_inline bool operator!=(const float2 &a, const float2 &b) { - return !(a == b); + return !(a == b); } -ccl_device_inline bool is_zero(const float2& a) +ccl_device_inline bool is_zero(const float2 &a) { - return (a.x == 0.0f && a.y == 0.0f); + return (a.x == 0.0f && a.y == 0.0f); } -ccl_device_inline float average(const float2& a) +ccl_device_inline float average(const float2 &a) { - return (a.x + a.y)*(1.0f/2.0f); + return (a.x + a.y) * (1.0f / 2.0f); } -ccl_device_inline float dot(const float2& a, const float2& b) +ccl_device_inline float dot(const float2 &a, const float2 &b) { - return a.x*b.x + a.y*b.y; + return a.x * b.x + a.y * b.y; } -ccl_device_inline float cross(const float2& a, const float2& b) +ccl_device_inline float cross(const float2 &a, const float2 &b) { - return (a.x*b.y - a.y*b.x); + return (a.x * b.y - a.y * b.x); } -ccl_device_inline float len(const float2& a) +ccl_device_inline float len(const float2 &a) { - return sqrtf(dot(a, a)); + return sqrtf(dot(a, a)); } -ccl_device_inline float2 normalize(const float2& a) +ccl_device_inline float2 normalize(const float2 &a) { - return a/len(a); + return a / len(a); } -ccl_device_inline float2 normalize_len(const float2& a, float *t) +ccl_device_inline float2 normalize_len(const float2 &a, float *t) { - *t = len(a); - return a/(*t); + *t = len(a); + return a / (*t); } -ccl_device_inline float2 safe_normalize(const float2& a) +ccl_device_inline float2 safe_normalize(const float2 &a) { - float t = len(a); - return (t != 0.0f)? a/t: a; + float t = len(a); + return (t != 0.0f) ? a / t : a; } -ccl_device_inline float2 min(const float2& a, const float2& b) +ccl_device_inline float2 min(const float2 &a, const float2 &b) { - return make_float2(min(a.x, b.x), min(a.y, b.y)); + return make_float2(min(a.x, b.x), min(a.y, b.y)); } -ccl_device_inline float2 max(const float2& a, const float2& b) +ccl_device_inline float2 max(const float2 &a, const float2 &b) { - return make_float2(max(a.x, b.x), max(a.y, b.y)); + return make_float2(max(a.x, b.x), max(a.y, b.y)); } -ccl_device_inline float2 clamp(const float2& a, const float2& mn, const float2& mx) +ccl_device_inline float2 clamp(const float2 &a, const float2 &mn, const float2 &mx) { - return min(max(a, mn), mx); + return min(max(a, mn), mx); } -ccl_device_inline float2 fabs(const float2& a) +ccl_device_inline float2 fabs(const float2 &a) { - return make_float2(fabsf(a.x), fabsf(a.y)); + return make_float2(fabsf(a.x), fabsf(a.y)); } -ccl_device_inline float2 as_float2(const float4& a) +ccl_device_inline float2 as_float2(const float4 &a) { - return make_float2(a.x, a.y); + return make_float2(a.x, a.y); } -ccl_device_inline float2 interp(const float2& a, const float2& b, float t) +ccl_device_inline float2 interp(const float2 &a, const float2 &b, float t) { - return a + t*(b - a); + return a + t * (b - a); } -ccl_device_inline float2 mix(const float2& a, const float2& b, float t) +ccl_device_inline float2 mix(const float2 &a, const float2 &b, float t) { - return a + t*(b - a); + return a + t * (b - a); } -#endif /* !__KERNEL_OPENCL__ */ +#endif /* !__KERNEL_OPENCL__ */ CCL_NAMESPACE_END -#endif /* __UTIL_MATH_FLOAT2_H__ */ +#endif /* __UTIL_MATH_FLOAT2_H__ */ diff --git a/intern/cycles/util/util_math_float3.h b/intern/cycles/util/util_math_float3.h index a54a3f3087c..85e9b8114ff 100644 --- a/intern/cycles/util/util_math_float3.h +++ b/intern/cycles/util/util_math_float3.h @@ -28,37 +28,37 @@ CCL_NAMESPACE_BEGIN */ #ifndef __KERNEL_OPENCL__ -ccl_device_inline float3 operator-(const float3& a); -ccl_device_inline float3 operator*(const float3& a, const float3& b); -ccl_device_inline float3 operator*(const float3& a, const float f); -ccl_device_inline float3 operator*(const float f, const float3& a); -ccl_device_inline float3 operator/(const float f, const float3& a); -ccl_device_inline float3 operator/(const float3& a, const float f); -ccl_device_inline float3 operator/(const float3& a, const float3& b); -ccl_device_inline float3 operator+(const float3& a, const float3& b); -ccl_device_inline float3 operator-(const float3& a, const float3& b); -ccl_device_inline float3 operator+=(float3& a, const float3& b); -ccl_device_inline float3 operator-=(float3& a, const float3& b); -ccl_device_inline float3 operator*=(float3& a, const float3& b); -ccl_device_inline float3 operator*=(float3& a, float f); -ccl_device_inline float3 operator/=(float3& a, const float3& b); -ccl_device_inline float3 operator/=(float3& a, float f); - -ccl_device_inline bool operator==(const float3& a, const float3& b); -ccl_device_inline bool operator!=(const float3& a, const float3& b); - -ccl_device_inline float dot(const float3& a, const float3& b); -ccl_device_inline float dot_xy(const float3& a, const float3& b); -ccl_device_inline float3 cross(const float3& a, const float3& b); -ccl_device_inline float3 normalize(const float3& a); -ccl_device_inline float3 min(const float3& a, const float3& b); -ccl_device_inline float3 max(const float3& a, const float3& b); -ccl_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx); -ccl_device_inline float3 fabs(const float3& a); -ccl_device_inline float3 mix(const float3& a, const float3& b, float t); -ccl_device_inline float3 rcp(const float3& a); -ccl_device_inline float3 sqrt(const float3& a); -#endif /* !__KERNEL_OPENCL__ */ +ccl_device_inline float3 operator-(const float3 &a); +ccl_device_inline float3 operator*(const float3 &a, const float3 &b); +ccl_device_inline float3 operator*(const float3 &a, const float f); +ccl_device_inline float3 operator*(const float f, const float3 &a); +ccl_device_inline float3 operator/(const float f, const float3 &a); +ccl_device_inline float3 operator/(const float3 &a, const float f); +ccl_device_inline float3 operator/(const float3 &a, const float3 &b); +ccl_device_inline float3 operator+(const float3 &a, const float3 &b); +ccl_device_inline float3 operator-(const float3 &a, const float3 &b); +ccl_device_inline float3 operator+=(float3 &a, const float3 &b); +ccl_device_inline float3 operator-=(float3 &a, const float3 &b); +ccl_device_inline float3 operator*=(float3 &a, const float3 &b); +ccl_device_inline float3 operator*=(float3 &a, float f); +ccl_device_inline float3 operator/=(float3 &a, const float3 &b); +ccl_device_inline float3 operator/=(float3 &a, float f); + +ccl_device_inline bool operator==(const float3 &a, const float3 &b); +ccl_device_inline bool operator!=(const float3 &a, const float3 &b); + +ccl_device_inline float dot(const float3 &a, const float3 &b); +ccl_device_inline float dot_xy(const float3 &a, const float3 &b); +ccl_device_inline float3 cross(const float3 &a, const float3 &b); +ccl_device_inline float3 normalize(const float3 &a); +ccl_device_inline float3 min(const float3 &a, const float3 &b); +ccl_device_inline float3 max(const float3 &a, const float3 &b); +ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx); +ccl_device_inline float3 fabs(const float3 &a); +ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t); +ccl_device_inline float3 rcp(const float3 &a); +ccl_device_inline float3 sqrt(const float3 &a); +#endif /* !__KERNEL_OPENCL__ */ ccl_device_inline float min3(float3 a); ccl_device_inline float max3(float3 a); @@ -81,342 +81,345 @@ ccl_device_inline bool isequal_float3(const float3 a, const float3 b); */ #ifndef __KERNEL_OPENCL__ -ccl_device_inline float3 operator-(const float3& a) +ccl_device_inline float3 operator-(const float3 &a) { -#ifdef __KERNEL_SSE__ - return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)))); -#else - return make_float3(-a.x, -a.y, -a.z); -#endif +# ifdef __KERNEL_SSE__ + return float3(_mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000)))); +# else + return make_float3(-a.x, -a.y, -a.z); +# endif } -ccl_device_inline float3 operator*(const float3& a, const float3& b) +ccl_device_inline float3 operator*(const float3 &a, const float3 &b) { -#ifdef __KERNEL_SSE__ - return float3(_mm_mul_ps(a.m128,b.m128)); -#else - return make_float3(a.x*b.x, a.y*b.y, a.z*b.z); -#endif +# ifdef __KERNEL_SSE__ + return float3(_mm_mul_ps(a.m128, b.m128)); +# else + return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); +# endif } -ccl_device_inline float3 operator*(const float3& a, const float f) +ccl_device_inline float3 operator*(const float3 &a, const float f) { -#ifdef __KERNEL_SSE__ - return float3(_mm_mul_ps(a.m128,_mm_set1_ps(f))); -#else - return make_float3(a.x*f, a.y*f, a.z*f); -#endif +# ifdef __KERNEL_SSE__ + return float3(_mm_mul_ps(a.m128, _mm_set1_ps(f))); +# else + return make_float3(a.x * f, a.y * f, a.z * f); +# endif } -ccl_device_inline float3 operator*(const float f, const float3& a) +ccl_device_inline float3 operator*(const float f, const float3 &a) { -#if defined(__KERNEL_SSE__) - return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128)); -#else - return make_float3(a.x*f, a.y*f, a.z*f); -#endif +# if defined(__KERNEL_SSE__) + return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128)); +# else + return make_float3(a.x * f, a.y * f, a.z * f); +# endif } -ccl_device_inline float3 operator/(const float f, const float3& a) +ccl_device_inline float3 operator/(const float f, const float3 &a) { -#if defined(__KERNEL_SSE__) - return float3(_mm_div_ps(_mm_set1_ps(f), a.m128)); -#else - return make_float3(f / a.x, f / a.y, f / a.z); -#endif +# if defined(__KERNEL_SSE__) + return float3(_mm_div_ps(_mm_set1_ps(f), a.m128)); +# else + return make_float3(f / a.x, f / a.y, f / a.z); +# endif } -ccl_device_inline float3 operator/(const float3& a, const float f) +ccl_device_inline float3 operator/(const float3 &a, const float f) { - float invf = 1.0f/f; - return a * invf; + float invf = 1.0f / f; + return a * invf; } -ccl_device_inline float3 operator/(const float3& a, const float3& b) +ccl_device_inline float3 operator/(const float3 &a, const float3 &b) { -#if defined(__KERNEL_SSE__) - return float3(_mm_div_ps(a.m128, b.m128)); -#else - return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); -#endif +# if defined(__KERNEL_SSE__) + return float3(_mm_div_ps(a.m128, b.m128)); +# else + return make_float3(a.x / b.x, a.y / b.y, a.z / b.z); +# endif } -ccl_device_inline float3 operator+(const float3& a, const float3& b) +ccl_device_inline float3 operator+(const float3 &a, const float3 &b) { -#ifdef __KERNEL_SSE__ - return float3(_mm_add_ps(a.m128, b.m128)); -#else - return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); -#endif +# ifdef __KERNEL_SSE__ + return float3(_mm_add_ps(a.m128, b.m128)); +# else + return make_float3(a.x + b.x, a.y + b.y, a.z + b.z); +# endif } -ccl_device_inline float3 operator-(const float3& a, const float3& b) +ccl_device_inline float3 operator-(const float3 &a, const float3 &b) { -#ifdef __KERNEL_SSE__ - return float3(_mm_sub_ps(a.m128, b.m128)); -#else - return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); -#endif +# ifdef __KERNEL_SSE__ + return float3(_mm_sub_ps(a.m128, b.m128)); +# else + return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); +# endif } -ccl_device_inline float3 operator+=(float3& a, const float3& b) +ccl_device_inline float3 operator+=(float3 &a, const float3 &b) { - return a = a + b; + return a = a + b; } -ccl_device_inline float3 operator-=(float3& a, const float3& b) +ccl_device_inline float3 operator-=(float3 &a, const float3 &b) { - return a = a - b; + return a = a - b; } -ccl_device_inline float3 operator*=(float3& a, const float3& b) +ccl_device_inline float3 operator*=(float3 &a, const float3 &b) { - return a = a * b; + return a = a * b; } -ccl_device_inline float3 operator*=(float3& a, float f) +ccl_device_inline float3 operator*=(float3 &a, float f) { - return a = a * f; + return a = a * f; } -ccl_device_inline float3 operator/=(float3& a, const float3& b) +ccl_device_inline float3 operator/=(float3 &a, const float3 &b) { - return a = a / b; + return a = a / b; } -ccl_device_inline float3 operator/=(float3& a, float f) +ccl_device_inline float3 operator/=(float3 &a, float f) { - float invf = 1.0f/f; - return a = a * invf; + float invf = 1.0f / f; + return a = a * invf; } -ccl_device_inline bool operator==(const float3& a, const float3& b) +ccl_device_inline bool operator==(const float3 &a, const float3 &b) { -#ifdef __KERNEL_SSE__ - return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7; -#else - return (a.x == b.x && a.y == b.y && a.z == b.z); -#endif +# ifdef __KERNEL_SSE__ + return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 7) == 7; +# else + return (a.x == b.x && a.y == b.y && a.z == b.z); +# endif } -ccl_device_inline bool operator!=(const float3& a, const float3& b) +ccl_device_inline bool operator!=(const float3 &a, const float3 &b) { - return !(a == b); + return !(a == b); } -ccl_device_inline float dot(const float3& a, const float3& b) +ccl_device_inline float dot(const float3 &a, const float3 &b) { -#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) - return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F)); -#else - return a.x*b.x + a.y*b.y + a.z*b.z; -#endif +# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) + return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F)); +# else + return a.x * b.x + a.y * b.y + a.z * b.z; +# endif } -ccl_device_inline float dot_xy(const float3& a, const float3& b) +ccl_device_inline float dot_xy(const float3 &a, const float3 &b) { -#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) - return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a,b),b)); -#else - return a.x*b.x + a.y*b.y; -#endif +# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) + return _mm_cvtss_f32(_mm_hadd_ps(_mm_mul_ps(a, b), b)); +# else + return a.x * b.x + a.y * b.y; +# endif } -ccl_device_inline float3 cross(const float3& a, const float3& b) +ccl_device_inline float3 cross(const float3 &a, const float3 &b) { - float3 r = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x); - return r; + float3 r = make_float3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); + return r; } -ccl_device_inline float3 normalize(const float3& a) +ccl_device_inline float3 normalize(const float3 &a) { -#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) - __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F)); - return float3(_mm_div_ps(a.m128, norm)); -#else - return a/len(a); -#endif +# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) + __m128 norm = _mm_sqrt_ps(_mm_dp_ps(a.m128, a.m128, 0x7F)); + return float3(_mm_div_ps(a.m128, norm)); +# else + return a / len(a); +# endif } -ccl_device_inline float3 min(const float3& a, const float3& b) +ccl_device_inline float3 min(const float3 &a, const float3 &b) { -#ifdef __KERNEL_SSE__ - return float3(_mm_min_ps(a.m128, b.m128)); -#else - return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); -#endif +# ifdef __KERNEL_SSE__ + return float3(_mm_min_ps(a.m128, b.m128)); +# else + return make_float3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); +# endif } -ccl_device_inline float3 max(const float3& a, const float3& b) +ccl_device_inline float3 max(const float3 &a, const float3 &b) { -#ifdef __KERNEL_SSE__ - return float3(_mm_max_ps(a.m128, b.m128)); -#else - return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); -#endif +# ifdef __KERNEL_SSE__ + return float3(_mm_max_ps(a.m128, b.m128)); +# else + return make_float3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); +# endif } -ccl_device_inline float3 clamp(const float3& a, const float3& mn, const float3& mx) +ccl_device_inline float3 clamp(const float3 &a, const float3 &mn, const float3 &mx) { - return min(max(a, mn), mx); + return min(max(a, mn), mx); } -ccl_device_inline float3 fabs(const float3& a) +ccl_device_inline float3 fabs(const float3 &a) { -#ifdef __KERNEL_SSE__ - __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); - return float3(_mm_and_ps(a.m128, mask)); -#else - return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z)); -#endif +# ifdef __KERNEL_SSE__ + __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); + return float3(_mm_and_ps(a.m128, mask)); +# else + return make_float3(fabsf(a.x), fabsf(a.y), fabsf(a.z)); +# endif } -ccl_device_inline float3 sqrt(const float3& a) +ccl_device_inline float3 sqrt(const float3 &a) { -#ifdef __KERNEL_SSE__ - return float3(_mm_sqrt_ps(a)); -#else - return make_float3(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z)); -#endif +# ifdef __KERNEL_SSE__ + return float3(_mm_sqrt_ps(a)); +# else + return make_float3(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z)); +# endif } -ccl_device_inline float3 mix(const float3& a, const float3& b, float t) +ccl_device_inline float3 mix(const float3 &a, const float3 &b, float t) { - return a + t*(b - a); + return a + t * (b - a); } -ccl_device_inline float3 rcp(const float3& a) +ccl_device_inline float3 rcp(const float3 &a) { -#ifdef __KERNEL_SSE__ - /* Don't use _mm_rcp_ps due to poor precision. */ - return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128)); -#else - return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z); -#endif +# ifdef __KERNEL_SSE__ + /* Don't use _mm_rcp_ps due to poor precision. */ + return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128)); +# else + return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z); +# endif } -#endif /* !__KERNEL_OPENCL__ */ +#endif /* !__KERNEL_OPENCL__ */ ccl_device_inline float min3(float3 a) { - return min(min(a.x, a.y), a.z); + return min(min(a.x, a.y), a.z); } ccl_device_inline float max3(float3 a) { - return max(max(a.x, a.y), a.z); + return max(max(a.x, a.y), a.z); } ccl_device_inline float len(const float3 a) { #if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) - return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F))); + return _mm_cvtss_f32(_mm_sqrt_ss(_mm_dp_ps(a.m128, a.m128, 0x7F))); #else - return sqrtf(dot(a, a)); + return sqrtf(dot(a, a)); #endif } ccl_device_inline float len_squared(const float3 a) { - return dot(a, a); + return dot(a, a); } ccl_device_inline float3 saturate3(float3 a) { - return make_float3(saturate(a.x), saturate(a.y), saturate(a.z)); + return make_float3(saturate(a.x), saturate(a.y), saturate(a.z)); } ccl_device_inline float3 normalize_len(const float3 a, float *t) { - *t = len(a); - float x = 1.0f / *t; - return a*x; + *t = len(a); + float x = 1.0f / *t; + return a * x; } ccl_device_inline float3 safe_normalize(const float3 a) { - float t = len(a); - return (t != 0.0f)? a * (1.0f/t) : a; + float t = len(a); + return (t != 0.0f) ? a * (1.0f / t) : a; } ccl_device_inline float3 safe_normalize_len(const float3 a, float *t) { - *t = len(a); - return (*t != 0.0f)? a/(*t): a; + *t = len(a); + return (*t != 0.0f) ? a / (*t) : a; } ccl_device_inline float3 interp(float3 a, float3 b, float t) { - return a + t*(b - a); + return a + t * (b - a); } ccl_device_inline bool is_zero(const float3 a) { #ifdef __KERNEL_SSE__ - return a == make_float3(0.0f); + return a == make_float3(0.0f); #else - return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f); + return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f); #endif } ccl_device_inline float reduce_add(const float3 a) { - return (a.x + a.y + a.z); + return (a.x + a.y + a.z); } ccl_device_inline float average(const float3 a) { - return reduce_add(a)*(1.0f/3.0f); + return reduce_add(a) * (1.0f / 3.0f); } ccl_device_inline bool isequal_float3(const float3 a, const float3 b) { #ifdef __KERNEL_OPENCL__ - return all(a == b); + return all(a == b); #else - return a == b; + return a == b; #endif } ccl_device_inline float3 pow3(float3 v, float e) { - return make_float3(powf(v.x, e), powf(v.y, e), powf(v.z, e)); + return make_float3(powf(v.x, e), powf(v.y, e), powf(v.z, e)); } ccl_device_inline float3 exp3(float3 v) { - return make_float3(expf(v.x), expf(v.y), expf(v.z)); + return make_float3(expf(v.x), expf(v.y), expf(v.z)); } ccl_device_inline float3 log3(float3 v) { - return make_float3(logf(v.x), logf(v.y), logf(v.z)); + return make_float3(logf(v.x), logf(v.y), logf(v.z)); } ccl_device_inline int3 quick_floor_to_int3(const float3 a) { #ifdef __KERNEL_SSE__ - int3 b = int3(_mm_cvttps_epi32(a.m128)); - int3 isneg = int3(_mm_castps_si128(_mm_cmplt_ps(a.m128, _mm_set_ps1(0.0f)))); - /* Unsaturated add 0xffffffff is the same as subtract -1. */ - return b + isneg; + int3 b = int3(_mm_cvttps_epi32(a.m128)); + int3 isneg = int3(_mm_castps_si128(_mm_cmplt_ps(a.m128, _mm_set_ps1(0.0f)))); + /* Unsaturated add 0xffffffff is the same as subtract -1. */ + return b + isneg; #else - return make_int3(quick_floor_to_int(a.x), quick_floor_to_int(a.y), quick_floor_to_int(a.z)); + return make_int3(quick_floor_to_int(a.x), quick_floor_to_int(a.y), quick_floor_to_int(a.z)); #endif } ccl_device_inline bool isfinite3_safe(float3 v) { - return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z); + return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z); } ccl_device_inline float3 ensure_finite3(float3 v) { - if(!isfinite_safe(v.x)) v.x = 0.0f; - if(!isfinite_safe(v.y)) v.y = 0.0f; - if(!isfinite_safe(v.z)) v.z = 0.0f; - return v; + if (!isfinite_safe(v.x)) + v.x = 0.0f; + if (!isfinite_safe(v.y)) + v.y = 0.0f; + if (!isfinite_safe(v.z)) + v.z = 0.0f; + return v; } CCL_NAMESPACE_END -#endif /* __UTIL_MATH_FLOAT3_H__ */ +#endif /* __UTIL_MATH_FLOAT3_H__ */ diff --git a/intern/cycles/util/util_math_float4.h b/intern/cycles/util/util_math_float4.h index 479ccf202ba..1fb886572e3 100644 --- a/intern/cycles/util/util_math_float4.h +++ b/intern/cycles/util/util_math_float4.h @@ -28,433 +28,408 @@ CCL_NAMESPACE_BEGIN */ #ifndef __KERNEL_OPENCL__ -ccl_device_inline float4 operator-(const float4& a); -ccl_device_inline float4 operator*(const float4& a, const float4& b); -ccl_device_inline float4 operator*(const float4& a, float f); -ccl_device_inline float4 operator*(float f, const float4& a); -ccl_device_inline float4 operator/(const float4& a, float f); -ccl_device_inline float4 operator/(const float4& a, const float4& b); -ccl_device_inline float4 operator+(const float4& a, const float4& b); -ccl_device_inline float4 operator-(const float4& a, const float4& b); -ccl_device_inline float4 operator+=(float4& a, const float4& b); -ccl_device_inline float4 operator*=(float4& a, const float4& b); -ccl_device_inline float4 operator*=(float4& a, float f); -ccl_device_inline float4 operator/=(float4& a, float f); - -ccl_device_inline int4 operator<(const float4& a, const float4& b); -ccl_device_inline int4 operator>=(const float4& a, const float4& b); -ccl_device_inline int4 operator<=(const float4& a, const float4& b); -ccl_device_inline bool operator==(const float4& a, const float4& b); - -ccl_device_inline float dot(const float4& a, const float4& b); -ccl_device_inline float len_squared(const float4& a); -ccl_device_inline float4 rcp(const float4& a); -ccl_device_inline float4 sqrt(const float4& a); -ccl_device_inline float4 sqr(const float4& a); -ccl_device_inline float4 cross(const float4& a, const float4& b); -ccl_device_inline bool is_zero(const float4& a); -ccl_device_inline float average(const float4& a); -ccl_device_inline float len(const float4& a); -ccl_device_inline float4 normalize(const float4& a); -ccl_device_inline float4 safe_normalize(const float4& a); -ccl_device_inline float4 min(const float4& a, const float4& b); -ccl_device_inline float4 max(const float4& a, const float4& b); -ccl_device_inline float4 clamp(const float4& a, const float4& mn, const float4& mx); -ccl_device_inline float4 fabs(const float4& a); -#endif /* !__KERNEL_OPENCL__*/ +ccl_device_inline float4 operator-(const float4 &a); +ccl_device_inline float4 operator*(const float4 &a, const float4 &b); +ccl_device_inline float4 operator*(const float4 &a, float f); +ccl_device_inline float4 operator*(float f, const float4 &a); +ccl_device_inline float4 operator/(const float4 &a, float f); +ccl_device_inline float4 operator/(const float4 &a, const float4 &b); +ccl_device_inline float4 operator+(const float4 &a, const float4 &b); +ccl_device_inline float4 operator-(const float4 &a, const float4 &b); +ccl_device_inline float4 operator+=(float4 &a, const float4 &b); +ccl_device_inline float4 operator*=(float4 &a, const float4 &b); +ccl_device_inline float4 operator*=(float4 &a, float f); +ccl_device_inline float4 operator/=(float4 &a, float f); + +ccl_device_inline int4 operator<(const float4 &a, const float4 &b); +ccl_device_inline int4 operator>=(const float4 &a, const float4 &b); +ccl_device_inline int4 operator<=(const float4 &a, const float4 &b); +ccl_device_inline bool operator==(const float4 &a, const float4 &b); + +ccl_device_inline float dot(const float4 &a, const float4 &b); +ccl_device_inline float len_squared(const float4 &a); +ccl_device_inline float4 rcp(const float4 &a); +ccl_device_inline float4 sqrt(const float4 &a); +ccl_device_inline float4 sqr(const float4 &a); +ccl_device_inline float4 cross(const float4 &a, const float4 &b); +ccl_device_inline bool is_zero(const float4 &a); +ccl_device_inline float average(const float4 &a); +ccl_device_inline float len(const float4 &a); +ccl_device_inline float4 normalize(const float4 &a); +ccl_device_inline float4 safe_normalize(const float4 &a); +ccl_device_inline float4 min(const float4 &a, const float4 &b); +ccl_device_inline float4 max(const float4 &a, const float4 &b); +ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx); +ccl_device_inline float4 fabs(const float4 &a); +#endif /* !__KERNEL_OPENCL__*/ #ifdef __KERNEL_SSE__ template<size_t index_0, size_t index_1, size_t index_2, size_t index_3> -__forceinline const float4 shuffle(const float4& b); +__forceinline const float4 shuffle(const float4 &b); template<size_t index_0, size_t index_1, size_t index_2, size_t index_3> -__forceinline const float4 shuffle(const float4& a, const float4& b); +__forceinline const float4 shuffle(const float4 &a, const float4 &b); -template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b); +template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &b); -template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& a, const float4& b); -template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4& a, const float4& b); +template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &a, const float4 &b); +template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 &a, const float4 &b); # ifdef __KERNEL_SSE3__ -template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b); -template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b); +template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 &b); +template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 &b); # endif -#endif /* __KERNEL_SSE__ */ +#endif /* __KERNEL_SSE__ */ #ifndef __KERNEL_GPU__ -ccl_device_inline float4 select(const int4& mask, - const float4& a, - const float4& b); -ccl_device_inline float4 reduce_min(const float4& a); -ccl_device_inline float4 reduce_max(const float4& a); -ccl_device_inline float4 reduce_add(const float4& a); -#endif /* !__KERNEL_GPU__ */ +ccl_device_inline float4 select(const int4 &mask, const float4 &a, const float4 &b); +ccl_device_inline float4 reduce_min(const float4 &a); +ccl_device_inline float4 reduce_max(const float4 &a); +ccl_device_inline float4 reduce_add(const float4 &a); +#endif /* !__KERNEL_GPU__ */ /******************************************************************************* * Definition. */ #ifndef __KERNEL_OPENCL__ -ccl_device_inline float4 operator-(const float4& a) +ccl_device_inline float4 operator-(const float4 &a) { -#ifdef __KERNEL_SSE__ - __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); - return float4(_mm_xor_ps(a.m128, mask)); -#else - return make_float4(-a.x, -a.y, -a.z, -a.w); -#endif +# ifdef __KERNEL_SSE__ + __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); + return float4(_mm_xor_ps(a.m128, mask)); +# else + return make_float4(-a.x, -a.y, -a.z, -a.w); +# endif } -ccl_device_inline float4 operator*(const float4& a, const float4& b) +ccl_device_inline float4 operator*(const float4 &a, const float4 &b) { -#ifdef __KERNEL_SSE__ - return float4(_mm_mul_ps(a.m128, b.m128)); -#else - return make_float4(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); -#endif +# ifdef __KERNEL_SSE__ + return float4(_mm_mul_ps(a.m128, b.m128)); +# else + return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); +# endif } -ccl_device_inline float4 operator*(const float4& a, float f) +ccl_device_inline float4 operator*(const float4 &a, float f) { -#if defined(__KERNEL_SSE__) - return a * make_float4(f); -#else - return make_float4(a.x*f, a.y*f, a.z*f, a.w*f); -#endif +# if defined(__KERNEL_SSE__) + return a * make_float4(f); +# else + return make_float4(a.x * f, a.y * f, a.z * f, a.w * f); +# endif } -ccl_device_inline float4 operator*(float f, const float4& a) +ccl_device_inline float4 operator*(float f, const float4 &a) { - return a * f; + return a * f; } -ccl_device_inline float4 operator/(const float4& a, float f) +ccl_device_inline float4 operator/(const float4 &a, float f) { - return a * (1.0f/f); + return a * (1.0f / f); } -ccl_device_inline float4 operator/(const float4& a, const float4& b) +ccl_device_inline float4 operator/(const float4 &a, const float4 &b) { -#ifdef __KERNEL_SSE__ - return float4(_mm_div_ps(a.m128, b.m128)); -#else - return make_float4(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); -#endif - +# ifdef __KERNEL_SSE__ + return float4(_mm_div_ps(a.m128, b.m128)); +# else + return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); +# endif } -ccl_device_inline float4 operator+(const float4& a, const float4& b) +ccl_device_inline float4 operator+(const float4 &a, const float4 &b) { -#ifdef __KERNEL_SSE__ - return float4(_mm_add_ps(a.m128, b.m128)); -#else - return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); -#endif +# ifdef __KERNEL_SSE__ + return float4(_mm_add_ps(a.m128, b.m128)); +# else + return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +# endif } -ccl_device_inline float4 operator-(const float4& a, const float4& b) +ccl_device_inline float4 operator-(const float4 &a, const float4 &b) { -#ifdef __KERNEL_SSE__ - return float4(_mm_sub_ps(a.m128, b.m128)); -#else - return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); -#endif +# ifdef __KERNEL_SSE__ + return float4(_mm_sub_ps(a.m128, b.m128)); +# else + return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); +# endif } -ccl_device_inline float4 operator+=(float4& a, const float4& b) +ccl_device_inline float4 operator+=(float4 &a, const float4 &b) { - return a = a + b; + return a = a + b; } -ccl_device_inline float4 operator*=(float4& a, const float4& b) +ccl_device_inline float4 operator*=(float4 &a, const float4 &b) { - return a = a * b; + return a = a * b; } -ccl_device_inline float4 operator*=(float4& a, float f) +ccl_device_inline float4 operator*=(float4 &a, float f) { - return a = a * f; + return a = a * f; } -ccl_device_inline float4 operator/=(float4& a, float f) +ccl_device_inline float4 operator/=(float4 &a, float f) { - return a = a / f; + return a = a / f; } -ccl_device_inline int4 operator<(const float4& a, const float4& b) +ccl_device_inline int4 operator<(const float4 &a, const float4 &b) { -#ifdef __KERNEL_SSE__ - return int4(_mm_castps_si128(_mm_cmplt_ps(a.m128, b.m128))); -#else - return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); -#endif +# ifdef __KERNEL_SSE__ + return int4(_mm_castps_si128(_mm_cmplt_ps(a.m128, b.m128))); +# else + return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); +# endif } -ccl_device_inline int4 operator>=(const float4& a, const float4& b) +ccl_device_inline int4 operator>=(const float4 &a, const float4 &b) { -#ifdef __KERNEL_SSE__ - return int4(_mm_castps_si128(_mm_cmpge_ps(a.m128, b.m128))); -#else - return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); -#endif +# ifdef __KERNEL_SSE__ + return int4(_mm_castps_si128(_mm_cmpge_ps(a.m128, b.m128))); +# else + return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); +# endif } -ccl_device_inline int4 operator<=(const float4& a, const float4& b) +ccl_device_inline int4 operator<=(const float4 &a, const float4 &b) { -#ifdef __KERNEL_SSE__ - return int4(_mm_castps_si128(_mm_cmple_ps(a.m128, b.m128))); -#else - return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w); -#endif +# ifdef __KERNEL_SSE__ + return int4(_mm_castps_si128(_mm_cmple_ps(a.m128, b.m128))); +# else + return make_int4(a.x <= b.x, a.y <= b.y, a.z <= b.z, a.w <= b.w); +# endif } -ccl_device_inline bool operator==(const float4& a, const float4& b) +ccl_device_inline bool operator==(const float4 &a, const float4 &b) { -#ifdef __KERNEL_SSE__ - return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15; -#else - return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w); -#endif +# ifdef __KERNEL_SSE__ + return (_mm_movemask_ps(_mm_cmpeq_ps(a.m128, b.m128)) & 15) == 15; +# else + return (a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w); +# endif } -ccl_device_inline float dot(const float4& a, const float4& b) +ccl_device_inline float dot(const float4 &a, const float4 &b) { -#if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) - return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF)); -#else - return (a.x*b.x + a.y*b.y) + (a.z*b.z + a.w*b.w); -#endif +# if defined(__KERNEL_SSE41__) && defined(__KERNEL_SSE__) + return _mm_cvtss_f32(_mm_dp_ps(a, b, 0xFF)); +# else + return (a.x * b.x + a.y * b.y) + (a.z * b.z + a.w * b.w); +# endif } -ccl_device_inline float len_squared(const float4& a) +ccl_device_inline float len_squared(const float4 &a) { - return dot(a, a); + return dot(a, a); } -ccl_device_inline float4 rcp(const float4& a) +ccl_device_inline float4 rcp(const float4 &a) { -#ifdef __KERNEL_SSE__ - /* Don't use _mm_rcp_ps due to poor precision. */ - return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128)); -#else - return make_float4(1.0f/a.x, 1.0f/a.y, 1.0f/a.z, 1.0f/a.w); -#endif +# ifdef __KERNEL_SSE__ + /* Don't use _mm_rcp_ps due to poor precision. */ + return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128)); +# else + return make_float4(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w); +# endif } -ccl_device_inline float4 sqrt(const float4& a) +ccl_device_inline float4 sqrt(const float4 &a) { -#ifdef __KERNEL_SSE__ - return float4(_mm_sqrt_ps(a.m128)); -#else - return make_float4(sqrtf(a.x), - sqrtf(a.y), - sqrtf(a.z), - sqrtf(a.w)); -#endif +# ifdef __KERNEL_SSE__ + return float4(_mm_sqrt_ps(a.m128)); +# else + return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w)); +# endif } -ccl_device_inline float4 sqr(const float4& a) +ccl_device_inline float4 sqr(const float4 &a) { - return a * a; + return a * a; } -ccl_device_inline float4 cross(const float4& a, const float4& b) +ccl_device_inline float4 cross(const float4 &a, const float4 &b) { -#ifdef __KERNEL_SSE__ - return (shuffle<1,2,0,0>(a)*shuffle<2,0,1,0>(b)) - - (shuffle<2,0,1,0>(a)*shuffle<1,2,0,0>(b)); -#else - return make_float4(a.y*b.z - a.z*b.y, - a.z*b.x - a.x*b.z, - a.x*b.y - a.y*b.x, - 0.0f); -#endif +# ifdef __KERNEL_SSE__ + return (shuffle<1, 2, 0, 0>(a) * shuffle<2, 0, 1, 0>(b)) - + (shuffle<2, 0, 1, 0>(a) * shuffle<1, 2, 0, 0>(b)); +# else + return make_float4(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x, 0.0f); +# endif } -ccl_device_inline bool is_zero(const float4& a) +ccl_device_inline bool is_zero(const float4 &a) { -#ifdef __KERNEL_SSE__ - return a == make_float4(0.0f); -#else - return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); -#endif +# ifdef __KERNEL_SSE__ + return a == make_float4(0.0f); +# else + return (a.x == 0.0f && a.y == 0.0f && a.z == 0.0f && a.w == 0.0f); +# endif } -ccl_device_inline float4 reduce_add(const float4& a) +ccl_device_inline float4 reduce_add(const float4 &a) { -#ifdef __KERNEL_SSE__ -# ifdef __KERNEL_SSE3__ - float4 h(_mm_hadd_ps(a.m128, a.m128)); - return float4( _mm_hadd_ps(h.m128, h.m128)); +# ifdef __KERNEL_SSE__ +# ifdef __KERNEL_SSE3__ + float4 h(_mm_hadd_ps(a.m128, a.m128)); + return float4(_mm_hadd_ps(h.m128, h.m128)); +# else + float4 h(shuffle<1, 0, 3, 2>(a) + a); + return shuffle<2, 3, 0, 1>(h) + h; +# endif # else - float4 h(shuffle<1,0,3,2>(a) + a); - return shuffle<2,3,0,1>(h) + h; + float sum = (a.x + a.y) + (a.z + a.w); + return make_float4(sum, sum, sum, sum); # endif -#else - float sum = (a.x + a.y) + (a.z + a.w); - return make_float4(sum, sum, sum, sum); -#endif } -ccl_device_inline float average(const float4& a) +ccl_device_inline float average(const float4 &a) { - return reduce_add(a).x * 0.25f; + return reduce_add(a).x * 0.25f; } -ccl_device_inline float len(const float4& a) +ccl_device_inline float len(const float4 &a) { - return sqrtf(dot(a, a)); + return sqrtf(dot(a, a)); } -ccl_device_inline float4 normalize(const float4& a) +ccl_device_inline float4 normalize(const float4 &a) { - return a/len(a); + return a / len(a); } -ccl_device_inline float4 safe_normalize(const float4& a) +ccl_device_inline float4 safe_normalize(const float4 &a) { - float t = len(a); - return (t != 0.0f)? a/t: a; + float t = len(a); + return (t != 0.0f) ? a / t : a; } -ccl_device_inline float4 min(const float4& a, const float4& b) +ccl_device_inline float4 min(const float4 &a, const float4 &b) { -#ifdef __KERNEL_SSE__ - return float4(_mm_min_ps(a.m128, b.m128)); -#else - return make_float4(min(a.x, b.x), - min(a.y, b.y), - min(a.z, b.z), - min(a.w, b.w)); -#endif +# ifdef __KERNEL_SSE__ + return float4(_mm_min_ps(a.m128, b.m128)); +# else + return make_float4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); +# endif } -ccl_device_inline float4 max(const float4& a, const float4& b) +ccl_device_inline float4 max(const float4 &a, const float4 &b) { -#ifdef __KERNEL_SSE__ - return float4(_mm_max_ps(a.m128, b.m128)); -#else - return make_float4(max(a.x, b.x), - max(a.y, b.y), - max(a.z, b.z), - max(a.w, b.w)); -#endif +# ifdef __KERNEL_SSE__ + return float4(_mm_max_ps(a.m128, b.m128)); +# else + return make_float4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); +# endif } -ccl_device_inline float4 clamp(const float4& a, const float4& mn, const float4& mx) +ccl_device_inline float4 clamp(const float4 &a, const float4 &mn, const float4 &mx) { - return min(max(a, mn), mx); + return min(max(a, mn), mx); } -ccl_device_inline float4 fabs(const float4& a) +ccl_device_inline float4 fabs(const float4 &a) { -#ifdef __KERNEL_SSE__ - return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); -#else - return make_float4(fabsf(a.x), - fabsf(a.y), - fabsf(a.z), - fabsf(a.w)); -#endif +# ifdef __KERNEL_SSE__ + return float4(_mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)))); +# else + return make_float4(fabsf(a.x), fabsf(a.y), fabsf(a.z), fabsf(a.w)); +# endif } -#endif /* !__KERNEL_OPENCL__*/ +#endif /* !__KERNEL_OPENCL__*/ #ifdef __KERNEL_SSE__ template<size_t index_0, size_t index_1, size_t index_2, size_t index_3> -__forceinline const float4 shuffle(const float4& b) +__forceinline const float4 shuffle(const float4 &b) { - return float4(_mm_castsi128_ps( - _mm_shuffle_epi32(_mm_castps_si128(b), - _MM_SHUFFLE(index_3, index_2, index_1, index_0)))); + return float4(_mm_castsi128_ps( + _mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(index_3, index_2, index_1, index_0)))); } template<size_t index_0, size_t index_1, size_t index_2, size_t index_3> -__forceinline const float4 shuffle(const float4& a, const float4& b) +__forceinline const float4 shuffle(const float4 &a, const float4 &b) { - return float4(_mm_shuffle_ps(a.m128, b.m128, - _MM_SHUFFLE(index_3, index_2, index_1, index_0))); + return float4(_mm_shuffle_ps(a.m128, b.m128, _MM_SHUFFLE(index_3, index_2, index_1, index_0))); } -template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& b) +template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &b) { - return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b)))); + return float4(_mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b)))); } -template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4& a, const float4& b) +template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 &a, const float4 &b) { - return float4(_mm_movelh_ps(a.m128, b.m128)); + return float4(_mm_movelh_ps(a.m128, b.m128)); } -template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4& a, const float4& b) +template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 &a, const float4 &b) { - return float4(_mm_movehl_ps(b.m128, a.m128)); + return float4(_mm_movehl_ps(b.m128, a.m128)); } # ifdef __KERNEL_SSE3__ -template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4& b) +template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 &b) { - return float4(_mm_moveldup_ps(b)); + return float4(_mm_moveldup_ps(b)); } -template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4& b) +template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 &b) { - return float4(_mm_movehdup_ps(b)); + return float4(_mm_movehdup_ps(b)); } -# endif /* __KERNEL_SSE3__ */ -#endif /* __KERNEL_SSE__ */ +# endif /* __KERNEL_SSE3__ */ +#endif /* __KERNEL_SSE__ */ #ifndef __KERNEL_GPU__ -ccl_device_inline float4 select(const int4& mask, - const float4& a, - const float4& b) +ccl_device_inline float4 select(const int4 &mask, const float4 &a, const float4 &b) { -#ifdef __KERNEL_SSE__ - return float4(_mm_blendv_ps(b.m128, a.m128, _mm_castsi128_ps(mask.m128))); -#else - return make_float4((mask.x)? a.x: b.x, - (mask.y)? a.y: b.y, - (mask.z)? a.z: b.z, - (mask.w)? a.w: b.w); -#endif +# ifdef __KERNEL_SSE__ + return float4(_mm_blendv_ps(b.m128, a.m128, _mm_castsi128_ps(mask.m128))); +# else + return make_float4( + (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w); +# endif } -ccl_device_inline float4 mask(const int4& mask, - const float4& a) +ccl_device_inline float4 mask(const int4 &mask, const float4 &a) { - /* Replace elements of x with zero where mask isn't set. */ - return select(mask, a, make_float4(0.0f)); + /* Replace elements of x with zero where mask isn't set. */ + return select(mask, a, make_float4(0.0f)); } -ccl_device_inline float4 reduce_min(const float4& a) +ccl_device_inline float4 reduce_min(const float4 &a) { -#ifdef __KERNEL_SSE__ - float4 h = min(shuffle<1,0,3,2>(a), a); - return min(shuffle<2,3,0,1>(h), h); -#else - return make_float4(min(min(a.x, a.y), min(a.z, a.w))); -#endif +# ifdef __KERNEL_SSE__ + float4 h = min(shuffle<1, 0, 3, 2>(a), a); + return min(shuffle<2, 3, 0, 1>(h), h); +# else + return make_float4(min(min(a.x, a.y), min(a.z, a.w))); +# endif } -ccl_device_inline float4 reduce_max(const float4& a) +ccl_device_inline float4 reduce_max(const float4 &a) { -#ifdef __KERNEL_SSE__ - float4 h = max(shuffle<1,0,3,2>(a), a); - return max(shuffle<2,3,0,1>(h), h); -#else - return make_float4(max(max(a.x, a.y), max(a.z, a.w))); -#endif +# ifdef __KERNEL_SSE__ + float4 h = max(shuffle<1, 0, 3, 2>(a), a); + return max(shuffle<2, 3, 0, 1>(h), h); +# else + return make_float4(max(max(a.x, a.y), max(a.z, a.w))); +# endif } ccl_device_inline float4 load_float4(const float *v) { -#ifdef __KERNEL_SSE__ - return float4(_mm_loadu_ps(v)); -#else - return make_float4(v[0], v[1], v[2], v[3]); -#endif +# ifdef __KERNEL_SSE__ + return float4(_mm_loadu_ps(v)); +# else + return make_float4(v[0], v[1], v[2], v[3]); +# endif } -#endif /* !__KERNEL_GPU__ */ +#endif /* !__KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_MATH_FLOAT4_H__ */ +#endif /* __UTIL_MATH_FLOAT4_H__ */ diff --git a/intern/cycles/util/util_math_int2.h b/intern/cycles/util/util_math_int2.h index dd401d9a091..0295cd51f7e 100644 --- a/intern/cycles/util/util_math_int2.h +++ b/intern/cycles/util/util_math_int2.h @@ -34,7 +34,7 @@ ccl_device_inline int2 operator+=(int2 &a, const int2 &b); ccl_device_inline int2 operator-(const int2 &a, const int2 &b); ccl_device_inline int2 operator*(const int2 &a, const int2 &b); ccl_device_inline int2 operator/(const int2 &a, const int2 &b); -#endif /* !__KERNEL_OPENCL__ */ +#endif /* !__KERNEL_OPENCL__ */ /******************************************************************************* * Definition. @@ -43,35 +43,35 @@ ccl_device_inline int2 operator/(const int2 &a, const int2 &b); #ifndef __KERNEL_OPENCL__ ccl_device_inline bool operator==(const int2 a, const int2 b) { - return (a.x == b.x && a.y == b.y); + return (a.x == b.x && a.y == b.y); } ccl_device_inline int2 operator+(const int2 &a, const int2 &b) { - return make_int2(a.x + b.x, a.y + b.y); + return make_int2(a.x + b.x, a.y + b.y); } ccl_device_inline int2 operator+=(int2 &a, const int2 &b) { - return a = a + b; + return a = a + b; } ccl_device_inline int2 operator-(const int2 &a, const int2 &b) { - return make_int2(a.x - b.x, a.y - b.y); + return make_int2(a.x - b.x, a.y - b.y); } ccl_device_inline int2 operator*(const int2 &a, const int2 &b) { - return make_int2(a.x * b.x, a.y * b.y); + return make_int2(a.x * b.x, a.y * b.y); } ccl_device_inline int2 operator/(const int2 &a, const int2 &b) { - return make_int2(a.x / b.x, a.y / b.y); + return make_int2(a.x / b.x, a.y / b.y); } -#endif /* !__KERNEL_OPENCL__ */ +#endif /* !__KERNEL_OPENCL__ */ CCL_NAMESPACE_END -#endif /* __UTIL_MATH_INT2_H__ */ +#endif /* __UTIL_MATH_INT2_H__ */ diff --git a/intern/cycles/util/util_math_int3.h b/intern/cycles/util/util_math_int3.h index 2f4752f90f1..d92ed895dc2 100644 --- a/intern/cycles/util/util_math_int3.h +++ b/intern/cycles/util/util_math_int3.h @@ -30,9 +30,9 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_OPENCL__ ccl_device_inline int3 min(int3 a, int3 b); ccl_device_inline int3 max(int3 a, int3 b); -ccl_device_inline int3 clamp(const int3& a, int mn, int mx); -ccl_device_inline int3 clamp(const int3& a, int3& mn, int mx); -#endif /* !__KERNEL_OPENCL__ */ +ccl_device_inline int3 clamp(const int3 &a, int mn, int mx); +ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx); +#endif /* !__KERNEL_OPENCL__ */ /******************************************************************************* * Definition. @@ -41,76 +41,74 @@ ccl_device_inline int3 clamp(const int3& a, int3& mn, int mx); #ifndef __KERNEL_OPENCL__ ccl_device_inline int3 min(int3 a, int3 b) { -#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) - return int3(_mm_min_epi32(a.m128, b.m128)); -#else - return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); -#endif +# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) + return int3(_mm_min_epi32(a.m128, b.m128)); +# else + return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); +# endif } ccl_device_inline int3 max(int3 a, int3 b) { -#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) - return int3(_mm_max_epi32(a.m128, b.m128)); -#else - return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); -#endif +# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) + return int3(_mm_max_epi32(a.m128, b.m128)); +# else + return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); +# endif } -ccl_device_inline int3 clamp(const int3& a, int mn, int mx) +ccl_device_inline int3 clamp(const int3 &a, int mn, int mx) { -#ifdef __KERNEL_SSE__ - return min(max(a, make_int3(mn)), make_int3(mx)); -#else - return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)); -#endif +# ifdef __KERNEL_SSE__ + return min(max(a, make_int3(mn)), make_int3(mx)); +# else + return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx)); +# endif } -ccl_device_inline int3 clamp(const int3& a, int3& mn, int mx) +ccl_device_inline int3 clamp(const int3 &a, int3 &mn, int mx) { -#ifdef __KERNEL_SSE__ - return min(max(a, mn), make_int3(mx)); -#else - return make_int3(clamp(a.x, mn.x, mx), - clamp(a.y, mn.y, mx), - clamp(a.z, mn.z, mx)); -#endif +# ifdef __KERNEL_SSE__ + return min(max(a, mn), make_int3(mx)); +# else + return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx)); +# endif } ccl_device_inline bool operator==(const int3 &a, const int3 &b) { - return a.x == b.x && a.y == b.y && a.z == b.z; + return a.x == b.x && a.y == b.y && a.z == b.z; } ccl_device_inline bool operator!=(const int3 &a, const int3 &b) { - return !(a == b); + return !(a == b); } ccl_device_inline bool operator<(const int3 &a, const int3 &b) { - return a.x < b.x && a.y < b.y && a.z < b.z; + return a.x < b.x && a.y < b.y && a.z < b.z; } ccl_device_inline int3 operator+(const int3 &a, const int3 &b) { -#ifdef __KERNEL_SSE__ - return int3(_mm_add_epi32(a.m128, b.m128)); -#else - return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); -#endif +# ifdef __KERNEL_SSE__ + return int3(_mm_add_epi32(a.m128, b.m128)); +# else + return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); +# endif } ccl_device_inline int3 operator-(const int3 &a, const int3 &b) { -#ifdef __KERNEL_SSE__ - return int3(_mm_sub_epi32(a.m128, b.m128)); -#else - return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); -#endif +# ifdef __KERNEL_SSE__ + return int3(_mm_sub_epi32(a.m128, b.m128)); +# else + return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); +# endif } -#endif /* !__KERNEL_OPENCL__ */ +#endif /* !__KERNEL_OPENCL__ */ CCL_NAMESPACE_END -#endif /* __UTIL_MATH_INT3_H__ */ +#endif /* __UTIL_MATH_INT3_H__ */ diff --git a/intern/cycles/util/util_math_int4.h b/intern/cycles/util/util_math_int4.h index 763c42318d5..186cc58489b 100644 --- a/intern/cycles/util/util_math_int4.h +++ b/intern/cycles/util/util_math_int4.h @@ -28,132 +28,129 @@ CCL_NAMESPACE_BEGIN */ #ifndef __KERNEL_GPU__ -ccl_device_inline int4 operator+(const int4& a, const int4& b); -ccl_device_inline int4 operator+=(int4& a, const int4& b); -ccl_device_inline int4 operator>>(const int4& a, int i); -ccl_device_inline int4 operator<<(const int4& a, int i); -ccl_device_inline int4 operator<(const int4& a, const int4& b); -ccl_device_inline int4 operator>=(const int4& a, const int4& b); -ccl_device_inline int4 operator&(const int4& a, const int4& b); +ccl_device_inline int4 operator+(const int4 &a, const int4 &b); +ccl_device_inline int4 operator+=(int4 &a, const int4 &b); +ccl_device_inline int4 operator>>(const int4 &a, int i); +ccl_device_inline int4 operator<<(const int4 &a, int i); +ccl_device_inline int4 operator<(const int4 &a, const int4 &b); +ccl_device_inline int4 operator>=(const int4 &a, const int4 &b); +ccl_device_inline int4 operator&(const int4 &a, const int4 &b); ccl_device_inline int4 min(int4 a, int4 b); ccl_device_inline int4 max(int4 a, int4 b); -ccl_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx); -ccl_device_inline int4 select(const int4& mask, const int4& a, const int4& b); -#endif /* __KERNEL_GPU__ */ +ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx); +ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b); +#endif /* __KERNEL_GPU__ */ /******************************************************************************* * Definition. */ #ifndef __KERNEL_GPU__ -ccl_device_inline int4 operator+(const int4& a, const int4& b) +ccl_device_inline int4 operator+(const int4 &a, const int4 &b) { -#ifdef __KERNEL_SSE__ - return int4(_mm_add_epi32(a.m128, b.m128)); -#else - return make_int4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); -#endif +# ifdef __KERNEL_SSE__ + return int4(_mm_add_epi32(a.m128, b.m128)); +# else + return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); +# endif } -ccl_device_inline int4 operator+=(int4& a, const int4& b) +ccl_device_inline int4 operator+=(int4 &a, const int4 &b) { - return a = a + b; + return a = a + b; } -ccl_device_inline int4 operator>>(const int4& a, int i) +ccl_device_inline int4 operator>>(const int4 &a, int i) { -#ifdef __KERNEL_SSE__ - return int4(_mm_srai_epi32(a.m128, i)); -#else - return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i); -#endif +# ifdef __KERNEL_SSE__ + return int4(_mm_srai_epi32(a.m128, i)); +# else + return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i); +# endif } -ccl_device_inline int4 operator<<(const int4& a, int i) +ccl_device_inline int4 operator<<(const int4 &a, int i) { -#ifdef __KERNEL_SSE__ - return int4(_mm_slli_epi32(a.m128, i)); -#else - return make_int4(a.x << i, a.y << i, a.z << i, a.w << i); -#endif +# ifdef __KERNEL_SSE__ + return int4(_mm_slli_epi32(a.m128, i)); +# else + return make_int4(a.x << i, a.y << i, a.z << i, a.w << i); +# endif } -ccl_device_inline int4 operator<(const int4& a, const int4& b) +ccl_device_inline int4 operator<(const int4 &a, const int4 &b) { -#ifdef __KERNEL_SSE__ - return int4(_mm_cmplt_epi32(a.m128, b.m128)); -#else - return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); -#endif +# ifdef __KERNEL_SSE__ + return int4(_mm_cmplt_epi32(a.m128, b.m128)); +# else + return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w); +# endif } -ccl_device_inline int4 operator>=(const int4& a, const int4& b) +ccl_device_inline int4 operator>=(const int4 &a, const int4 &b) { -#ifdef __KERNEL_SSE__ - return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128))); -#else - return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); -#endif +# ifdef __KERNEL_SSE__ + return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128))); +# else + return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w); +# endif } -ccl_device_inline int4 operator&(const int4& a, const int4& b) +ccl_device_inline int4 operator&(const int4 &a, const int4 &b) { -#ifdef __KERNEL_SSE__ - return int4(_mm_and_si128(a.m128, b.m128)); -#else - return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w); -#endif +# ifdef __KERNEL_SSE__ + return int4(_mm_and_si128(a.m128, b.m128)); +# else + return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w); +# endif } ccl_device_inline int4 min(int4 a, int4 b) { -#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) - return int4(_mm_min_epi32(a.m128, b.m128)); -#else - return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); -#endif +# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) + return int4(_mm_min_epi32(a.m128, b.m128)); +# else + return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); +# endif } ccl_device_inline int4 max(int4 a, int4 b) { -#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) - return int4(_mm_max_epi32(a.m128, b.m128)); -#else - return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); -#endif +# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__) + return int4(_mm_max_epi32(a.m128, b.m128)); +# else + return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); +# endif } -ccl_device_inline int4 clamp(const int4& a, const int4& mn, const int4& mx) +ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx) { - return min(max(a, mn), mx); + return min(max(a, mn), mx); } -ccl_device_inline int4 select(const int4& mask, const int4& a, const int4& b) +ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b) { -#ifdef __KERNEL_SSE__ - const __m128 m = _mm_cvtepi32_ps(mask); - /* TODO(sergey): avoid cvt. */ - return int4(_mm_castps_si128( - _mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), - _mm_andnot_ps(m, _mm_castsi128_ps(b))))); -#else - return make_int4((mask.x)? a.x: b.x, - (mask.y)? a.y: b.y, - (mask.z)? a.z: b.z, - (mask.w)? a.w: b.w); -#endif +# ifdef __KERNEL_SSE__ + const __m128 m = _mm_cvtepi32_ps(mask); + /* TODO(sergey): avoid cvt. */ + return int4(_mm_castps_si128( + _mm_or_ps(_mm_and_ps(m, _mm_castsi128_ps(a)), _mm_andnot_ps(m, _mm_castsi128_ps(b))))); +# else + return make_int4( + (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w); +# endif } ccl_device_inline int4 load_int4(const int *v) { -#ifdef __KERNEL_SSE__ - return int4(_mm_loadu_si128((__m128i*)v)); -#else - return make_int4(v[0], v[1], v[2], v[3]); -#endif +# ifdef __KERNEL_SSE__ + return int4(_mm_loadu_si128((__m128i *)v)); +# else + return make_int4(v[0], v[1], v[2], v[3]); +# endif } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_MATH_INT4_H__ */ +#endif /* __UTIL_MATH_INT4_H__ */ diff --git a/intern/cycles/util/util_math_intersect.h b/intern/cycles/util/util_math_intersect.h index aa75783d378..95ac231c611 100644 --- a/intern/cycles/util/util_math_intersect.h +++ b/intern/cycles/util/util_math_intersect.h @@ -21,167 +21,175 @@ CCL_NAMESPACE_BEGIN /* Ray Intersection */ -ccl_device bool ray_sphere_intersect( - float3 ray_P, float3 ray_D, float ray_t, - float3 sphere_P, float sphere_radius, - float3 *isect_P, float *isect_t) +ccl_device bool ray_sphere_intersect(float3 ray_P, + float3 ray_D, + float ray_t, + float3 sphere_P, + float sphere_radius, + float3 *isect_P, + float *isect_t) { - const float3 d = sphere_P - ray_P; - const float radiussq = sphere_radius*sphere_radius; - const float tsq = dot(d, d); - - if(tsq > radiussq) { - /* Ray origin outside sphere. */ - const float tp = dot(d, ray_D); - if(tp < 0.0f) { - /* Ray points away from sphere. */ - return false; - } - const float dsq = tsq - tp*tp; /* pythagoras */ - if(dsq > radiussq) { - /* Closest point on ray outside sphere. */ - return false; - } - const float t = tp - sqrtf(radiussq - dsq); /* pythagoras */ - if(t < ray_t) { - *isect_t = t; - *isect_P = ray_P + ray_D*t; - return true; - } - } - return false; + const float3 d = sphere_P - ray_P; + const float radiussq = sphere_radius * sphere_radius; + const float tsq = dot(d, d); + + if (tsq > radiussq) { + /* Ray origin outside sphere. */ + const float tp = dot(d, ray_D); + if (tp < 0.0f) { + /* Ray points away from sphere. */ + return false; + } + const float dsq = tsq - tp * tp; /* pythagoras */ + if (dsq > radiussq) { + /* Closest point on ray outside sphere. */ + return false; + } + const float t = tp - sqrtf(radiussq - dsq); /* pythagoras */ + if (t < ray_t) { + *isect_t = t; + *isect_P = ray_P + ray_D * t; + return true; + } + } + return false; } -ccl_device bool ray_aligned_disk_intersect( - float3 ray_P, float3 ray_D, float ray_t, - float3 disk_P, float disk_radius, - float3 *isect_P, float *isect_t) +ccl_device bool ray_aligned_disk_intersect(float3 ray_P, + float3 ray_D, + float ray_t, + float3 disk_P, + float disk_radius, + float3 *isect_P, + float *isect_t) { - /* Aligned disk normal. */ - float disk_t; - const float3 disk_N = normalize_len(ray_P - disk_P, &disk_t); - const float div = dot(ray_D, disk_N); - if(UNLIKELY(div == 0.0f)) { - return false; - } - /* Compute t to intersection point. */ - const float t = -disk_t/div; - if(t < 0.0f || t > ray_t) { - return false; - } - /* Test if within radius. */ - float3 P = ray_P + ray_D*t; - if(len_squared(P - disk_P) > disk_radius*disk_radius) { - return false; - } - *isect_P = P; - *isect_t = t; - return true; + /* Aligned disk normal. */ + float disk_t; + const float3 disk_N = normalize_len(ray_P - disk_P, &disk_t); + const float div = dot(ray_D, disk_N); + if (UNLIKELY(div == 0.0f)) { + return false; + } + /* Compute t to intersection point. */ + const float t = -disk_t / div; + if (t < 0.0f || t > ray_t) { + return false; + } + /* Test if within radius. */ + float3 P = ray_P + ray_D * t; + if (len_squared(P - disk_P) > disk_radius * disk_radius) { + return false; + } + *isect_P = P; + *isect_t = t; + return true; } -ccl_device_forceinline bool ray_triangle_intersect( - float3 ray_P, float3 ray_dir, float ray_t, +ccl_device_forceinline bool ray_triangle_intersect(float3 ray_P, + float3 ray_dir, + float ray_t, #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - const ssef *ssef_verts, + const ssef *ssef_verts, #else - const float3 tri_a, const float3 tri_b, const float3 tri_c, + const float3 tri_a, + const float3 tri_b, + const float3 tri_c, #endif - float *isect_u, float *isect_v, float *isect_t) + float *isect_u, + float *isect_v, + float *isect_t) { #if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) - typedef ssef float3; - const float3 tri_a(ssef_verts[0]); - const float3 tri_b(ssef_verts[1]); - const float3 tri_c(ssef_verts[2]); - const float3 P(ray_P); - const float3 dir(ray_dir); + typedef ssef float3; + const float3 tri_a(ssef_verts[0]); + const float3 tri_b(ssef_verts[1]); + const float3 tri_c(ssef_verts[2]); + const float3 P(ray_P); + const float3 dir(ray_dir); #else # define dot3(a, b) dot(a, b) - const float3 P = ray_P; - const float3 dir = ray_dir; + const float3 P = ray_P; + const float3 dir = ray_dir; #endif - /* Calculate vertices relative to ray origin. */ - const float3 v0 = tri_c - P; - const float3 v1 = tri_a - P; - const float3 v2 = tri_b - P; - - /* Calculate triangle edges. */ - const float3 e0 = v2 - v0; - const float3 e1 = v0 - v1; - const float3 e2 = v1 - v2; - - /* Perform edge tests. */ -#if defined(__KERNEL_SSE2__) && defined (__KERNEL_SSE__) - const float3 crossU = cross(v2 + v0, e0); - const float3 crossV = cross(v0 + v1, e1); - const float3 crossW = cross(v1 + v2, e2); - - ssef crossX(crossU); - ssef crossY(crossV); - ssef crossZ(crossW); - ssef zero = _mm_setzero_ps(); - _MM_TRANSPOSE4_PS(crossX, crossY, crossZ, zero); - - const ssef dirX(ray_dir.x); - const ssef dirY(ray_dir.y); - const ssef dirZ(ray_dir.z); - - ssef UVWW = madd(crossX, dirX, madd(crossY, dirY, crossZ * dirZ)); + /* Calculate vertices relative to ray origin. */ + const float3 v0 = tri_c - P; + const float3 v1 = tri_a - P; + const float3 v2 = tri_b - P; + + /* Calculate triangle edges. */ + const float3 e0 = v2 - v0; + const float3 e1 = v0 - v1; + const float3 e2 = v1 - v2; + + /* Perform edge tests. */ +#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + const float3 crossU = cross(v2 + v0, e0); + const float3 crossV = cross(v0 + v1, e1); + const float3 crossW = cross(v1 + v2, e2); + + ssef crossX(crossU); + ssef crossY(crossV); + ssef crossZ(crossW); + ssef zero = _mm_setzero_ps(); + _MM_TRANSPOSE4_PS(crossX, crossY, crossZ, zero); + + const ssef dirX(ray_dir.x); + const ssef dirY(ray_dir.y); + const ssef dirZ(ray_dir.z); + + ssef UVWW = madd(crossX, dirX, madd(crossY, dirY, crossZ * dirZ)); #else /* __KERNEL_SSE2__ */ - const float U = dot(cross(v2 + v0, e0), ray_dir); - const float V = dot(cross(v0 + v1, e1), ray_dir); - const float W = dot(cross(v1 + v2, e2), ray_dir); -#endif /* __KERNEL_SSE2__ */ - -#if defined(__KERNEL_SSE2__) && defined (__KERNEL_SSE__) - int uvw_sign = movemask(UVWW) & 0x7; - if(uvw_sign != 0) { - if(uvw_sign != 0x7) { - return false; - } - } + const float U = dot(cross(v2 + v0, e0), ray_dir); + const float V = dot(cross(v0 + v1, e1), ray_dir); + const float W = dot(cross(v1 + v2, e2), ray_dir); +#endif /* __KERNEL_SSE2__ */ + +#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + int uvw_sign = movemask(UVWW) & 0x7; + if (uvw_sign != 0) { + if (uvw_sign != 0x7) { + return false; + } + } #else - const float minUVW = min(U, min(V, W)); - const float maxUVW = max(U, max(V, W)); + const float minUVW = min(U, min(V, W)); + const float maxUVW = max(U, max(V, W)); - if(minUVW < 0.0f && maxUVW > 0.0f) { - return false; - } + if (minUVW < 0.0f && maxUVW > 0.0f) { + return false; + } #endif - - /* Calculate geometry normal and denominator. */ - const float3 Ng1 = cross(e1, e0); - //const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0); - const float3 Ng = Ng1 + Ng1; - const float den = dot3(Ng, dir); - /* Avoid division by 0. */ - if(UNLIKELY(den == 0.0f)) { - return false; - } - - /* Perform depth test. */ - const float T = dot3(v0, Ng); - const int sign_den = (__float_as_int(den) & 0x80000000); - const float sign_T = xor_signmask(T, sign_den); - if((sign_T < 0.0f) || - (sign_T > ray_t * xor_signmask(den, sign_den))) - { - return false; - } - - const float inv_den = 1.0f / den; -#if defined(__KERNEL_SSE2__) && defined (__KERNEL_SSE__) - UVWW *= inv_den; - _mm_store_ss(isect_u, UVWW); - _mm_store_ss(isect_v, shuffle<1,1,3,3>(UVWW)); + /* Calculate geometry normal and denominator. */ + const float3 Ng1 = cross(e1, e0); + //const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0); + const float3 Ng = Ng1 + Ng1; + const float den = dot3(Ng, dir); + /* Avoid division by 0. */ + if (UNLIKELY(den == 0.0f)) { + return false; + } + + /* Perform depth test. */ + const float T = dot3(v0, Ng); + const int sign_den = (__float_as_int(den) & 0x80000000); + const float sign_T = xor_signmask(T, sign_den); + if ((sign_T < 0.0f) || (sign_T > ray_t * xor_signmask(den, sign_den))) { + return false; + } + + const float inv_den = 1.0f / den; +#if defined(__KERNEL_SSE2__) && defined(__KERNEL_SSE__) + UVWW *= inv_den; + _mm_store_ss(isect_u, UVWW); + _mm_store_ss(isect_v, shuffle<1, 1, 3, 3>(UVWW)); #else - *isect_u = U * inv_den; - *isect_v = V * inv_den; + *isect_u = U * inv_den; + *isect_v = V * inv_den; #endif - *isect_t = T * inv_den; - return true; + *isect_t = T * inv_den; + return true; #undef dot3 } @@ -191,40 +199,51 @@ ccl_device_forceinline bool ray_triangle_intersect( * If ellipse is true, hits outside the ellipse that's enclosed by the * quad are rejected. */ -ccl_device bool ray_quad_intersect(float3 ray_P, float3 ray_D, - float ray_mint, float ray_maxt, +ccl_device bool ray_quad_intersect(float3 ray_P, + float3 ray_D, + float ray_mint, + float ray_maxt, float3 quad_P, - float3 quad_u, float3 quad_v, float3 quad_n, - float3 *isect_P, float *isect_t, - float *isect_u, float *isect_v, bool ellipse) + float3 quad_u, + float3 quad_v, + float3 quad_n, + float3 *isect_P, + float *isect_t, + float *isect_u, + float *isect_v, + bool ellipse) { - /* Perform intersection test. */ - float t = -(dot(ray_P, quad_n) - dot(quad_P, quad_n)) / dot(ray_D, quad_n); - if(t < ray_mint || t > ray_maxt) { - return false; - } - const float3 hit = ray_P + t*ray_D; - const float3 inplane = hit - quad_P; - const float u = dot(inplane, quad_u) / dot(quad_u, quad_u); - if(u < -0.5f || u > 0.5f) { - return false; - } - const float v = dot(inplane, quad_v) / dot(quad_v, quad_v); - if(v < -0.5f || v > 0.5f) { - return false; - } - if(ellipse && (u*u + v*v > 0.25f)) { - return false; - } - /* Store the result. */ - /* TODO(sergey): Check whether we can avoid some checks here. */ - if(isect_P != NULL) *isect_P = hit; - if(isect_t != NULL) *isect_t = t; - if(isect_u != NULL) *isect_u = u + 0.5f; - if(isect_v != NULL) *isect_v = v + 0.5f; - return true; + /* Perform intersection test. */ + float t = -(dot(ray_P, quad_n) - dot(quad_P, quad_n)) / dot(ray_D, quad_n); + if (t < ray_mint || t > ray_maxt) { + return false; + } + const float3 hit = ray_P + t * ray_D; + const float3 inplane = hit - quad_P; + const float u = dot(inplane, quad_u) / dot(quad_u, quad_u); + if (u < -0.5f || u > 0.5f) { + return false; + } + const float v = dot(inplane, quad_v) / dot(quad_v, quad_v); + if (v < -0.5f || v > 0.5f) { + return false; + } + if (ellipse && (u * u + v * v > 0.25f)) { + return false; + } + /* Store the result. */ + /* TODO(sergey): Check whether we can avoid some checks here. */ + if (isect_P != NULL) + *isect_P = hit; + if (isect_t != NULL) + *isect_t = t; + if (isect_u != NULL) + *isect_u = u + 0.5f; + if (isect_v != NULL) + *isect_v = v + 0.5f; + return true; } CCL_NAMESPACE_END -#endif /* __UTIL_MATH_INTERSECT_H__ */ +#endif /* __UTIL_MATH_INTERSECT_H__ */ diff --git a/intern/cycles/util/util_math_matrix.h b/intern/cycles/util/util_math_matrix.h index 9ffcb9659b2..fe80fab6ebd 100644 --- a/intern/cycles/util/util_math_matrix.h +++ b/intern/cycles/util/util_math_matrix.h @@ -19,17 +19,17 @@ CCL_NAMESPACE_BEGIN -#define MAT(A, size, row, col) A[(row)*(size)+(col)] +#define MAT(A, size, row, col) A[(row) * (size) + (col)] /* Variants that use a constant stride on GPUS. */ #ifdef __KERNEL_GPU__ -# define MATS(A, n, r, c, s) A[((r)*(n)+(c))*(s)] +# define MATS(A, n, r, c, s) A[((r) * (n) + (c)) * (s)] /* Element access when only the lower-triangular elements are stored. */ -# define MATHS(A, r, c, s) A[((r)*((r)+1)/2+(c))*(s)] -# define VECS(V, i, s) V[(i)*(s)] +# define MATHS(A, r, c, s) A[((r) * ((r) + 1) / 2 + (c)) * (s)] +# define VECS(V, i, s) V[(i) * (s)] #else # define MATS(A, n, r, c, s) MAT(A, n, r, c) -# define MATHS(A, r, c, s) A[(r)*((r)+1)/2+(c)] +# define MATHS(A, r, c, s) A[(r) * ((r) + 1) / 2 + (c)] # define VECS(V, i, s) V[i] #endif @@ -37,111 +37,115 @@ CCL_NAMESPACE_BEGIN ccl_device_inline void math_vector_zero(float *v, int n) { - for(int i = 0; i < n; i++) { - v[i] = 0.0f; - } + for (int i = 0; i < n; i++) { + v[i] = 0.0f; + } } ccl_device_inline void math_matrix_zero(float *A, int n) { - for(int row = 0; row < n; row++) { - for(int col = 0; col <= row; col++) { - MAT(A, n, row, col) = 0.0f; - } - } + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + MAT(A, n, row, col) = 0.0f; + } + } } /* Elementary vector operations. */ ccl_device_inline void math_vector_add(float *a, const float *ccl_restrict b, int n) { - for(int i = 0; i < n; i++) { - a[i] += b[i]; - } + for (int i = 0; i < n; i++) { + a[i] += b[i]; + } } ccl_device_inline void math_vector_mul(float *a, const float *ccl_restrict b, int n) { - for(int i = 0; i < n; i++) { - a[i] *= b[i]; - } + for (int i = 0; i < n; i++) { + a[i] *= b[i]; + } } -ccl_device_inline void math_vector_mul_strided(ccl_global float *a, const float *ccl_restrict b, int astride, int n) +ccl_device_inline void math_vector_mul_strided(ccl_global float *a, + const float *ccl_restrict b, + int astride, + int n) { - for(int i = 0; i < n; i++) { - a[i*astride] *= b[i]; - } + for (int i = 0; i < n; i++) { + a[i * astride] *= b[i]; + } } ccl_device_inline void math_vector_scale(float *a, float b, int n) { - for(int i = 0; i < n; i++) { - a[i] *= b; - } + for (int i = 0; i < n; i++) { + a[i] *= b; + } } ccl_device_inline void math_vector_max(float *a, const float *ccl_restrict b, int n) { - for(int i = 0; i < n; i++) { - a[i] = max(a[i], b[i]); - } + for (int i = 0; i < n; i++) { + a[i] = max(a[i], b[i]); + } } ccl_device_inline void math_vec3_add(float3 *v, int n, float *x, float3 w) { - for(int i = 0; i < n; i++) { - v[i] += w*x[i]; - } + for (int i = 0; i < n; i++) { + v[i] += w * x[i]; + } } -ccl_device_inline void math_vec3_add_strided(ccl_global float3 *v, int n, float *x, float3 w, int stride) +ccl_device_inline void math_vec3_add_strided( + ccl_global float3 *v, int n, float *x, float3 w, int stride) { - for(int i = 0; i < n; i++) { - ccl_global float *elem = (ccl_global float*) (v + i*stride); - atomic_add_and_fetch_float(elem+0, w.x*x[i]); - atomic_add_and_fetch_float(elem+1, w.y*x[i]); - atomic_add_and_fetch_float(elem+2, w.z*x[i]); - } + for (int i = 0; i < n; i++) { + ccl_global float *elem = (ccl_global float *)(v + i * stride); + atomic_add_and_fetch_float(elem + 0, w.x * x[i]); + atomic_add_and_fetch_float(elem + 1, w.y * x[i]); + atomic_add_and_fetch_float(elem + 2, w.z * x[i]); + } } /* Elementary matrix operations. * Note: TriMatrix refers to a square matrix that is symmetric, and therefore its upper-triangular part isn't stored. */ -ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A, int n, float val, int stride) +ccl_device_inline void math_trimatrix_add_diagonal(ccl_global float *A, + int n, + float val, + int stride) { - for(int row = 0; row < n; row++) { - MATHS(A, row, row, stride) += val; - } + for (int row = 0; row < n; row++) { + MATHS(A, row, row, stride) += val; + } } /* Add Gramian matrix of v to A. * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */ ccl_device_inline void math_matrix_add_gramian(float *A, - int n, - const float *ccl_restrict v, - float weight) + int n, + const float *ccl_restrict v, + float weight) { - for(int row = 0; row < n; row++) { - for(int col = 0; col <= row; col++) { - MAT(A, n, row, col) += v[row]*v[col]*weight; - } - } + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + MAT(A, n, row, col) += v[row] * v[col] * weight; + } + } } /* Add Gramian matrix of v to A. * The Gramian matrix of v is vt*v, so element (i,j) is v[i]*v[j]. */ -ccl_device_inline void math_trimatrix_add_gramian_strided(ccl_global float *A, - int n, - const float *ccl_restrict v, - float weight, - int stride) +ccl_device_inline void math_trimatrix_add_gramian_strided( + ccl_global float *A, int n, const float *ccl_restrict v, float weight, int stride) { - for(int row = 0; row < n; row++) { - for(int col = 0; col <= row; col++) { - atomic_add_and_fetch_float(&MATHS(A, row, col, stride), v[row]*v[col]*weight); - } - } + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + atomic_add_and_fetch_float(&MATHS(A, row, col, stride), v[row] * v[col] * weight); + } + } } ccl_device_inline void math_trimatrix_add_gramian(ccl_global float *A, @@ -149,23 +153,23 @@ ccl_device_inline void math_trimatrix_add_gramian(ccl_global float *A, const float *ccl_restrict v, float weight) { - for(int row = 0; row < n; row++) { - for(int col = 0; col <= row; col++) { - MATHS(A, row, col, 1) += v[row]*v[col]*weight; - } - } + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + MATHS(A, row, col, 1) += v[row] * v[col] * weight; + } + } } /* Transpose matrix A inplace. */ ccl_device_inline void math_matrix_transpose(ccl_global float *A, int n, int stride) { - for(int i = 0; i < n; i++) { - for(int j = 0; j < i; j++) { - float temp = MATS(A, n, i, j, stride); - MATS(A, n, i, j, stride) = MATS(A, n, j, i, stride); - MATS(A, n, j, i, stride) = temp; - } - } + for (int i = 0; i < n; i++) { + for (int j = 0; j < i; j++) { + float temp = MATS(A, n, i, j, stride); + MATS(A, n, i, j, stride) = MATS(A, n, j, i, stride); + MATS(A, n, j, i, stride) = temp; + } + } } /* Solvers for matrix problems */ @@ -175,21 +179,21 @@ ccl_device_inline void math_matrix_transpose(ccl_global float *A, int n, int str * Also, only the lower triangular part of A is ever accessed. */ ccl_device void math_trimatrix_cholesky(ccl_global float *A, int n, int stride) { - for(int row = 0; row < n; row++) { - for(int col = 0; col <= row; col++) { - float sum_col = MATHS(A, row, col, stride); - for(int k = 0; k < col; k++) { - sum_col -= MATHS(A, row, k, stride) * MATHS(A, col, k, stride); - } - if(row == col) { - sum_col = sqrtf(max(sum_col, 0.0f)); - } - else { - sum_col /= MATHS(A, col, col, stride); - } - MATHS(A, row, col, stride) = sum_col; - } - } + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + float sum_col = MATHS(A, row, col, stride); + for (int k = 0; k < col; k++) { + sum_col -= MATHS(A, row, k, stride) * MATHS(A, col, k, stride); + } + if (row == col) { + sum_col = sqrtf(max(sum_col, 0.0f)); + } + else { + sum_col /= MATHS(A, col, col, stride); + } + MATHS(A, row, col, stride) = sum_col; + } + } } /* Solve A*S=y for S given A and y, where A is symmetrical positive-semidefinite and both inputs are destroyed in the process. @@ -201,29 +205,32 @@ ccl_device void math_trimatrix_cholesky(ccl_global float *A, int n, int stride) * * This is useful for solving the normal equation S=inv(Xt*W*X)*Xt*W*y, since Xt*W*X is * symmetrical positive-semidefinite by construction, so we can just use this function with A=Xt*W*X and y=Xt*W*y. */ -ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A, ccl_global float3 *y, int n, int stride) +ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A, + ccl_global float3 *y, + int n, + int stride) { - /* Since the first entry of the design row is always 1, the upper-left element of XtWX is a good - * heuristic for the amount of pixels considered (with weighting), therefore the amount of correction - * is scaled based on it. */ - math_trimatrix_add_diagonal(A, n, 3e-7f*A[0], stride); /* Improve the numerical stability. */ - math_trimatrix_cholesky(A, n, stride); /* Replace A with L so that L*Lt = A. */ - - /* Use forward substitution to solve L*b = y, replacing y by b. */ - for(int row = 0; row < n; row++) { - float3 sum = VECS(y, row, stride); - for(int col = 0; col < row; col++) - sum -= MATHS(A, row, col, stride) * VECS(y, col, stride); - VECS(y, row, stride) = sum / MATHS(A, row, row, stride); - } - - /* Use backward substitution to solve Lt*S = b, replacing b by S. */ - for(int row = n-1; row >= 0; row--) { - float3 sum = VECS(y, row, stride); - for(int col = row+1; col < n; col++) - sum -= MATHS(A, col, row, stride) * VECS(y, col, stride); - VECS(y, row, stride) = sum / MATHS(A, row, row, stride); - } + /* Since the first entry of the design row is always 1, the upper-left element of XtWX is a good + * heuristic for the amount of pixels considered (with weighting), therefore the amount of correction + * is scaled based on it. */ + math_trimatrix_add_diagonal(A, n, 3e-7f * A[0], stride); /* Improve the numerical stability. */ + math_trimatrix_cholesky(A, n, stride); /* Replace A with L so that L*Lt = A. */ + + /* Use forward substitution to solve L*b = y, replacing y by b. */ + for (int row = 0; row < n; row++) { + float3 sum = VECS(y, row, stride); + for (int col = 0; col < row; col++) + sum -= MATHS(A, row, col, stride) * VECS(y, col, stride); + VECS(y, row, stride) = sum / MATHS(A, row, row, stride); + } + + /* Use backward substitution to solve Lt*S = b, replacing b by S. */ + for (int row = n - 1; row >= 0; row--) { + float3 sum = VECS(y, row, stride); + for (int col = row + 1; col < n; col++) + sum -= MATHS(A, col, row, stride) * VECS(y, col, stride); + VECS(y, row, stride) = sum / MATHS(A, row, row, stride); + } } /* Perform the Jacobi Eigenvalue Methon on matrix A. @@ -234,181 +241,193 @@ ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A, ccl_global * and V will contain the eigenvectors of the original A in its rows (!), * so that A = V^T*D*V. Therefore, the diagonal elements of D are the (sorted) eigenvalues of A. */ -ccl_device void math_matrix_jacobi_eigendecomposition(float *A, ccl_global float *V, int n, int v_stride) +ccl_device void math_matrix_jacobi_eigendecomposition(float *A, + ccl_global float *V, + int n, + int v_stride) { - const float singular_epsilon = 1e-9f; - - for(int row = 0; row < n; row++) { - for(int col = 0; col < n; col++) { - MATS(V, n, row, col, v_stride) = (col == row) ? 1.0f : 0.0f; - } - } - - for(int sweep = 0; sweep < 8; sweep++) { - float off_diagonal = 0.0f; - for(int row = 1; row < n; row++) { - for(int col = 0; col < row; col++) { - off_diagonal += fabsf(MAT(A, n, row, col)); - } - } - if(off_diagonal < 1e-7f) { - /* The matrix has nearly reached diagonal form. - * Since the eigenvalues are only used to determine truncation, their exact values aren't required - a relative error of a few ULPs won't matter at all. */ - break; - } - - /* Set the threshold for the small element rotation skip in the first sweep: - * Skip all elements that are less than a tenth of the average off-diagonal element. */ - float threshold = 0.2f*off_diagonal / (n*n); - - for(int row = 1; row < n; row++) { - for(int col = 0; col < row; col++) { - /* Perform a Jacobi rotation on this element that reduces it to zero. */ - float element = MAT(A, n, row, col); - float abs_element = fabsf(element); - - /* If we're in a later sweep and the element already is very small, just set it to zero and skip the rotation. */ - if(sweep > 3 && abs_element <= singular_epsilon*fabsf(MAT(A, n, row, row)) && abs_element <= singular_epsilon*fabsf(MAT(A, n, col, col))) { - MAT(A, n, row, col) = 0.0f; - continue; - } - - if(element == 0.0f) { - continue; - } - - /* If we're in one of the first sweeps and the element is smaller than the threshold, skip it. */ - if(sweep < 3 && (abs_element < threshold)) { - continue; - } - - /* Determine rotation: The rotation is characterized by its angle phi - or, in the actual implementation, sin(phi) and cos(phi). - * To find those, we first compute their ratio - that might be unstable if the angle approaches 90°, so there's a fallback for that case. - * Then, we compute sin(phi) and cos(phi) themselves. */ - float singular_diff = MAT(A, n, row, row) - MAT(A, n, col, col); - float ratio; - if(abs_element > singular_epsilon*fabsf(singular_diff)) { - float cot_2phi = 0.5f*singular_diff / element; - ratio = 1.0f / (fabsf(cot_2phi) + sqrtf(1.0f + cot_2phi*cot_2phi)); - if(cot_2phi < 0.0f) ratio = -ratio; /* Copy sign. */ - } - else { - ratio = element / singular_diff; - } - - float c = 1.0f / sqrtf(1.0f + ratio*ratio); - float s = ratio*c; - /* To improve numerical stability by avoiding cancellation, the update equations are reformulized to use sin(phi) and tan(phi/2) instead. */ - float tan_phi_2 = s / (1.0f + c); - - /* Update the singular values in the diagonal. */ - float singular_delta = ratio*element; - MAT(A, n, row, row) += singular_delta; - MAT(A, n, col, col) -= singular_delta; - - /* Set the element itself to zero. */ - MAT(A, n, row, col) = 0.0f; - - /* Perform the actual rotations on the matrices. */ -#define ROT(M, r1, c1, r2, c2, stride) \ - { \ - float M1 = MATS(M, n, r1, c1, stride); \ - float M2 = MATS(M, n, r2, c2, stride); \ - MATS(M, n, r1, c1, stride) -= s*(M2 + tan_phi_2*M1); \ - MATS(M, n, r2, c2, stride) += s*(M1 - tan_phi_2*M2); \ - } - - /* Split into three parts to ensure correct accesses since we only store the lower-triangular part of A. */ - for(int i = 0 ; i < col; i++) ROT(A, col, i, row, i, 1); - for(int i = col+1; i < row; i++) ROT(A, i, col, row, i, 1); - for(int i = row+1; i < n ; i++) ROT(A, i, col, i, row, 1); - - for(int i = 0 ; i < n ; i++) ROT(V, col, i, row, i, v_stride); + const float singular_epsilon = 1e-9f; + + for (int row = 0; row < n; row++) { + for (int col = 0; col < n; col++) { + MATS(V, n, row, col, v_stride) = (col == row) ? 1.0f : 0.0f; + } + } + + for (int sweep = 0; sweep < 8; sweep++) { + float off_diagonal = 0.0f; + for (int row = 1; row < n; row++) { + for (int col = 0; col < row; col++) { + off_diagonal += fabsf(MAT(A, n, row, col)); + } + } + if (off_diagonal < 1e-7f) { + /* The matrix has nearly reached diagonal form. + * Since the eigenvalues are only used to determine truncation, their exact values aren't required - a relative error of a few ULPs won't matter at all. */ + break; + } + + /* Set the threshold for the small element rotation skip in the first sweep: + * Skip all elements that are less than a tenth of the average off-diagonal element. */ + float threshold = 0.2f * off_diagonal / (n * n); + + for (int row = 1; row < n; row++) { + for (int col = 0; col < row; col++) { + /* Perform a Jacobi rotation on this element that reduces it to zero. */ + float element = MAT(A, n, row, col); + float abs_element = fabsf(element); + + /* If we're in a later sweep and the element already is very small, just set it to zero and skip the rotation. */ + if (sweep > 3 && abs_element <= singular_epsilon * fabsf(MAT(A, n, row, row)) && + abs_element <= singular_epsilon * fabsf(MAT(A, n, col, col))) { + MAT(A, n, row, col) = 0.0f; + continue; + } + + if (element == 0.0f) { + continue; + } + + /* If we're in one of the first sweeps and the element is smaller than the threshold, skip it. */ + if (sweep < 3 && (abs_element < threshold)) { + continue; + } + + /* Determine rotation: The rotation is characterized by its angle phi - or, in the actual implementation, sin(phi) and cos(phi). + * To find those, we first compute their ratio - that might be unstable if the angle approaches 90°, so there's a fallback for that case. + * Then, we compute sin(phi) and cos(phi) themselves. */ + float singular_diff = MAT(A, n, row, row) - MAT(A, n, col, col); + float ratio; + if (abs_element > singular_epsilon * fabsf(singular_diff)) { + float cot_2phi = 0.5f * singular_diff / element; + ratio = 1.0f / (fabsf(cot_2phi) + sqrtf(1.0f + cot_2phi * cot_2phi)); + if (cot_2phi < 0.0f) + ratio = -ratio; /* Copy sign. */ + } + else { + ratio = element / singular_diff; + } + + float c = 1.0f / sqrtf(1.0f + ratio * ratio); + float s = ratio * c; + /* To improve numerical stability by avoiding cancellation, the update equations are reformulized to use sin(phi) and tan(phi/2) instead. */ + float tan_phi_2 = s / (1.0f + c); + + /* Update the singular values in the diagonal. */ + float singular_delta = ratio * element; + MAT(A, n, row, row) += singular_delta; + MAT(A, n, col, col) -= singular_delta; + + /* Set the element itself to zero. */ + MAT(A, n, row, col) = 0.0f; + + /* Perform the actual rotations on the matrices. */ +#define ROT(M, r1, c1, r2, c2, stride) \ + { \ + float M1 = MATS(M, n, r1, c1, stride); \ + float M2 = MATS(M, n, r2, c2, stride); \ + MATS(M, n, r1, c1, stride) -= s * (M2 + tan_phi_2 * M1); \ + MATS(M, n, r2, c2, stride) += s * (M1 - tan_phi_2 * M2); \ + } + + /* Split into three parts to ensure correct accesses since we only store the lower-triangular part of A. */ + for (int i = 0; i < col; i++) + ROT(A, col, i, row, i, 1); + for (int i = col + 1; i < row; i++) + ROT(A, i, col, row, i, 1); + for (int i = row + 1; i < n; i++) + ROT(A, i, col, i, row, 1); + + for (int i = 0; i < n; i++) + ROT(V, col, i, row, i, v_stride); #undef ROT - } - } - } - - /* Sort eigenvalues and the associated eigenvectors. */ - for(int i = 0; i < n - 1; i++) { - float v = MAT(A, n, i, i); - int k = i; - for(int j = i; j < n; j++) { - if(MAT(A, n, j, j) >= v) { - v = MAT(A, n, j, j); - k = j; - } - } - if(k != i) { - /* Swap eigenvalues. */ - MAT(A, n, k, k) = MAT(A, n, i, i); - MAT(A, n, i, i) = v; - /* Swap eigenvectors. */ - for(int j = 0; j < n; j++) { - float v = MATS(V, n, i, j, v_stride); - MATS(V, n, i, j, v_stride) = MATS(V, n, k, j, v_stride); - MATS(V, n, k, j, v_stride) = v; - } - } - } + } + } + } + + /* Sort eigenvalues and the associated eigenvectors. */ + for (int i = 0; i < n - 1; i++) { + float v = MAT(A, n, i, i); + int k = i; + for (int j = i; j < n; j++) { + if (MAT(A, n, j, j) >= v) { + v = MAT(A, n, j, j); + k = j; + } + } + if (k != i) { + /* Swap eigenvalues. */ + MAT(A, n, k, k) = MAT(A, n, i, i); + MAT(A, n, i, i) = v; + /* Swap eigenvectors. */ + for (int j = 0; j < n; j++) { + float v = MATS(V, n, i, j, v_stride); + MATS(V, n, i, j, v_stride) = MATS(V, n, k, j, v_stride); + MATS(V, n, k, j, v_stride) = v; + } + } + } } #ifdef __KERNEL_SSE3__ ccl_device_inline void math_vector_zero_sse(float4 *A, int n) { - for(int i = 0; i < n; i++) { - A[i] = make_float4(0.0f); - } + for (int i = 0; i < n; i++) { + A[i] = make_float4(0.0f); + } } ccl_device_inline void math_matrix_zero_sse(float4 *A, int n) { - for(int row = 0; row < n; row++) { - for(int col = 0; col <= row; col++) { - MAT(A, n, row, col) = make_float4(0.0f); - } - } + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + MAT(A, n, row, col) = make_float4(0.0f); + } + } } /* Add Gramian matrix of v to A. * The Gramian matrix of v is v^T*v, so element (i,j) is v[i]*v[j]. */ -ccl_device_inline void math_matrix_add_gramian_sse(float4 *A, int n, const float4 *ccl_restrict v, float4 weight) +ccl_device_inline void math_matrix_add_gramian_sse(float4 *A, + int n, + const float4 *ccl_restrict v, + float4 weight) { - for(int row = 0; row < n; row++) { - for(int col = 0; col <= row; col++) { - MAT(A, n, row, col) = MAT(A, n, row, col) + v[row] * v[col] * weight; - } - } + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + MAT(A, n, row, col) = MAT(A, n, row, col) + v[row] * v[col] * weight; + } + } } ccl_device_inline void math_vector_add_sse(float4 *V, int n, const float4 *ccl_restrict a) { - for(int i = 0; i < n; i++) { - V[i] += a[i]; - } + for (int i = 0; i < n; i++) { + V[i] += a[i]; + } } ccl_device_inline void math_vector_mul_sse(float4 *V, int n, const float4 *ccl_restrict a) { - for(int i = 0; i < n; i++) { - V[i] *= a[i]; - } + for (int i = 0; i < n; i++) { + V[i] *= a[i]; + } } ccl_device_inline void math_vector_max_sse(float4 *a, const float4 *ccl_restrict b, int n) { - for(int i = 0; i < n; i++) { - a[i] = max(a[i], b[i]); - } + for (int i = 0; i < n; i++) { + a[i] = max(a[i], b[i]); + } } ccl_device_inline void math_matrix_hsum(float *A, int n, const float4 *ccl_restrict B) { - for(int row = 0; row < n; row++) { - for(int col = 0; col <= row; col++) { - MAT(A, n, row, col) = reduce_add(MAT(B, n, row, col))[0]; - } - } + for (int row = 0; row < n; row++) { + for (int col = 0; col <= row; col++) { + MAT(A, n, row, col) = reduce_add(MAT(B, n, row, col))[0]; + } + } } #endif @@ -416,4 +435,4 @@ ccl_device_inline void math_matrix_hsum(float *A, int n, const float4 *ccl_restr CCL_NAMESPACE_END -#endif /* __UTIL_MATH_MATRIX_H__ */ +#endif /* __UTIL_MATH_MATRIX_H__ */ diff --git a/intern/cycles/util/util_md5.cpp b/intern/cycles/util/util_md5.cpp index 7cdd28a4793..c11f495f785 100644 --- a/intern/cycles/util/util_md5.cpp +++ b/intern/cycles/util/util_md5.cpp @@ -10,11 +10,11 @@ * freely, subject to the following restrictions: * * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgment in the product documentation would be - * appreciated but is not required. + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. + * misrepresented as being the original software. * 3. This notice may not be removed or altered from any source distribution. * * L. Peter Deutsch @@ -34,239 +34,237 @@ CCL_NAMESPACE_BEGIN #define T_MASK ((uint32_t)~0) #define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87) #define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9) -#define T3 0x242070db +#define T3 0x242070db #define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111) #define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050) -#define T6 0x4787c62a +#define T6 0x4787c62a #define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec) #define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe) -#define T9 0x698098d8 +#define T9 0x698098d8 #define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850) #define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e) #define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841) -#define T13 0x6b901122 +#define T13 0x6b901122 #define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c) #define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71) -#define T16 0x49b40821 +#define T16 0x49b40821 #define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d) #define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf) -#define T19 0x265e5a51 +#define T19 0x265e5a51 #define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855) #define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2) -#define T22 0x02441453 +#define T22 0x02441453 #define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e) #define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437) -#define T25 0x21e1cde6 +#define T25 0x21e1cde6 #define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829) #define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278) -#define T28 0x455a14ed +#define T28 0x455a14ed #define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa) #define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07) -#define T31 0x676f02d9 +#define T31 0x676f02d9 #define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375) #define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd) #define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e) -#define T35 0x6d9d6122 +#define T35 0x6d9d6122 #define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3) #define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb) -#define T38 0x4bdecfa9 +#define T38 0x4bdecfa9 #define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f) #define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f) -#define T41 0x289b7ec6 +#define T41 0x289b7ec6 #define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805) #define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a) -#define T44 0x04881d05 +#define T44 0x04881d05 #define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6) #define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a) -#define T47 0x1fa27cf8 +#define T47 0x1fa27cf8 #define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a) #define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb) -#define T50 0x432aff97 +#define T50 0x432aff97 #define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58) #define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6) -#define T53 0x655b59c3 +#define T53 0x655b59c3 #define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d) #define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82) #define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e) -#define T57 0x6fa87e4f +#define T57 0x6fa87e4f #define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f) #define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb) -#define T60 0x4e0811a1 +#define T60 0x4e0811a1 #define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d) #define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca) -#define T63 0x2ad7d2bb +#define T63 0x2ad7d2bb #define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e) void MD5Hash::process(const uint8_t *data /*[64]*/) { - uint32_t - a = abcd[0], b = abcd[1], - c = abcd[2], d = abcd[3]; - uint32_t t; - /* Define storage for little-endian or both types of CPUs. */ - uint32_t xbuf[16]; - const uint32_t *X; - - { - /* - * Determine dynamically whether this is a big-endian or - * little-endian machine, since we can use a more efficient - * algorithm on the latter. - */ - static const int w = 1; - - if(*((const uint8_t *)&w)) /* dynamic little-endian */ - { - /* - * On little-endian machines, we can process properly aligned - * data without copying it. - */ - if(!((data - (const uint8_t *)0) & 3)) { - /* data are properly aligned */ - X = (const uint32_t *)data; - } - else { - /* not aligned */ - memcpy(xbuf, data, 64); - X = xbuf; - } - } - else { /* dynamic big-endian */ - /* - * On big-endian machines, we must arrange the bytes in the - * right order. - */ - const uint8_t *xp = data; - int i; - - X = xbuf; /* (dynamic only) */ - for(i = 0; i < 16; ++i, xp += 4) - xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24); - } - } + uint32_t a = abcd[0], b = abcd[1], c = abcd[2], d = abcd[3]; + uint32_t t; + /* Define storage for little-endian or both types of CPUs. */ + uint32_t xbuf[16]; + const uint32_t *X; + + { + /* + * Determine dynamically whether this is a big-endian or + * little-endian machine, since we can use a more efficient + * algorithm on the latter. + */ + static const int w = 1; + + if (*((const uint8_t *)&w)) /* dynamic little-endian */ + { + /* + * On little-endian machines, we can process properly aligned + * data without copying it. + */ + if (!((data - (const uint8_t *)0) & 3)) { + /* data are properly aligned */ + X = (const uint32_t *)data; + } + else { + /* not aligned */ + memcpy(xbuf, data, 64); + X = xbuf; + } + } + else { /* dynamic big-endian */ + /* + * On big-endian machines, we must arrange the bytes in the + * right order. + */ + const uint8_t *xp = data; + int i; + + X = xbuf; /* (dynamic only) */ + for (i = 0; i < 16; ++i, xp += 4) + xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24); + } + } #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) - /* Round 1. */ - /* Let [abcd k s i] denote the operation - * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */ + /* Round 1. */ + /* Let [abcd k s i] denote the operation + * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */ #define F(x, y, z) (((x) & (y)) | (~(x) & (z))) -#define SET(a, b, c, d, k, s, Ti)\ - t = a + F(b,c,d) + X[k] + Ti;\ - a = ROTATE_LEFT(t, s) + b - /* Do the following 16 operations. */ - SET(a, b, c, d, 0, 7, T1); - SET(d, a, b, c, 1, 12, T2); - SET(c, d, a, b, 2, 17, T3); - SET(b, c, d, a, 3, 22, T4); - SET(a, b, c, d, 4, 7, T5); - SET(d, a, b, c, 5, 12, T6); - SET(c, d, a, b, 6, 17, T7); - SET(b, c, d, a, 7, 22, T8); - SET(a, b, c, d, 8, 7, T9); - SET(d, a, b, c, 9, 12, T10); - SET(c, d, a, b, 10, 17, T11); - SET(b, c, d, a, 11, 22, T12); - SET(a, b, c, d, 12, 7, T13); - SET(d, a, b, c, 13, 12, T14); - SET(c, d, a, b, 14, 17, T15); - SET(b, c, d, a, 15, 22, T16); +#define SET(a, b, c, d, k, s, Ti) \ + t = a + F(b, c, d) + X[k] + Ti; \ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 0, 7, T1); + SET(d, a, b, c, 1, 12, T2); + SET(c, d, a, b, 2, 17, T3); + SET(b, c, d, a, 3, 22, T4); + SET(a, b, c, d, 4, 7, T5); + SET(d, a, b, c, 5, 12, T6); + SET(c, d, a, b, 6, 17, T7); + SET(b, c, d, a, 7, 22, T8); + SET(a, b, c, d, 8, 7, T9); + SET(d, a, b, c, 9, 12, T10); + SET(c, d, a, b, 10, 17, T11); + SET(b, c, d, a, 11, 22, T12); + SET(a, b, c, d, 12, 7, T13); + SET(d, a, b, c, 13, 12, T14); + SET(c, d, a, b, 14, 17, T15); + SET(b, c, d, a, 15, 22, T16); #undef SET - /* Round 2. */ - /* Let [abcd k s i] denote the operation - * a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */ + /* Round 2. */ + /* Let [abcd k s i] denote the operation + * a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */ #define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) -#define SET(a, b, c, d, k, s, Ti)\ - t = a + G(b,c,d) + X[k] + Ti;\ - a = ROTATE_LEFT(t, s) + b - /* Do the following 16 operations. */ - SET(a, b, c, d, 1, 5, T17); - SET(d, a, b, c, 6, 9, T18); - SET(c, d, a, b, 11, 14, T19); - SET(b, c, d, a, 0, 20, T20); - SET(a, b, c, d, 5, 5, T21); - SET(d, a, b, c, 10, 9, T22); - SET(c, d, a, b, 15, 14, T23); - SET(b, c, d, a, 4, 20, T24); - SET(a, b, c, d, 9, 5, T25); - SET(d, a, b, c, 14, 9, T26); - SET(c, d, a, b, 3, 14, T27); - SET(b, c, d, a, 8, 20, T28); - SET(a, b, c, d, 13, 5, T29); - SET(d, a, b, c, 2, 9, T30); - SET(c, d, a, b, 7, 14, T31); - SET(b, c, d, a, 12, 20, T32); +#define SET(a, b, c, d, k, s, Ti) \ + t = a + G(b, c, d) + X[k] + Ti; \ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 1, 5, T17); + SET(d, a, b, c, 6, 9, T18); + SET(c, d, a, b, 11, 14, T19); + SET(b, c, d, a, 0, 20, T20); + SET(a, b, c, d, 5, 5, T21); + SET(d, a, b, c, 10, 9, T22); + SET(c, d, a, b, 15, 14, T23); + SET(b, c, d, a, 4, 20, T24); + SET(a, b, c, d, 9, 5, T25); + SET(d, a, b, c, 14, 9, T26); + SET(c, d, a, b, 3, 14, T27); + SET(b, c, d, a, 8, 20, T28); + SET(a, b, c, d, 13, 5, T29); + SET(d, a, b, c, 2, 9, T30); + SET(c, d, a, b, 7, 14, T31); + SET(b, c, d, a, 12, 20, T32); #undef SET - /* Round 3. */ - /* Let [abcd k s t] denote the operation - * a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */ + /* Round 3. */ + /* Let [abcd k s t] denote the operation + * a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */ #define H(x, y, z) ((x) ^ (y) ^ (z)) -#define SET(a, b, c, d, k, s, Ti)\ - t = a + H(b,c,d) + X[k] + Ti;\ +#define SET(a, b, c, d, k, s, Ti) \ + t = a + H(b, c, d) + X[k] + Ti; \ a = ROTATE_LEFT(t, s) + b - /* Do the following 16 operations. */ - SET(a, b, c, d, 5, 4, T33); - SET(d, a, b, c, 8, 11, T34); - SET(c, d, a, b, 11, 16, T35); - SET(b, c, d, a, 14, 23, T36); - SET(a, b, c, d, 1, 4, T37); - SET(d, a, b, c, 4, 11, T38); - SET(c, d, a, b, 7, 16, T39); - SET(b, c, d, a, 10, 23, T40); - SET(a, b, c, d, 13, 4, T41); - SET(d, a, b, c, 0, 11, T42); - SET(c, d, a, b, 3, 16, T43); - SET(b, c, d, a, 6, 23, T44); - SET(a, b, c, d, 9, 4, T45); - SET(d, a, b, c, 12, 11, T46); - SET(c, d, a, b, 15, 16, T47); - SET(b, c, d, a, 2, 23, T48); + /* Do the following 16 operations. */ + SET(a, b, c, d, 5, 4, T33); + SET(d, a, b, c, 8, 11, T34); + SET(c, d, a, b, 11, 16, T35); + SET(b, c, d, a, 14, 23, T36); + SET(a, b, c, d, 1, 4, T37); + SET(d, a, b, c, 4, 11, T38); + SET(c, d, a, b, 7, 16, T39); + SET(b, c, d, a, 10, 23, T40); + SET(a, b, c, d, 13, 4, T41); + SET(d, a, b, c, 0, 11, T42); + SET(c, d, a, b, 3, 16, T43); + SET(b, c, d, a, 6, 23, T44); + SET(a, b, c, d, 9, 4, T45); + SET(d, a, b, c, 12, 11, T46); + SET(c, d, a, b, 15, 16, T47); + SET(b, c, d, a, 2, 23, T48); #undef SET - /* Round 4. */ - /* Let [abcd k s t] denote the operation - * a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */ + /* Round 4. */ + /* Let [abcd k s t] denote the operation + * a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */ #define I(x, y, z) ((y) ^ ((x) | ~(z))) -#define SET(a, b, c, d, k, s, Ti)\ - t = a + I(b,c,d) + X[k] + Ti;\ - a = ROTATE_LEFT(t, s) + b - /* Do the following 16 operations. */ - SET(a, b, c, d, 0, 6, T49); - SET(d, a, b, c, 7, 10, T50); - SET(c, d, a, b, 14, 15, T51); - SET(b, c, d, a, 5, 21, T52); - SET(a, b, c, d, 12, 6, T53); - SET(d, a, b, c, 3, 10, T54); - SET(c, d, a, b, 10, 15, T55); - SET(b, c, d, a, 1, 21, T56); - SET(a, b, c, d, 8, 6, T57); - SET(d, a, b, c, 15, 10, T58); - SET(c, d, a, b, 6, 15, T59); - SET(b, c, d, a, 13, 21, T60); - SET(a, b, c, d, 4, 6, T61); - SET(d, a, b, c, 11, 10, T62); - SET(c, d, a, b, 2, 15, T63); - SET(b, c, d, a, 9, 21, T64); +#define SET(a, b, c, d, k, s, Ti) \ + t = a + I(b, c, d) + X[k] + Ti; \ + a = ROTATE_LEFT(t, s) + b + /* Do the following 16 operations. */ + SET(a, b, c, d, 0, 6, T49); + SET(d, a, b, c, 7, 10, T50); + SET(c, d, a, b, 14, 15, T51); + SET(b, c, d, a, 5, 21, T52); + SET(a, b, c, d, 12, 6, T53); + SET(d, a, b, c, 3, 10, T54); + SET(c, d, a, b, 10, 15, T55); + SET(b, c, d, a, 1, 21, T56); + SET(a, b, c, d, 8, 6, T57); + SET(d, a, b, c, 15, 10, T58); + SET(c, d, a, b, 6, 15, T59); + SET(b, c, d, a, 13, 21, T60); + SET(a, b, c, d, 4, 6, T61); + SET(d, a, b, c, 11, 10, T62); + SET(c, d, a, b, 2, 15, T63); + SET(b, c, d, a, 9, 21, T64); #undef SET - /* Then perform the following additions. (That is increment each - * of the four registers by the value it had before this block - * was started.) */ - abcd[0] += a; - abcd[1] += b; - abcd[2] += c; - abcd[3] += d; + /* Then perform the following additions. (That is increment each + * of the four registers by the value it had before this block + * was started.) */ + abcd[0] += a; + abcd[1] += b; + abcd[2] += c; + abcd[3] += d; } MD5Hash::MD5Hash() { - count[0] = count[1] = 0; - abcd[0] = 0x67452301; - abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476; - abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301; - abcd[3] = 0x10325476; + count[0] = count[1] = 0; + abcd[0] = 0x67452301; + abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476; + abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301; + abcd[3] = 0x10325476; } MD5Hash::~MD5Hash() @@ -275,116 +273,115 @@ MD5Hash::~MD5Hash() void MD5Hash::append(const uint8_t *data, int nbytes) { - const uint8_t *p = data; - int left = nbytes; - int offset = (count[0] >> 3) & 63; - uint32_t nbits = (uint32_t)(nbytes << 3); - - if(nbytes <= 0) - return; - - /* Update the message length. */ - count[1] += nbytes >> 29; - count[0] += nbits; - if(count[0] < nbits) - count[1]++; - - /* Process an initial partial block. */ - if(offset) { - int copy = (offset + nbytes > 64 ? 64 - offset : nbytes); - - memcpy(buf + offset, p, copy); - if(offset + copy < 64) - return; - p += copy; - left -= copy; - process(buf); - } - - /* Process full blocks. */ - for(; left >= 64; p += 64, left -= 64) - process(p); - - /* Process a final partial block. */ - if(left) - memcpy(buf, p, left); + const uint8_t *p = data; + int left = nbytes; + int offset = (count[0] >> 3) & 63; + uint32_t nbits = (uint32_t)(nbytes << 3); + + if (nbytes <= 0) + return; + + /* Update the message length. */ + count[1] += nbytes >> 29; + count[0] += nbits; + if (count[0] < nbits) + count[1]++; + + /* Process an initial partial block. */ + if (offset) { + int copy = (offset + nbytes > 64 ? 64 - offset : nbytes); + + memcpy(buf + offset, p, copy); + if (offset + copy < 64) + return; + p += copy; + left -= copy; + process(buf); + } + + /* Process full blocks. */ + for (; left >= 64; p += 64, left -= 64) + process(p); + + /* Process a final partial block. */ + if (left) + memcpy(buf, p, left); } -void MD5Hash::append(const string& str) +void MD5Hash::append(const string &str) { - if(str.size()) { - append((const uint8_t*)str.c_str(), str.size()); - } + if (str.size()) { + append((const uint8_t *)str.c_str(), str.size()); + } } -bool MD5Hash::append_file(const string& filepath) +bool MD5Hash::append_file(const string &filepath) { - FILE *f = path_fopen(filepath, "rb"); + FILE *f = path_fopen(filepath, "rb"); - if(!f) { - fprintf(stderr, "MD5: failed to open file %s\n", filepath.c_str()); - return false; - } + if (!f) { + fprintf(stderr, "MD5: failed to open file %s\n", filepath.c_str()); + return false; + } - const size_t buffer_size = 1024; - uint8_t buffer[buffer_size]; - size_t n; + const size_t buffer_size = 1024; + uint8_t buffer[buffer_size]; + size_t n; - do { - n = fread(buffer, 1, buffer_size, f); - append(buffer, n); - } while(n == buffer_size); + do { + n = fread(buffer, 1, buffer_size, f); + append(buffer, n); + } while (n == buffer_size); - bool success = (ferror(f) == 0); + bool success = (ferror(f) == 0); - fclose(f); + fclose(f); - return success; + return success; } void MD5Hash::finish(uint8_t digest[16]) { - static const uint8_t pad[64] = { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - - uint8_t data[8]; - int i; - - /* Save the length before padding. */ - for(i = 0; i < 8; ++i) - data[i] = (uint8_t)(count[i >> 2] >> ((i & 3) << 3)); - - /* Pad to 56 bytes mod 64. */ - append(pad, ((55 - (count[0] >> 3)) & 63) + 1); - /* Append the length. */ - append(data, 8); - - for(i = 0; i < 16; ++i) - digest[i] = (uint8_t)(abcd[i >> 2] >> ((i & 3) << 3)); + static const uint8_t pad[64] = {0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + uint8_t data[8]; + int i; + + /* Save the length before padding. */ + for (i = 0; i < 8; ++i) + data[i] = (uint8_t)(count[i >> 2] >> ((i & 3) << 3)); + + /* Pad to 56 bytes mod 64. */ + append(pad, ((55 - (count[0] >> 3)) & 63) + 1); + /* Append the length. */ + append(data, 8); + + for (i = 0; i < 16; ++i) + digest[i] = (uint8_t)(abcd[i >> 2] >> ((i & 3) << 3)); } string MD5Hash::get_hex() { - uint8_t digest[16]; - char buf[16*2+1]; + uint8_t digest[16]; + char buf[16 * 2 + 1]; - finish(digest); + finish(digest); - for(int i = 0; i < 16; i++) - sprintf(buf + i*2, "%02X", (unsigned int)digest[i]); - buf[sizeof(buf)-1] = '\0'; + for (int i = 0; i < 16; i++) + sprintf(buf + i * 2, "%02X", (unsigned int)digest[i]); + buf[sizeof(buf) - 1] = '\0'; - return string(buf); + return string(buf); } -string util_md5_string(const string& str) +string util_md5_string(const string &str) { - MD5Hash md5; - md5.append((uint8_t*)str.c_str(), str.size()); - return md5.get_hex(); + MD5Hash md5; + md5.append((uint8_t *)str.c_str(), str.size()); + return md5.get_hex(); } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_md5.h b/intern/cycles/util/util_md5.h index f8c0115d8ce..4622945f9d2 100644 --- a/intern/cycles/util/util_md5.h +++ b/intern/cycles/util/util_md5.h @@ -36,26 +36,26 @@ CCL_NAMESPACE_BEGIN class MD5Hash { -public: - MD5Hash(); - ~MD5Hash(); - - void append(const uint8_t *data, int size); - void append(const string& str); - bool append_file(const string& filepath); - string get_hex(); - -protected: - void process(const uint8_t *data); - void finish(uint8_t digest[16]); - - uint32_t count[2]; /* message length in bits, lsw first */ - uint32_t abcd[4]; /* digest buffer */ - uint8_t buf[64]; /* accumulate block */ + public: + MD5Hash(); + ~MD5Hash(); + + void append(const uint8_t *data, int size); + void append(const string &str); + bool append_file(const string &filepath); + string get_hex(); + + protected: + void process(const uint8_t *data); + void finish(uint8_t digest[16]); + + uint32_t count[2]; /* message length in bits, lsw first */ + uint32_t abcd[4]; /* digest buffer */ + uint8_t buf[64]; /* accumulate block */ }; -string util_md5_string(const string& str); +string util_md5_string(const string &str); CCL_NAMESPACE_END -#endif /* __UTIL_MD5_H__ */ +#endif /* __UTIL_MD5_H__ */ diff --git a/intern/cycles/util/util_murmurhash.cpp b/intern/cycles/util/util_murmurhash.cpp index 68df8fa1a84..5d728769fe9 100644 --- a/intern/cycles/util/util_murmurhash.cpp +++ b/intern/cycles/util/util_murmurhash.cpp @@ -27,15 +27,15 @@ #include "util/util_murmurhash.h" #if defined(_MSC_VER) -# define ROTL32(x,y) _rotl(x,y) -# define ROTL64(x,y) _rotl64(x,y) +# define ROTL32(x, y) _rotl(x, y) +# define ROTL64(x, y) _rotl64(x, y) # define BIG_CONSTANT(x) (x) #else ccl_device_inline uint32_t rotl32(uint32_t x, int8_t r) { - return (x << r) | (x >> (32 - r)); + return (x << r) | (x >> (32 - r)); } -# define ROTL32(x,y) rotl32(x,y) +# define ROTL32(x, y) rotl32(x, y) # define BIG_CONSTANT(x) (x##LLU) #endif @@ -45,82 +45,82 @@ CCL_NAMESPACE_BEGIN * handle aligned reads, do the conversion here. */ ccl_device_inline uint32_t mm_hash_getblock32(const uint32_t *p, int i) { - return p[i]; + return p[i]; } /* Finalization mix - force all bits of a hash block to avalanche */ -ccl_device_inline uint32_t mm_hash_fmix32 ( uint32_t h ) +ccl_device_inline uint32_t mm_hash_fmix32(uint32_t h) { - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - return h; + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; } uint32_t util_murmur_hash3(const void *key, int len, uint32_t seed) { - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 4; - - uint32_t h1 = seed; - - const uint32_t c1 = 0xcc9e2d51; - const uint32_t c2 = 0x1b873593; - - const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); - - for(int i = -nblocks; i; i++) { - uint32_t k1 = mm_hash_getblock32(blocks,i); - - k1 *= c1; - k1 = ROTL32(k1,15); - k1 *= c2; - - h1 ^= k1; - h1 = ROTL32(h1,13); - h1 = h1 * 5 + 0xe6546b64; - } - - const uint8_t *tail = (const uint8_t*)(data + nblocks*4); - - uint32_t k1 = 0; - - switch(len & 3) { - case 3: - k1 ^= tail[2] << 16; - ATTR_FALLTHROUGH; - case 2: - k1 ^= tail[1] << 8; - ATTR_FALLTHROUGH; - case 1: - k1 ^= tail[0]; - k1 *= c1; - k1 = ROTL32(k1,15); - k1 *= c2; - h1 ^= k1; - } - - h1 ^= len; - h1 = mm_hash_fmix32(h1); - return h1; + const uint8_t *data = (const uint8_t *)key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4); + + for (int i = -nblocks; i; i++) { + uint32_t k1 = mm_hash_getblock32(blocks, i); + + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + const uint8_t *tail = (const uint8_t *)(data + nblocks * 4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + ATTR_FALLTHROUGH; + case 2: + k1 ^= tail[1] << 8; + ATTR_FALLTHROUGH; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + } + + h1 ^= len; + h1 = mm_hash_fmix32(h1); + return h1; } /* This is taken from the cryptomatte specification 1.0 */ float util_hash_to_float(uint32_t hash) { - uint32_t mantissa = hash & (( 1 << 23) - 1); - uint32_t exponent = (hash >> 23) & ((1 << 8) - 1); - exponent = max(exponent, (uint32_t) 1); - exponent = min(exponent, (uint32_t) 254); - exponent = exponent << 23; - uint32_t sign = (hash >> 31); - sign = sign << 31; - uint32_t float_bits = sign | exponent | mantissa; - float f; - memcpy(&f, &float_bits, sizeof(uint32_t)); - return f; + uint32_t mantissa = hash & ((1 << 23) - 1); + uint32_t exponent = (hash >> 23) & ((1 << 8) - 1); + exponent = max(exponent, (uint32_t)1); + exponent = min(exponent, (uint32_t)254); + exponent = exponent << 23; + uint32_t sign = (hash >> 31); + sign = sign << 31; + uint32_t float_bits = sign | exponent | mantissa; + float f; + memcpy(&f, &float_bits, sizeof(uint32_t)); + return f; } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_murmurhash.h b/intern/cycles/util/util_murmurhash.h index 3e7897d3ae6..2ec87efd87a 100644 --- a/intern/cycles/util/util_murmurhash.h +++ b/intern/cycles/util/util_murmurhash.h @@ -14,7 +14,6 @@ * limitations under the License. */ - #ifndef __UTIL_MURMURHASH_H__ #define __UTIL_MURMURHASH_H__ @@ -27,4 +26,4 @@ float util_hash_to_float(uint32_t hash); CCL_NAMESPACE_END -#endif /* __UTIL_MURMURHASH_H__ */ +#endif /* __UTIL_MURMURHASH_H__ */ diff --git a/intern/cycles/util/util_opengl.h b/intern/cycles/util/util_opengl.h index 04f0fbaf288..7a8d5eec1f9 100644 --- a/intern/cycles/util/util_opengl.h +++ b/intern/cycles/util/util_opengl.h @@ -22,4 +22,4 @@ #include <GL/glew.h> -#endif /* __UTIL_OPENGL_H__ */ +#endif /* __UTIL_OPENGL_H__ */ diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h index 5267bd9a97a..46dd883282a 100644 --- a/intern/cycles/util/util_optimization.h +++ b/intern/cycles/util/util_optimization.h @@ -23,49 +23,49 @@ * * Compile a regular, SSE2 and SSE3 kernel. */ -#if defined(i386) || defined(_M_IX86) +# if defined(i386) || defined(_M_IX86) /* We require minimum SSE2 support on x86, so auto enable. */ -# define __KERNEL_SSE2__ +# define __KERNEL_SSE2__ -# ifdef WITH_KERNEL_SSE2 -# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 -# endif +# ifdef WITH_KERNEL_SSE2 +# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 +# endif -# ifdef WITH_KERNEL_SSE3 -# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 -# endif +# ifdef WITH_KERNEL_SSE3 +# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +# endif -#endif /* defined(i386) || defined(_M_IX86) */ +# endif /* defined(i386) || defined(_M_IX86) */ /* x86-64 * * Compile a regular (includes SSE2), SSE3, SSE 4.1, AVX and AVX2 kernel. */ -#if defined(__x86_64__) || defined(_M_X64) +# if defined(__x86_64__) || defined(_M_X64) /* SSE2 is always available on x86-64 CPUs, so auto enable */ -# define __KERNEL_SSE2__ +# define __KERNEL_SSE2__ /* no SSE2 kernel on x86-64, part of regular kernel */ -# ifdef WITH_KERNEL_SSE3 -# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 -# endif +# ifdef WITH_KERNEL_SSE3 +# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 +# endif -# ifdef WITH_KERNEL_SSE41 -# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 -# endif +# ifdef WITH_KERNEL_SSE41 +# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 +# endif -# ifdef WITH_KERNEL_AVX -# define WITH_CYCLES_OPTIMIZED_KERNEL_AVX -# endif +# ifdef WITH_KERNEL_AVX +# define WITH_CYCLES_OPTIMIZED_KERNEL_AVX +# endif -# ifdef WITH_KERNEL_AVX2 -# define WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 -# endif +# ifdef WITH_KERNEL_AVX2 +# define WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 +# endif -#endif /* defined(__x86_64__) || defined(_M_X64) */ +# endif /* defined(__x86_64__) || defined(_M_X64) */ #endif -#endif /* __UTIL_OPTIMIZATION_H__ */ +#endif /* __UTIL_OPTIMIZATION_H__ */ diff --git a/intern/cycles/util/util_param.h b/intern/cycles/util/util_param.h index 03815486429..cfbe416aba1 100644 --- a/intern/cycles/util/util_param.h +++ b/intern/cycles/util/util_param.h @@ -32,4 +32,4 @@ static constexpr TypeDesc TypeFloat2(TypeDesc::FLOAT, TypeDesc::VEC2); CCL_NAMESPACE_END -#endif /* __UTIL_PARAM_H__ */ +#endif /* __UTIL_PARAM_H__ */ diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp index 93080a6c80c..77293c45f6b 100644 --- a/intern/cycles/util/util_path.cpp +++ b/intern/cycles/util/util_path.cpp @@ -58,7 +58,7 @@ typedef struct _stati64 path_stat_t; typedef struct _stat path_stat_t; # endif # ifndef S_ISDIR -# define S_ISDIR(x) (((x) & _S_IFDIR) == _S_IFDIR) +# define S_ISDIR(x) (((x)&_S_IFDIR) == _S_IFDIR) # endif #else typedef struct stat path_stat_t; @@ -72,918 +72,870 @@ namespace { #ifdef _WIN32 class directory_iterator { -public: - class path_info { - public: - path_info(const string& path, - const WIN32_FIND_DATAW& find_data) - : path_(path), - find_data_(find_data) - { - } - - string path() { - return path_join(path_, string_from_wstring(find_data_.cFileName)); - } - protected: - const string& path_; - const WIN32_FIND_DATAW& find_data_; - }; - - directory_iterator() - : path_info_("", find_data_), - h_find_(INVALID_HANDLE_VALUE) - { - } - - explicit directory_iterator(const string& path) - : path_(path), - path_info_(path, find_data_) - { - string wildcard = path; - if(wildcard[wildcard.size() - 1] != DIR_SEP) { - wildcard += DIR_SEP; - } - wildcard += "*"; - h_find_ = FindFirstFileW(string_to_wstring(wildcard).c_str(), - &find_data_); - if(h_find_ != INVALID_HANDLE_VALUE) { - skip_dots(); - } - } - - ~directory_iterator() - { - if(h_find_ != INVALID_HANDLE_VALUE) { - FindClose(h_find_); - } - } - - directory_iterator& operator++() - { - step(); - return *this; - } - - path_info* operator-> () - { - return &path_info_; - } - - bool operator!=(const directory_iterator& other) - { - return h_find_ != other.h_find_; - } - -protected: - bool step() - { - if(do_step()) { - return skip_dots(); - } - return false; - } - - bool do_step() - { - if(h_find_ != INVALID_HANDLE_VALUE) { - bool result = FindNextFileW(h_find_, &find_data_) == TRUE; - if(!result) { - FindClose(h_find_); - h_find_ = INVALID_HANDLE_VALUE; - } - return result; - } - return false; - } - - bool skip_dots() - { - while(wcscmp(find_data_.cFileName, L".") == 0 || - wcscmp(find_data_.cFileName, L"..") == 0) - { - if(!do_step()) { - return false; - } - } - return true; - } - - string path_; - path_info path_info_; - WIN32_FIND_DATAW find_data_; - HANDLE h_find_; + public: + class path_info { + public: + path_info(const string &path, const WIN32_FIND_DATAW &find_data) + : path_(path), find_data_(find_data) + { + } + + string path() + { + return path_join(path_, string_from_wstring(find_data_.cFileName)); + } + + protected: + const string &path_; + const WIN32_FIND_DATAW &find_data_; + }; + + directory_iterator() : path_info_("", find_data_), h_find_(INVALID_HANDLE_VALUE) + { + } + + explicit directory_iterator(const string &path) : path_(path), path_info_(path, find_data_) + { + string wildcard = path; + if (wildcard[wildcard.size() - 1] != DIR_SEP) { + wildcard += DIR_SEP; + } + wildcard += "*"; + h_find_ = FindFirstFileW(string_to_wstring(wildcard).c_str(), &find_data_); + if (h_find_ != INVALID_HANDLE_VALUE) { + skip_dots(); + } + } + + ~directory_iterator() + { + if (h_find_ != INVALID_HANDLE_VALUE) { + FindClose(h_find_); + } + } + + directory_iterator &operator++() + { + step(); + return *this; + } + + path_info *operator->() + { + return &path_info_; + } + + bool operator!=(const directory_iterator &other) + { + return h_find_ != other.h_find_; + } + + protected: + bool step() + { + if (do_step()) { + return skip_dots(); + } + return false; + } + + bool do_step() + { + if (h_find_ != INVALID_HANDLE_VALUE) { + bool result = FindNextFileW(h_find_, &find_data_) == TRUE; + if (!result) { + FindClose(h_find_); + h_find_ = INVALID_HANDLE_VALUE; + } + return result; + } + return false; + } + + bool skip_dots() + { + while (wcscmp(find_data_.cFileName, L".") == 0 || wcscmp(find_data_.cFileName, L"..") == 0) { + if (!do_step()) { + return false; + } + } + return true; + } + + string path_; + path_info path_info_; + WIN32_FIND_DATAW find_data_; + HANDLE h_find_; }; -#else /* _WIN32 */ +#else /* _WIN32 */ class directory_iterator { -public: - class path_info { - public: - explicit path_info(const string& path) - : path_(path), - entry_(NULL) - { - } - - string path() { - return path_join(path_, entry_->d_name); - } - - void current_entry_set(const struct dirent *entry) - { - entry_ = entry; - } - protected: - const string& path_; - const struct dirent *entry_; - }; - - directory_iterator() - : path_info_(""), - name_list_(NULL), - num_entries_(-1), - cur_entry_(-1) - { - } - - explicit directory_iterator(const string& path) - : path_(path), - path_info_(path_), - cur_entry_(0) - { - num_entries_ = scandir(path.c_str(), - &name_list_, - NULL, - alphasort); - if(num_entries_ < 0) { - perror("scandir"); - } - else { - skip_dots(); - } - } - - ~directory_iterator() - { - destroy_name_list(); - } - - directory_iterator& operator++() - { - step(); - return *this; - } - - path_info* operator-> () - { - path_info_.current_entry_set(name_list_[cur_entry_]); - return &path_info_; - } - - bool operator!=(const directory_iterator& other) - { - return name_list_ != other.name_list_; - } - -protected: - bool step() - { - if(do_step()) { - return skip_dots(); - } - return false; - } - - bool do_step() - { - ++cur_entry_; - if(cur_entry_ >= num_entries_) { - destroy_name_list(); - return false; - } - return true; - } - - /* Skip . and .. folders. */ - bool skip_dots() - { - while(strcmp(name_list_[cur_entry_]->d_name, ".") == 0 || - strcmp(name_list_[cur_entry_]->d_name, "..") == 0) - { - if(!step()) { - return false; - } - } - return true; - } - - void destroy_name_list() - { - if(name_list_ == NULL) { - return; - } - for(int i = 0; i < num_entries_; ++i) { - free(name_list_[i]); - } - free(name_list_); - name_list_ = NULL; - } - - string path_; - path_info path_info_; - struct dirent **name_list_; - int num_entries_, cur_entry_; + public: + class path_info { + public: + explicit path_info(const string &path) : path_(path), entry_(NULL) + { + } + + string path() + { + return path_join(path_, entry_->d_name); + } + + void current_entry_set(const struct dirent *entry) + { + entry_ = entry; + } + + protected: + const string &path_; + const struct dirent *entry_; + }; + + directory_iterator() : path_info_(""), name_list_(NULL), num_entries_(-1), cur_entry_(-1) + { + } + + explicit directory_iterator(const string &path) : path_(path), path_info_(path_), cur_entry_(0) + { + num_entries_ = scandir(path.c_str(), &name_list_, NULL, alphasort); + if (num_entries_ < 0) { + perror("scandir"); + } + else { + skip_dots(); + } + } + + ~directory_iterator() + { + destroy_name_list(); + } + + directory_iterator &operator++() + { + step(); + return *this; + } + + path_info *operator->() + { + path_info_.current_entry_set(name_list_[cur_entry_]); + return &path_info_; + } + + bool operator!=(const directory_iterator &other) + { + return name_list_ != other.name_list_; + } + + protected: + bool step() + { + if (do_step()) { + return skip_dots(); + } + return false; + } + + bool do_step() + { + ++cur_entry_; + if (cur_entry_ >= num_entries_) { + destroy_name_list(); + return false; + } + return true; + } + + /* Skip . and .. folders. */ + bool skip_dots() + { + while (strcmp(name_list_[cur_entry_]->d_name, ".") == 0 || + strcmp(name_list_[cur_entry_]->d_name, "..") == 0) { + if (!step()) { + return false; + } + } + return true; + } + + void destroy_name_list() + { + if (name_list_ == NULL) { + return; + } + for (int i = 0; i < num_entries_; ++i) { + free(name_list_[i]); + } + free(name_list_); + name_list_ = NULL; + } + + string path_; + path_info path_info_; + struct dirent **name_list_; + int num_entries_, cur_entry_; }; -#endif /* _WIN32 */ +#endif /* _WIN32 */ -size_t find_last_slash(const string& path) +size_t find_last_slash(const string &path) { - for(size_t i = 0; i < path.size(); ++i) { - size_t index = path.size() - 1 - i; + for (size_t i = 0; i < path.size(); ++i) { + size_t index = path.size() - 1 - i; #ifdef _WIN32 - if(path[index] == DIR_SEP || path[index] == DIR_SEP_ALT) + if (path[index] == DIR_SEP || path[index] == DIR_SEP_ALT) #else - if(path[index] == DIR_SEP) + if (path[index] == DIR_SEP) #endif - { - return index; - } - } - return string::npos; -} - -} /* namespace */ - -static char *path_specials(const string& sub) -{ - static bool env_init = false; - static char *env_shader_path; - static char *env_source_path; - if(!env_init) { - env_shader_path = getenv("CYCLES_SHADER_PATH"); - /* NOTE: It is KERNEL in env variable for compatibility reasons. */ - env_source_path = getenv("CYCLES_KERNEL_PATH"); - env_init = true; - } - if(env_shader_path != NULL && sub == "shader") { - return env_shader_path; - } - else if(env_shader_path != NULL && sub == "source") { - return env_source_path; - } - return NULL; + { + return index; + } + } + return string::npos; +} + +} /* namespace */ + +static char *path_specials(const string &sub) +{ + static bool env_init = false; + static char *env_shader_path; + static char *env_source_path; + if (!env_init) { + env_shader_path = getenv("CYCLES_SHADER_PATH"); + /* NOTE: It is KERNEL in env variable for compatibility reasons. */ + env_source_path = getenv("CYCLES_KERNEL_PATH"); + env_init = true; + } + if (env_shader_path != NULL && sub == "shader") { + return env_shader_path; + } + else if (env_shader_path != NULL && sub == "source") { + return env_source_path; + } + return NULL; } #if defined(__linux__) || defined(__APPLE__) static string path_xdg_cache_get() { - const char *home = getenv("XDG_CACHE_HOME"); - if(home) { - return string(home); - } - else { - home = getenv("HOME"); - if(home == NULL) { - home = getpwuid(getuid())->pw_dir; - } - return path_join(string(home), ".cache"); - } + const char *home = getenv("XDG_CACHE_HOME"); + if (home) { + return string(home); + } + else { + home = getenv("HOME"); + if (home == NULL) { + home = getpwuid(getuid())->pw_dir; + } + return path_join(string(home), ".cache"); + } } #endif -void path_init(const string& path, const string& user_path) +void path_init(const string &path, const string &user_path) { - cached_path = path; - cached_user_path = user_path; + cached_path = path; + cached_user_path = user_path; #ifdef _MSC_VER - // workaround for https://svn.boost.org/trac/boost/ticket/6320 - // indirectly init boost codec here since it's not thread safe, and can - // cause crashes when it happens in multithreaded image load - OIIO::Filesystem::exists(path); + // workaround for https://svn.boost.org/trac/boost/ticket/6320 + // indirectly init boost codec here since it's not thread safe, and can + // cause crashes when it happens in multithreaded image load + OIIO::Filesystem::exists(path); #endif } -string path_get(const string& sub) +string path_get(const string &sub) { - char *special = path_specials(sub); - if(special != NULL) - return special; + char *special = path_specials(sub); + if (special != NULL) + return special; - if(cached_path == "") - cached_path = path_dirname(Sysutil::this_program_path()); + if (cached_path == "") + cached_path = path_dirname(Sysutil::this_program_path()); - return path_join(cached_path, sub); + return path_join(cached_path, sub); } -string path_user_get(const string& sub) +string path_user_get(const string &sub) { - if(cached_user_path == "") - cached_user_path = path_dirname(Sysutil::this_program_path()); + if (cached_user_path == "") + cached_user_path = path_dirname(Sysutil::this_program_path()); - return path_join(cached_user_path, sub); + return path_join(cached_user_path, sub); } -string path_cache_get(const string& sub) +string path_cache_get(const string &sub) { #if defined(__linux__) || defined(__APPLE__) - if(cached_xdg_cache_path == "") { - cached_xdg_cache_path = path_xdg_cache_get(); - } - string result = path_join(cached_xdg_cache_path, "cycles"); - return path_join(result, sub); + if (cached_xdg_cache_path == "") { + cached_xdg_cache_path = path_xdg_cache_get(); + } + string result = path_join(cached_xdg_cache_path, "cycles"); + return path_join(result, sub); #else - /* TODO(sergey): What that should be on Windows? */ - return path_user_get(path_join("cache", sub)); + /* TODO(sergey): What that should be on Windows? */ + return path_user_get(path_join("cache", sub)); #endif } #if defined(__linux__) || defined(__APPLE__) -string path_xdg_home_get(const string& sub = ""); +string path_xdg_home_get(const string &sub = ""); #endif -string path_filename(const string& path) +string path_filename(const string &path) { - size_t index = find_last_slash(path); - if(index != string::npos) { - /* Corner cases to match boost behavior. */ + size_t index = find_last_slash(path); + if (index != string::npos) { + /* Corner cases to match boost behavior. */ #ifndef _WIN32 - if(index == 0 && path.size() == 1) { - return path; - } + if (index == 0 && path.size() == 1) { + return path; + } #endif - if(index == path.size() - 1) { + if (index == path.size() - 1) { #ifdef _WIN32 - if(index == 2) { - return string(1, DIR_SEP); - } + if (index == 2) { + return string(1, DIR_SEP); + } #endif - return "."; - } - return path.substr(index + 1, path.size() - index - 1); - } - return path; + return "."; + } + return path.substr(index + 1, path.size() - index - 1); + } + return path; } -string path_dirname(const string& path) +string path_dirname(const string &path) { - size_t index = find_last_slash(path); - if(index != string::npos) { + size_t index = find_last_slash(path); + if (index != string::npos) { #ifndef _WIN32 - if(index == 0 && path.size() > 1) { - return string(1, DIR_SEP); - } + if (index == 0 && path.size() > 1) { + return string(1, DIR_SEP); + } #endif - return path.substr(0, index); - } - return ""; + return path.substr(0, index); + } + return ""; } -string path_join(const string& dir, const string& file) +string path_join(const string &dir, const string &file) { - if(dir.size() == 0) { - return file; - } - if(file.size() == 0) { - return dir; - } - string result = dir; + if (dir.size() == 0) { + return file; + } + if (file.size() == 0) { + return dir; + } + string result = dir; #ifndef _WIN32 - if(result[result.size() - 1] != DIR_SEP && - file[0] != DIR_SEP) + if (result[result.size() - 1] != DIR_SEP && file[0] != DIR_SEP) #else - if(result[result.size() - 1] != DIR_SEP && - result[result.size() - 1] != DIR_SEP_ALT && - file[0] != DIR_SEP && - file[0] != DIR_SEP_ALT) + if (result[result.size() - 1] != DIR_SEP && result[result.size() - 1] != DIR_SEP_ALT && + file[0] != DIR_SEP && file[0] != DIR_SEP_ALT) #endif - { - result += DIR_SEP; - } - result += file; - return result; + { + result += DIR_SEP; + } + result += file; + return result; } -string path_escape(const string& path) +string path_escape(const string &path) { - string result = path; - string_replace(result, " ", "\\ "); - return result; + string result = path; + string_replace(result, " ", "\\ "); + return result; } -bool path_is_relative(const string& path) +bool path_is_relative(const string &path) { #ifdef _WIN32 # ifdef HAVE_SHLWAPI_H - return PathIsRelative(path.c_str()); + return PathIsRelative(path.c_str()); # else /* HAVE_SHLWAPI_H */ - if(path.size() >= 3) { - return !(((path[0] >= 'a' && path[0] <= 'z') || - (path[0] >= 'A' && path[0] <= 'Z')) && - path[1] == ':' && path[2] == DIR_SEP); - } - return true; -# endif /* HAVE_SHLWAPI_H */ -#else /* _WIN32 */ - if(path.size() == 0) { - return 1; - } - return path[0] != DIR_SEP; -#endif /* _WIN32 */ + if (path.size() >= 3) { + return !(((path[0] >= 'a' && path[0] <= 'z') || (path[0] >= 'A' && path[0] <= 'Z')) && + path[1] == ':' && path[2] == DIR_SEP); + } + return true; +# endif /* HAVE_SHLWAPI_H */ +#else /* _WIN32 */ + if (path.size() == 0) { + return 1; + } + return path[0] != DIR_SEP; +#endif /* _WIN32 */ } #ifdef _WIN32 /* Add a slash if the UNC path points to a share. */ -static string path_unc_add_slash_to_share(const string& path) +static string path_unc_add_slash_to_share(const string &path) { - size_t slash_after_server = path.find(DIR_SEP, 2); - if(slash_after_server != string::npos) { - size_t slash_after_share = path.find(DIR_SEP, - slash_after_server + 1); - if(slash_after_share == string::npos) { - return path + DIR_SEP; - } - } - return path; + size_t slash_after_server = path.find(DIR_SEP, 2); + if (slash_after_server != string::npos) { + size_t slash_after_share = path.find(DIR_SEP, slash_after_server + 1); + if (slash_after_share == string::npos) { + return path + DIR_SEP; + } + } + return path; } /* Convert: * \\?\UNC\server\share\folder\... to \\server\share\folder\... * \\?\C:\ to C:\ and \\?\C:\folder\... to C:\folder\... */ -static string path_unc_to_short(const string& path) -{ - size_t len = path.size(); - if((len > 3) && - (path[0] == DIR_SEP) && - (path[1] == DIR_SEP) && - (path[2] == '?') && - ((path[3] == DIR_SEP) || (path[3] == DIR_SEP_ALT))) - { - if((len > 5) && (path[5] == ':')) { - return path.substr(4, len - 4); - } - else if((len > 7) && - (path.substr(4, 3) == "UNC") && - ((path[7] == DIR_SEP) || (path[7] == DIR_SEP_ALT))) - { - return "\\\\" + path.substr(8, len - 8); - } - } - return path; -} - -static string path_cleanup_unc(const string& path) -{ - string result = path_unc_to_short(path); - if(path.size() > 2) { - /* It's possible path is now a non-UNC. */ - if(result[0] == DIR_SEP && result[1] == DIR_SEP) { - return path_unc_add_slash_to_share(result); - } - } - return result; +static string path_unc_to_short(const string &path) +{ + size_t len = path.size(); + if ((len > 3) && (path[0] == DIR_SEP) && (path[1] == DIR_SEP) && (path[2] == '?') && + ((path[3] == DIR_SEP) || (path[3] == DIR_SEP_ALT))) { + if ((len > 5) && (path[5] == ':')) { + return path.substr(4, len - 4); + } + else if ((len > 7) && (path.substr(4, 3) == "UNC") && + ((path[7] == DIR_SEP) || (path[7] == DIR_SEP_ALT))) { + return "\\\\" + path.substr(8, len - 8); + } + } + return path; +} + +static string path_cleanup_unc(const string &path) +{ + string result = path_unc_to_short(path); + if (path.size() > 2) { + /* It's possible path is now a non-UNC. */ + if (result[0] == DIR_SEP && result[1] == DIR_SEP) { + return path_unc_add_slash_to_share(result); + } + } + return result; } /* Make path compatible for stat() functions. */ -static string path_make_compatible(const string& path) -{ - string result = path; - /* In Windows stat() doesn't recognize dir ending on a slash. */ - if(result.size() > 3 && result[result.size() - 1] == DIR_SEP) { - result.resize(result.size() - 1); - } - /* Clean up UNC path. */ - if((path.size() >= 3) && (path[0] == DIR_SEP) && (path[1] == DIR_SEP)) { - result = path_cleanup_unc(result); - } - /* Make sure volume-only path ends up wit ha directory separator. */ - if(result.size() == 2 && result[1] == ':') { - result += DIR_SEP; - } - return result; -} - -static int path_wstat(const wstring& path_wc, path_stat_t *st) -{ -#if defined(_MSC_VER) || defined(__MINGW64__) - return _wstat64(path_wc.c_str(), st); -#elif defined(__MINGW32__) - return _wstati64(path_wc.c_str(), st); -#else - return _wstat(path_wc.c_str(), st); -#endif +static string path_make_compatible(const string &path) +{ + string result = path; + /* In Windows stat() doesn't recognize dir ending on a slash. */ + if (result.size() > 3 && result[result.size() - 1] == DIR_SEP) { + result.resize(result.size() - 1); + } + /* Clean up UNC path. */ + if ((path.size() >= 3) && (path[0] == DIR_SEP) && (path[1] == DIR_SEP)) { + result = path_cleanup_unc(result); + } + /* Make sure volume-only path ends up wit ha directory separator. */ + if (result.size() == 2 && result[1] == ':') { + result += DIR_SEP; + } + return result; +} + +static int path_wstat(const wstring &path_wc, path_stat_t *st) +{ +# if defined(_MSC_VER) || defined(__MINGW64__) + return _wstat64(path_wc.c_str(), st); +# elif defined(__MINGW32__) + return _wstati64(path_wc.c_str(), st); +# else + return _wstat(path_wc.c_str(), st); +# endif } -static int path_stat(const string& path, path_stat_t *st) +static int path_stat(const string &path, path_stat_t *st) { - wstring path_wc = string_to_wstring(path); - return path_wstat(path_wc, st); + wstring path_wc = string_to_wstring(path); + return path_wstat(path_wc, st); } #else /* _WIN32 */ -static int path_stat(const string& path, path_stat_t *st) +static int path_stat(const string &path, path_stat_t *st) { - return stat(path.c_str(), st); + return stat(path.c_str(), st); } -#endif /* _WIN32 */ +#endif /* _WIN32 */ -size_t path_file_size(const string& path) +size_t path_file_size(const string &path) { - path_stat_t st; - if(path_stat(path, &st) != 0) { - return -1; - } - return st.st_size; + path_stat_t st; + if (path_stat(path, &st) != 0) { + return -1; + } + return st.st_size; } -bool path_exists(const string& path) +bool path_exists(const string &path) { #ifdef _WIN32 - string fixed_path = path_make_compatible(path); - wstring path_wc = string_to_wstring(fixed_path); - path_stat_t st; - if(path_wstat(path_wc, &st) != 0) { - return false; - } - return st.st_mode != 0; + string fixed_path = path_make_compatible(path); + wstring path_wc = string_to_wstring(fixed_path); + path_stat_t st; + if (path_wstat(path_wc, &st) != 0) { + return false; + } + return st.st_mode != 0; #else /* _WIN32 */ - struct stat st; - if(stat(path.c_str(), &st) != 0) { - return 0; - } - return st.st_mode != 0; -#endif /* _WIN32 */ + struct stat st; + if (stat(path.c_str(), &st) != 0) { + return 0; + } + return st.st_mode != 0; +#endif /* _WIN32 */ } -bool path_is_directory(const string& path) +bool path_is_directory(const string &path) { - path_stat_t st; - if(path_stat(path, &st) != 0) { - return false; - } - return S_ISDIR(st.st_mode); + path_stat_t st; + if (path_stat(path, &st) != 0) { + return false; + } + return S_ISDIR(st.st_mode); } -static void path_files_md5_hash_recursive(MD5Hash& hash, const string& dir) +static void path_files_md5_hash_recursive(MD5Hash &hash, const string &dir) { - if(path_exists(dir)) { - directory_iterator it(dir), it_end; + if (path_exists(dir)) { + directory_iterator it(dir), it_end; - for(; it != it_end; ++it) { - if(path_is_directory(it->path())) { - path_files_md5_hash_recursive(hash, it->path()); - } - else { - string filepath = it->path(); + for (; it != it_end; ++it) { + if (path_is_directory(it->path())) { + path_files_md5_hash_recursive(hash, it->path()); + } + else { + string filepath = it->path(); - hash.append((const uint8_t*)filepath.c_str(), filepath.size()); - hash.append_file(filepath); - } - } - } + hash.append((const uint8_t *)filepath.c_str(), filepath.size()); + hash.append_file(filepath); + } + } + } } -string path_files_md5_hash(const string& dir) +string path_files_md5_hash(const string &dir) { - /* computes md5 hash of all files in the directory */ - MD5Hash hash; + /* computes md5 hash of all files in the directory */ + MD5Hash hash; - path_files_md5_hash_recursive(hash, dir); + path_files_md5_hash_recursive(hash, dir); - return hash.get_hex(); + return hash.get_hex(); } -static bool create_directories_recursivey(const string& path) +static bool create_directories_recursivey(const string &path) { - if(path_is_directory(path)) { - /* Directory already exists, nothing to do. */ - return true; - } - if(path_exists(path)) { - /* File exists and it's not a directory. */ - return false; - } + if (path_is_directory(path)) { + /* Directory already exists, nothing to do. */ + return true; + } + if (path_exists(path)) { + /* File exists and it's not a directory. */ + return false; + } - string parent = path_dirname(path); - if(parent.size() > 0 && parent != path) { - if(!create_directories_recursivey(parent)) { - return false; - } - } + string parent = path_dirname(path); + if (parent.size() > 0 && parent != path) { + if (!create_directories_recursivey(parent)) { + return false; + } + } #ifdef _WIN32 - wstring path_wc = string_to_wstring(path); - return _wmkdir(path_wc.c_str()) == 0; + wstring path_wc = string_to_wstring(path); + return _wmkdir(path_wc.c_str()) == 0; #else - return mkdir(path.c_str(), 0777) == 0; + return mkdir(path.c_str(), 0777) == 0; #endif } -void path_create_directories(const string& filepath) +void path_create_directories(const string &filepath) { - string path = path_dirname(filepath); - create_directories_recursivey(path); + string path = path_dirname(filepath); + create_directories_recursivey(path); } -bool path_write_binary(const string& path, const vector<uint8_t>& binary) +bool path_write_binary(const string &path, const vector<uint8_t> &binary) { - path_create_directories(path); + path_create_directories(path); - /* write binary file from memory */ - FILE *f = path_fopen(path, "wb"); + /* write binary file from memory */ + FILE *f = path_fopen(path, "wb"); - if(!f) - return false; + if (!f) + return false; - if(binary.size() > 0) - fwrite(&binary[0], sizeof(uint8_t), binary.size(), f); + if (binary.size() > 0) + fwrite(&binary[0], sizeof(uint8_t), binary.size(), f); - fclose(f); + fclose(f); - return true; + return true; } -bool path_write_text(const string& path, string& text) +bool path_write_text(const string &path, string &text) { - vector<uint8_t> binary(text.length(), 0); - std::copy(text.begin(), text.end(), binary.begin()); + vector<uint8_t> binary(text.length(), 0); + std::copy(text.begin(), text.end(), binary.begin()); - return path_write_binary(path, binary); + return path_write_binary(path, binary); } -bool path_read_binary(const string& path, vector<uint8_t>& binary) +bool path_read_binary(const string &path, vector<uint8_t> &binary) { - /* read binary file into memory */ - FILE *f = path_fopen(path, "rb"); + /* read binary file into memory */ + FILE *f = path_fopen(path, "rb"); - if(!f) { - binary.resize(0); - return false; - } + if (!f) { + binary.resize(0); + return false; + } - binary.resize(path_file_size(path)); + binary.resize(path_file_size(path)); - if(binary.size() == 0) { - fclose(f); - return false; - } + if (binary.size() == 0) { + fclose(f); + return false; + } - if(fread(&binary[0], sizeof(uint8_t), binary.size(), f) != binary.size()) { - fclose(f); - return false; - } + if (fread(&binary[0], sizeof(uint8_t), binary.size(), f) != binary.size()) { + fclose(f); + return false; + } - fclose(f); + fclose(f); - return true; + return true; } -bool path_read_text(const string& path, string& text) +bool path_read_text(const string &path, string &text) { - vector<uint8_t> binary; + vector<uint8_t> binary; - if(!path_exists(path) || !path_read_binary(path, binary)) - return false; + if (!path_exists(path) || !path_read_binary(path, binary)) + return false; - const char *str = (const char*)&binary[0]; - size_t size = binary.size(); - text = string(str, size); + const char *str = (const char *)&binary[0]; + size_t size = binary.size(); + text = string(str, size); - return true; + return true; } -uint64_t path_modified_time(const string& path) +uint64_t path_modified_time(const string &path) { - path_stat_t st; - if(path_stat(path, &st) != 0) { - return 0; - } - return st.st_mtime; + path_stat_t st; + if (path_stat(path, &st) != 0) { + return 0; + } + return st.st_mtime; } -bool path_remove(const string& path) +bool path_remove(const string &path) { - return remove(path.c_str()) == 0; + return remove(path.c_str()) == 0; } struct SourceReplaceState { - typedef map<string, string> ProcessedMapping; - /* Base director for all relative include headers. */ - string base; - /* Result of processed files. */ - ProcessedMapping processed_files; - /* Set of files which are considered "precompiled" and which are replaced - * with and empty string on a subsequent occurrence in include statement. - */ - set<string> precompiled_headers; + typedef map<string, string> ProcessedMapping; + /* Base director for all relative include headers. */ + string base; + /* Result of processed files. */ + ProcessedMapping processed_files; + /* Set of files which are considered "precompiled" and which are replaced + * with and empty string on a subsequent occurrence in include statement. + */ + set<string> precompiled_headers; }; -static string path_source_replace_includes_recursive( - const string& source, - const string& source_filepath, - SourceReplaceState *state); - -static string line_directive(const SourceReplaceState& state, - const string& path, - const int line) -{ - string unescaped_path = path; - /* First we make path relative. */ - if(string_startswith(unescaped_path, state.base.c_str())) { - const string base_file = path_filename(state.base); - const size_t base_len = state.base.length(); - unescaped_path = base_file + - unescaped_path.substr(base_len, - unescaped_path.length() - base_len); - } - /* Second, we replace all unsafe characters. */ - const size_t length = unescaped_path.length(); - string escaped_path = ""; - for(size_t i = 0; i < length; ++i) { - const char ch = unescaped_path[i]; - if(strchr("\"\'\?\\", ch) != NULL) { - escaped_path += "\\"; - } - escaped_path += ch; - } - /* TODO(sergey): Check whether using std::to_string combined with several - * concatenation operations is any faster. - */ - return string_printf("#line %d \"%s\"", line, escaped_path.c_str()); -} - -static string path_source_handle_preprocessor( - const string& preprocessor_line, - const string& source_filepath, - const size_t line_number, - SourceReplaceState *state) -{ - string result = preprocessor_line; - string token = string_strip( - preprocessor_line.substr(1, preprocessor_line.size() - 1)); - if(string_startswith(token, "include")) { - token = string_strip(token.substr(7, token.size() - 7)); - if(token[0] == '"') { - const size_t n_start = 1; - const size_t n_end = token.find("\"", n_start); - const string filename = token.substr(n_start, n_end - n_start); - const bool is_precompiled = string_endswith(token, "// PRECOMPILED"); - string filepath = path_join(state->base, filename); - if(!path_exists(filepath)) { - filepath = path_join(path_dirname(source_filepath), - filename); - } - if(is_precompiled) { - state->precompiled_headers.insert(filepath); - } - string text; - if(path_read_text(filepath, text)) { - text = path_source_replace_includes_recursive( - text, filepath, state); - /* Use line directives for better error messages. */ - result = line_directive(*state, filepath, 1) + "\n" - + text + "\n" - + line_directive(*state, source_filepath, line_number + 1); - } - } - } - return result; +static string path_source_replace_includes_recursive(const string &source, + const string &source_filepath, + SourceReplaceState *state); + +static string line_directive(const SourceReplaceState &state, const string &path, const int line) +{ + string unescaped_path = path; + /* First we make path relative. */ + if (string_startswith(unescaped_path, state.base.c_str())) { + const string base_file = path_filename(state.base); + const size_t base_len = state.base.length(); + unescaped_path = base_file + + unescaped_path.substr(base_len, unescaped_path.length() - base_len); + } + /* Second, we replace all unsafe characters. */ + const size_t length = unescaped_path.length(); + string escaped_path = ""; + for (size_t i = 0; i < length; ++i) { + const char ch = unescaped_path[i]; + if (strchr("\"\'\?\\", ch) != NULL) { + escaped_path += "\\"; + } + escaped_path += ch; + } + /* TODO(sergey): Check whether using std::to_string combined with several + * concatenation operations is any faster. + */ + return string_printf("#line %d \"%s\"", line, escaped_path.c_str()); +} + +static string path_source_handle_preprocessor(const string &preprocessor_line, + const string &source_filepath, + const size_t line_number, + SourceReplaceState *state) +{ + string result = preprocessor_line; + string token = string_strip(preprocessor_line.substr(1, preprocessor_line.size() - 1)); + if (string_startswith(token, "include")) { + token = string_strip(token.substr(7, token.size() - 7)); + if (token[0] == '"') { + const size_t n_start = 1; + const size_t n_end = token.find("\"", n_start); + const string filename = token.substr(n_start, n_end - n_start); + const bool is_precompiled = string_endswith(token, "// PRECOMPILED"); + string filepath = path_join(state->base, filename); + if (!path_exists(filepath)) { + filepath = path_join(path_dirname(source_filepath), filename); + } + if (is_precompiled) { + state->precompiled_headers.insert(filepath); + } + string text; + if (path_read_text(filepath, text)) { + text = path_source_replace_includes_recursive(text, filepath, state); + /* Use line directives for better error messages. */ + result = line_directive(*state, filepath, 1) + "\n" + text + "\n" + + line_directive(*state, source_filepath, line_number + 1); + } + } + } + return result; } /* Our own little c preprocessor that replaces #includes with the file * contents, to work around issue of OpenCL drivers not supporting * include paths with spaces in them. */ -static string path_source_replace_includes_recursive( - const string& source, - const string& source_filepath, - SourceReplaceState *state) -{ - /* Try to re-use processed file without spending time on replacing all - * include directives again. - */ - SourceReplaceState::ProcessedMapping::iterator replaced_file = - state->processed_files.find(source_filepath); - if(replaced_file != state->processed_files.end()) { - if(state->precompiled_headers.find(source_filepath) != - state->precompiled_headers.end()) { - return ""; - } - return replaced_file->second; - } - /* Perform full file processing. */ - string result = ""; - const size_t source_length = source.length(); - size_t index = 0; - /* Information about where we are in the source. */ - size_t line_number = 0, column_number = 1; - /* Currently gathered non-preprocessor token. - * Store as start/length rather than token itself to avoid overhead of - * memory re-allocations on each character concatenation. - */ - size_t token_start = 0, token_length = 0; - /* Denotes whether we're inside of preprocessor line, together with - * preprocessor line itself. - * - * TODO(sergey): Investigate whether using token start/end position - * gives measurable speedup. - */ - bool inside_preprocessor = false; - string preprocessor_line = ""; - /* Actual loop over the whole source. */ - while(index < source_length) { - const char ch = source[index]; - if(ch == '\n') { - if(inside_preprocessor) { - result += path_source_handle_preprocessor(preprocessor_line, - source_filepath, - line_number, - state); - /* Start gathering net part of the token. */ - token_start = index; - token_length = 0; - } - inside_preprocessor = false; - preprocessor_line = ""; - column_number = 0; - ++line_number; - } - else if(ch == '#' && column_number == 1 && !inside_preprocessor) { - /* Append all possible non-preprocessor token to the result. */ - if(token_length != 0) { - result.append(source, token_start, token_length); - token_start = index; - token_length = 0; - } - inside_preprocessor = true; - } - if(inside_preprocessor) { - preprocessor_line += ch; - } - else { - ++token_length; - } - ++index; - ++column_number; - } - /* Append possible tokens which happened before special events handled - * above. - */ - if(token_length != 0) { - result.append(source, token_start, token_length); - } - if(inside_preprocessor) { - result += path_source_handle_preprocessor(preprocessor_line, - source_filepath, - line_number, - state); - } - /* Store result for further reuse. */ - state->processed_files[source_filepath] = result; - return result; -} - -string path_source_replace_includes(const string& source, - const string& path, - const string& source_filename) -{ - SourceReplaceState state; - state.base = path; - return path_source_replace_includes_recursive( - source, - path_join(path, source_filename), - &state); -} - -FILE *path_fopen(const string& path, const string& mode) +static string path_source_replace_includes_recursive(const string &source, + const string &source_filepath, + SourceReplaceState *state) +{ + /* Try to re-use processed file without spending time on replacing all + * include directives again. + */ + SourceReplaceState::ProcessedMapping::iterator replaced_file = state->processed_files.find( + source_filepath); + if (replaced_file != state->processed_files.end()) { + if (state->precompiled_headers.find(source_filepath) != state->precompiled_headers.end()) { + return ""; + } + return replaced_file->second; + } + /* Perform full file processing. */ + string result = ""; + const size_t source_length = source.length(); + size_t index = 0; + /* Information about where we are in the source. */ + size_t line_number = 0, column_number = 1; + /* Currently gathered non-preprocessor token. + * Store as start/length rather than token itself to avoid overhead of + * memory re-allocations on each character concatenation. + */ + size_t token_start = 0, token_length = 0; + /* Denotes whether we're inside of preprocessor line, together with + * preprocessor line itself. + * + * TODO(sergey): Investigate whether using token start/end position + * gives measurable speedup. + */ + bool inside_preprocessor = false; + string preprocessor_line = ""; + /* Actual loop over the whole source. */ + while (index < source_length) { + const char ch = source[index]; + if (ch == '\n') { + if (inside_preprocessor) { + result += path_source_handle_preprocessor( + preprocessor_line, source_filepath, line_number, state); + /* Start gathering net part of the token. */ + token_start = index; + token_length = 0; + } + inside_preprocessor = false; + preprocessor_line = ""; + column_number = 0; + ++line_number; + } + else if (ch == '#' && column_number == 1 && !inside_preprocessor) { + /* Append all possible non-preprocessor token to the result. */ + if (token_length != 0) { + result.append(source, token_start, token_length); + token_start = index; + token_length = 0; + } + inside_preprocessor = true; + } + if (inside_preprocessor) { + preprocessor_line += ch; + } + else { + ++token_length; + } + ++index; + ++column_number; + } + /* Append possible tokens which happened before special events handled + * above. + */ + if (token_length != 0) { + result.append(source, token_start, token_length); + } + if (inside_preprocessor) { + result += path_source_handle_preprocessor( + preprocessor_line, source_filepath, line_number, state); + } + /* Store result for further reuse. */ + state->processed_files[source_filepath] = result; + return result; +} + +string path_source_replace_includes(const string &source, + const string &path, + const string &source_filename) +{ + SourceReplaceState state; + state.base = path; + return path_source_replace_includes_recursive(source, path_join(path, source_filename), &state); +} + +FILE *path_fopen(const string &path, const string &mode) { #ifdef _WIN32 - wstring path_wc = string_to_wstring(path); - wstring mode_wc = string_to_wstring(mode); - return _wfopen(path_wc.c_str(), mode_wc.c_str()); + wstring path_wc = string_to_wstring(path); + wstring mode_wc = string_to_wstring(mode); + return _wfopen(path_wc.c_str(), mode_wc.c_str()); #else - return fopen(path.c_str(), mode.c_str()); + return fopen(path.c_str(), mode.c_str()); #endif } -void path_cache_clear_except(const string& name, const set<string>& except) +void path_cache_clear_except(const string &name, const set<string> &except) { - string dir = path_user_get("cache"); - - if(path_exists(dir)) { - directory_iterator it(dir), it_end; + string dir = path_user_get("cache"); - for(; it != it_end; ++it) { - string filename = path_filename(it->path()); + if (path_exists(dir)) { + directory_iterator it(dir), it_end; - if(string_startswith(filename, name.c_str())) - if(except.find(filename) == except.end()) - path_remove(it->path()); - } - } + for (; it != it_end; ++it) { + string filename = path_filename(it->path()); + if (string_startswith(filename, name.c_str())) + if (except.find(filename) == except.end()) + path_remove(it->path()); + } + } } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h index 738dba94647..7a83c2135a4 100644 --- a/intern/cycles/util/util_path.h +++ b/intern/cycles/util/util_path.h @@ -32,46 +32,46 @@ CCL_NAMESPACE_BEGIN /* program paths */ -void path_init(const string& path = "", const string& user_path = ""); -string path_get(const string& sub = ""); -string path_user_get(const string& sub = ""); -string path_cache_get(const string& sub = ""); +void path_init(const string &path = "", const string &user_path = ""); +string path_get(const string &sub = ""); +string path_user_get(const string &sub = ""); +string path_cache_get(const string &sub = ""); /* path string manipulation */ -string path_filename(const string& path); -string path_dirname(const string& path); -string path_join(const string& dir, const string& file); -string path_escape(const string& path); -bool path_is_relative(const string& path); +string path_filename(const string &path); +string path_dirname(const string &path); +string path_join(const string &dir, const string &file); +string path_escape(const string &path); +bool path_is_relative(const string &path); /* file info */ -size_t path_file_size(const string& path); -bool path_exists(const string& path); -bool path_is_directory(const string& path); -string path_files_md5_hash(const string& dir); -uint64_t path_modified_time(const string& path); +size_t path_file_size(const string &path); +bool path_exists(const string &path); +bool path_is_directory(const string &path); +string path_files_md5_hash(const string &dir); +uint64_t path_modified_time(const string &path); /* directory utility */ -void path_create_directories(const string& path); +void path_create_directories(const string &path); /* file read/write utilities */ -FILE *path_fopen(const string& path, const string& mode); +FILE *path_fopen(const string &path, const string &mode); -bool path_write_binary(const string& path, const vector<uint8_t>& binary); -bool path_write_text(const string& path, string& text); -bool path_read_binary(const string& path, vector<uint8_t>& binary); -bool path_read_text(const string& path, string& text); +bool path_write_binary(const string &path, const vector<uint8_t> &binary); +bool path_write_text(const string &path, string &text); +bool path_read_binary(const string &path, vector<uint8_t> &binary); +bool path_read_text(const string &path, string &text); /* File manipulation. */ -bool path_remove(const string& path); +bool path_remove(const string &path); /* source code utility */ -string path_source_replace_includes(const string& source, - const string& path, - const string& source_filename=""); +string path_source_replace_includes(const string &source, + const string &path, + const string &source_filename = ""); /* cache utility */ -void path_cache_clear_except(const string& name, const set<string>& except); +void path_cache_clear_except(const string &name, const set<string> &except); CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_profiling.cpp b/intern/cycles/util/util_profiling.cpp index 30aaef69310..e3edf219435 100644 --- a/intern/cycles/util/util_profiling.cpp +++ b/intern/cycles/util/util_profiling.cpp @@ -20,159 +20,159 @@ CCL_NAMESPACE_BEGIN -Profiler::Profiler() - : do_stop_worker(true), worker(NULL) +Profiler::Profiler() : do_stop_worker(true), worker(NULL) { } Profiler::~Profiler() { - assert(worker == NULL); + assert(worker == NULL); } void Profiler::run() { - uint64_t updates = 0; - auto start_time = std::chrono::system_clock::now(); - while(!do_stop_worker) { - thread_scoped_lock lock(mutex); - foreach(ProfilingState *state, states) { - uint32_t cur_event = state->event; - int32_t cur_shader = state->shader; - int32_t cur_object = state->object; - - /* The state reads/writes should be atomic, but just to be sure - * check the values for validity anyways. */ - if(cur_event < PROFILING_NUM_EVENTS) { - event_samples[cur_event]++; - } - - if(cur_shader >= 0 && cur_shader < shader_samples.size()) { - /* Only consider the active shader during events whose runtime significantly depends on it. */ - if(((cur_event >= PROFILING_SHADER_EVAL ) && (cur_event <= PROFILING_SUBSURFACE)) || - ((cur_event >= PROFILING_CLOSURE_EVAL) && (cur_event <= PROFILING_CLOSURE_VOLUME_SAMPLE))) { - shader_samples[cur_shader]++; - } - } - - if(cur_object >= 0 && cur_object < object_samples.size()) { - object_samples[cur_object]++; - } - } - lock.unlock(); - - /* Relative waits always overshoot a bit, so just waiting 1ms every - * time would cause the sampling to drift over time. - * By keeping track of the absolute time, the wait times correct themselves - - * if one wait overshoots a lot, the next one will be shorter to compensate. */ - updates++; - std::this_thread::sleep_until(start_time + updates*std::chrono::milliseconds(1)); - } + uint64_t updates = 0; + auto start_time = std::chrono::system_clock::now(); + while (!do_stop_worker) { + thread_scoped_lock lock(mutex); + foreach (ProfilingState *state, states) { + uint32_t cur_event = state->event; + int32_t cur_shader = state->shader; + int32_t cur_object = state->object; + + /* The state reads/writes should be atomic, but just to be sure + * check the values for validity anyways. */ + if (cur_event < PROFILING_NUM_EVENTS) { + event_samples[cur_event]++; + } + + if (cur_shader >= 0 && cur_shader < shader_samples.size()) { + /* Only consider the active shader during events whose runtime significantly depends on it. */ + if (((cur_event >= PROFILING_SHADER_EVAL) && (cur_event <= PROFILING_SUBSURFACE)) || + ((cur_event >= PROFILING_CLOSURE_EVAL) && + (cur_event <= PROFILING_CLOSURE_VOLUME_SAMPLE))) { + shader_samples[cur_shader]++; + } + } + + if (cur_object >= 0 && cur_object < object_samples.size()) { + object_samples[cur_object]++; + } + } + lock.unlock(); + + /* Relative waits always overshoot a bit, so just waiting 1ms every + * time would cause the sampling to drift over time. + * By keeping track of the absolute time, the wait times correct themselves - + * if one wait overshoots a lot, the next one will be shorter to compensate. */ + updates++; + std::this_thread::sleep_until(start_time + updates * std::chrono::milliseconds(1)); + } } void Profiler::reset(int num_shaders, int num_objects) { - bool running = (worker != NULL); - if(running) { - stop(); - } - - /* Resize and clear the accumulation vectors. */ - shader_hits.assign(num_shaders, 0); - object_hits.assign(num_objects, 0); - - event_samples.assign(PROFILING_NUM_EVENTS, 0); - shader_samples.assign(num_shaders, 0); - object_samples.assign(num_objects, 0); - - if(running) { - start(); - } + bool running = (worker != NULL); + if (running) { + stop(); + } + + /* Resize and clear the accumulation vectors. */ + shader_hits.assign(num_shaders, 0); + object_hits.assign(num_objects, 0); + + event_samples.assign(PROFILING_NUM_EVENTS, 0); + shader_samples.assign(num_shaders, 0); + object_samples.assign(num_objects, 0); + + if (running) { + start(); + } } void Profiler::start() { - assert(worker == NULL); - do_stop_worker = false; - worker = new thread(function_bind(&Profiler::run, this)); + assert(worker == NULL); + do_stop_worker = false; + worker = new thread(function_bind(&Profiler::run, this)); } void Profiler::stop() { - if(worker != NULL) { - do_stop_worker = true; + if (worker != NULL) { + do_stop_worker = true; - worker->join(); - delete worker; - worker = NULL; - } + worker->join(); + delete worker; + worker = NULL; + } } void Profiler::add_state(ProfilingState *state) { - thread_scoped_lock lock(mutex); + thread_scoped_lock lock(mutex); - /* Add the ProfilingState from the list of sampled states. */ - assert(std::find(states.begin(), states.end(), state) == states.end()); - states.push_back(state); + /* Add the ProfilingState from the list of sampled states. */ + assert(std::find(states.begin(), states.end(), state) == states.end()); + states.push_back(state); - /* Resize thread-local hit counters. */ - state->shader_hits.assign(shader_hits.size(), 0); - state->object_hits.assign(object_hits.size(), 0); + /* Resize thread-local hit counters. */ + state->shader_hits.assign(shader_hits.size(), 0); + state->object_hits.assign(object_hits.size(), 0); - /* Initialize the state. */ - state->event = PROFILING_UNKNOWN; - state->shader = -1; - state->object = -1; - state->active = true; + /* Initialize the state. */ + state->event = PROFILING_UNKNOWN; + state->shader = -1; + state->object = -1; + state->active = true; } void Profiler::remove_state(ProfilingState *state) { - thread_scoped_lock lock(mutex); - - /* Remove the ProfilingState from the list of sampled states. */ - states.erase(std::remove(states.begin(), states.end(), state), states.end()); - state->active = false; - - /* Merge thread-local hit counters. */ - assert(shader_hits.size() == state->shader_hits.size()); - for(int i = 0; i < shader_hits.size(); i++) { - shader_hits[i] += state->shader_hits[i]; - } - - assert(object_hits.size() == state->object_hits.size()); - for(int i = 0; i < object_hits.size(); i++) { - object_hits[i] += state->object_hits[i]; - } + thread_scoped_lock lock(mutex); + + /* Remove the ProfilingState from the list of sampled states. */ + states.erase(std::remove(states.begin(), states.end(), state), states.end()); + state->active = false; + + /* Merge thread-local hit counters. */ + assert(shader_hits.size() == state->shader_hits.size()); + for (int i = 0; i < shader_hits.size(); i++) { + shader_hits[i] += state->shader_hits[i]; + } + + assert(object_hits.size() == state->object_hits.size()); + for (int i = 0; i < object_hits.size(); i++) { + object_hits[i] += state->object_hits[i]; + } } uint64_t Profiler::get_event(ProfilingEvent event) { - assert(worker == NULL); - return event_samples[event]; + assert(worker == NULL); + return event_samples[event]; } bool Profiler::get_shader(int shader, uint64_t &samples, uint64_t &hits) { - assert(worker == NULL); - if(shader_samples[shader] == 0) { - return false; - } - samples = shader_samples[shader]; - hits = shader_hits[shader]; - return true; + assert(worker == NULL); + if (shader_samples[shader] == 0) { + return false; + } + samples = shader_samples[shader]; + hits = shader_hits[shader]; + return true; } bool Profiler::get_object(int object, uint64_t &samples, uint64_t &hits) { - assert(worker == NULL); - if(object_samples[object] == 0) { - return false; - } - samples = object_samples[object]; - hits = object_hits[object]; - return true; + assert(worker == NULL); + if (object_samples[object] == 0) { + return false; + } + samples = object_samples[object]; + hits = object_hits[object]; + return true; } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_profiling.h b/intern/cycles/util/util_profiling.h index 77cdbb0b325..f5f500239f2 100644 --- a/intern/cycles/util/util_profiling.h +++ b/intern/cycles/util/util_profiling.h @@ -27,42 +27,42 @@ CCL_NAMESPACE_BEGIN enum ProfilingEvent : uint32_t { - PROFILING_UNKNOWN, - PROFILING_RAY_SETUP, - PROFILING_PATH_INTEGRATE, - PROFILING_SCENE_INTERSECT, - PROFILING_INDIRECT_EMISSION, - PROFILING_VOLUME, - PROFILING_SHADER_SETUP, - PROFILING_SHADER_EVAL, - PROFILING_SHADER_APPLY, - PROFILING_AO, - PROFILING_SUBSURFACE, - PROFILING_CONNECT_LIGHT, - PROFILING_SURFACE_BOUNCE, - PROFILING_WRITE_RESULT, - - PROFILING_INTERSECT, - PROFILING_INTERSECT_LOCAL, - PROFILING_INTERSECT_SHADOW_ALL, - PROFILING_INTERSECT_VOLUME, - PROFILING_INTERSECT_VOLUME_ALL, - - PROFILING_CLOSURE_EVAL, - PROFILING_CLOSURE_SAMPLE, - PROFILING_CLOSURE_VOLUME_EVAL, - PROFILING_CLOSURE_VOLUME_SAMPLE, - - PROFILING_DENOISING, - PROFILING_DENOISING_CONSTRUCT_TRANSFORM, - PROFILING_DENOISING_RECONSTRUCT, - PROFILING_DENOISING_DIVIDE_SHADOW, - PROFILING_DENOISING_NON_LOCAL_MEANS, - PROFILING_DENOISING_COMBINE_HALVES, - PROFILING_DENOISING_GET_FEATURE, - PROFILING_DENOISING_DETECT_OUTLIERS, - - PROFILING_NUM_EVENTS, + PROFILING_UNKNOWN, + PROFILING_RAY_SETUP, + PROFILING_PATH_INTEGRATE, + PROFILING_SCENE_INTERSECT, + PROFILING_INDIRECT_EMISSION, + PROFILING_VOLUME, + PROFILING_SHADER_SETUP, + PROFILING_SHADER_EVAL, + PROFILING_SHADER_APPLY, + PROFILING_AO, + PROFILING_SUBSURFACE, + PROFILING_CONNECT_LIGHT, + PROFILING_SURFACE_BOUNCE, + PROFILING_WRITE_RESULT, + + PROFILING_INTERSECT, + PROFILING_INTERSECT_LOCAL, + PROFILING_INTERSECT_SHADOW_ALL, + PROFILING_INTERSECT_VOLUME, + PROFILING_INTERSECT_VOLUME_ALL, + + PROFILING_CLOSURE_EVAL, + PROFILING_CLOSURE_SAMPLE, + PROFILING_CLOSURE_VOLUME_EVAL, + PROFILING_CLOSURE_VOLUME_SAMPLE, + + PROFILING_DENOISING, + PROFILING_DENOISING_CONSTRUCT_TRANSFORM, + PROFILING_DENOISING_RECONSTRUCT, + PROFILING_DENOISING_DIVIDE_SHADOW, + PROFILING_DENOISING_NON_LOCAL_MEANS, + PROFILING_DENOISING_COMBINE_HALVES, + PROFILING_DENOISING_GET_FEATURE, + PROFILING_DENOISING_DETECT_OUTLIERS, + + PROFILING_NUM_EVENTS, }; /* Contains the current execution state of a worker thread. @@ -79,97 +79,97 @@ enum ProfilingEvent : uint32_t { * case of reading an intermediate state could at worst result * in a single incorrect sample. */ struct ProfilingState { - volatile uint32_t event = PROFILING_UNKNOWN; - volatile int32_t shader = -1; - volatile int32_t object = -1; - volatile bool active = false; + volatile uint32_t event = PROFILING_UNKNOWN; + volatile int32_t shader = -1; + volatile int32_t object = -1; + volatile bool active = false; - vector<uint64_t> shader_hits; - vector<uint64_t> object_hits; + vector<uint64_t> shader_hits; + vector<uint64_t> object_hits; }; class Profiler { -public: - Profiler(); - ~Profiler(); + public: + Profiler(); + ~Profiler(); - void reset(int num_shaders, int num_objects); + void reset(int num_shaders, int num_objects); - void start(); - void stop(); + void start(); + void stop(); - void add_state(ProfilingState *state); - void remove_state(ProfilingState *state); + void add_state(ProfilingState *state); + void remove_state(ProfilingState *state); - uint64_t get_event(ProfilingEvent event); - bool get_shader(int shader, uint64_t &samples, uint64_t &hits); - bool get_object(int object, uint64_t &samples, uint64_t &hits); + uint64_t get_event(ProfilingEvent event); + bool get_shader(int shader, uint64_t &samples, uint64_t &hits); + bool get_object(int object, uint64_t &samples, uint64_t &hits); -protected: - void run(); + protected: + void run(); - /* Tracks how often the worker was in each ProfilingEvent while sampling, - * so multiplying the values by the sample frequency (currently 1ms) - * gives the approximate time spent in each state. */ - vector<uint64_t> event_samples; - vector<uint64_t> shader_samples; - vector<uint64_t> object_samples; + /* Tracks how often the worker was in each ProfilingEvent while sampling, + * so multiplying the values by the sample frequency (currently 1ms) + * gives the approximate time spent in each state. */ + vector<uint64_t> event_samples; + vector<uint64_t> shader_samples; + vector<uint64_t> object_samples; - /* Tracks the total amounts every object/shader was hit. - * Used to evaluate relative cost, written by the render thread. - * Indexed by the shader and object IDs that the kernel also uses - * to index __object_flag and __shaders. */ - vector<uint64_t> shader_hits; - vector<uint64_t> object_hits; + /* Tracks the total amounts every object/shader was hit. + * Used to evaluate relative cost, written by the render thread. + * Indexed by the shader and object IDs that the kernel also uses + * to index __object_flag and __shaders. */ + vector<uint64_t> shader_hits; + vector<uint64_t> object_hits; - volatile bool do_stop_worker; - thread *worker; + volatile bool do_stop_worker; + thread *worker; - thread_mutex mutex; - vector<ProfilingState*> states; + thread_mutex mutex; + vector<ProfilingState *> states; }; class ProfilingHelper { -public: - ProfilingHelper(ProfilingState *state, ProfilingEvent event) - : state(state) - { - previous_event = state->event; - state->event = event; - } - - inline void set_event(ProfilingEvent event) - { - state->event = event; - } - - inline void set_shader(int shader) - { - state->shader = shader; - if(state->active) { - assert(shader < state->shader_hits.size()); - state->shader_hits[shader]++; - } - } - - inline void set_object(int object) - { - state->object = object; - if(state->active) { - assert(object < state->object_hits.size()); - state->object_hits[object]++; - } - } - - ~ProfilingHelper() - { - state->event = previous_event; - } -private: - ProfilingState *state; - uint32_t previous_event; + public: + ProfilingHelper(ProfilingState *state, ProfilingEvent event) : state(state) + { + previous_event = state->event; + state->event = event; + } + + inline void set_event(ProfilingEvent event) + { + state->event = event; + } + + inline void set_shader(int shader) + { + state->shader = shader; + if (state->active) { + assert(shader < state->shader_hits.size()); + state->shader_hits[shader]++; + } + } + + inline void set_object(int object) + { + state->object = object; + if (state->active) { + assert(object < state->object_hits.size()); + state->object_hits[object]++; + } + } + + ~ProfilingHelper() + { + state->event = previous_event; + } + + private: + ProfilingState *state; + uint32_t previous_event; }; CCL_NAMESPACE_END -#endif /* __UTIL_PROFILING_H__ */ +#endif /* __UTIL_PROFILING_H__ */ diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h index 06900d14cdc..f05e5b918f3 100644 --- a/intern/cycles/util/util_progress.h +++ b/intern/cycles/util/util_progress.h @@ -31,361 +31,359 @@ CCL_NAMESPACE_BEGIN class Progress { -public: - Progress() - { - pixel_samples = 0; - total_pixel_samples = 0; - current_tile_sample = 0; - rendered_tiles = 0; - denoised_tiles = 0; - start_time = time_dt(); - render_start_time = time_dt(); - end_time = 0.0; - status = "Initializing"; - substatus = ""; - sync_status = ""; - sync_substatus = ""; - kernel_status = ""; - update_cb = function_null; - cancel = false; - cancel_message = ""; - error = false; - error_message = ""; - cancel_cb = function_null; - } - - Progress(Progress& progress) - { - *this = progress; - } - - Progress& operator=(Progress& progress) - { - thread_scoped_lock lock(progress.progress_mutex); - - progress.get_status(status, substatus); - - pixel_samples = progress.pixel_samples; - total_pixel_samples = progress.total_pixel_samples; - current_tile_sample = progress.get_current_sample(); - - return *this; - } - - void reset() - { - pixel_samples = 0; - total_pixel_samples = 0; - current_tile_sample = 0; - rendered_tiles = 0; - denoised_tiles = 0; - start_time = time_dt(); - render_start_time = time_dt(); - end_time = 0.0; - status = "Initializing"; - substatus = ""; - sync_status = ""; - sync_substatus = ""; - kernel_status = ""; - cancel = false; - cancel_message = ""; - error = false; - error_message = ""; - } - - /* cancel */ - void set_cancel(const string& cancel_message_) - { - thread_scoped_lock lock(progress_mutex); - cancel_message = cancel_message_; - cancel = true; - } - - bool get_cancel() - { - if(!cancel && cancel_cb) - cancel_cb(); - - return cancel; - } - - string get_cancel_message() - { - thread_scoped_lock lock(progress_mutex); - return cancel_message; - } - - void set_cancel_callback(function<void()> function) - { - cancel_cb = function; - } - - /* error */ - void set_error(const string& error_message_) - { - thread_scoped_lock lock(progress_mutex); - error_message = error_message_; - error = true; - /* If error happens we also stop rendering. */ - cancel_message = error_message_; - cancel = true; - } - - bool get_error() - { - return error; - } - - string get_error_message() - { - thread_scoped_lock lock(progress_mutex); - return error_message; - } - - /* tile and timing information */ - - void set_start_time() - { - thread_scoped_lock lock(progress_mutex); - - start_time = time_dt(); - end_time = 0.0; - } - - void set_render_start_time() - { - thread_scoped_lock lock(progress_mutex); - - render_start_time = time_dt(); - } - - void add_skip_time(const scoped_timer &start_timer, bool only_render) - { - double skip_time = time_dt() - start_timer.get_start(); - - render_start_time += skip_time; - if(!only_render) { - start_time += skip_time; - } - } - - void get_time(double& total_time_, double& render_time_) - { - thread_scoped_lock lock(progress_mutex); - - double time = (end_time > 0) ? end_time : time_dt(); - - total_time_ = time - start_time; - render_time_ = time - render_start_time; - } - - void set_end_time() - { - end_time = time_dt(); - } - - void reset_sample() - { - thread_scoped_lock lock(progress_mutex); - - pixel_samples = 0; - current_tile_sample = 0; - rendered_tiles = 0; - denoised_tiles = 0; - } - - void set_total_pixel_samples(uint64_t total_pixel_samples_) - { - thread_scoped_lock lock(progress_mutex); - - total_pixel_samples = total_pixel_samples_; - } - - float get_progress() - { - if(total_pixel_samples > 0) { - return ((float) pixel_samples) / total_pixel_samples; - } - return 0.0f; - } - - void add_samples(uint64_t pixel_samples_, int tile_sample) - { - thread_scoped_lock lock(progress_mutex); - - pixel_samples += pixel_samples_; - current_tile_sample = tile_sample; - } - - void add_samples_update(uint64_t pixel_samples_, int tile_sample) - { - add_samples(pixel_samples_, tile_sample); - set_update(); - } - - void add_finished_tile(bool denoised) - { - thread_scoped_lock lock(progress_mutex); - - if(denoised) { - denoised_tiles++; - } - else { - rendered_tiles++; - } - } - - int get_current_sample() - { - thread_scoped_lock lock(progress_mutex); - /* Note that the value here always belongs to the last tile that updated, - * so it's only useful if there is only one active tile. */ - return current_tile_sample; - } - - int get_rendered_tiles() - { - thread_scoped_lock lock(progress_mutex); - return rendered_tiles; - } - - int get_denoised_tiles() - { - thread_scoped_lock lock(progress_mutex); - return denoised_tiles; - } - - /* status messages */ - - void set_status(const string& status_, const string& substatus_ = "") - { - { - thread_scoped_lock lock(progress_mutex); - status = status_; - substatus = substatus_; - } - - set_update(); - } - - void set_substatus(const string& substatus_) - { - { - thread_scoped_lock lock(progress_mutex); - substatus = substatus_; - } - - set_update(); - } - - void set_sync_status(const string& status_, const string& substatus_ = "") - { - { - thread_scoped_lock lock(progress_mutex); - sync_status = status_; - sync_substatus = substatus_; - } - - set_update(); - - } - - void set_sync_substatus(const string& substatus_) - { - { - thread_scoped_lock lock(progress_mutex); - sync_substatus = substatus_; - } - - set_update(); - } - - void get_status(string& status_, string& substatus_) - { - thread_scoped_lock lock(progress_mutex); - - if(sync_status != "") { - status_ = sync_status; - substatus_ = sync_substatus; - } - else { - status_ = status; - substatus_ = substatus; - } - } - - - /* kernel status */ - - void set_kernel_status(const string &kernel_status_) - { - { - thread_scoped_lock lock(progress_mutex); - kernel_status = kernel_status_; - } - - set_update(); - } - - void get_kernel_status(string &kernel_status_) - { - thread_scoped_lock lock(progress_mutex); - kernel_status_ = kernel_status; - } - - /* callback */ - - void set_update() - { - if(update_cb) { - thread_scoped_lock lock(update_mutex); - update_cb(); - } - } - - void set_update_callback(function<void()> function) - { - update_cb = function; - } - -protected: - thread_mutex progress_mutex; - thread_mutex update_mutex; - function<void()> update_cb; - function<void()> cancel_cb; - - /* pixel_samples counts how many samples have been rendered over all pixel, not just per pixel. - * This makes the progress estimate more accurate when tiles with different sizes are used. - * - * total_pixel_samples is the total amount of pixel samples that will be rendered. */ - uint64_t pixel_samples, total_pixel_samples; - /* Stores the current sample count of the last tile that called the update function. - * It's used to display the sample count if only one tile is active. */ - int current_tile_sample; - /* Stores the number of tiles that's already finished. - * Used to determine whether all but the last tile are finished rendering, in which case the current_tile_sample is displayed. */ - int rendered_tiles, denoised_tiles; - - double start_time, render_start_time; - /* End time written when render is done, so it doesn't keep increasing on redraws. */ - double end_time; - - string status; - string substatus; - - string sync_status; - string sync_substatus; - - string kernel_status; - - volatile bool cancel; - string cancel_message; - - volatile bool error; - string error_message; + public: + Progress() + { + pixel_samples = 0; + total_pixel_samples = 0; + current_tile_sample = 0; + rendered_tiles = 0; + denoised_tiles = 0; + start_time = time_dt(); + render_start_time = time_dt(); + end_time = 0.0; + status = "Initializing"; + substatus = ""; + sync_status = ""; + sync_substatus = ""; + kernel_status = ""; + update_cb = function_null; + cancel = false; + cancel_message = ""; + error = false; + error_message = ""; + cancel_cb = function_null; + } + + Progress(Progress &progress) + { + *this = progress; + } + + Progress &operator=(Progress &progress) + { + thread_scoped_lock lock(progress.progress_mutex); + + progress.get_status(status, substatus); + + pixel_samples = progress.pixel_samples; + total_pixel_samples = progress.total_pixel_samples; + current_tile_sample = progress.get_current_sample(); + + return *this; + } + + void reset() + { + pixel_samples = 0; + total_pixel_samples = 0; + current_tile_sample = 0; + rendered_tiles = 0; + denoised_tiles = 0; + start_time = time_dt(); + render_start_time = time_dt(); + end_time = 0.0; + status = "Initializing"; + substatus = ""; + sync_status = ""; + sync_substatus = ""; + kernel_status = ""; + cancel = false; + cancel_message = ""; + error = false; + error_message = ""; + } + + /* cancel */ + void set_cancel(const string &cancel_message_) + { + thread_scoped_lock lock(progress_mutex); + cancel_message = cancel_message_; + cancel = true; + } + + bool get_cancel() + { + if (!cancel && cancel_cb) + cancel_cb(); + + return cancel; + } + + string get_cancel_message() + { + thread_scoped_lock lock(progress_mutex); + return cancel_message; + } + + void set_cancel_callback(function<void()> function) + { + cancel_cb = function; + } + + /* error */ + void set_error(const string &error_message_) + { + thread_scoped_lock lock(progress_mutex); + error_message = error_message_; + error = true; + /* If error happens we also stop rendering. */ + cancel_message = error_message_; + cancel = true; + } + + bool get_error() + { + return error; + } + + string get_error_message() + { + thread_scoped_lock lock(progress_mutex); + return error_message; + } + + /* tile and timing information */ + + void set_start_time() + { + thread_scoped_lock lock(progress_mutex); + + start_time = time_dt(); + end_time = 0.0; + } + + void set_render_start_time() + { + thread_scoped_lock lock(progress_mutex); + + render_start_time = time_dt(); + } + + void add_skip_time(const scoped_timer &start_timer, bool only_render) + { + double skip_time = time_dt() - start_timer.get_start(); + + render_start_time += skip_time; + if (!only_render) { + start_time += skip_time; + } + } + + void get_time(double &total_time_, double &render_time_) + { + thread_scoped_lock lock(progress_mutex); + + double time = (end_time > 0) ? end_time : time_dt(); + + total_time_ = time - start_time; + render_time_ = time - render_start_time; + } + + void set_end_time() + { + end_time = time_dt(); + } + + void reset_sample() + { + thread_scoped_lock lock(progress_mutex); + + pixel_samples = 0; + current_tile_sample = 0; + rendered_tiles = 0; + denoised_tiles = 0; + } + + void set_total_pixel_samples(uint64_t total_pixel_samples_) + { + thread_scoped_lock lock(progress_mutex); + + total_pixel_samples = total_pixel_samples_; + } + + float get_progress() + { + if (total_pixel_samples > 0) { + return ((float)pixel_samples) / total_pixel_samples; + } + return 0.0f; + } + + void add_samples(uint64_t pixel_samples_, int tile_sample) + { + thread_scoped_lock lock(progress_mutex); + + pixel_samples += pixel_samples_; + current_tile_sample = tile_sample; + } + + void add_samples_update(uint64_t pixel_samples_, int tile_sample) + { + add_samples(pixel_samples_, tile_sample); + set_update(); + } + + void add_finished_tile(bool denoised) + { + thread_scoped_lock lock(progress_mutex); + + if (denoised) { + denoised_tiles++; + } + else { + rendered_tiles++; + } + } + + int get_current_sample() + { + thread_scoped_lock lock(progress_mutex); + /* Note that the value here always belongs to the last tile that updated, + * so it's only useful if there is only one active tile. */ + return current_tile_sample; + } + + int get_rendered_tiles() + { + thread_scoped_lock lock(progress_mutex); + return rendered_tiles; + } + + int get_denoised_tiles() + { + thread_scoped_lock lock(progress_mutex); + return denoised_tiles; + } + + /* status messages */ + + void set_status(const string &status_, const string &substatus_ = "") + { + { + thread_scoped_lock lock(progress_mutex); + status = status_; + substatus = substatus_; + } + + set_update(); + } + + void set_substatus(const string &substatus_) + { + { + thread_scoped_lock lock(progress_mutex); + substatus = substatus_; + } + + set_update(); + } + + void set_sync_status(const string &status_, const string &substatus_ = "") + { + { + thread_scoped_lock lock(progress_mutex); + sync_status = status_; + sync_substatus = substatus_; + } + + set_update(); + } + + void set_sync_substatus(const string &substatus_) + { + { + thread_scoped_lock lock(progress_mutex); + sync_substatus = substatus_; + } + + set_update(); + } + + void get_status(string &status_, string &substatus_) + { + thread_scoped_lock lock(progress_mutex); + + if (sync_status != "") { + status_ = sync_status; + substatus_ = sync_substatus; + } + else { + status_ = status; + substatus_ = substatus; + } + } + + /* kernel status */ + + void set_kernel_status(const string &kernel_status_) + { + { + thread_scoped_lock lock(progress_mutex); + kernel_status = kernel_status_; + } + + set_update(); + } + + void get_kernel_status(string &kernel_status_) + { + thread_scoped_lock lock(progress_mutex); + kernel_status_ = kernel_status; + } + + /* callback */ + + void set_update() + { + if (update_cb) { + thread_scoped_lock lock(update_mutex); + update_cb(); + } + } + + void set_update_callback(function<void()> function) + { + update_cb = function; + } + + protected: + thread_mutex progress_mutex; + thread_mutex update_mutex; + function<void()> update_cb; + function<void()> cancel_cb; + + /* pixel_samples counts how many samples have been rendered over all pixel, not just per pixel. + * This makes the progress estimate more accurate when tiles with different sizes are used. + * + * total_pixel_samples is the total amount of pixel samples that will be rendered. */ + uint64_t pixel_samples, total_pixel_samples; + /* Stores the current sample count of the last tile that called the update function. + * It's used to display the sample count if only one tile is active. */ + int current_tile_sample; + /* Stores the number of tiles that's already finished. + * Used to determine whether all but the last tile are finished rendering, in which case the current_tile_sample is displayed. */ + int rendered_tiles, denoised_tiles; + + double start_time, render_start_time; + /* End time written when render is done, so it doesn't keep increasing on redraws. */ + double end_time; + + string status; + string substatus; + + string sync_status; + string sync_substatus; + + string kernel_status; + + volatile bool cancel; + string cancel_message; + + volatile bool error; + string error_message; }; CCL_NAMESPACE_END -#endif /* __UTIL_PROGRESS_H__ */ +#endif /* __UTIL_PROGRESS_H__ */ diff --git a/intern/cycles/util/util_projection.h b/intern/cycles/util/util_projection.h index d1af013ae3a..416af18b53e 100644 --- a/intern/cycles/util/util_projection.h +++ b/intern/cycles/util/util_projection.h @@ -24,153 +24,193 @@ CCL_NAMESPACE_BEGIN /* 4x4 projection matrix, perspective or orthographic. */ typedef struct ProjectionTransform { - float4 x, y, z, w; /* rows */ + float4 x, y, z, w; /* rows */ #ifndef __KERNEL_GPU__ - ProjectionTransform() - { - } - - explicit ProjectionTransform(const Transform& tfm) - : x(tfm.x), - y(tfm.y), - z(tfm.z), - w(make_float4(0.0f, 0.0f, 0.0f, 1.0f)) - { - } + ProjectionTransform() + { + } + + explicit ProjectionTransform(const Transform &tfm) + : x(tfm.x), y(tfm.y), z(tfm.z), w(make_float4(0.0f, 0.0f, 0.0f, 1.0f)) + { + } #endif } ProjectionTransform; typedef struct PerspectiveMotionTransform { - ProjectionTransform pre; - ProjectionTransform post; + ProjectionTransform pre; + ProjectionTransform post; } PerspectiveMotionTransform; /* Functions */ ccl_device_inline float3 transform_perspective(const ProjectionTransform *t, const float3 a) { - float4 b = make_float4(a.x, a.y, a.z, 1.0f); - float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b)); - float w = dot(t->w, b); + float4 b = make_float4(a.x, a.y, a.z, 1.0f); + float3 c = make_float3(dot(t->x, b), dot(t->y, b), dot(t->z, b)); + float w = dot(t->w, b); - return (w != 0.0f)? c/w: make_float3(0.0f, 0.0f, 0.0f); + return (w != 0.0f) ? c / w : make_float3(0.0f, 0.0f, 0.0f); } -ccl_device_inline float3 transform_perspective_direction(const ProjectionTransform *t, const float3 a) +ccl_device_inline float3 transform_perspective_direction(const ProjectionTransform *t, + const float3 a) { - float3 c = make_float3( - a.x*t->x.x + a.y*t->x.y + a.z*t->x.z, - a.x*t->y.x + a.y*t->y.y + a.z*t->y.z, - a.x*t->z.x + a.y*t->z.y + a.z*t->z.z); + float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z, + a.x * t->y.x + a.y * t->y.y + a.z * t->y.z, + a.x * t->z.x + a.y * t->z.y + a.z * t->z.z); - return c; + return c; } #ifndef __KERNEL_GPU__ -ccl_device_inline Transform projection_to_transform(const ProjectionTransform& a) +ccl_device_inline Transform projection_to_transform(const ProjectionTransform &a) { - Transform tfm = {a.x, a.y, a.z}; - return tfm; + Transform tfm = {a.x, a.y, a.z}; + return tfm; } -ccl_device_inline ProjectionTransform projection_transpose(const ProjectionTransform& a) +ccl_device_inline ProjectionTransform projection_transpose(const ProjectionTransform &a) { - ProjectionTransform t; - - t.x.x = a.x.x; t.x.y = a.y.x; t.x.z = a.z.x; t.x.w = a.w.x; - t.y.x = a.x.y; t.y.y = a.y.y; t.y.z = a.z.y; t.y.w = a.w.y; - t.z.x = a.x.z; t.z.y = a.y.z; t.z.z = a.z.z; t.z.w = a.w.z; - t.w.x = a.x.w; t.w.y = a.y.w; t.w.z = a.z.w; t.w.w = a.w.w; - - return t; + ProjectionTransform t; + + t.x.x = a.x.x; + t.x.y = a.y.x; + t.x.z = a.z.x; + t.x.w = a.w.x; + t.y.x = a.x.y; + t.y.y = a.y.y; + t.y.z = a.z.y; + t.y.w = a.w.y; + t.z.x = a.x.z; + t.z.y = a.y.z; + t.z.z = a.z.z; + t.z.w = a.w.z; + t.w.x = a.x.w; + t.w.y = a.y.w; + t.w.z = a.z.w; + t.w.w = a.w.w; + + return t; } -ProjectionTransform projection_inverse(const ProjectionTransform& a); - -ccl_device_inline ProjectionTransform make_projection( - float a, float b, float c, float d, - float e, float f, float g, float h, - float i, float j, float k, float l, - float m, float n, float o, float p) +ProjectionTransform projection_inverse(const ProjectionTransform &a); + +ccl_device_inline ProjectionTransform make_projection(float a, + float b, + float c, + float d, + float e, + float f, + float g, + float h, + float i, + float j, + float k, + float l, + float m, + float n, + float o, + float p) { - ProjectionTransform t; - - t.x.x = a; t.x.y = b; t.x.z = c; t.x.w = d; - t.y.x = e; t.y.y = f; t.y.z = g; t.y.w = h; - t.z.x = i; t.z.y = j; t.z.z = k; t.z.w = l; - t.w.x = m; t.w.y = n; t.w.z = o; t.w.w = p; - - return t; + ProjectionTransform t; + + t.x.x = a; + t.x.y = b; + t.x.z = c; + t.x.w = d; + t.y.x = e; + t.y.y = f; + t.y.z = g; + t.y.w = h; + t.z.x = i; + t.z.y = j; + t.z.z = k; + t.z.w = l; + t.w.x = m; + t.w.y = n; + t.w.z = o; + t.w.w = p; + + return t; } ccl_device_inline ProjectionTransform projection_identity() { - return make_projection( - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f); + return make_projection(1.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 1.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 1.0f, + 0.0f, + 0.0f, + 0.0f, + 0.0f, + 1.0f); } -ccl_device_inline ProjectionTransform operator*(const ProjectionTransform& a, const ProjectionTransform& b) +ccl_device_inline ProjectionTransform operator*(const ProjectionTransform &a, + const ProjectionTransform &b) { - ProjectionTransform c = projection_transpose(b); - ProjectionTransform t; + ProjectionTransform c = projection_transpose(b); + ProjectionTransform t; - t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w)); - t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w)); - t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w)); - t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w)); + t.x = make_float4(dot(a.x, c.x), dot(a.x, c.y), dot(a.x, c.z), dot(a.x, c.w)); + t.y = make_float4(dot(a.y, c.x), dot(a.y, c.y), dot(a.y, c.z), dot(a.y, c.w)); + t.z = make_float4(dot(a.z, c.x), dot(a.z, c.y), dot(a.z, c.z), dot(a.z, c.w)); + t.w = make_float4(dot(a.w, c.x), dot(a.w, c.y), dot(a.w, c.z), dot(a.w, c.w)); - return t; + return t; } -ccl_device_inline ProjectionTransform operator*(const ProjectionTransform& a, const Transform& b) +ccl_device_inline ProjectionTransform operator*(const ProjectionTransform &a, const Transform &b) { - return a * ProjectionTransform(b); + return a * ProjectionTransform(b); } -ccl_device_inline ProjectionTransform operator*(const Transform& a, const ProjectionTransform& b) +ccl_device_inline ProjectionTransform operator*(const Transform &a, const ProjectionTransform &b) { - return ProjectionTransform(a) * b; + return ProjectionTransform(a) * b; } -ccl_device_inline void print_projection(const char *label, const ProjectionTransform& t) +ccl_device_inline void print_projection(const char *label, const ProjectionTransform &t) { - print_float4(label, t.x); - print_float4(label, t.y); - print_float4(label, t.z); - print_float4(label, t.w); - printf("\n"); + print_float4(label, t.x); + print_float4(label, t.y); + print_float4(label, t.z); + print_float4(label, t.w); + printf("\n"); } ccl_device_inline ProjectionTransform projection_perspective(float fov, float n, float f) { - ProjectionTransform persp = make_projection( - 1, 0, 0, 0, - 0, 1, 0, 0, - 0, 0, f / (f - n), -f*n / (f - n), - 0, 0, 1, 0); + ProjectionTransform persp = make_projection( + 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, f / (f - n), -f * n / (f - n), 0, 0, 1, 0); - float inv_angle = 1.0f/tanf(0.5f*fov); + float inv_angle = 1.0f / tanf(0.5f * fov); - Transform scale = transform_scale(inv_angle, inv_angle, 1); + Transform scale = transform_scale(inv_angle, inv_angle, 1); - return scale * persp; + return scale * persp; } ccl_device_inline ProjectionTransform projection_orthographic(float znear, float zfar) { - Transform t = - transform_scale(1.0f, 1.0f, 1.0f / (zfar-znear)) * - transform_translate(0.0f, 0.0f, -znear); + Transform t = transform_scale(1.0f, 1.0f, 1.0f / (zfar - znear)) * + transform_translate(0.0f, 0.0f, -znear); - return ProjectionTransform(t); + return ProjectionTransform(t); } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_PROJECTION_H__ */ +#endif /* __UTIL_PROJECTION_H__ */ diff --git a/intern/cycles/util/util_queue.h b/intern/cycles/util/util_queue.h index 0a2b7718f57..622f4fe3e47 100644 --- a/intern/cycles/util/util_queue.h +++ b/intern/cycles/util/util_queue.h @@ -25,4 +25,4 @@ using std::queue; CCL_NAMESPACE_END -#endif /* __UTIL_LIST_H__ */ +#endif /* __UTIL_LIST_H__ */ diff --git a/intern/cycles/util/util_rect.h b/intern/cycles/util/util_rect.h index 389669acf2e..36f02a01f7b 100644 --- a/intern/cycles/util/util_rect.h +++ b/intern/cycles/util/util_rect.h @@ -26,47 +26,47 @@ CCL_NAMESPACE_BEGIN ccl_device_inline int4 rect_from_shape(int x0, int y0, int w, int h) { - return make_int4(x0, y0, x0 + w, y0 + h); + return make_int4(x0, y0, x0 + w, y0 + h); } ccl_device_inline int4 rect_expand(int4 rect, int d) { - return make_int4(rect.x - d, rect.y - d, rect.z + d, rect.w + d); + return make_int4(rect.x - d, rect.y - d, rect.z + d, rect.w + d); } /* Returns the intersection of two rects. */ ccl_device_inline int4 rect_clip(int4 a, int4 b) { - return make_int4(max(a.x, b.x), max(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); + return make_int4(max(a.x, b.x), max(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); } ccl_device_inline bool rect_is_valid(int4 rect) { - return (rect.z > rect.x) && (rect.w > rect.y); + return (rect.z > rect.x) && (rect.w > rect.y); } /* Returns the local row-major index of the pixel inside the rect. */ ccl_device_inline int coord_to_local_index(int4 rect, int x, int y) { - int w = rect.z - rect.x; - return (y - rect.y) * w + (x - rect.x); + int w = rect.z - rect.x; + return (y - rect.y) * w + (x - rect.x); } /* Finds the coordinates of a pixel given by its row-major index in the rect, * and returns whether the pixel is inside it. */ ccl_device_inline bool local_index_to_coord(int4 rect, int idx, int *x, int *y) { - int w = rect.z - rect.x; - *x = (idx % w) + rect.x; - *y = (idx / w) + rect.y; - return (*y < rect.w); + int w = rect.z - rect.x; + *x = (idx % w) + rect.x; + *y = (idx / w) + rect.y; + return (*y < rect.w); } ccl_device_inline int rect_size(int4 rect) { - return (rect.z - rect.x) * (rect.w - rect.y); + return (rect.z - rect.x) * (rect.w - rect.y); } CCL_NAMESPACE_END -#endif /* __UTIL_RECT_H__ */ +#endif /* __UTIL_RECT_H__ */ diff --git a/intern/cycles/util/util_set.h b/intern/cycles/util/util_set.h index a9c56bb4919..298e1f7729a 100644 --- a/intern/cycles/util/util_set.h +++ b/intern/cycles/util/util_set.h @@ -31,4 +31,4 @@ using std::unordered_set; CCL_NAMESPACE_END -#endif /* __UTIL_SET_H__ */ +#endif /* __UTIL_SET_H__ */ diff --git a/intern/cycles/util/util_simd.cpp b/intern/cycles/util/util_simd.cpp index f90439c188b..861dcf1fe36 100644 --- a/intern/cycles/util/util_simd.cpp +++ b/intern/cycles/util/util_simd.cpp @@ -15,33 +15,29 @@ * limitations under the License. */ -#if (defined(WITH_KERNEL_SSE2)) || \ - (defined(WITH_KERNEL_NATIVE) && defined(__SSE2__)) +#if (defined(WITH_KERNEL_SSE2)) || (defined(WITH_KERNEL_NATIVE) && defined(__SSE2__)) -#define __KERNEL_SSE2__ -#include "util/util_simd.h" +# define __KERNEL_SSE2__ +# include "util/util_simd.h" CCL_NAMESPACE_BEGIN -const __m128 _mm_lookupmask_ps[16] = { - _mm_castsi128_ps(_mm_set_epi32( 0, 0, 0, 0)), - _mm_castsi128_ps(_mm_set_epi32( 0, 0, 0,-1)), - _mm_castsi128_ps(_mm_set_epi32( 0, 0,-1, 0)), - _mm_castsi128_ps(_mm_set_epi32( 0, 0,-1,-1)), - _mm_castsi128_ps(_mm_set_epi32( 0,-1, 0, 0)), - _mm_castsi128_ps(_mm_set_epi32( 0,-1, 0,-1)), - _mm_castsi128_ps(_mm_set_epi32( 0,-1,-1, 0)), - _mm_castsi128_ps(_mm_set_epi32( 0,-1,-1,-1)), - _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0, 0)), - _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0,-1)), - _mm_castsi128_ps(_mm_set_epi32(-1, 0,-1, 0)), - _mm_castsi128_ps(_mm_set_epi32(-1, 0,-1,-1)), - _mm_castsi128_ps(_mm_set_epi32(-1,-1, 0, 0)), - _mm_castsi128_ps(_mm_set_epi32(-1,-1, 0,-1)), - _mm_castsi128_ps(_mm_set_epi32(-1,-1,-1, 0)), - _mm_castsi128_ps(_mm_set_epi32(-1,-1,-1,-1)) -}; - +const __m128 _mm_lookupmask_ps[16] = {_mm_castsi128_ps(_mm_set_epi32(0, 0, 0, 0)), + _mm_castsi128_ps(_mm_set_epi32(0, 0, 0, -1)), + _mm_castsi128_ps(_mm_set_epi32(0, 0, -1, 0)), + _mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1)), + _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, 0)), + _mm_castsi128_ps(_mm_set_epi32(0, -1, 0, -1)), + _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, 0)), + _mm_castsi128_ps(_mm_set_epi32(0, -1, -1, -1)), + _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0, 0)), + _mm_castsi128_ps(_mm_set_epi32(-1, 0, 0, -1)), + _mm_castsi128_ps(_mm_set_epi32(-1, 0, -1, 0)), + _mm_castsi128_ps(_mm_set_epi32(-1, 0, -1, -1)), + _mm_castsi128_ps(_mm_set_epi32(-1, -1, 0, 0)), + _mm_castsi128_ps(_mm_set_epi32(-1, -1, 0, -1)), + _mm_castsi128_ps(_mm_set_epi32(-1, -1, -1, 0)), + _mm_castsi128_ps(_mm_set_epi32(-1, -1, -1, -1))}; CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h index c92fc1ae391..8fcaadc5f53 100644 --- a/intern/cycles/util/util_simd.h +++ b/intern/cycles/util/util_simd.h @@ -20,439 +20,550 @@ #ifndef __KERNEL_GPU__ -#include <limits> +# include <limits> -#include "util/util_defines.h" +# include "util/util_defines.h" /* SSE Intrinsics includes * * We assume __KERNEL_SSEX__ flags to have been defined at this point */ /* SSE intrinsics headers */ -#ifndef FREE_WINDOWS64 +# ifndef FREE_WINDOWS64 -#ifdef _MSC_VER -# include <intrin.h> -#elif (defined(__x86_64__) || defined(__i386__)) -# include <x86intrin.h> -#endif +# ifdef _MSC_VER +# include <intrin.h> +# elif (defined(__x86_64__) || defined(__i386__)) +# include <x86intrin.h> +# endif -#else +# else /* MinGW64 has conflicting declarations for these SSE headers in <windows.h>. * Since we can't avoid including <windows.h>, better only include that */ -#include "util/util_windows.h" +# include "util/util_windows.h" -#endif +# endif -#if defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86) - #define SIMD_SET_FLUSH_TO_ZERO \ - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \ - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); -#else - #define SIMD_SET_FLUSH_TO_ZERO -#endif +# if defined(__x86_64__) || defined(__i386__) || defined(_M_X64) || defined(_M_IX86) +# define SIMD_SET_FLUSH_TO_ZERO \ + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \ + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +# else +# define SIMD_SET_FLUSH_TO_ZERO +# endif CCL_NAMESPACE_BEGIN -#ifdef __KERNEL_SSE2__ +# ifdef __KERNEL_SSE2__ extern const __m128 _mm_lookupmask_ps[16]; /* Special Types */ static struct TrueTy { -__forceinline operator bool( ) const { return true; } + __forceinline operator bool() const + { + return true; + } } True ccl_maybe_unused; static struct FalseTy { -__forceinline operator bool( ) const { return false; } + __forceinline operator bool() const + { + return false; + } } False ccl_maybe_unused; -static struct NegInfTy -{ -__forceinline operator float ( ) const { return -std::numeric_limits<float>::infinity(); } -__forceinline operator int ( ) const { return std::numeric_limits<int>::min(); } +static struct NegInfTy { + __forceinline operator float() const + { + return -std::numeric_limits<float>::infinity(); + } + __forceinline operator int() const + { + return std::numeric_limits<int>::min(); + } } neg_inf ccl_maybe_unused; -static struct PosInfTy -{ -__forceinline operator float ( ) const { return std::numeric_limits<float>::infinity(); } -__forceinline operator int ( ) const { return std::numeric_limits<int>::max(); } +static struct PosInfTy { + __forceinline operator float() const + { + return std::numeric_limits<float>::infinity(); + } + __forceinline operator int() const + { + return std::numeric_limits<int>::max(); + } } inf ccl_maybe_unused, pos_inf ccl_maybe_unused; /* Intrinsics Functions */ -#if defined(__BMI__) && defined(__GNUC__) -# ifndef _tzcnt_u32 -# define _tzcnt_u32 __tzcnt_u32 -# endif -# ifndef _tzcnt_u64 -# define _tzcnt_u64 __tzcnt_u64 -# endif -#endif +# if defined(__BMI__) && defined(__GNUC__) +# ifndef _tzcnt_u32 +# define _tzcnt_u32 __tzcnt_u32 +# endif +# ifndef _tzcnt_u64 +# define _tzcnt_u64 __tzcnt_u64 +# endif +# endif -#if defined(__LZCNT__) -#define _lzcnt_u32 __lzcnt32 -#define _lzcnt_u64 __lzcnt64 -#endif +# if defined(__LZCNT__) +# define _lzcnt_u32 __lzcnt32 +# define _lzcnt_u64 __lzcnt64 +# endif -#if defined(_WIN32) && !defined(__MINGW32__) && !defined(__clang__) +# if defined(_WIN32) && !defined(__MINGW32__) && !defined(__clang__) -__forceinline int __popcnt(int in) { +__forceinline int __popcnt(int in) +{ return _mm_popcnt_u32(in); } -#if !defined(_MSC_VER) -__forceinline unsigned int __popcnt(unsigned int in) { +# if !defined(_MSC_VER) +__forceinline unsigned int __popcnt(unsigned int in) +{ return _mm_popcnt_u32(in); } -#endif +# endif -#if defined(__KERNEL_64_BIT__) -__forceinline long long __popcnt(long long in) { +# if defined(__KERNEL_64_BIT__) +__forceinline long long __popcnt(long long in) +{ return _mm_popcnt_u64(in); } -__forceinline size_t __popcnt(size_t in) { +__forceinline size_t __popcnt(size_t in) +{ return _mm_popcnt_u64(in); } -#endif +# endif -__forceinline int __bsf(int v) { -#if defined(__KERNEL_AVX2__) +__forceinline int __bsf(int v) +{ +# if defined(__KERNEL_AVX2__) return _tzcnt_u32(v); -#else - unsigned long r = 0; _BitScanForward(&r,v); return r; -#endif +# else + unsigned long r = 0; + _BitScanForward(&r, v); + return r; +# endif } -__forceinline unsigned int __bsf(unsigned int v) { -#if defined(__KERNEL_AVX2__) +__forceinline unsigned int __bsf(unsigned int v) +{ +# if defined(__KERNEL_AVX2__) return _tzcnt_u32(v); -#else - unsigned long r = 0; _BitScanForward(&r,v); return r; -#endif +# else + unsigned long r = 0; + _BitScanForward(&r, v); + return r; +# endif } -__forceinline int __bsr(int v) { - unsigned long r = 0; _BitScanReverse(&r,v); return r; +__forceinline int __bsr(int v) +{ + unsigned long r = 0; + _BitScanReverse(&r, v); + return r; } -__forceinline int __btc(int v, int i) { - long r = v; _bittestandcomplement(&r,i); return r; +__forceinline int __btc(int v, int i) +{ + long r = v; + _bittestandcomplement(&r, i); + return r; } -__forceinline int __bts(int v, int i) { - long r = v; _bittestandset(&r,i); return r; +__forceinline int __bts(int v, int i) +{ + long r = v; + _bittestandset(&r, i); + return r; } -__forceinline int __btr(int v, int i) { - long r = v; _bittestandreset(&r,i); return r; +__forceinline int __btr(int v, int i) +{ + long r = v; + _bittestandreset(&r, i); + return r; } -__forceinline int bitscan(int v) { -#if defined(__KERNEL_AVX2__) +__forceinline int bitscan(int v) +{ +# if defined(__KERNEL_AVX2__) return _tzcnt_u32(v); -#else +# else return __bsf(v); -#endif +# endif } __forceinline int clz(const int x) { -#if defined(__KERNEL_AVX2__) +# if defined(__KERNEL_AVX2__) return _lzcnt_u32(x); -#else - if(UNLIKELY(x == 0)) return 32; +# else + if (UNLIKELY(x == 0)) + return 32; return 31 - __bsr(x); -#endif +# endif } -__forceinline int __bscf(int& v) +__forceinline int __bscf(int &v) { int i = __bsf(v); - v &= v-1; + v &= v - 1; return i; } -__forceinline unsigned int __bscf(unsigned int& v) +__forceinline unsigned int __bscf(unsigned int &v) { unsigned int i = __bsf(v); - v &= v-1; + v &= v - 1; return i; } -#if defined(__KERNEL_64_BIT__) +# if defined(__KERNEL_64_BIT__) -__forceinline size_t __bsf(size_t v) { -#if defined(__KERNEL_AVX2__) +__forceinline size_t __bsf(size_t v) +{ +# if defined(__KERNEL_AVX2__) return _tzcnt_u64(v); -#else - unsigned long r = 0; _BitScanForward64(&r,v); return r; -#endif +# else + unsigned long r = 0; + _BitScanForward64(&r, v); + return r; +# endif } -__forceinline size_t __bsr(size_t v) { - unsigned long r = 0; _BitScanReverse64(&r,v); return r; +__forceinline size_t __bsr(size_t v) +{ + unsigned long r = 0; + _BitScanReverse64(&r, v); + return r; } -__forceinline size_t __btc(size_t v, size_t i) { - size_t r = v; _bittestandcomplement64((__int64*)&r,i); return r; +__forceinline size_t __btc(size_t v, size_t i) +{ + size_t r = v; + _bittestandcomplement64((__int64 *)&r, i); + return r; } -__forceinline size_t __bts(size_t v, size_t i) { - __int64 r = v; _bittestandset64(&r,i); return r; +__forceinline size_t __bts(size_t v, size_t i) +{ + __int64 r = v; + _bittestandset64(&r, i); + return r; } -__forceinline size_t __btr(size_t v, size_t i) { - __int64 r = v; _bittestandreset64(&r,i); return r; +__forceinline size_t __btr(size_t v, size_t i) +{ + __int64 r = v; + _bittestandreset64(&r, i); + return r; } -__forceinline size_t bitscan(size_t v) { -#if defined(__KERNEL_AVX2__) -#if defined(__KERNEL_64_BIT__) +__forceinline size_t bitscan(size_t v) +{ +# if defined(__KERNEL_AVX2__) +# if defined(__KERNEL_64_BIT__) return _tzcnt_u64(v); -#else +# else return _tzcnt_u32(v); -#endif -#else +# endif +# else return __bsf(v); -#endif +# endif } -__forceinline size_t __bscf(size_t& v) +__forceinline size_t __bscf(size_t &v) { size_t i = __bsf(v); - v &= v-1; + v &= v - 1; return i; } -#endif /* __KERNEL_64_BIT__ */ +# endif /* __KERNEL_64_BIT__ */ -#else /* _WIN32 */ +# else /* _WIN32 */ -__forceinline unsigned int __popcnt(unsigned int in) { - int r = 0; asm ("popcnt %1,%0" : "=r"(r) : "r"(in)); return r; +__forceinline unsigned int __popcnt(unsigned int in) +{ + int r = 0; + asm("popcnt %1,%0" : "=r"(r) : "r"(in)); + return r; } -__forceinline int __bsf(int v) { - int r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; +__forceinline int __bsf(int v) +{ + int r = 0; + asm("bsf %1,%0" : "=r"(r) : "r"(v)); + return r; } -__forceinline int __bsr(int v) { - int r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; +__forceinline int __bsr(int v) +{ + int r = 0; + asm("bsr %1,%0" : "=r"(r) : "r"(v)); + return r; } -__forceinline int __btc(int v, int i) { - int r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags" ); return r; +__forceinline int __btc(int v, int i) +{ + int r = 0; + asm("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); + return r; } -__forceinline int __bts(int v, int i) { - int r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; +__forceinline int __bts(int v, int i) +{ + int r = 0; + asm("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); + return r; } -__forceinline int __btr(int v, int i) { - int r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; +__forceinline int __btr(int v, int i) +{ + int r = 0; + asm("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); + return r; } -#if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && !(defined(__ILP32__) && defined(__x86_64__)) -__forceinline size_t __bsf(size_t v) { - size_t r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; +# if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \ + !(defined(__ILP32__) && defined(__x86_64__)) +__forceinline size_t __bsf(size_t v) +{ + size_t r = 0; + asm("bsf %1,%0" : "=r"(r) : "r"(v)); + return r; } -#endif +# endif -__forceinline unsigned int __bsf(unsigned int v) { - unsigned int r = 0; asm ("bsf %1,%0" : "=r"(r) : "r"(v)); return r; +__forceinline unsigned int __bsf(unsigned int v) +{ + unsigned int r = 0; + asm("bsf %1,%0" : "=r"(r) : "r"(v)); + return r; } -__forceinline size_t __bsr(size_t v) { - size_t r = 0; asm ("bsr %1,%0" : "=r"(r) : "r"(v)); return r; +__forceinline size_t __bsr(size_t v) +{ + size_t r = 0; + asm("bsr %1,%0" : "=r"(r) : "r"(v)); + return r; } -__forceinline size_t __btc(size_t v, size_t i) { - size_t r = 0; asm ("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags" ); return r; +__forceinline size_t __btc(size_t v, size_t i) +{ + size_t r = 0; + asm("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); + return r; } -__forceinline size_t __bts(size_t v, size_t i) { - size_t r = 0; asm ("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; +__forceinline size_t __bts(size_t v, size_t i) +{ + size_t r = 0; + asm("bts %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); + return r; } -__forceinline size_t __btr(size_t v, size_t i) { - size_t r = 0; asm ("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); return r; +__forceinline size_t __btr(size_t v, size_t i) +{ + size_t r = 0; + asm("btr %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags"); + return r; } -__forceinline int bitscan(int v) { -#if defined(__KERNEL_AVX2__) +__forceinline int bitscan(int v) +{ +# if defined(__KERNEL_AVX2__) return _tzcnt_u32(v); -#else +# else return __bsf(v); -#endif +# endif } -__forceinline unsigned int bitscan(unsigned int v) { -#if defined(__KERNEL_AVX2__) +__forceinline unsigned int bitscan(unsigned int v) +{ +# if defined(__KERNEL_AVX2__) return _tzcnt_u32(v); -#else +# else return __bsf(v); -#endif +# endif } -#if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && !(defined(__ILP32__) && defined(__x86_64__)) -__forceinline size_t bitscan(size_t v) { -#if defined(__KERNEL_AVX2__) -#if defined(__KERNEL_64_BIT__) +# if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \ + !(defined(__ILP32__) && defined(__x86_64__)) +__forceinline size_t bitscan(size_t v) +{ +# if defined(__KERNEL_AVX2__) +# if defined(__KERNEL_64_BIT__) return _tzcnt_u64(v); -#else +# else return _tzcnt_u32(v); -#endif -#else +# endif +# else return __bsf(v); -#endif +# endif } -#endif +# endif __forceinline int clz(const int x) { -#if defined(__KERNEL_AVX2__) +# if defined(__KERNEL_AVX2__) return _lzcnt_u32(x); -#else - if(UNLIKELY(x == 0)) return 32; +# else + if (UNLIKELY(x == 0)) + return 32; return 31 - __bsr(x); -#endif +# endif } -__forceinline int __bscf(int& v) +__forceinline int __bscf(int &v) { int i = bitscan(v); -#if defined(__KERNEL_AVX2__) - v &= v-1; -#else - v = __btc(v,i); -#endif +# if defined(__KERNEL_AVX2__) + v &= v - 1; +# else + v = __btc(v, i); +# endif return i; } -__forceinline unsigned int __bscf(unsigned int& v) +__forceinline unsigned int __bscf(unsigned int &v) { unsigned int i = bitscan(v); - v &= v-1; + v &= v - 1; return i; } -#if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && !(defined(__ILP32__) && defined(__x86_64__)) -__forceinline size_t __bscf(size_t& v) +# if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \ + !(defined(__ILP32__) && defined(__x86_64__)) +__forceinline size_t __bscf(size_t &v) { size_t i = bitscan(v); -#if defined(__KERNEL_AVX2__) - v &= v-1; -#else - v = __btc(v,i); -#endif +# if defined(__KERNEL_AVX2__) + v &= v - 1; +# else + v = __btc(v, i); +# endif return i; } -#endif +# endif -#endif /* _WIN32 */ +# endif /* _WIN32 */ /* Test __KERNEL_SSE41__ for MSVC which does not define __SSE4_1__, and test * __SSE4_1__ to avoid OpenImageIO conflicts with our emulation macros on other * platforms when compiling code outside the kernel. */ -#if !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) +# if !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) /* Emulation of SSE4 functions with SSE2 */ -#define _MM_FROUND_TO_NEAREST_INT 0x00 -#define _MM_FROUND_TO_NEG_INF 0x01 -#define _MM_FROUND_TO_POS_INF 0x02 -#define _MM_FROUND_TO_ZERO 0x03 -#define _MM_FROUND_CUR_DIRECTION 0x04 +# define _MM_FROUND_TO_NEAREST_INT 0x00 +# define _MM_FROUND_TO_NEG_INF 0x01 +# define _MM_FROUND_TO_POS_INF 0x02 +# define _MM_FROUND_TO_ZERO 0x03 +# define _MM_FROUND_CUR_DIRECTION 0x04 -#undef _mm_blendv_ps -#define _mm_blendv_ps _mm_blendv_ps_emu -__forceinline __m128 _mm_blendv_ps_emu( __m128 value, __m128 input, __m128 mask) +# undef _mm_blendv_ps +# define _mm_blendv_ps _mm_blendv_ps_emu +__forceinline __m128 _mm_blendv_ps_emu(__m128 value, __m128 input, __m128 mask) { - __m128i isignmask = _mm_set1_epi32(0x80000000); - __m128 signmask = _mm_castsi128_ps(isignmask); - __m128i iandsign = _mm_castps_si128(_mm_and_ps(mask, signmask)); - __m128i icmpmask = _mm_cmpeq_epi32(iandsign, isignmask); - __m128 cmpmask = _mm_castsi128_ps(icmpmask); - return _mm_or_ps(_mm_and_ps(cmpmask, input), _mm_andnot_ps(cmpmask, value)); + __m128i isignmask = _mm_set1_epi32(0x80000000); + __m128 signmask = _mm_castsi128_ps(isignmask); + __m128i iandsign = _mm_castps_si128(_mm_and_ps(mask, signmask)); + __m128i icmpmask = _mm_cmpeq_epi32(iandsign, isignmask); + __m128 cmpmask = _mm_castsi128_ps(icmpmask); + return _mm_or_ps(_mm_and_ps(cmpmask, input), _mm_andnot_ps(cmpmask, value)); } -#undef _mm_blend_ps -#define _mm_blend_ps _mm_blend_ps_emu -__forceinline __m128 _mm_blend_ps_emu( __m128 value, __m128 input, const int mask) +# undef _mm_blend_ps +# define _mm_blend_ps _mm_blend_ps_emu +__forceinline __m128 _mm_blend_ps_emu(__m128 value, __m128 input, const int mask) { - assert(mask < 0x10); return _mm_blendv_ps(value, input, _mm_lookupmask_ps[mask]); + assert(mask < 0x10); + return _mm_blendv_ps(value, input, _mm_lookupmask_ps[mask]); } -#undef _mm_blendv_epi8 -#define _mm_blendv_epi8 _mm_blendv_epi8_emu -__forceinline __m128i _mm_blendv_epi8_emu( __m128i value, __m128i input, __m128i mask) +# undef _mm_blendv_epi8 +# define _mm_blendv_epi8 _mm_blendv_epi8_emu +__forceinline __m128i _mm_blendv_epi8_emu(__m128i value, __m128i input, __m128i mask) { - return _mm_or_si128(_mm_and_si128(mask, input), _mm_andnot_si128(mask, value)); + return _mm_or_si128(_mm_and_si128(mask, input), _mm_andnot_si128(mask, value)); } -#undef _mm_min_epi32 -#define _mm_min_epi32 _mm_min_epi32_emu -__forceinline __m128i _mm_min_epi32_emu( __m128i value, __m128i input) +# undef _mm_min_epi32 +# define _mm_min_epi32 _mm_min_epi32_emu +__forceinline __m128i _mm_min_epi32_emu(__m128i value, __m128i input) { - return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input)); + return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input)); } -#undef _mm_max_epi32 -#define _mm_max_epi32 _mm_max_epi32_emu -__forceinline __m128i _mm_max_epi32_emu( __m128i value, __m128i input) +# undef _mm_max_epi32 +# define _mm_max_epi32 _mm_max_epi32_emu +__forceinline __m128i _mm_max_epi32_emu(__m128i value, __m128i input) { - return _mm_blendv_epi8(value, input, _mm_cmplt_epi32(value, input)); + return _mm_blendv_epi8(value, input, _mm_cmplt_epi32(value, input)); } -#undef _mm_extract_epi32 -#define _mm_extract_epi32 _mm_extract_epi32_emu -__forceinline int _mm_extract_epi32_emu( __m128i input, const int index) +# undef _mm_extract_epi32 +# define _mm_extract_epi32 _mm_extract_epi32_emu +__forceinline int _mm_extract_epi32_emu(__m128i input, const int index) { - switch(index) { - case 0: return _mm_cvtsi128_si32(input); - case 1: return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(1, 1, 1, 1))); - case 2: return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(2, 2, 2, 2))); - case 3: return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(3, 3, 3, 3))); - default: assert(false); return 0; + switch (index) { + case 0: + return _mm_cvtsi128_si32(input); + case 1: + return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(1, 1, 1, 1))); + case 2: + return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(2, 2, 2, 2))); + case 3: + return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(3, 3, 3, 3))); + default: + assert(false); + return 0; } } -#undef _mm_insert_epi32 -#define _mm_insert_epi32 _mm_insert_epi32_emu -__forceinline __m128i _mm_insert_epi32_emu( __m128i value, int input, const int index) +# undef _mm_insert_epi32 +# define _mm_insert_epi32 _mm_insert_epi32_emu +__forceinline __m128i _mm_insert_epi32_emu(__m128i value, int input, const int index) { - assert(index >= 0 && index < 4); ((int*)&value)[index] = input; return value; + assert(index >= 0 && index < 4); + ((int *)&value)[index] = input; + return value; } -#undef _mm_insert_ps -#define _mm_insert_ps _mm_insert_ps_emu -__forceinline __m128 _mm_insert_ps_emu( __m128 value, __m128 input, const int index) +# undef _mm_insert_ps +# define _mm_insert_ps _mm_insert_ps_emu +__forceinline __m128 _mm_insert_ps_emu(__m128 value, __m128 input, const int index) { - assert(index < 0x100); - ((float*)&value)[(index >> 4)&0x3] = ((float*)&input)[index >> 6]; - return _mm_andnot_ps(_mm_lookupmask_ps[index&0xf], value); + assert(index < 0x100); + ((float *)&value)[(index >> 4) & 0x3] = ((float *)&input)[index >> 6]; + return _mm_andnot_ps(_mm_lookupmask_ps[index & 0xf], value); } -#undef _mm_round_ps -#define _mm_round_ps _mm_round_ps_emu -__forceinline __m128 _mm_round_ps_emu( __m128 value, const int flags) +# undef _mm_round_ps +# define _mm_round_ps _mm_round_ps_emu +__forceinline __m128 _mm_round_ps_emu(__m128 value, const int flags) { - switch(flags) - { - case _MM_FROUND_TO_NEAREST_INT: return _mm_cvtepi32_ps(_mm_cvtps_epi32(value)); - case _MM_FROUND_TO_NEG_INF : return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(-0.5f)))); - case _MM_FROUND_TO_POS_INF : return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps( 0.5f)))); - case _MM_FROUND_TO_ZERO : return _mm_cvtepi32_ps(_mm_cvttps_epi32(value)); + switch (flags) { + case _MM_FROUND_TO_NEAREST_INT: + return _mm_cvtepi32_ps(_mm_cvtps_epi32(value)); + case _MM_FROUND_TO_NEG_INF: + return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(-0.5f)))); + case _MM_FROUND_TO_POS_INF: + return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(0.5f)))); + case _MM_FROUND_TO_ZERO: + return _mm_cvtepi32_ps(_mm_cvttps_epi32(value)); } return value; } -#endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */ +# endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */ -#else /* __KERNEL_SSE2__ */ +# else /* __KERNEL_SSE2__ */ /* This section is for utility functions which operates on non-register data * which might be used from a non-vectorized code. @@ -460,38 +571,34 @@ __forceinline __m128 _mm_round_ps_emu( __m128 value, const int flags) ccl_device_inline int bitscan(int value) { - assert(value != 0); - int bit = 0; - while((value & (1 << bit)) == 0) { - ++bit; - } - return bit; + assert(value != 0); + int bit = 0; + while ((value & (1 << bit)) == 0) { + ++bit; + } + return bit; } ccl_device_inline int __bsr(int value) { - assert(value != 0); - int bit = 0; - while(value >>= 1) { - ++bit; - } - return bit; + assert(value != 0); + int bit = 0; + while (value >>= 1) { + ++bit; + } + return bit; } -#endif /* __KERNEL_SSE2__ */ +# endif /* __KERNEL_SSE2__ */ /* quiet unused define warnings */ -#if defined(__KERNEL_SSE2__) || \ - defined(__KERNEL_SSE3__) || \ - defined(__KERNEL_SSSE3__) || \ - defined(__KERNEL_SSE41__) || \ - defined(__KERNEL_AVX__) || \ - defined(__KERNEL_AVX2__) - /* do nothing */ -#endif +# if defined(__KERNEL_SSE2__) || defined(__KERNEL_SSE3__) || defined(__KERNEL_SSSE3__) || \ + defined(__KERNEL_SSE41__) || defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) +/* do nothing */ +# endif CCL_NAMESPACE_END -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ -#endif /* __UTIL_SIMD_TYPES_H__ */ +#endif /* __UTIL_SIMD_TYPES_H__ */ diff --git a/intern/cycles/util/util_sky_model.cpp b/intern/cycles/util/util_sky_model.cpp index 526bce4ff88..4a6a9f32607 100644 --- a/intern/cycles/util/util_sky_model.cpp +++ b/intern/cycles/util/util_sky_model.cpp @@ -111,23 +111,23 @@ CCL_NAMESPACE_BEGIN // replicated to make this a stand-alone module. #ifndef MATH_PI -#define MATH_PI 3.141592653589793 +# define MATH_PI 3.141592653589793 #endif #ifndef MATH_DEG_TO_RAD -#define MATH_DEG_TO_RAD ( MATH_PI / 180.0 ) +# define MATH_DEG_TO_RAD (MATH_PI / 180.0) #endif #ifndef DEGREES -#define DEGREES * MATH_DEG_TO_RAD +# define DEGREES *MATH_DEG_TO_RAD #endif #ifndef TERRESTRIAL_SOLAR_RADIUS -#define TERRESTRIAL_SOLAR_RADIUS ( ( 0.51 DEGREES ) / 2.0 ) +# define TERRESTRIAL_SOLAR_RADIUS ((0.51 DEGREES) / 2.0) #endif #ifndef ALLOC -#define ALLOC(_struct) ((_struct *)malloc(sizeof(_struct))) +# define ALLOC(_struct) ((_struct *)malloc(sizeof(_struct))) #endif // internal definitions @@ -137,233 +137,213 @@ typedef const double *ArHosekSkyModel_Radiance_Dataset; // internal functions -static void ArHosekSkyModel_CookConfiguration( - ArHosekSkyModel_Dataset dataset, - ArHosekSkyModelConfiguration config, - double turbidity, - double albedo, - double solar_elevation) +static void ArHosekSkyModel_CookConfiguration(ArHosekSkyModel_Dataset dataset, + ArHosekSkyModelConfiguration config, + double turbidity, + double albedo, + double solar_elevation) { - const double * elev_matrix; - - int int_turbidity = (int)turbidity; - double turbidity_rem = turbidity - (double)int_turbidity; - - solar_elevation = pow(solar_elevation / (MATH_PI / 2.0), (1.0 / 3.0)); - - // alb 0 low turb - - elev_matrix = dataset + ( 9 * 6 * (int_turbidity-1)); - - for(unsigned int i = 0; i < 9; ++i) { - //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; - config[i] = - (1.0-albedo) * (1.0 - turbidity_rem) - * ( pow(1.0-solar_elevation, 5.0) * elev_matrix[i] + - 5.0 * pow(1.0-solar_elevation, 4.0) * solar_elevation * elev_matrix[i+9] + - 10.0*pow(1.0-solar_elevation, 3.0)*pow(solar_elevation, 2.0) * elev_matrix[i+18] + - 10.0*pow(1.0-solar_elevation, 2.0)*pow(solar_elevation, 3.0) * elev_matrix[i+27] + - 5.0*(1.0-solar_elevation)*pow(solar_elevation, 4.0) * elev_matrix[i+36] + - pow(solar_elevation, 5.0) * elev_matrix[i+45]); - } - - // alb 1 low turb - elev_matrix = dataset + (9*6*10 + 9*6*(int_turbidity-1)); - for(unsigned int i = 0; i < 9; ++i) { - //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; - config[i] += - (albedo) * (1.0 - turbidity_rem) - * ( pow(1.0-solar_elevation, 5.0) * elev_matrix[i] + - 5.0 * pow(1.0-solar_elevation, 4.0) * solar_elevation * elev_matrix[i+9] + - 10.0*pow(1.0-solar_elevation, 3.0)*pow(solar_elevation, 2.0) * elev_matrix[i+18] + - 10.0*pow(1.0-solar_elevation, 2.0)*pow(solar_elevation, 3.0) * elev_matrix[i+27] + - 5.0*(1.0-solar_elevation)*pow(solar_elevation, 4.0) * elev_matrix[i+36] + - pow(solar_elevation, 5.0) * elev_matrix[i+45]); - } - - if(int_turbidity == 10) - return; - - // alb 0 high turb - elev_matrix = dataset + (9*6*(int_turbidity)); - for(unsigned int i = 0; i < 9; ++i) { - //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; - config[i] += - (1.0-albedo) * (turbidity_rem) - * ( pow(1.0-solar_elevation, 5.0) * elev_matrix[i] + - 5.0 * pow(1.0-solar_elevation, 4.0) * solar_elevation * elev_matrix[i+9] + - 10.0*pow(1.0-solar_elevation, 3.0)*pow(solar_elevation, 2.0) * elev_matrix[i+18] + - 10.0*pow(1.0-solar_elevation, 2.0)*pow(solar_elevation, 3.0) * elev_matrix[i+27] + - 5.0*(1.0-solar_elevation)*pow(solar_elevation, 4.0) * elev_matrix[i+36] + - pow(solar_elevation, 5.0) * elev_matrix[i+45]); - } - - // alb 1 high turb - elev_matrix = dataset + (9*6*10 + 9*6*(int_turbidity)); - for(unsigned int i = 0; i < 9; ++i) { - //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; - config[i] += - (albedo) * (turbidity_rem) - * ( pow(1.0-solar_elevation, 5.0) * elev_matrix[i] + - 5.0 * pow(1.0-solar_elevation, 4.0) * solar_elevation * elev_matrix[i+9] + - 10.0*pow(1.0-solar_elevation, 3.0)*pow(solar_elevation, 2.0) * elev_matrix[i+18] + - 10.0*pow(1.0-solar_elevation, 2.0)*pow(solar_elevation, 3.0) * elev_matrix[i+27] + - 5.0*(1.0-solar_elevation)*pow(solar_elevation, 4.0) * elev_matrix[i+36] + - pow(solar_elevation, 5.0) * elev_matrix[i+45]); - } + const double *elev_matrix; + + int int_turbidity = (int)turbidity; + double turbidity_rem = turbidity - (double)int_turbidity; + + solar_elevation = pow(solar_elevation / (MATH_PI / 2.0), (1.0 / 3.0)); + + // alb 0 low turb + + elev_matrix = dataset + (9 * 6 * (int_turbidity - 1)); + + for (unsigned int i = 0; i < 9; ++i) { + //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; + config[i] = + (1.0 - albedo) * (1.0 - turbidity_rem) * + (pow(1.0 - solar_elevation, 5.0) * elev_matrix[i] + + 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[i + 9] + + 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[i + 18] + + 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[i + 27] + + 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[i + 36] + + pow(solar_elevation, 5.0) * elev_matrix[i + 45]); + } + + // alb 1 low turb + elev_matrix = dataset + (9 * 6 * 10 + 9 * 6 * (int_turbidity - 1)); + for (unsigned int i = 0; i < 9; ++i) { + //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; + config[i] += + (albedo) * (1.0 - turbidity_rem) * + (pow(1.0 - solar_elevation, 5.0) * elev_matrix[i] + + 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[i + 9] + + 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[i + 18] + + 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[i + 27] + + 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[i + 36] + + pow(solar_elevation, 5.0) * elev_matrix[i + 45]); + } + + if (int_turbidity == 10) + return; + + // alb 0 high turb + elev_matrix = dataset + (9 * 6 * (int_turbidity)); + for (unsigned int i = 0; i < 9; ++i) { + //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; + config[i] += + (1.0 - albedo) * (turbidity_rem) * + (pow(1.0 - solar_elevation, 5.0) * elev_matrix[i] + + 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[i + 9] + + 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[i + 18] + + 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[i + 27] + + 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[i + 36] + + pow(solar_elevation, 5.0) * elev_matrix[i + 45]); + } + + // alb 1 high turb + elev_matrix = dataset + (9 * 6 * 10 + 9 * 6 * (int_turbidity)); + for (unsigned int i = 0; i < 9; ++i) { + //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; + config[i] += + (albedo) * (turbidity_rem) * + (pow(1.0 - solar_elevation, 5.0) * elev_matrix[i] + + 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[i + 9] + + 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[i + 18] + + 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[i + 27] + + 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[i + 36] + + pow(solar_elevation, 5.0) * elev_matrix[i + 45]); + } } -static double ArHosekSkyModel_CookRadianceConfiguration( - ArHosekSkyModel_Radiance_Dataset dataset, - double turbidity, - double albedo, - double solar_elevation) +static double ArHosekSkyModel_CookRadianceConfiguration(ArHosekSkyModel_Radiance_Dataset dataset, + double turbidity, + double albedo, + double solar_elevation) { - const double* elev_matrix; - - int int_turbidity = (int)turbidity; - double turbidity_rem = turbidity - (double)int_turbidity; - double res; - solar_elevation = pow(solar_elevation / (MATH_PI / 2.0), (1.0 / 3.0)); - - // alb 0 low turb - elev_matrix = dataset + (6*(int_turbidity-1)); - //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; - res = (1.0-albedo) * (1.0 - turbidity_rem) * - ( pow(1.0-solar_elevation, 5.0) * elev_matrix[0] + - 5.0*pow(1.0-solar_elevation, 4.0)*solar_elevation * elev_matrix[1] + - 10.0*pow(1.0-solar_elevation, 3.0)*pow(solar_elevation, 2.0) * elev_matrix[2] + - 10.0*pow(1.0-solar_elevation, 2.0)*pow(solar_elevation, 3.0) * elev_matrix[3] + - 5.0*(1.0-solar_elevation)*pow(solar_elevation, 4.0) * elev_matrix[4] + - pow(solar_elevation, 5.0) * elev_matrix[5]); - - // alb 1 low turb - elev_matrix = dataset + (6*10 + 6*(int_turbidity-1)); - //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; - res += (albedo) * (1.0 - turbidity_rem) * - ( pow(1.0-solar_elevation, 5.0) * elev_matrix[0] + - 5.0*pow(1.0-solar_elevation, 4.0)*solar_elevation * elev_matrix[1] + - 10.0*pow(1.0-solar_elevation, 3.0)*pow(solar_elevation, 2.0) * elev_matrix[2] + - 10.0*pow(1.0-solar_elevation, 2.0)*pow(solar_elevation, 3.0) * elev_matrix[3] + - 5.0*(1.0-solar_elevation)*pow(solar_elevation, 4.0) * elev_matrix[4] + - pow(solar_elevation, 5.0) * elev_matrix[5]); - if(int_turbidity == 10) - return res; - - // alb 0 high turb - elev_matrix = dataset + (6*(int_turbidity)); - //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; - res += (1.0-albedo) * (turbidity_rem) * - ( pow(1.0-solar_elevation, 5.0) * elev_matrix[0] + - 5.0*pow(1.0-solar_elevation, 4.0)*solar_elevation * elev_matrix[1] + - 10.0*pow(1.0-solar_elevation, 3.0)*pow(solar_elevation, 2.0) * elev_matrix[2] + - 10.0*pow(1.0-solar_elevation, 2.0)*pow(solar_elevation, 3.0) * elev_matrix[3] + - 5.0*(1.0-solar_elevation)*pow(solar_elevation, 4.0) * elev_matrix[4] + - pow(solar_elevation, 5.0) * elev_matrix[5]); - - // alb 1 high turb - elev_matrix = dataset + (6*10 + 6*(int_turbidity)); - //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; - res += (albedo) * (turbidity_rem) * - ( pow(1.0-solar_elevation, 5.0) * elev_matrix[0] + - 5.0*pow(1.0-solar_elevation, 4.0)*solar_elevation * elev_matrix[1] + - 10.0*pow(1.0-solar_elevation, 3.0)*pow(solar_elevation, 2.0) * elev_matrix[2] + - 10.0*pow(1.0-solar_elevation, 2.0)*pow(solar_elevation, 3.0) * elev_matrix[3] + - 5.0*(1.0-solar_elevation)*pow(solar_elevation, 4.0) * elev_matrix[4] + - pow(solar_elevation, 5.0) * elev_matrix[5]); - return res; + const double *elev_matrix; + + int int_turbidity = (int)turbidity; + double turbidity_rem = turbidity - (double)int_turbidity; + double res; + solar_elevation = pow(solar_elevation / (MATH_PI / 2.0), (1.0 / 3.0)); + + // alb 0 low turb + elev_matrix = dataset + (6 * (int_turbidity - 1)); + //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; + res = (1.0 - albedo) * (1.0 - turbidity_rem) * + (pow(1.0 - solar_elevation, 5.0) * elev_matrix[0] + + 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[1] + + 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[2] + + 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[3] + + 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[4] + + pow(solar_elevation, 5.0) * elev_matrix[5]); + + // alb 1 low turb + elev_matrix = dataset + (6 * 10 + 6 * (int_turbidity - 1)); + //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; + res += (albedo) * (1.0 - turbidity_rem) * + (pow(1.0 - solar_elevation, 5.0) * elev_matrix[0] + + 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[1] + + 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[2] + + 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[3] + + 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[4] + + pow(solar_elevation, 5.0) * elev_matrix[5]); + if (int_turbidity == 10) + return res; + + // alb 0 high turb + elev_matrix = dataset + (6 * (int_turbidity)); + //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; + res += (1.0 - albedo) * (turbidity_rem) * + (pow(1.0 - solar_elevation, 5.0) * elev_matrix[0] + + 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[1] + + 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[2] + + 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[3] + + 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[4] + + pow(solar_elevation, 5.0) * elev_matrix[5]); + + // alb 1 high turb + elev_matrix = dataset + (6 * 10 + 6 * (int_turbidity)); + //(1-t).^3* A1 + 3*(1-t).^2.*t * A2 + 3*(1-t) .* t .^ 2 * A3 + t.^3 * A4; + res += (albedo) * (turbidity_rem) * + (pow(1.0 - solar_elevation, 5.0) * elev_matrix[0] + + 5.0 * pow(1.0 - solar_elevation, 4.0) * solar_elevation * elev_matrix[1] + + 10.0 * pow(1.0 - solar_elevation, 3.0) * pow(solar_elevation, 2.0) * elev_matrix[2] + + 10.0 * pow(1.0 - solar_elevation, 2.0) * pow(solar_elevation, 3.0) * elev_matrix[3] + + 5.0 * (1.0 - solar_elevation) * pow(solar_elevation, 4.0) * elev_matrix[4] + + pow(solar_elevation, 5.0) * elev_matrix[5]); + return res; } -static double ArHosekSkyModel_GetRadianceInternal( - ArHosekSkyModelConfiguration configuration, - double theta, - double gamma) +static double ArHosekSkyModel_GetRadianceInternal(ArHosekSkyModelConfiguration configuration, + double theta, + double gamma) { - const double expM = exp(configuration[4] * gamma); - const double rayM = cos(gamma)*cos(gamma); - const double mieM = (1.0 + cos(gamma)*cos(gamma)) / pow((1.0 + configuration[8]*configuration[8] - 2.0*configuration[8]*cos(gamma)), 1.5); - const double zenith = sqrt(cos(theta)); - - return (1.0 + configuration[0] * exp(configuration[1] / (cos(theta) + 0.01))) * - (configuration[2] + configuration[3] * expM + configuration[5] * rayM + configuration[6] * mieM + configuration[7] * zenith); + const double expM = exp(configuration[4] * gamma); + const double rayM = cos(gamma) * cos(gamma); + const double mieM = + (1.0 + cos(gamma) * cos(gamma)) / + pow((1.0 + configuration[8] * configuration[8] - 2.0 * configuration[8] * cos(gamma)), 1.5); + const double zenith = sqrt(cos(theta)); + + return (1.0 + configuration[0] * exp(configuration[1] / (cos(theta) + 0.01))) * + (configuration[2] + configuration[3] * expM + configuration[5] * rayM + + configuration[6] * mieM + configuration[7] * zenith); } -void arhosekskymodelstate_free(ArHosekSkyModelState * state) +void arhosekskymodelstate_free(ArHosekSkyModelState *state) { - free(state); + free(state); } -double arhosekskymodel_radiance(ArHosekSkyModelState *state, +double arhosekskymodel_radiance(ArHosekSkyModelState *state, double theta, double gamma, double wavelength) { - int low_wl = (int)((wavelength - 320.0) / 40.0); - - if(low_wl < 0 || low_wl >= 11) - return 0.0; - - double interp = fmod((wavelength - 320.0 ) / 40.0, 1.0); - - double val_low = - ArHosekSkyModel_GetRadianceInternal( - state->configs[low_wl], - theta, - gamma) - * state->radiances[low_wl] - * state->emission_correction_factor_sky[low_wl]; - - if(interp < 1e-6) - return val_low; - - double result = ( 1.0 - interp ) * val_low; - - if(low_wl+1 < 11) { - result += - interp - * ArHosekSkyModel_GetRadianceInternal( - state->configs[low_wl+1], - theta, - gamma) - * state->radiances[low_wl+1] - * state->emission_correction_factor_sky[low_wl+1]; - } - - return result; -} + int low_wl = (int)((wavelength - 320.0) / 40.0); + + if (low_wl < 0 || low_wl >= 11) + return 0.0; + + double interp = fmod((wavelength - 320.0) / 40.0, 1.0); + + double val_low = ArHosekSkyModel_GetRadianceInternal(state->configs[low_wl], theta, gamma) * + state->radiances[low_wl] * state->emission_correction_factor_sky[low_wl]; + + if (interp < 1e-6) + return val_low; + + double result = (1.0 - interp) * val_low; + if (low_wl + 1 < 11) { + result += interp * + ArHosekSkyModel_GetRadianceInternal(state->configs[low_wl + 1], theta, gamma) * + state->radiances[low_wl + 1] * state->emission_correction_factor_sky[low_wl + 1]; + } + + return result; +} // xyz and rgb versions -ArHosekSkyModelState * arhosek_xyz_skymodelstate_alloc_init( - const double turbidity, - const double albedo, - const double elevation) +ArHosekSkyModelState *arhosek_xyz_skymodelstate_alloc_init(const double turbidity, + const double albedo, + const double elevation) { - ArHosekSkyModelState * state = ALLOC(ArHosekSkyModelState); - - state->solar_radius = TERRESTRIAL_SOLAR_RADIUS; - state->turbidity = turbidity; - state->albedo = albedo; - state->elevation = elevation; - - for(unsigned int channel = 0; channel < 3; ++channel) { - ArHosekSkyModel_CookConfiguration( - datasetsXYZ[channel], - state->configs[channel], - turbidity, - albedo, - elevation); - - state->radiances[channel] = - ArHosekSkyModel_CookRadianceConfiguration( - datasetsXYZRad[channel], - turbidity, - albedo, - elevation); - } - - return state; + ArHosekSkyModelState *state = ALLOC(ArHosekSkyModelState); + + state->solar_radius = TERRESTRIAL_SOLAR_RADIUS; + state->turbidity = turbidity; + state->albedo = albedo; + state->elevation = elevation; + + for (unsigned int channel = 0; channel < 3; ++channel) { + ArHosekSkyModel_CookConfiguration( + datasetsXYZ[channel], state->configs[channel], turbidity, albedo, elevation); + + state->radiances[channel] = ArHosekSkyModel_CookRadianceConfiguration( + datasetsXYZRad[channel], turbidity, albedo, elevation); + } + + return state; } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_sky_model.h b/intern/cycles/util/util_sky_model.h index 2e593f58c39..84340614b2c 100644 --- a/intern/cycles/util/util_sky_model.h +++ b/intern/cycles/util/util_sky_model.h @@ -28,7 +28,6 @@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - /* ============================================================================ This file is part of a sample implementation of the analytical skylight and @@ -92,7 +91,6 @@ an updated version of this code has been published! ============================================================================ */ - /* This code is taken from ART, a rendering research system written in a @@ -303,11 +301,10 @@ HINT #1: if you want to model the sky of an earth-like planet that orbits CCL_NAMESPACE_BEGIN #ifndef _SKY_MODEL_H_ -#define _SKY_MODEL_H_ +# define _SKY_MODEL_H_ typedef double ArHosekSkyModelConfiguration[9]; - // Spectral version of the model /* ---------------------------------------------------------------------------- @@ -336,18 +333,16 @@ typedef double ArHosekSkyModelConfiguration[9]; ---------------------------------------------------------------------------- */ -typedef struct ArHosekSkyModelState -{ - ArHosekSkyModelConfiguration configs[11]; - double radiances[11]; - double turbidity; - double solar_radius; - double emission_correction_factor_sky[11]; - double emission_correction_factor_sun[11]; - double albedo; - double elevation; -} -ArHosekSkyModelState; +typedef struct ArHosekSkyModelState { + ArHosekSkyModelConfiguration configs[11]; + double radiances[11]; + double turbidity; + double solar_radius; + double emission_correction_factor_sky[11]; + double emission_correction_factor_sun[11]; + double albedo; + double elevation; +} ArHosekSkyModelState; /* ---------------------------------------------------------------------------- @@ -358,11 +353,9 @@ ArHosekSkyModelState; ---------------------------------------------------------------------------- */ -ArHosekSkyModelState *arhosekskymodelstate_alloc_init( - const double solar_elevation, - const double atmospheric_turbidity, - const double ground_albedo); - +ArHosekSkyModelState *arhosekskymodelstate_alloc_init(const double solar_elevation, + const double atmospheric_turbidity, + const double ground_albedo); /* ---------------------------------------------------------------------------- @@ -393,14 +386,14 @@ ArHosekSkyModelState *arhosekskymodelstate_alloc_init( ---------------------------------------------------------------------------- */ -ArHosekSkyModelState* arhosekskymodelstate_alienworld_alloc_init( - const double solar_elevation, - const double solar_intensity, - const double solar_surface_temperature_kelvin, - const double atmospheric_turbidity, - const double ground_albedo); +ArHosekSkyModelState *arhosekskymodelstate_alienworld_alloc_init( + const double solar_elevation, + const double solar_intensity, + const double solar_surface_temperature_kelvin, + const double atmospheric_turbidity, + const double ground_albedo); -void arhosekskymodelstate_free(ArHosekSkyModelState *state); +void arhosekskymodelstate_free(ArHosekSkyModelState *state); double arhosekskymodel_radiance(ArHosekSkyModelState *state, double theta, @@ -409,20 +402,15 @@ double arhosekskymodel_radiance(ArHosekSkyModelState *state, // CIE XYZ and RGB versions +ArHosekSkyModelState *arhosek_xyz_skymodelstate_alloc_init(const double turbidity, + const double albedo, + const double elevation); -ArHosekSkyModelState * arhosek_xyz_skymodelstate_alloc_init( - const double turbidity, - const double albedo, - const double elevation); +ArHosekSkyModelState *arhosek_rgb_skymodelstate_alloc_init(const double turbidity, + const double albedo, + const double elevation); - -ArHosekSkyModelState * arhosek_rgb_skymodelstate_alloc_init( - const double turbidity, - const double albedo, - const double elevation); - - -double arhosek_tristim_skymodel_radiance(ArHosekSkyModelState* state, +double arhosek_tristim_skymodel_radiance(ArHosekSkyModelState *state, double theta, double gamma, int channel); @@ -431,12 +419,11 @@ double arhosek_tristim_skymodel_radiance(ArHosekSkyModelState* state, // Please read the above description before using this - there are several // caveats! -double arhosekskymodel_solar_radiance(ArHosekSkyModelState* state, +double arhosekskymodel_solar_radiance(ArHosekSkyModelState *state, double theta, double gamma, double wavelength); - #endif // _SKY_MODEL_H_ CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_sky_model_data.h b/intern/cycles/util/util_sky_model_data.h index e6f3f761532..a2a3935eb84 100644 --- a/intern/cycles/util/util_sky_model_data.h +++ b/intern/cycles/util/util_sky_model_data.h @@ -28,7 +28,6 @@ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - /* ============================================================================ This file is part of a sample implementation of the analytical skylight and @@ -104,3760 +103,3745 @@ the model. // Uses Sep 9 pattern / Aug 23 mean dataset static const double datasetXYZ1[] = { - // albedo 0, turbidity 1 - -1.117001e+000, - -1.867262e-001, - -1.113505e+001, - 1.259865e+001, - -3.937339e-002, - 1.167571e+000, - 7.100686e-003, - 3.592678e+000, - 6.083296e-001, - -1.152006e+000, - -1.926669e-001, - 6.152049e+000, - -4.770802e+000, - -8.704701e-002, - 7.483626e-001, - 3.372718e-002, - 4.464592e+000, - 4.036546e-001, - -1.072371e+000, - -2.696632e-001, - 2.816168e-001, - 1.820571e+000, - -3.742666e-001, - 2.080607e+000, - -7.675295e-002, - -2.835366e+000, - 1.129329e+000, - -1.109935e+000, - -1.532764e-001, - 1.198787e+000, - -9.015183e-001, - 5.173015e-003, - 5.749178e-001, - 1.075633e-001, - 4.387949e+000, - 2.650413e-001, - -1.052297e+000, - -2.229452e-001, - 1.952347e+000, - 5.727205e-001, - -4.885070e+000, - 1.984016e+000, - -1.106197e-001, - -4.898361e-001, - 8.907873e-001, - -1.070108e+000, - -1.600465e-001, - 1.593886e+000, - -4.479251e-005, - -3.306541e+000, - 9.390193e-001, - 9.513168e-002, - 2.343583e+000, - 5.335404e-001, - // albedo 0, turbidity 2 - -1.113253e+000, - -1.699600e-001, - -1.038822e+001, - 1.137513e+001, - -4.040911e-002, - 1.037455e+000, - 4.991792e-002, - 4.801919e+000, - 6.302710e-001, - -1.135747e+000, - -1.678594e-001, - 4.970755e+000, - -4.430230e+000, - -6.657408e-002, - 3.636161e-001, - 1.558009e-001, - 6.013370e+000, - 3.959601e-001, - -1.095892e+000, - -2.732595e-001, - 7.666496e-001, - 1.350731e+000, - -4.401401e-001, - 2.470135e+000, - -1.707929e-001, - -3.260793e+000, - 1.170337e+000, - -1.073668e+000, - -2.603929e-002, - -1.944589e-001, - 4.575207e-001, - 6.878164e-001, - -1.390770e-001, - 3.690299e-001, - 7.885781e+000, - 1.877694e-001, - -1.070091e+000, - -2.798957e-001, - 2.338478e+000, - -2.647221e+000, - -7.387808e+000, - 2.329210e+000, - -1.644639e-001, - -2.003710e+000, - 9.874527e-001, - -1.067120e+000, - -1.418866e-001, - 1.254090e+000, - 6.053048e+000, - -2.918892e+000, - 5.322812e-001, - 1.613053e-001, - 3.018161e+000, - 5.274090e-001, - // albedo 0, turbidity 3 - -1.129483e+000, - -1.890619e-001, - -9.065101e+000, - 9.659923e+000, - -3.607819e-002, - 8.314359e-001, - 8.181661e-002, - 4.768868e+000, - 6.339777e-001, - -1.146420e+000, - -1.883579e-001, - 3.309173e+000, - -3.127882e+000, - -6.938176e-002, - 3.987113e-001, - 1.400581e-001, - 6.283042e+000, - 5.267076e-001, - -1.128348e+000, - -2.641305e-001, - 1.223176e+000, - 5.514952e-002, - -3.490649e-001, - 1.997784e+000, - -4.123709e-002, - -2.251251e+000, - 9.483466e-001, - -1.025820e+000, - 1.404690e-002, - -1.187406e+000, - 2.729900e+000, - 5.877588e-001, - -2.761140e-001, - 4.602633e-001, - 8.305125e+000, - 3.945001e-001, - -1.083957e+000, - -2.606679e-001, - 2.207108e+000, - -7.202803e+000, - -5.968103e+000, - 2.129455e+000, - -7.789512e-002, - -1.137688e+000, - 8.871769e-001, - -1.062465e+000, - -1.512189e-001, - 1.042881e+000, - 1.427839e+001, - -4.242214e+000, - 4.038100e-001, - 1.997780e-001, - 2.814449e+000, - 5.803196e-001, - // albedo 0, turbidity 4 - -1.175099e+000, - -2.410789e-001, - -1.108587e+001, - 1.133404e+001, - -1.819300e-002, - 6.772942e-001, - 9.605043e-002, - 4.231166e+000, - 6.239972e-001, - -1.224207e+000, - -2.883527e-001, - 3.002206e+000, - -2.649612e+000, - -4.795418e-002, - 4.984398e-001, - 3.251434e-002, - 4.851611e+000, - 6.551019e-001, - -1.136955e+000, - -2.423048e-001, - 1.058823e+000, - -2.489236e-001, - -2.462179e-001, - 1.933140e+000, - 9.106828e-002, - -1.905869e-001, - 8.171065e-001, - -1.014535e+000, - -8.262500e-003, - -1.448017e+000, - 2.295788e+000, - 3.510334e-001, - -1.477418e+000, - 5.432449e-001, - 5.762796e+000, - 4.908751e-001, - -1.070666e+000, - -2.379780e-001, - 1.844589e+000, - -5.442448e+000, - -4.012768e+000, - 2.945275e+000, - 9.854725e-003, - 8.455959e-002, - 8.145030e-001, - -1.071525e+000, - -1.777132e-001, - 8.076590e-001, - 9.925865e+000, - -3.324623e+000, - -6.367437e-001, - 2.844581e-001, - 2.248384e+000, - 6.544022e-001, - // albedo 0, turbidity 5 - -1.218818e+000, - -2.952382e-001, - -1.345975e+001, - 1.347153e+001, - -6.814585e-003, - 5.079068e-001, - 1.197230e-001, - 3.776949e+000, - 5.836961e-001, - -1.409868e+000, - -5.114330e-001, - 2.776539e+000, - -2.039001e+000, - -2.673769e-002, - 4.145288e-001, - 7.829342e-004, - 2.275883e+000, - 6.629691e-001, - -1.069151e+000, - -9.434247e-002, - 7.293972e-001, - -1.222473e+000, - -1.533461e-001, - 2.160357e+000, - 4.626837e-002, - 3.852415e+000, - 8.593570e-001, - -1.021306e+000, - -1.149551e-001, - -1.108414e+000, - 4.178343e+000, - 4.013665e-001, - -2.222814e+000, - 6.929462e-001, - 1.392652e+000, - 4.401662e-001, - -1.074251e+000, - -2.224002e-001, - 1.372356e+000, - -8.858704e+000, - -3.922660e+000, - 3.020018e+000, - -1.458724e-002, - 1.511186e+000, - 8.288064e-001, - -1.062048e+000, - -1.526582e-001, - 4.921067e-001, - 1.485522e+001, - -3.229936e+000, - -8.426604e-001, - 3.916243e-001, - 2.678994e+000, - 6.689264e-001, - // albedo 0, turbidity 6 - -1.257023e+000, - -3.364700e-001, - -1.527795e+001, - 1.504223e+001, - 2.717715e-003, - 3.029910e-001, - 1.636851e-001, - 3.561663e+000, - 5.283161e-001, - -1.635124e+000, - -7.329993e-001, - 3.523939e+000, - -2.566337e+000, - -1.902543e-002, - 5.505483e-001, - -6.242176e-002, - 1.065992e+000, - 6.654236e-001, - -9.295823e-001, - 4.845834e-002, - -2.992990e-001, - -2.001327e-001, - -8.019339e-002, - 1.807806e+000, - 9.020277e-002, - 5.095372e+000, - 8.639936e-001, - -1.093740e+000, - -2.148608e-001, - -5.216240e-001, - 2.119777e+000, - 9.506454e-002, - -1.831439e+000, - 6.961204e-001, - 1.102084e-001, - 4.384319e-001, - -1.044181e+000, - -1.849257e-001, - 9.071246e-001, - -4.648901e+000, - -2.279385e+000, - 2.356502e+000, - -4.169147e-002, - 1.932557e+000, - 8.296550e-001, - -1.061451e+000, - -1.458745e-001, - 2.952267e-001, - 8.967214e+000, - -3.726228e+000, - -5.022316e-001, - 5.684877e-001, - 3.102347e+000, - 6.658443e-001, - // albedo 0, turbidity 7 - -1.332391e+000, - -4.127769e-001, - -9.328643e+000, - 9.046194e+000, - 3.457775e-003, - 3.377425e-001, - 1.530909e-001, - 3.301209e+000, - 4.997917e-001, - -1.932002e+000, - -9.947777e-001, - -2.042329e+000, - 3.586940e+000, - -5.642182e-002, - 8.130478e-001, - -8.195988e-002, - 1.118294e-001, - 5.617231e-001, - -8.707374e-001, - 1.286999e-001, - 1.820054e+000, - -4.674706e+000, - 3.317471e-003, - 5.919018e-001, - 1.975278e-001, - 6.686519e+000, - 9.631727e-001, - -1.070378e+000, - -3.030579e-001, - -9.041938e-001, - 6.200201e+000, - 1.232207e-001, - -3.650628e-001, - 5.029403e-001, - -2.903162e+000, - 3.811408e-001, - -1.063035e+000, - -1.637545e-001, - 5.853072e-001, - -7.889906e+000, - -1.200641e+000, - 1.035018e+000, - 1.192093e-001, - 3.267054e+000, - 8.416151e-001, - -1.053655e+000, - -1.562286e-001, - 2.423683e-001, - 1.128575e+001, - -4.363262e+000, - -7.314160e-002, - 5.642088e-001, - 2.514023e+000, - 6.670457e-001, - // albedo 0, turbidity 8 - -1.366112e+000, - -4.718287e-001, - -7.876222e+000, - 7.746900e+000, - -9.182309e-003, - 4.716076e-001, - 8.320252e-002, - 3.165603e+000, - 5.392334e-001, - -2.468204e+000, - -1.336340e+000, - -5.386723e+000, - 7.072672e+000, - -8.329266e-002, - 8.636876e-001, - -1.978177e-002, - -1.326218e-001, - 2.979222e-001, - -9.653522e-001, - -2.373416e-002, - 1.810250e+000, - -6.467262e+000, - 1.410706e-001, - -4.753717e-001, - 3.003095e-001, - 6.551163e+000, - 1.151083e+000, - -8.943186e-001, - -2.487152e-001, - -2.308960e-001, - 8.512648e+000, - 1.298402e-001, - 1.034705e+000, - 2.303509e-001, - -3.924095e+000, - 2.982717e-001, - -1.146999e+000, - -2.318784e-001, - 8.992419e-002, - -9.933614e+000, - -8.860920e-001, - -3.071656e-002, - 2.852012e-001, - 3.046199e+000, - 8.599001e-001, - -1.032399e+000, - -1.645145e-001, - 2.683599e-001, - 1.327701e+001, - -4.407670e+000, - 7.709869e-002, - 4.951727e-001, - 1.957277e+000, - 6.630943e-001, - // albedo 0, turbidity 9 - -1.469070e+000, - -6.135092e-001, - -6.506263e+000, - 6.661315e+000, - -3.835383e-002, - 7.150413e-001, - 7.784318e-003, - 2.820577e+000, - 6.756784e-001, - -2.501583e+000, - -1.247404e+000, - -1.523462e+001, - 1.633191e+001, - -1.204803e-002, - 5.896471e-001, - -2.002023e-002, - 1.144647e+000, - 6.177874e-002, - -2.438672e+000, - -1.127291e+000, - 5.731172e+000, - -1.021350e+001, - 6.165610e-002, - -7.752641e-001, - 4.708254e-001, - 4.176847e+000, - 1.200881e+000, - -1.513427e-001, - 9.792731e-002, - -1.612349e+000, - 9.814289e+000, - 5.188921e-002, - 1.716403e+000, - -7.039255e-002, - -2.815115e+000, - 3.291874e-001, - -1.318511e+000, - -3.650554e-001, - 4.221268e-001, - -9.294529e+000, - -4.397520e-002, - -8.100625e-001, - 3.742719e-001, - 1.834166e+000, - 8.223450e-001, - -1.016009e+000, - -1.820264e-001, - 1.278426e-001, - 1.182696e+001, - -4.801528e+000, - 4.947899e-001, - 4.660378e-001, - 1.601254e+000, - 6.702359e-001, - // albedo 0, turbidity 10 - -1.841310e+000, - -9.781779e-001, - -4.610903e+000, - 4.824662e+000, - -5.100806e-002, - 6.463776e-001, - -6.377724e-006, - 2.216875e+000, - 8.618530e-001, - -2.376373e+000, - -1.108657e+000, - -1.489799e+001, - 1.546458e+001, - 4.091025e-002, - 9.761780e-002, - -1.048958e-002, - 2.165834e+000, - -1.609171e-001, - -4.710318e+000, - -2.261963e+000, - 6.947327e+000, - -1.034828e+001, - -1.325542e-001, - 7.508674e-001, - 2.247553e-001, - 2.873142e+000, - 1.297100e+000, - 2.163750e-001, - -1.944345e-001, - -2.437860e+000, - 1.011314e+001, - 4.450500e-001, - 3.111492e-001, - 2.751323e-001, - -1.627906e+000, - 2.531213e-001, - -1.258794e+000, - -3.524641e-001, - 8.425444e-001, - -1.085313e+001, - -1.154381e+000, - -4.638014e-001, - -2.781115e-003, - 4.344498e-001, - 8.507091e-001, - -1.018938e+000, - -1.804153e-001, - -6.354054e-002, - 1.573150e+001, - -4.386999e+000, - 6.211115e-001, - 5.294648e-001, - 1.580749e+000, - 6.586655e-001, - // albedo 1, turbidity 1 - -1.116416e+000, - -1.917524e-001, - -1.068233e+001, - 1.222221e+001, - -3.668978e-002, - 1.054022e+000, - 1.592132e-002, - 3.180583e+000, - 5.627370e-001, - -1.132341e+000, - -1.671286e-001, - 5.976499e+000, - -4.227366e+000, - -9.542489e-002, - 8.664938e-001, - 8.351793e-003, - 4.876068e+000, - 4.492779e-001, - -1.087635e+000, - -3.173679e-001, - 4.314407e-001, - 1.100555e+000, - -4.410057e-001, - 1.677253e+000, - -3.005925e-002, - -4.201249e+000, - 1.070902e+000, - -1.083031e+000, - -8.847705e-002, - 1.291773e+000, - 4.546776e-001, - 3.091894e-001, - 7.261760e-001, - 4.203659e-002, - 5.990615e+000, - 3.704756e-001, - -1.057899e+000, - -2.246706e-001, - 2.329563e+000, - -1.219656e+000, - -5.335260e+000, - 8.545378e-001, - -3.906209e-002, - -9.025499e-001, - 7.797348e-001, - -1.073305e+000, - -1.522553e-001, - 1.767063e+000, - 1.904280e+000, - -3.101673e+000, - 3.995856e-001, - 2.905192e-002, - 2.563977e+000, - 5.753067e-001, - // albedo 1, turbidity 2 - -1.113674e+000, - -1.759694e-001, - -9.754125e+000, - 1.087391e+001, - -3.841093e-002, - 9.524272e-001, - 5.680219e-002, - 4.227034e+000, - 6.029571e-001, - -1.126496e+000, - -1.680281e-001, - 5.332352e+000, - -4.575579e+000, - -6.761755e-002, - 3.295335e-001, - 1.194896e-001, - 5.570901e+000, - 4.536185e-001, - -1.103074e+000, - -2.681801e-001, - 6.571479e-002, - 2.396522e+000, - -4.551280e-001, - 2.466331e+000, - -1.232022e-001, - -3.023201e+000, - 1.086379e+000, - -1.053299e+000, - -2.697173e-002, - 8.379121e-001, - -9.681458e-001, - 5.890692e-001, - -4.872027e-001, - 2.936929e-001, - 7.510139e+000, - 3.079122e-001, - -1.079553e+000, - -2.710448e-001, - 2.462379e+000, - -3.713554e-001, - -8.534512e+000, - 1.828242e+000, - -1.686398e-001, - -1.961340e+000, - 8.941077e-001, - -1.069741e+000, - -1.396394e-001, - 1.657868e+000, - 3.236313e+000, - -2.706344e+000, - -2.948122e-001, - 1.314816e-001, - 2.868457e+000, - 5.413403e-001, - // albedo 1, turbidity 3 - -1.131649e+000, - -1.954455e-001, - -7.751595e+000, - 8.685861e+000, - -4.910871e-002, - 8.992952e-001, - 4.710143e-002, - 4.254818e+000, - 6.821116e-001, - -1.156689e+000, - -1.884324e-001, - 3.163519e+000, - -3.091522e+000, - -6.613927e-002, - -2.575883e-002, - 1.640065e-001, - 6.073643e+000, - 4.453468e-001, - -1.079224e+000, - -2.621389e-001, - 9.446437e-001, - 1.448479e+000, - -3.969384e-001, - 2.626638e+000, - -8.101186e-002, - -3.016355e+000, - 1.076295e+000, - -1.080832e+000, - 1.033057e-002, - -3.500156e-001, - -3.281419e-002, - 5.655512e-001, - -1.156742e+000, - 4.534710e-001, - 8.774122e+000, - 2.772869e-001, - -1.051202e+000, - -2.679975e-001, - 2.719109e+000, - -2.190316e+000, - -6.878798e+000, - 2.250481e+000, - -2.030252e-001, - -2.026527e+000, - 9.701096e-001, - -1.089849e+000, - -1.598589e-001, - 1.564748e+000, - 6.869187e+000, - -3.053670e+000, - -6.110435e-001, - 1.644472e-001, - 2.370452e+000, - 5.511770e-001, - // albedo 1, turbidity 4 - -1.171419e+000, - -2.429746e-001, - -8.991334e+000, - 9.571216e+000, - -2.772861e-002, - 6.688262e-001, - 7.683478e-002, - 3.785611e+000, - 6.347635e-001, - -1.228554e+000, - -2.917562e-001, - 2.753986e+000, - -2.491780e+000, - -4.663434e-002, - 3.118303e-001, - 7.546506e-002, - 4.463096e+000, - 5.955071e-001, - -1.093124e+000, - -2.447767e-001, - 9.097406e-001, - 5.448296e-001, - -2.957824e-001, - 2.024167e+000, - -5.152333e-004, - -1.069081e+000, - 9.369565e-001, - -1.056994e+000, - 1.569507e-002, - -8.217491e-001, - 1.870818e+000, - 7.061930e-001, - -1.483928e+000, - 5.978206e-001, - 6.864902e+000, - 3.673332e-001, - -1.054871e+000, - -2.758129e-001, - 2.712807e+000, - -5.950110e+000, - -6.554039e+000, - 2.447523e+000, - -1.895171e-001, - -1.454292e+000, - 9.131738e-001, - -1.100218e+000, - -1.746241e-001, - 1.438505e+000, - 1.115481e+001, - -3.266076e+000, - -8.837357e-001, - 1.970100e-001, - 1.991595e+000, - 5.907821e-001, - // albedo 1, turbidity 5 - -1.207267e+000, - -2.913610e-001, - -1.103767e+001, - 1.140724e+001, - -1.416800e-002, - 5.564047e-001, - 8.476262e-002, - 3.371255e+000, - 6.221335e-001, - -1.429698e+000, - -5.374218e-001, - 2.837524e+000, - -2.221936e+000, - -2.422337e-002, - 9.313758e-002, - 7.190250e-002, - 1.869022e+000, - 5.609035e-001, - -1.002274e+000, - -6.972810e-002, - 4.031308e-001, - -3.932997e-001, - -1.521923e-001, - 2.390646e+000, - -6.893990e-002, - 2.999661e+000, - 1.017843e+000, - -1.081168e+000, - -1.178666e-001, - -4.968080e-001, - 3.919299e+000, - 6.046866e-001, - -2.440615e+000, - 7.891538e-001, - 2.140835e+000, - 2.740470e-001, - -1.050727e+000, - -2.307688e-001, - 2.276396e+000, - -9.454407e+000, - -5.505176e+000, - 2.992620e+000, - -2.450942e-001, - 6.078372e-001, - 9.606765e-001, - -1.103752e+000, - -1.810202e-001, - 1.375044e+000, - 1.589095e+001, - -3.438954e+000, - -1.265669e+000, - 2.475172e-001, - 1.680768e+000, - 5.978056e-001, - // albedo 1, turbidity 6 - -1.244324e+000, - -3.378542e-001, - -1.111001e+001, - 1.137784e+001, - -7.896794e-003, - 4.808023e-001, - 9.249904e-002, - 3.025816e+000, - 5.880239e-001, - -1.593165e+000, - -7.027621e-001, - 2.220896e+000, - -1.437709e+000, - -1.534738e-002, - 6.286958e-002, - 6.644555e-002, - 1.091727e+000, - 5.470080e-001, - -9.136506e-001, - 1.344874e-002, - 7.772636e-001, - -1.209396e+000, - -1.408978e-001, - 2.433718e+000, - -1.041938e-001, - 3.791244e+000, - 1.037916e+000, - -1.134968e+000, - -1.803315e-001, - -9.267335e-001, - 4.576670e+000, - 6.851928e-001, - -2.805000e+000, - 8.687208e-001, - 1.161483e+000, - 2.571688e-001, - -1.017037e+000, - -2.053943e-001, - 2.361640e+000, - -9.887818e+000, - -5.122889e+000, - 3.287088e+000, - -2.594102e-001, - 8.578927e-001, - 9.592340e-001, - -1.118723e+000, - -1.934942e-001, - 1.226023e+000, - 1.674140e+001, - -3.277335e+000, - -1.629809e+000, - 2.765232e-001, - 1.637713e+000, - 6.113963e-001, - // albedo 1, turbidity 7 - -1.314779e+000, - -4.119915e-001, - -1.241150e+001, - 1.241578e+001, - 2.344284e-003, - 2.980837e-001, - 1.414613e-001, - 2.781731e+000, - 4.998556e-001, - -1.926199e+000, - -1.020038e+000, - 2.569200e+000, - -1.081159e+000, - -2.266833e-002, - 3.588668e-001, - 8.750078e-003, - -2.452171e-001, - 4.796758e-001, - -7.780002e-001, - 1.850647e-001, - 4.445456e-002, - -2.409297e+000, - -7.816346e-002, - 1.546790e+000, - -2.807227e-002, - 5.998176e+000, - 1.132396e+000, - -1.179326e+000, - -3.578330e-001, - -2.392933e-001, - 6.467883e+000, - 5.904596e-001, - -1.869975e+000, - 8.045839e-001, - -2.498121e+000, - 1.610633e-001, - -1.009956e+000, - -1.311896e-001, - 1.726577e+000, - -1.219356e+001, - -3.466239e+000, - 2.343602e+000, - -2.252205e-001, - 2.573681e+000, - 1.027109e+000, - -1.112460e+000, - -2.063093e-001, - 1.233051e+000, - 2.058946e+001, - -4.578074e+000, - -1.145643e+000, - 3.160192e-001, - 1.420159e+000, - 5.860212e-001, - // albedo 1, turbidity 8 - -1.371689e+000, - -4.914196e-001, - -1.076610e+001, - 1.107405e+001, - -1.485077e-002, - 5.936218e-001, - 3.685482e-002, - 2.599968e+000, - 6.002204e-001, - -2.436997e+000, - -1.377939e+000, - 2.130141e-002, - 1.079593e+000, - -1.796232e-002, - -3.933248e-002, - 1.610711e-001, - -6.901181e-001, - 1.206416e-001, - -8.743368e-001, - 7.331370e-002, - 8.734259e-001, - -3.743126e+000, - -3.151167e-002, - 1.297596e+000, - -7.634926e-002, - 6.532873e+000, - 1.435737e+000, - -9.810197e-001, - -3.521634e-001, - -2.855205e-001, - 7.134674e+000, - 6.839748e-001, - -1.394841e+000, - 6.952036e-001, - -4.633104e+000, - -2.173401e-002, - -1.122958e+000, - -1.691536e-001, - 1.382360e+000, - -1.102913e+001, - -2.608171e+000, - 1.865111e+000, - -1.345154e-001, - 3.112342e+000, - 1.094134e+000, - -1.075586e+000, - -2.077415e-001, - 1.171477e+000, - 1.793270e+001, - -4.656858e+000, - -1.036839e+000, - 3.338295e-001, - 1.042793e+000, - 5.739374e-001, - // albedo 1, turbidity 9 - -1.465871e+000, - -6.364486e-001, - -8.833718e+000, - 9.343650e+000, - -3.223600e-002, - 7.552848e-001, - -3.121341e-006, - 2.249164e+000, - 8.094662e-001, - -2.448924e+000, - -1.270878e+000, - -4.823703e+000, - 5.853058e+000, - -2.149127e-002, - 3.581132e-002, - -1.230276e-003, - 4.892553e-001, - -1.597657e-001, - -2.419809e+000, - -1.071337e+000, - 1.575648e+000, - -4.983580e+000, - 9.545185e-003, - 5.032615e-001, - 4.186266e-001, - 4.634147e+000, - 1.433517e+000, - -1.383278e-001, - -2.797095e-002, - -1.943067e-001, - 6.679623e+000, - 4.118280e-001, - -2.744289e-001, - -2.118722e-002, - -4.337025e+000, - 1.505072e-001, - -1.341872e+000, - -2.518572e-001, - 1.027009e+000, - -6.527103e+000, - -1.081271e+000, - 1.015465e+000, - 2.845789e-001, - 2.470371e+000, - 9.278120e-001, - -1.040640e+000, - -2.367454e-001, - 1.100744e+000, - 8.827253e+000, - -4.560794e+000, - -7.287017e-001, - 2.842503e-001, - 6.336593e-001, - 6.327335e-001, - // albedo 1, turbidity 10 - -1.877993e+000, - -1.025135e+000, - -4.311037e+000, - 4.715016e+000, - -4.711631e-002, - 6.335844e-001, - -7.665398e-006, - 1.788017e+000, - 9.001409e-001, - -2.281540e+000, - -1.137668e+000, - -1.036869e+001, - 1.136254e+001, - 1.961739e-002, - -9.836174e-002, - -6.734567e-003, - 1.320918e+000, - -2.400807e-001, - -4.904054e+000, - -2.315781e+000, - 5.735999e+000, - -8.626257e+000, - -1.255643e-001, - 1.545446e+000, - 1.396860e-001, - 2.972897e+000, - 1.429934e+000, - 4.077067e-001, - -1.833688e-001, - -2.450939e+000, - 9.119433e+000, - 4.505361e-001, - -1.340828e+000, - 3.973690e-001, - -1.785370e+000, - 9.628711e-002, - -1.296052e+000, - -3.250526e-001, - 1.813294e+000, - -1.031485e+001, - -1.388690e+000, - 1.239733e+000, - -8.989196e-002, - -3.389637e-001, - 9.639560e-001, - -1.062181e+000, - -2.423444e-001, - 7.577592e-001, - 1.566938e+001, - -4.462264e+000, - -5.742810e-001, - 3.262259e-001, - 9.461672e-001, - 6.232887e-001, + // albedo 0, turbidity 1 + -1.117001e+000, + -1.867262e-001, + -1.113505e+001, + 1.259865e+001, + -3.937339e-002, + 1.167571e+000, + 7.100686e-003, + 3.592678e+000, + 6.083296e-001, + -1.152006e+000, + -1.926669e-001, + 6.152049e+000, + -4.770802e+000, + -8.704701e-002, + 7.483626e-001, + 3.372718e-002, + 4.464592e+000, + 4.036546e-001, + -1.072371e+000, + -2.696632e-001, + 2.816168e-001, + 1.820571e+000, + -3.742666e-001, + 2.080607e+000, + -7.675295e-002, + -2.835366e+000, + 1.129329e+000, + -1.109935e+000, + -1.532764e-001, + 1.198787e+000, + -9.015183e-001, + 5.173015e-003, + 5.749178e-001, + 1.075633e-001, + 4.387949e+000, + 2.650413e-001, + -1.052297e+000, + -2.229452e-001, + 1.952347e+000, + 5.727205e-001, + -4.885070e+000, + 1.984016e+000, + -1.106197e-001, + -4.898361e-001, + 8.907873e-001, + -1.070108e+000, + -1.600465e-001, + 1.593886e+000, + -4.479251e-005, + -3.306541e+000, + 9.390193e-001, + 9.513168e-002, + 2.343583e+000, + 5.335404e-001, + // albedo 0, turbidity 2 + -1.113253e+000, + -1.699600e-001, + -1.038822e+001, + 1.137513e+001, + -4.040911e-002, + 1.037455e+000, + 4.991792e-002, + 4.801919e+000, + 6.302710e-001, + -1.135747e+000, + -1.678594e-001, + 4.970755e+000, + -4.430230e+000, + -6.657408e-002, + 3.636161e-001, + 1.558009e-001, + 6.013370e+000, + 3.959601e-001, + -1.095892e+000, + -2.732595e-001, + 7.666496e-001, + 1.350731e+000, + -4.401401e-001, + 2.470135e+000, + -1.707929e-001, + -3.260793e+000, + 1.170337e+000, + -1.073668e+000, + -2.603929e-002, + -1.944589e-001, + 4.575207e-001, + 6.878164e-001, + -1.390770e-001, + 3.690299e-001, + 7.885781e+000, + 1.877694e-001, + -1.070091e+000, + -2.798957e-001, + 2.338478e+000, + -2.647221e+000, + -7.387808e+000, + 2.329210e+000, + -1.644639e-001, + -2.003710e+000, + 9.874527e-001, + -1.067120e+000, + -1.418866e-001, + 1.254090e+000, + 6.053048e+000, + -2.918892e+000, + 5.322812e-001, + 1.613053e-001, + 3.018161e+000, + 5.274090e-001, + // albedo 0, turbidity 3 + -1.129483e+000, + -1.890619e-001, + -9.065101e+000, + 9.659923e+000, + -3.607819e-002, + 8.314359e-001, + 8.181661e-002, + 4.768868e+000, + 6.339777e-001, + -1.146420e+000, + -1.883579e-001, + 3.309173e+000, + -3.127882e+000, + -6.938176e-002, + 3.987113e-001, + 1.400581e-001, + 6.283042e+000, + 5.267076e-001, + -1.128348e+000, + -2.641305e-001, + 1.223176e+000, + 5.514952e-002, + -3.490649e-001, + 1.997784e+000, + -4.123709e-002, + -2.251251e+000, + 9.483466e-001, + -1.025820e+000, + 1.404690e-002, + -1.187406e+000, + 2.729900e+000, + 5.877588e-001, + -2.761140e-001, + 4.602633e-001, + 8.305125e+000, + 3.945001e-001, + -1.083957e+000, + -2.606679e-001, + 2.207108e+000, + -7.202803e+000, + -5.968103e+000, + 2.129455e+000, + -7.789512e-002, + -1.137688e+000, + 8.871769e-001, + -1.062465e+000, + -1.512189e-001, + 1.042881e+000, + 1.427839e+001, + -4.242214e+000, + 4.038100e-001, + 1.997780e-001, + 2.814449e+000, + 5.803196e-001, + // albedo 0, turbidity 4 + -1.175099e+000, + -2.410789e-001, + -1.108587e+001, + 1.133404e+001, + -1.819300e-002, + 6.772942e-001, + 9.605043e-002, + 4.231166e+000, + 6.239972e-001, + -1.224207e+000, + -2.883527e-001, + 3.002206e+000, + -2.649612e+000, + -4.795418e-002, + 4.984398e-001, + 3.251434e-002, + 4.851611e+000, + 6.551019e-001, + -1.136955e+000, + -2.423048e-001, + 1.058823e+000, + -2.489236e-001, + -2.462179e-001, + 1.933140e+000, + 9.106828e-002, + -1.905869e-001, + 8.171065e-001, + -1.014535e+000, + -8.262500e-003, + -1.448017e+000, + 2.295788e+000, + 3.510334e-001, + -1.477418e+000, + 5.432449e-001, + 5.762796e+000, + 4.908751e-001, + -1.070666e+000, + -2.379780e-001, + 1.844589e+000, + -5.442448e+000, + -4.012768e+000, + 2.945275e+000, + 9.854725e-003, + 8.455959e-002, + 8.145030e-001, + -1.071525e+000, + -1.777132e-001, + 8.076590e-001, + 9.925865e+000, + -3.324623e+000, + -6.367437e-001, + 2.844581e-001, + 2.248384e+000, + 6.544022e-001, + // albedo 0, turbidity 5 + -1.218818e+000, + -2.952382e-001, + -1.345975e+001, + 1.347153e+001, + -6.814585e-003, + 5.079068e-001, + 1.197230e-001, + 3.776949e+000, + 5.836961e-001, + -1.409868e+000, + -5.114330e-001, + 2.776539e+000, + -2.039001e+000, + -2.673769e-002, + 4.145288e-001, + 7.829342e-004, + 2.275883e+000, + 6.629691e-001, + -1.069151e+000, + -9.434247e-002, + 7.293972e-001, + -1.222473e+000, + -1.533461e-001, + 2.160357e+000, + 4.626837e-002, + 3.852415e+000, + 8.593570e-001, + -1.021306e+000, + -1.149551e-001, + -1.108414e+000, + 4.178343e+000, + 4.013665e-001, + -2.222814e+000, + 6.929462e-001, + 1.392652e+000, + 4.401662e-001, + -1.074251e+000, + -2.224002e-001, + 1.372356e+000, + -8.858704e+000, + -3.922660e+000, + 3.020018e+000, + -1.458724e-002, + 1.511186e+000, + 8.288064e-001, + -1.062048e+000, + -1.526582e-001, + 4.921067e-001, + 1.485522e+001, + -3.229936e+000, + -8.426604e-001, + 3.916243e-001, + 2.678994e+000, + 6.689264e-001, + // albedo 0, turbidity 6 + -1.257023e+000, + -3.364700e-001, + -1.527795e+001, + 1.504223e+001, + 2.717715e-003, + 3.029910e-001, + 1.636851e-001, + 3.561663e+000, + 5.283161e-001, + -1.635124e+000, + -7.329993e-001, + 3.523939e+000, + -2.566337e+000, + -1.902543e-002, + 5.505483e-001, + -6.242176e-002, + 1.065992e+000, + 6.654236e-001, + -9.295823e-001, + 4.845834e-002, + -2.992990e-001, + -2.001327e-001, + -8.019339e-002, + 1.807806e+000, + 9.020277e-002, + 5.095372e+000, + 8.639936e-001, + -1.093740e+000, + -2.148608e-001, + -5.216240e-001, + 2.119777e+000, + 9.506454e-002, + -1.831439e+000, + 6.961204e-001, + 1.102084e-001, + 4.384319e-001, + -1.044181e+000, + -1.849257e-001, + 9.071246e-001, + -4.648901e+000, + -2.279385e+000, + 2.356502e+000, + -4.169147e-002, + 1.932557e+000, + 8.296550e-001, + -1.061451e+000, + -1.458745e-001, + 2.952267e-001, + 8.967214e+000, + -3.726228e+000, + -5.022316e-001, + 5.684877e-001, + 3.102347e+000, + 6.658443e-001, + // albedo 0, turbidity 7 + -1.332391e+000, + -4.127769e-001, + -9.328643e+000, + 9.046194e+000, + 3.457775e-003, + 3.377425e-001, + 1.530909e-001, + 3.301209e+000, + 4.997917e-001, + -1.932002e+000, + -9.947777e-001, + -2.042329e+000, + 3.586940e+000, + -5.642182e-002, + 8.130478e-001, + -8.195988e-002, + 1.118294e-001, + 5.617231e-001, + -8.707374e-001, + 1.286999e-001, + 1.820054e+000, + -4.674706e+000, + 3.317471e-003, + 5.919018e-001, + 1.975278e-001, + 6.686519e+000, + 9.631727e-001, + -1.070378e+000, + -3.030579e-001, + -9.041938e-001, + 6.200201e+000, + 1.232207e-001, + -3.650628e-001, + 5.029403e-001, + -2.903162e+000, + 3.811408e-001, + -1.063035e+000, + -1.637545e-001, + 5.853072e-001, + -7.889906e+000, + -1.200641e+000, + 1.035018e+000, + 1.192093e-001, + 3.267054e+000, + 8.416151e-001, + -1.053655e+000, + -1.562286e-001, + 2.423683e-001, + 1.128575e+001, + -4.363262e+000, + -7.314160e-002, + 5.642088e-001, + 2.514023e+000, + 6.670457e-001, + // albedo 0, turbidity 8 + -1.366112e+000, + -4.718287e-001, + -7.876222e+000, + 7.746900e+000, + -9.182309e-003, + 4.716076e-001, + 8.320252e-002, + 3.165603e+000, + 5.392334e-001, + -2.468204e+000, + -1.336340e+000, + -5.386723e+000, + 7.072672e+000, + -8.329266e-002, + 8.636876e-001, + -1.978177e-002, + -1.326218e-001, + 2.979222e-001, + -9.653522e-001, + -2.373416e-002, + 1.810250e+000, + -6.467262e+000, + 1.410706e-001, + -4.753717e-001, + 3.003095e-001, + 6.551163e+000, + 1.151083e+000, + -8.943186e-001, + -2.487152e-001, + -2.308960e-001, + 8.512648e+000, + 1.298402e-001, + 1.034705e+000, + 2.303509e-001, + -3.924095e+000, + 2.982717e-001, + -1.146999e+000, + -2.318784e-001, + 8.992419e-002, + -9.933614e+000, + -8.860920e-001, + -3.071656e-002, + 2.852012e-001, + 3.046199e+000, + 8.599001e-001, + -1.032399e+000, + -1.645145e-001, + 2.683599e-001, + 1.327701e+001, + -4.407670e+000, + 7.709869e-002, + 4.951727e-001, + 1.957277e+000, + 6.630943e-001, + // albedo 0, turbidity 9 + -1.469070e+000, + -6.135092e-001, + -6.506263e+000, + 6.661315e+000, + -3.835383e-002, + 7.150413e-001, + 7.784318e-003, + 2.820577e+000, + 6.756784e-001, + -2.501583e+000, + -1.247404e+000, + -1.523462e+001, + 1.633191e+001, + -1.204803e-002, + 5.896471e-001, + -2.002023e-002, + 1.144647e+000, + 6.177874e-002, + -2.438672e+000, + -1.127291e+000, + 5.731172e+000, + -1.021350e+001, + 6.165610e-002, + -7.752641e-001, + 4.708254e-001, + 4.176847e+000, + 1.200881e+000, + -1.513427e-001, + 9.792731e-002, + -1.612349e+000, + 9.814289e+000, + 5.188921e-002, + 1.716403e+000, + -7.039255e-002, + -2.815115e+000, + 3.291874e-001, + -1.318511e+000, + -3.650554e-001, + 4.221268e-001, + -9.294529e+000, + -4.397520e-002, + -8.100625e-001, + 3.742719e-001, + 1.834166e+000, + 8.223450e-001, + -1.016009e+000, + -1.820264e-001, + 1.278426e-001, + 1.182696e+001, + -4.801528e+000, + 4.947899e-001, + 4.660378e-001, + 1.601254e+000, + 6.702359e-001, + // albedo 0, turbidity 10 + -1.841310e+000, + -9.781779e-001, + -4.610903e+000, + 4.824662e+000, + -5.100806e-002, + 6.463776e-001, + -6.377724e-006, + 2.216875e+000, + 8.618530e-001, + -2.376373e+000, + -1.108657e+000, + -1.489799e+001, + 1.546458e+001, + 4.091025e-002, + 9.761780e-002, + -1.048958e-002, + 2.165834e+000, + -1.609171e-001, + -4.710318e+000, + -2.261963e+000, + 6.947327e+000, + -1.034828e+001, + -1.325542e-001, + 7.508674e-001, + 2.247553e-001, + 2.873142e+000, + 1.297100e+000, + 2.163750e-001, + -1.944345e-001, + -2.437860e+000, + 1.011314e+001, + 4.450500e-001, + 3.111492e-001, + 2.751323e-001, + -1.627906e+000, + 2.531213e-001, + -1.258794e+000, + -3.524641e-001, + 8.425444e-001, + -1.085313e+001, + -1.154381e+000, + -4.638014e-001, + -2.781115e-003, + 4.344498e-001, + 8.507091e-001, + -1.018938e+000, + -1.804153e-001, + -6.354054e-002, + 1.573150e+001, + -4.386999e+000, + 6.211115e-001, + 5.294648e-001, + 1.580749e+000, + 6.586655e-001, + // albedo 1, turbidity 1 + -1.116416e+000, + -1.917524e-001, + -1.068233e+001, + 1.222221e+001, + -3.668978e-002, + 1.054022e+000, + 1.592132e-002, + 3.180583e+000, + 5.627370e-001, + -1.132341e+000, + -1.671286e-001, + 5.976499e+000, + -4.227366e+000, + -9.542489e-002, + 8.664938e-001, + 8.351793e-003, + 4.876068e+000, + 4.492779e-001, + -1.087635e+000, + -3.173679e-001, + 4.314407e-001, + 1.100555e+000, + -4.410057e-001, + 1.677253e+000, + -3.005925e-002, + -4.201249e+000, + 1.070902e+000, + -1.083031e+000, + -8.847705e-002, + 1.291773e+000, + 4.546776e-001, + 3.091894e-001, + 7.261760e-001, + 4.203659e-002, + 5.990615e+000, + 3.704756e-001, + -1.057899e+000, + -2.246706e-001, + 2.329563e+000, + -1.219656e+000, + -5.335260e+000, + 8.545378e-001, + -3.906209e-002, + -9.025499e-001, + 7.797348e-001, + -1.073305e+000, + -1.522553e-001, + 1.767063e+000, + 1.904280e+000, + -3.101673e+000, + 3.995856e-001, + 2.905192e-002, + 2.563977e+000, + 5.753067e-001, + // albedo 1, turbidity 2 + -1.113674e+000, + -1.759694e-001, + -9.754125e+000, + 1.087391e+001, + -3.841093e-002, + 9.524272e-001, + 5.680219e-002, + 4.227034e+000, + 6.029571e-001, + -1.126496e+000, + -1.680281e-001, + 5.332352e+000, + -4.575579e+000, + -6.761755e-002, + 3.295335e-001, + 1.194896e-001, + 5.570901e+000, + 4.536185e-001, + -1.103074e+000, + -2.681801e-001, + 6.571479e-002, + 2.396522e+000, + -4.551280e-001, + 2.466331e+000, + -1.232022e-001, + -3.023201e+000, + 1.086379e+000, + -1.053299e+000, + -2.697173e-002, + 8.379121e-001, + -9.681458e-001, + 5.890692e-001, + -4.872027e-001, + 2.936929e-001, + 7.510139e+000, + 3.079122e-001, + -1.079553e+000, + -2.710448e-001, + 2.462379e+000, + -3.713554e-001, + -8.534512e+000, + 1.828242e+000, + -1.686398e-001, + -1.961340e+000, + 8.941077e-001, + -1.069741e+000, + -1.396394e-001, + 1.657868e+000, + 3.236313e+000, + -2.706344e+000, + -2.948122e-001, + 1.314816e-001, + 2.868457e+000, + 5.413403e-001, + // albedo 1, turbidity 3 + -1.131649e+000, + -1.954455e-001, + -7.751595e+000, + 8.685861e+000, + -4.910871e-002, + 8.992952e-001, + 4.710143e-002, + 4.254818e+000, + 6.821116e-001, + -1.156689e+000, + -1.884324e-001, + 3.163519e+000, + -3.091522e+000, + -6.613927e-002, + -2.575883e-002, + 1.640065e-001, + 6.073643e+000, + 4.453468e-001, + -1.079224e+000, + -2.621389e-001, + 9.446437e-001, + 1.448479e+000, + -3.969384e-001, + 2.626638e+000, + -8.101186e-002, + -3.016355e+000, + 1.076295e+000, + -1.080832e+000, + 1.033057e-002, + -3.500156e-001, + -3.281419e-002, + 5.655512e-001, + -1.156742e+000, + 4.534710e-001, + 8.774122e+000, + 2.772869e-001, + -1.051202e+000, + -2.679975e-001, + 2.719109e+000, + -2.190316e+000, + -6.878798e+000, + 2.250481e+000, + -2.030252e-001, + -2.026527e+000, + 9.701096e-001, + -1.089849e+000, + -1.598589e-001, + 1.564748e+000, + 6.869187e+000, + -3.053670e+000, + -6.110435e-001, + 1.644472e-001, + 2.370452e+000, + 5.511770e-001, + // albedo 1, turbidity 4 + -1.171419e+000, + -2.429746e-001, + -8.991334e+000, + 9.571216e+000, + -2.772861e-002, + 6.688262e-001, + 7.683478e-002, + 3.785611e+000, + 6.347635e-001, + -1.228554e+000, + -2.917562e-001, + 2.753986e+000, + -2.491780e+000, + -4.663434e-002, + 3.118303e-001, + 7.546506e-002, + 4.463096e+000, + 5.955071e-001, + -1.093124e+000, + -2.447767e-001, + 9.097406e-001, + 5.448296e-001, + -2.957824e-001, + 2.024167e+000, + -5.152333e-004, + -1.069081e+000, + 9.369565e-001, + -1.056994e+000, + 1.569507e-002, + -8.217491e-001, + 1.870818e+000, + 7.061930e-001, + -1.483928e+000, + 5.978206e-001, + 6.864902e+000, + 3.673332e-001, + -1.054871e+000, + -2.758129e-001, + 2.712807e+000, + -5.950110e+000, + -6.554039e+000, + 2.447523e+000, + -1.895171e-001, + -1.454292e+000, + 9.131738e-001, + -1.100218e+000, + -1.746241e-001, + 1.438505e+000, + 1.115481e+001, + -3.266076e+000, + -8.837357e-001, + 1.970100e-001, + 1.991595e+000, + 5.907821e-001, + // albedo 1, turbidity 5 + -1.207267e+000, + -2.913610e-001, + -1.103767e+001, + 1.140724e+001, + -1.416800e-002, + 5.564047e-001, + 8.476262e-002, + 3.371255e+000, + 6.221335e-001, + -1.429698e+000, + -5.374218e-001, + 2.837524e+000, + -2.221936e+000, + -2.422337e-002, + 9.313758e-002, + 7.190250e-002, + 1.869022e+000, + 5.609035e-001, + -1.002274e+000, + -6.972810e-002, + 4.031308e-001, + -3.932997e-001, + -1.521923e-001, + 2.390646e+000, + -6.893990e-002, + 2.999661e+000, + 1.017843e+000, + -1.081168e+000, + -1.178666e-001, + -4.968080e-001, + 3.919299e+000, + 6.046866e-001, + -2.440615e+000, + 7.891538e-001, + 2.140835e+000, + 2.740470e-001, + -1.050727e+000, + -2.307688e-001, + 2.276396e+000, + -9.454407e+000, + -5.505176e+000, + 2.992620e+000, + -2.450942e-001, + 6.078372e-001, + 9.606765e-001, + -1.103752e+000, + -1.810202e-001, + 1.375044e+000, + 1.589095e+001, + -3.438954e+000, + -1.265669e+000, + 2.475172e-001, + 1.680768e+000, + 5.978056e-001, + // albedo 1, turbidity 6 + -1.244324e+000, + -3.378542e-001, + -1.111001e+001, + 1.137784e+001, + -7.896794e-003, + 4.808023e-001, + 9.249904e-002, + 3.025816e+000, + 5.880239e-001, + -1.593165e+000, + -7.027621e-001, + 2.220896e+000, + -1.437709e+000, + -1.534738e-002, + 6.286958e-002, + 6.644555e-002, + 1.091727e+000, + 5.470080e-001, + -9.136506e-001, + 1.344874e-002, + 7.772636e-001, + -1.209396e+000, + -1.408978e-001, + 2.433718e+000, + -1.041938e-001, + 3.791244e+000, + 1.037916e+000, + -1.134968e+000, + -1.803315e-001, + -9.267335e-001, + 4.576670e+000, + 6.851928e-001, + -2.805000e+000, + 8.687208e-001, + 1.161483e+000, + 2.571688e-001, + -1.017037e+000, + -2.053943e-001, + 2.361640e+000, + -9.887818e+000, + -5.122889e+000, + 3.287088e+000, + -2.594102e-001, + 8.578927e-001, + 9.592340e-001, + -1.118723e+000, + -1.934942e-001, + 1.226023e+000, + 1.674140e+001, + -3.277335e+000, + -1.629809e+000, + 2.765232e-001, + 1.637713e+000, + 6.113963e-001, + // albedo 1, turbidity 7 + -1.314779e+000, + -4.119915e-001, + -1.241150e+001, + 1.241578e+001, + 2.344284e-003, + 2.980837e-001, + 1.414613e-001, + 2.781731e+000, + 4.998556e-001, + -1.926199e+000, + -1.020038e+000, + 2.569200e+000, + -1.081159e+000, + -2.266833e-002, + 3.588668e-001, + 8.750078e-003, + -2.452171e-001, + 4.796758e-001, + -7.780002e-001, + 1.850647e-001, + 4.445456e-002, + -2.409297e+000, + -7.816346e-002, + 1.546790e+000, + -2.807227e-002, + 5.998176e+000, + 1.132396e+000, + -1.179326e+000, + -3.578330e-001, + -2.392933e-001, + 6.467883e+000, + 5.904596e-001, + -1.869975e+000, + 8.045839e-001, + -2.498121e+000, + 1.610633e-001, + -1.009956e+000, + -1.311896e-001, + 1.726577e+000, + -1.219356e+001, + -3.466239e+000, + 2.343602e+000, + -2.252205e-001, + 2.573681e+000, + 1.027109e+000, + -1.112460e+000, + -2.063093e-001, + 1.233051e+000, + 2.058946e+001, + -4.578074e+000, + -1.145643e+000, + 3.160192e-001, + 1.420159e+000, + 5.860212e-001, + // albedo 1, turbidity 8 + -1.371689e+000, + -4.914196e-001, + -1.076610e+001, + 1.107405e+001, + -1.485077e-002, + 5.936218e-001, + 3.685482e-002, + 2.599968e+000, + 6.002204e-001, + -2.436997e+000, + -1.377939e+000, + 2.130141e-002, + 1.079593e+000, + -1.796232e-002, + -3.933248e-002, + 1.610711e-001, + -6.901181e-001, + 1.206416e-001, + -8.743368e-001, + 7.331370e-002, + 8.734259e-001, + -3.743126e+000, + -3.151167e-002, + 1.297596e+000, + -7.634926e-002, + 6.532873e+000, + 1.435737e+000, + -9.810197e-001, + -3.521634e-001, + -2.855205e-001, + 7.134674e+000, + 6.839748e-001, + -1.394841e+000, + 6.952036e-001, + -4.633104e+000, + -2.173401e-002, + -1.122958e+000, + -1.691536e-001, + 1.382360e+000, + -1.102913e+001, + -2.608171e+000, + 1.865111e+000, + -1.345154e-001, + 3.112342e+000, + 1.094134e+000, + -1.075586e+000, + -2.077415e-001, + 1.171477e+000, + 1.793270e+001, + -4.656858e+000, + -1.036839e+000, + 3.338295e-001, + 1.042793e+000, + 5.739374e-001, + // albedo 1, turbidity 9 + -1.465871e+000, + -6.364486e-001, + -8.833718e+000, + 9.343650e+000, + -3.223600e-002, + 7.552848e-001, + -3.121341e-006, + 2.249164e+000, + 8.094662e-001, + -2.448924e+000, + -1.270878e+000, + -4.823703e+000, + 5.853058e+000, + -2.149127e-002, + 3.581132e-002, + -1.230276e-003, + 4.892553e-001, + -1.597657e-001, + -2.419809e+000, + -1.071337e+000, + 1.575648e+000, + -4.983580e+000, + 9.545185e-003, + 5.032615e-001, + 4.186266e-001, + 4.634147e+000, + 1.433517e+000, + -1.383278e-001, + -2.797095e-002, + -1.943067e-001, + 6.679623e+000, + 4.118280e-001, + -2.744289e-001, + -2.118722e-002, + -4.337025e+000, + 1.505072e-001, + -1.341872e+000, + -2.518572e-001, + 1.027009e+000, + -6.527103e+000, + -1.081271e+000, + 1.015465e+000, + 2.845789e-001, + 2.470371e+000, + 9.278120e-001, + -1.040640e+000, + -2.367454e-001, + 1.100744e+000, + 8.827253e+000, + -4.560794e+000, + -7.287017e-001, + 2.842503e-001, + 6.336593e-001, + 6.327335e-001, + // albedo 1, turbidity 10 + -1.877993e+000, + -1.025135e+000, + -4.311037e+000, + 4.715016e+000, + -4.711631e-002, + 6.335844e-001, + -7.665398e-006, + 1.788017e+000, + 9.001409e-001, + -2.281540e+000, + -1.137668e+000, + -1.036869e+001, + 1.136254e+001, + 1.961739e-002, + -9.836174e-002, + -6.734567e-003, + 1.320918e+000, + -2.400807e-001, + -4.904054e+000, + -2.315781e+000, + 5.735999e+000, + -8.626257e+000, + -1.255643e-001, + 1.545446e+000, + 1.396860e-001, + 2.972897e+000, + 1.429934e+000, + 4.077067e-001, + -1.833688e-001, + -2.450939e+000, + 9.119433e+000, + 4.505361e-001, + -1.340828e+000, + 3.973690e-001, + -1.785370e+000, + 9.628711e-002, + -1.296052e+000, + -3.250526e-001, + 1.813294e+000, + -1.031485e+001, + -1.388690e+000, + 1.239733e+000, + -8.989196e-002, + -3.389637e-001, + 9.639560e-001, + -1.062181e+000, + -2.423444e-001, + 7.577592e-001, + 1.566938e+001, + -4.462264e+000, + -5.742810e-001, + 3.262259e-001, + 9.461672e-001, + 6.232887e-001, }; -static const double datasetXYZRad1[] = -{ - // albedo 0, turbidity 1 - 1.560219e+000, - 1.417388e+000, - 1.206927e+000, - 1.091949e+001, - 5.931416e+000, - 7.304788e+000, - // albedo 0, turbidity 2 - 1.533049e+000, - 1.560532e+000, - 3.685059e-001, - 1.355040e+001, - 5.543711e+000, - 7.792189e+000, - // albedo 0, turbidity 3 - 1.471043e+000, - 1.746088e+000, - -9.299697e-001, - 1.720362e+001, - 5.473384e+000, - 8.336416e+000, - // albedo 0, turbidity 4 - 1.355991e+000, - 2.109348e+000, - -3.295855e+000, - 2.264843e+001, - 5.454607e+000, - 9.304656e+000, - // albedo 0, turbidity 5 - 1.244963e+000, - 2.547533e+000, - -5.841485e+000, - 2.756879e+001, - 5.576104e+000, - 1.043287e+001, - // albedo 0, turbidity 6 - 1.175532e+000, - 2.784634e+000, - -7.212225e+000, - 2.975347e+001, - 6.472980e+000, - 1.092331e+001, - // albedo 0, turbidity 7 - 1.082973e+000, - 3.118094e+000, - -8.934293e+000, - 3.186879e+001, - 8.473885e+000, - 1.174019e+001, - // albedo 0, turbidity 8 - 9.692500e-001, - 3.349574e+000, - -1.003810e+001, - 3.147654e+001, - 1.338931e+001, - 1.272547e+001, - // albedo 0, turbidity 9 - 8.547044e-001, - 3.151538e+000, - -9.095567e+000, - 2.554995e+001, - 2.273219e+001, - 1.410398e+001, - // albedo 0, turbidity 10 - 7.580340e-001, - 2.311153e+000, - -5.170814e+000, - 1.229669e+001, - 3.686529e+001, - 1.598882e+001, - // albedo 1, turbidity 1 - 1.664273e+000, - 1.574468e+000, - 1.422078e+000, - 9.768247e+000, - 1.447338e+001, - 1.644988e+001, - // albedo 1, turbidity 2 - 1.638295e+000, - 1.719586e+000, - 5.786675e-001, - 1.239846e+001, - 1.415419e+001, - 1.728605e+001, - // albedo 1, turbidity 3 - 1.572623e+000, - 1.921559e+000, - -7.714802e-001, - 1.609246e+001, - 1.420954e+001, - 1.825908e+001, - // albedo 1, turbidity 4 - 1.468395e+000, - 2.211970e+000, - -2.845869e+000, - 2.075027e+001, - 1.524822e+001, - 1.937622e+001, - // albedo 1, turbidity 5 - 1.355047e+000, - 2.556469e+000, - -4.960920e+000, - 2.460237e+001, - 1.648360e+001, - 2.065648e+001, - // albedo 1, turbidity 6 - 1.291642e+000, - 2.742036e+000, - -6.061967e+000, - 2.602002e+001, - 1.819144e+001, - 2.116712e+001, - // albedo 1, turbidity 7 - 1.194565e+000, - 2.972120e+000, - -7.295779e+000, - 2.691805e+001, - 2.124880e+001, - 2.201819e+001, - // albedo 1, turbidity 8 - 1.083631e+000, - 3.047021e+000, - -7.766096e+000, - 2.496261e+001, - 2.744264e+001, - 2.291875e+001, - // albedo 1, turbidity 9 - 9.707994e-001, - 2.736459e+000, - -6.308284e+000, - 1.760860e+001, - 3.776291e+001, - 2.392150e+001, - // albedo 1, turbidity 10 - 8.574294e-001, - 1.865155e+000, - -2.364707e+000, - 4.337793e+000, - 5.092831e+001, - 2.523432e+001, +static const double datasetXYZRad1[] = { + // albedo 0, turbidity 1 + 1.560219e+000, + 1.417388e+000, + 1.206927e+000, + 1.091949e+001, + 5.931416e+000, + 7.304788e+000, + // albedo 0, turbidity 2 + 1.533049e+000, + 1.560532e+000, + 3.685059e-001, + 1.355040e+001, + 5.543711e+000, + 7.792189e+000, + // albedo 0, turbidity 3 + 1.471043e+000, + 1.746088e+000, + -9.299697e-001, + 1.720362e+001, + 5.473384e+000, + 8.336416e+000, + // albedo 0, turbidity 4 + 1.355991e+000, + 2.109348e+000, + -3.295855e+000, + 2.264843e+001, + 5.454607e+000, + 9.304656e+000, + // albedo 0, turbidity 5 + 1.244963e+000, + 2.547533e+000, + -5.841485e+000, + 2.756879e+001, + 5.576104e+000, + 1.043287e+001, + // albedo 0, turbidity 6 + 1.175532e+000, + 2.784634e+000, + -7.212225e+000, + 2.975347e+001, + 6.472980e+000, + 1.092331e+001, + // albedo 0, turbidity 7 + 1.082973e+000, + 3.118094e+000, + -8.934293e+000, + 3.186879e+001, + 8.473885e+000, + 1.174019e+001, + // albedo 0, turbidity 8 + 9.692500e-001, + 3.349574e+000, + -1.003810e+001, + 3.147654e+001, + 1.338931e+001, + 1.272547e+001, + // albedo 0, turbidity 9 + 8.547044e-001, + 3.151538e+000, + -9.095567e+000, + 2.554995e+001, + 2.273219e+001, + 1.410398e+001, + // albedo 0, turbidity 10 + 7.580340e-001, + 2.311153e+000, + -5.170814e+000, + 1.229669e+001, + 3.686529e+001, + 1.598882e+001, + // albedo 1, turbidity 1 + 1.664273e+000, + 1.574468e+000, + 1.422078e+000, + 9.768247e+000, + 1.447338e+001, + 1.644988e+001, + // albedo 1, turbidity 2 + 1.638295e+000, + 1.719586e+000, + 5.786675e-001, + 1.239846e+001, + 1.415419e+001, + 1.728605e+001, + // albedo 1, turbidity 3 + 1.572623e+000, + 1.921559e+000, + -7.714802e-001, + 1.609246e+001, + 1.420954e+001, + 1.825908e+001, + // albedo 1, turbidity 4 + 1.468395e+000, + 2.211970e+000, + -2.845869e+000, + 2.075027e+001, + 1.524822e+001, + 1.937622e+001, + // albedo 1, turbidity 5 + 1.355047e+000, + 2.556469e+000, + -4.960920e+000, + 2.460237e+001, + 1.648360e+001, + 2.065648e+001, + // albedo 1, turbidity 6 + 1.291642e+000, + 2.742036e+000, + -6.061967e+000, + 2.602002e+001, + 1.819144e+001, + 2.116712e+001, + // albedo 1, turbidity 7 + 1.194565e+000, + 2.972120e+000, + -7.295779e+000, + 2.691805e+001, + 2.124880e+001, + 2.201819e+001, + // albedo 1, turbidity 8 + 1.083631e+000, + 3.047021e+000, + -7.766096e+000, + 2.496261e+001, + 2.744264e+001, + 2.291875e+001, + // albedo 1, turbidity 9 + 9.707994e-001, + 2.736459e+000, + -6.308284e+000, + 1.760860e+001, + 3.776291e+001, + 2.392150e+001, + // albedo 1, turbidity 10 + 8.574294e-001, + 1.865155e+000, + -2.364707e+000, + 4.337793e+000, + 5.092831e+001, + 2.523432e+001, }; -static const double datasetXYZ2[] = -{ - // albedo 0, turbidity 1 - -1.127942e+000, - -1.905548e-001, - -1.252356e+001, - 1.375799e+001, - -3.624732e-002, - 1.055453e+000, - 1.385036e-002, - 4.176970e+000, - 5.928345e-001, - -1.155260e+000, - -1.778135e-001, - 6.216056e+000, - -5.254116e+000, - -8.787445e-002, - 8.434621e-001, - 4.025734e-002, - 6.195322e+000, - 3.111856e-001, - -1.125624e+000, - -3.217593e-001, - 5.043919e-001, - 1.686284e+000, - -3.536071e-001, - 1.476321e+000, - -7.899019e-002, - -4.522531e+000, - 1.271691e+000, - -1.081801e+000, - -1.033234e-001, - 9.995550e-001, - 7.482946e-003, - -6.776018e-002, - 1.463141e+000, - 9.492021e-002, - 5.612723e+000, - 1.298846e-001, - -1.075320e+000, - -2.402711e-001, - 2.141284e+000, - -1.203359e+000, - -4.945188e+000, - 1.437221e+000, - -8.096750e-002, - -1.028378e+000, - 1.004164e+000, - -1.073337e+000, - -1.516517e-001, - 1.639379e+000, - 2.304669e+000, - -3.214244e+000, - 1.286245e+000, - 5.613957e-002, - 2.480902e+000, - 4.999363e-001, - // albedo 0, turbidity 2 - -1.128399e+000, - -1.857793e-001, - -1.089863e+001, - 1.172984e+001, - -3.768099e-002, - 9.439285e-001, - 4.869335e-002, - 4.845114e+000, - 6.119211e-001, - -1.114002e+000, - -1.399280e-001, - 4.963800e+000, - -4.685500e+000, - -7.780879e-002, - 4.049736e-001, - 1.586297e-001, - 7.770264e+000, - 3.449006e-001, - -1.185472e+000, - -3.403543e-001, - 6.588322e-001, - 1.133713e+000, - -4.118674e-001, - 2.061191e+000, - -1.882768e-001, - -4.372586e+000, - 1.223530e+000, - -1.002272e+000, - 2.000703e-002, - 7.073269e-002, - 1.485075e+000, - 5.005589e-001, - 4.301494e-001, - 3.626541e-001, - 7.921098e+000, - 1.574766e-001, - -1.121006e+000, - -3.007777e-001, - 2.242051e+000, - -4.571561e+000, - -7.761071e+000, - 2.053404e+000, - -1.524018e-001, - -1.886162e+000, - 1.018208e+000, - -1.058864e+000, - -1.358673e-001, - 1.389667e+000, - 8.633409e+000, - -3.437249e+000, - 7.295429e-001, - 1.514700e-001, - 2.842513e+000, - 5.014325e-001, - // albedo 0, turbidity 3 - -1.144464e+000, - -2.043799e-001, - -1.020188e+001, - 1.071247e+001, - -3.256693e-002, - 7.860205e-001, - 6.872719e-002, - 4.824771e+000, - 6.259836e-001, - -1.170104e+000, - -2.118626e-001, - 4.391405e+000, - -4.198900e+000, - -7.111559e-002, - 3.890442e-001, - 1.024831e-001, - 6.282535e+000, - 5.365688e-001, - -1.129171e+000, - -2.552880e-001, - 2.238298e-001, - 7.314295e-001, - -3.562730e-001, - 1.881931e+000, - -3.078716e-002, - -1.039120e+000, - 9.096301e-001, - -1.042294e+000, - 4.450203e-003, - -5.116033e-001, - 2.627589e+000, - 6.098996e-001, - -1.264638e-001, - 4.325281e-001, - 7.080503e+000, - 4.583646e-001, - -1.082293e+000, - -2.723056e-001, - 2.065076e+000, - -8.143133e+000, - -7.892212e+000, - 2.142231e+000, - -7.106240e-002, - -1.122398e+000, - 8.338505e-001, - -1.071715e+000, - -1.426568e-001, - 1.095351e+000, - 1.729783e+001, - -3.851931e+000, - 4.360514e-001, - 2.114440e-001, - 2.970832e+000, - 5.944389e-001, - // albedo 0, turbidity 4 - -1.195909e+000, - -2.590449e-001, - -1.191037e+001, - 1.207947e+001, - -1.589842e-002, - 6.297846e-001, - 9.054772e-002, - 4.285959e+000, - 5.933752e-001, - -1.245763e+000, - -3.316637e-001, - 4.293660e+000, - -3.694011e+000, - -4.699947e-002, - 4.843684e-001, - 2.130425e-002, - 4.097549e+000, - 6.530809e-001, - -1.148742e+000, - -1.902509e-001, - -2.393233e-001, - -2.441254e-001, - -2.610918e-001, - 1.846988e+000, - 3.532866e-002, - 2.660106e+000, - 8.358294e-001, - -1.016080e+000, - -7.444960e-002, - -5.053436e-001, - 4.388855e+000, - 6.054987e-001, - -1.208300e+000, - 5.817215e-001, - 2.543570e+000, - 4.726568e-001, - -1.072027e+000, - -2.101440e-001, - 1.518378e+000, - -1.060119e+001, - -6.016546e+000, - 2.649475e+000, - -5.166992e-002, - 1.571269e+000, - 8.344622e-001, - -1.072365e+000, - -1.511201e-001, - 7.478010e-001, - 1.900732e+001, - -3.950387e+000, - -3.473907e-001, - 3.797211e-001, - 2.782949e+000, - 6.296808e-001, - // albedo 0, turbidity 5 - -1.239423e+000, - -3.136289e-001, - -1.351100e+001, - 1.349468e+001, - -7.070423e-003, - 5.012315e-001, - 1.106008e-001, - 3.803619e+000, - 5.577948e-001, - -1.452524e+000, - -5.676944e-001, - 2.993153e+000, - -2.277288e+000, - -2.168954e-002, - 3.056720e-001, - 1.152338e-002, - 1.852697e+000, - 6.427228e-001, - -1.061421e+000, - -4.590521e-002, - 6.057022e-001, - -1.096835e+000, - -1.504952e-001, - 2.344921e+000, - -5.491832e-002, - 5.268322e+000, - 9.082253e-001, - -1.042373e+000, - -1.769498e-001, - -1.075388e+000, - 3.831712e+000, - 3.154140e-001, - -2.416458e+000, - 7.909032e-001, - -1.492892e-002, - 3.854049e-001, - -1.064159e+000, - -1.892684e-001, - 1.438685e+000, - -8.166362e+000, - -3.616364e+000, - 3.275206e+000, - -1.203825e-001, - 2.039491e+000, - 8.688057e-001, - -1.070120e+000, - -1.569508e-001, - 4.124760e-001, - 1.399683e+001, - -3.547085e+000, - -1.046326e+000, - 4.973825e-001, - 2.791231e+000, - 6.503286e-001, - // albedo 0, turbidity 6 - -1.283579e+000, - -3.609518e-001, - -1.335397e+001, - 1.315248e+001, - -4.431938e-004, - 3.769526e-001, - 1.429824e-001, - 3.573613e+000, - 4.998696e-001, - -1.657952e+000, - -7.627948e-001, - 1.958222e+000, - -7.949816e-001, - -2.882837e-002, - 5.356149e-001, - -5.191946e-002, - 8.869955e-001, - 6.263320e-001, - -9.527600e-001, - 6.494189e-002, - 5.361303e-001, - -2.129590e+000, - -9.258630e-002, - 1.604776e+000, - 5.067770e-002, - 6.376055e+000, - 9.138052e-001, - -1.080827e+000, - -2.523120e-001, - -7.154262e-001, - 4.120085e+000, - 1.878228e-001, - -1.492158e+000, - 6.881655e-001, - -1.446611e+000, - 4.040631e-001, - -1.054075e+000, - -1.665498e-001, - 9.191052e-001, - -6.636943e+000, - -1.894826e+000, - 2.107810e+000, - -3.680499e-002, - 2.655452e+000, - 8.413840e-001, - -1.061127e+000, - -1.448849e-001, - 2.667493e-001, - 1.034103e+001, - -4.285769e+000, - -3.874504e-001, - 5.998752e-001, - 3.132426e+000, - 6.652753e-001, - // albedo 0, turbidity 7 - -1.347345e+000, - -4.287832e-001, - -9.305553e+000, - 9.133813e+000, - -3.173527e-003, - 3.977564e-001, - 1.151420e-001, - 3.320564e+000, - 4.998134e-001, - -1.927296e+000, - -9.901372e-001, - -2.593499e+000, - 4.087421e+000, - -5.833993e-002, - 8.158929e-001, - -4.681279e-002, - 2.423716e-001, - 4.938052e-001, - -9.470092e-001, - 7.325237e-002, - 2.064735e+000, - -5.167540e+000, - -1.313751e-002, - 4.832169e-001, - 1.126295e-001, - 6.970522e+000, - 1.035022e+000, - -1.022557e+000, - -2.762616e-001, - -9.375748e-001, - 6.696739e+000, - 2.200765e-001, - -1.133253e-001, - 5.492505e-001, - -3.109391e+000, - 3.321914e-001, - -1.087444e+000, - -1.836263e-001, - 6.225024e-001, - -8.576765e+000, - -1.107637e+000, - 7.859427e-001, - 9.910909e-002, - 3.112938e+000, - 8.596261e-001, - -1.051544e+000, - -1.546262e-001, - 2.371731e-001, - 1.200502e+001, - -4.527291e+000, - 7.268862e-002, - 5.571478e-001, - 2.532873e+000, - 6.662000e-001, - // albedo 0, turbidity 8 - -1.375576e+000, - -4.840019e-001, - -8.121290e+000, - 8.058140e+000, - -1.445661e-002, - 5.123314e-001, - 5.813321e-002, - 3.203219e+000, - 5.442318e-001, - -2.325221e+000, - -1.241463e+000, - -7.063430e+000, - 8.741369e+000, - -7.829950e-002, - 8.844273e-001, - -3.471106e-002, - 1.740583e-001, - 2.814079e-001, - -1.228700e+000, - -2.013412e-001, - 2.949042e+000, - -7.371945e+000, - 1.071753e-001, - -2.491970e-001, - 2.265223e-001, - 6.391504e+000, - 1.172389e+000, - -7.601786e-001, - -1.680631e-001, - -7.584444e-001, - 8.541356e+000, - 8.222291e-002, - 6.729633e-001, - 3.206615e-001, - -3.700940e+000, - 2.710054e-001, - -1.191166e+000, - -2.672347e-001, - 2.927498e-001, - -9.713613e+000, - -4.783721e-001, - 2.352803e-001, - 2.161949e-001, - 2.691481e+000, - 8.745447e-001, - -1.030135e+000, - -1.653301e-001, - 2.263443e-001, - 1.296157e+001, - -4.650644e+000, - 7.055709e-003, - 5.091975e-001, - 2.000370e+000, - 6.603839e-001, - // albedo 0, turbidity 9 - -1.508018e+000, - -6.460933e-001, - -6.402745e+000, - 6.545995e+000, - -3.750320e-002, - 6.921803e-001, - 3.309819e-003, - 2.797527e+000, - 6.978446e-001, - -2.333308e+000, - -1.167837e+000, - -1.746787e+001, - 1.868630e+001, - -8.948229e-003, - 5.621946e-001, - -3.402626e-002, - 1.217943e+000, - 1.149865e-002, - -2.665953e+000, - -1.226307e+000, - 7.169725e+000, - -1.159434e+001, - 3.583420e-002, - -3.074378e-001, - 3.412248e-001, - 4.422122e+000, - 1.283791e+000, - -9.705116e-002, - 8.312991e-002, - -2.160462e+000, - 1.028235e+001, - 3.543357e-002, - 1.032049e+000, - 1.058310e-001, - -2.972898e+000, - 2.418628e-001, - -1.329617e+000, - -3.699557e-001, - 5.560117e-001, - -9.730113e+000, - 9.938865e-002, - -3.071488e-001, - 2.510691e-001, - 1.777111e+000, - 8.705142e-001, - -1.019387e+000, - -1.893247e-001, - 1.194079e-001, - 1.239436e+001, - -4.799224e+000, - 2.940213e-001, - 4.841268e-001, - 1.529724e+000, - 6.582615e-001, - // albedo 0, turbidity 10 - -1.896737e+000, - -1.005442e+000, - -6.411032e+000, - 6.548220e+000, - -3.227596e-002, - 5.717262e-001, - -8.115192e-006, - 2.296704e+000, - 9.000749e-001, - -2.411116e+000, - -1.225587e+000, - -1.753629e+001, - 1.829393e+001, - 1.247555e-002, - 2.364616e-001, - -5.114637e-003, - 1.603778e+000, - -2.224156e-001, - -4.707121e+000, - -2.074977e+000, - 7.942300e+000, - -1.132407e+001, - -5.415654e-002, - 5.446811e-001, - 1.032493e-001, - 4.010235e+000, - 1.369802e+000, - 1.010482e-001, - -4.013305e-001, - -2.674579e+000, - 9.779409e+000, - 1.782506e-001, - 7.053045e-001, - 4.200002e-001, - -2.400671e+000, - 1.953165e-001, - -1.243526e+000, - -3.391255e-001, - 8.848882e-001, - -9.789025e+000, - -3.997324e-001, - -9.546227e-001, - -1.044017e-001, - 6.010593e-001, - 8.714462e-001, - -1.014633e+000, - -1.730009e-001, - -7.738934e-002, - 1.390903e+001, - -4.847307e+000, - 1.076059e+000, - 5.685743e-001, - 1.572992e+000, - 6.561432e-001, - // albedo 1, turbidity 1 - -1.122998e+000, - -1.881183e-001, - -1.030709e+001, - 1.158932e+001, - -4.079495e-002, - 9.603774e-001, - 3.079436e-002, - 4.009235e+000, - 5.060745e-001, - -1.134790e+000, - -1.539688e-001, - 5.478405e+000, - -4.217270e+000, - -1.043858e-001, - 7.165008e-001, - 1.524765e-002, - 6.473623e+000, - 4.207882e-001, - -1.134957e+000, - -3.513318e-001, - 7.393837e-001, - 1.354415e+000, - -4.764078e-001, - 1.690441e+000, - -5.492640e-002, - -5.563523e+000, - 1.145743e+000, - -1.058344e+000, - -5.758503e-002, - 1.168230e+000, - 3.269824e-001, - 1.795193e-001, - 7.849011e-001, - 7.441853e-002, - 6.904804e+000, - 2.818790e-001, - -1.075194e+000, - -2.355813e-001, - 2.463685e+000, - -1.536505e+000, - -7.505771e+000, - 9.619712e-001, - -6.465851e-002, - -1.355492e+000, - 8.489847e-001, - -1.079030e+000, - -1.465328e-001, - 1.773838e+000, - 2.310131e+000, - -3.136065e+000, - 3.507952e-001, - 4.435014e-002, - 2.819225e+000, - 5.689008e-001, - // albedo 1, turbidity 2 - -1.125833e+000, - -1.870849e-001, - -9.555833e+000, - 1.059713e+001, - -4.225402e-002, - 9.164663e-001, - 4.338796e-002, - 4.400980e+000, - 6.056119e-001, - -1.127440e+000, - -1.551891e-001, - 4.755621e+000, - -4.408806e+000, - -7.851763e-002, - 2.268284e-001, - 1.460070e-001, - 7.048003e+000, - 3.525997e-001, - -1.143788e+000, - -3.170178e-001, - 5.480669e-001, - 2.041830e+000, - -4.532139e-001, - 2.302233e+000, - -1.887419e-001, - -4.489221e+000, - 1.250967e+000, - -1.032849e+000, - 7.376031e-003, - 5.666073e-001, - -2.312203e-001, - 4.862894e-001, - -1.748294e-001, - 3.572870e-001, - 8.380522e+000, - 1.302333e-001, - -1.093728e+000, - -2.786977e-001, - 2.641272e+000, - -1.507494e+000, - -8.731243e+000, - 1.684055e+000, - -2.023377e-001, - -2.176398e+000, - 1.013249e+000, - -1.076578e+000, - -1.456205e-001, - 1.693935e+000, - 2.945003e+000, - -2.822673e+000, - -2.520033e-001, - 1.517034e-001, - 2.649109e+000, - 5.179094e-001, - // albedo 1, turbidity 3 - -1.146417e+000, - -2.119353e-001, - -7.187525e+000, - 8.058599e+000, - -5.256438e-002, - 8.375733e-001, - 3.887093e-002, - 4.222111e+000, - 6.695347e-001, - -1.173674e+000, - -2.067025e-001, - 2.899359e+000, - -2.804918e+000, - -8.473899e-002, - 3.944225e-003, - 1.340641e-001, - 6.160887e+000, - 4.527141e-001, - -1.090098e+000, - -2.599633e-001, - 9.180856e-001, - 1.092710e+000, - -4.215019e-001, - 2.427660e+000, - -9.277667e-002, - -2.123523e+000, - 1.058159e+000, - -1.084460e+000, - 8.056181e-003, - -2.453510e-001, - 6.619567e-001, - 4.668118e-001, - -9.526719e-001, - 4.648454e-001, - 8.001572e+000, - 3.054194e-001, - -1.053728e+000, - -2.765784e-001, - 2.792388e+000, - -3.489517e+000, - -8.150535e+000, - 2.195757e+000, - -2.017234e-001, - -2.128017e+000, - 9.326589e-001, - -1.099348e+000, - -1.593939e-001, - 1.568292e+000, - 7.247853e+000, - -2.933000e+000, - -5.890481e-001, - 1.724440e-001, - 2.433484e+000, - 5.736558e-001, - // albedo 1, turbidity 4 - -1.185983e+000, - -2.581184e-001, - -7.761056e+000, - 8.317053e+000, - -3.351773e-002, - 6.676667e-001, - 5.941733e-002, - 3.820727e+000, - 6.324032e-001, - -1.268591e+000, - -3.398067e-001, - 2.348503e+000, - -2.023779e+000, - -5.368458e-002, - 1.083282e-001, - 8.402858e-002, - 3.910254e+000, - 5.577481e-001, - -1.071353e+000, - -1.992459e-001, - 7.878387e-001, - 1.974702e-001, - -3.033058e-001, - 2.335298e+000, - -8.205259e-002, - 7.954454e-001, - 9.972312e-001, - -1.089513e+000, - -3.104364e-002, - -5.995746e-001, - 2.330281e+000, - 6.581939e-001, - -1.821467e+000, - 6.679973e-001, - 5.090195e+000, - 3.125161e-001, - -1.040214e+000, - -2.570934e-001, - 2.660489e+000, - -6.506045e+000, - -7.053586e+000, - 2.763153e+000, - -2.433632e-001, - -7.648176e-001, - 9.452937e-001, - -1.116052e+000, - -1.831993e-001, - 1.457694e+000, - 1.163608e+001, - -3.216426e+000, - -1.045594e+000, - 2.285002e-001, - 1.817407e+000, - 5.810396e-001, - // albedo 1, turbidity 5 - -1.230134e+000, - -3.136264e-001, - -8.909301e+000, - 9.145006e+000, - -1.055387e-002, - 4.467317e-001, - 1.016826e-001, - 3.342964e+000, - 5.633840e-001, - -1.442907e+000, - -5.593147e-001, - 2.156447e+000, - -1.241657e+000, - -3.512130e-002, - 3.050274e-001, - 1.797175e-002, - 1.742358e+000, - 5.977153e-001, - -1.027627e+000, - -6.481539e-002, - 4.351975e-001, - -1.051677e+000, - -2.030672e-001, - 1.942684e+000, - -3.615993e-002, - 4.050266e+000, - 9.801624e-001, - -1.082110e+000, - -1.578209e-001, - -3.397511e-001, - 4.163851e+000, - 6.650368e-001, - -1.841730e+000, - 7.062544e-001, - 6.789881e-001, - 3.172623e-001, - -1.047447e+000, - -1.977560e-001, - 2.183364e+000, - -8.805249e+000, - -5.483962e+000, - 2.551309e+000, - -1.779640e-001, - 1.519501e+000, - 9.212536e-001, - -1.111853e+000, - -1.935736e-001, - 1.394408e+000, - 1.392405e+001, - -3.465430e+000, - -1.068432e+000, - 2.388671e-001, - 1.455336e+000, - 6.233425e-001, - // albedo 1, turbidity 6 - -1.262238e+000, - -3.546341e-001, - -1.008703e+001, - 1.020084e+001, - -1.852187e-003, - 3.537580e-001, - 1.239199e-001, - 3.056093e+000, - 5.132052e-001, - -1.613810e+000, - -7.355585e-001, - 2.760123e+000, - -1.685253e+000, - -2.517552e-002, - 2.914258e-001, - 4.743448e-003, - 8.689596e-001, - 5.674192e-001, - -9.462336e-001, - 2.950767e-002, - -2.613816e-001, - -7.398653e-001, - -1.315558e-001, - 1.901042e+000, - -6.447844e-002, - 4.969341e+000, - 1.027342e+000, - -1.111481e+000, - -2.194054e-001, - -9.004538e-002, - 3.983442e+000, - 4.871278e-001, - -1.965315e+000, - 7.956121e-001, - -2.363225e-001, - 2.718037e-001, - -1.036397e+000, - -1.827106e-001, - 1.964747e+000, - -8.870759e+000, - -4.208011e+000, - 2.461215e+000, - -2.158905e-001, - 1.561676e+000, - 9.436866e-001, - -1.113769e+000, - -1.947819e-001, - 1.300720e+000, - 1.516476e+001, - -4.088732e+000, - -1.069384e+000, - 2.836434e-001, - 1.671451e+000, - 6.229612e-001, - // albedo 1, turbidity 7 - -1.328069e+000, - -4.244047e-001, - -8.417040e+000, - 8.552244e+000, - -6.813504e-003, - 4.127422e-001, - 9.619897e-002, - 2.854227e+000, - 5.059880e-001, - -1.927552e+000, - -1.025290e+000, - 9.529576e-001, - 4.255950e-001, - -3.738779e-002, - 2.584586e-001, - 4.911004e-002, - -2.640913e-001, - 4.138626e-001, - -8.488094e-001, - 1.435988e-001, - 6.356807e-001, - -2.895732e+000, - -8.473961e-002, - 1.701305e+000, - -1.323908e-001, - 6.499338e+000, - 1.210928e+000, - -1.128313e+000, - -3.397048e-001, - -4.043140e-001, - 6.265097e+000, - 5.482395e-001, - -2.057614e+000, - 8.884087e-001, - -2.943879e+000, - 9.760301e-002, - -1.039764e+000, - -1.494772e-001, - 1.781915e+000, - -1.153012e+001, - -3.379232e+000, - 2.517231e+000, - -2.764393e-001, - 2.588849e+000, - 1.052120e+000, - -1.108447e+000, - -2.012251e-001, - 1.198640e+000, - 1.925331e+001, - -4.423892e+000, - -1.257122e+000, - 3.395690e-001, - 1.481220e+000, - 5.880175e-001, - // albedo 1, turbidity 8 - -1.374185e+000, - -4.967434e-001, - -7.401318e+000, - 7.724021e+000, - -2.345723e-002, - 5.979653e-001, - 2.436346e-002, - 2.658970e+000, - 6.014891e-001, - -2.310933e+000, - -1.290290e+000, - -1.301909e+000, - 2.557806e+000, - -3.744449e-002, - 8.982861e-002, - 1.090613e-001, - -4.398363e-001, - 1.184329e-001, - -1.124730e+000, - -9.921830e-002, - 1.366902e+000, - -4.172489e+000, - -5.078016e-002, - 1.393597e+000, - -9.323843e-002, - 6.452721e+000, - 1.435913e+000, - -8.468477e-001, - -2.744819e-001, - -4.347200e-001, - 6.713362e+000, - 6.127133e-001, - -1.685634e+000, - 7.360941e-001, - -4.535502e+000, - -2.920866e-002, - -1.165242e+000, - -2.008697e-001, - 1.438778e+000, - -1.008936e+001, - -2.214771e+000, - 2.102909e+000, - -1.763085e-001, - 2.859075e+000, - 1.093470e+000, - -1.074614e+000, - -2.066374e-001, - 1.131891e+000, - 1.630063e+001, - -4.801441e+000, - -1.112590e+000, - 3.595785e-001, - 1.122227e+000, - 5.794610e-001, - // albedo 1, turbidity 9 - -1.521515e+000, - -6.835604e-001, - -5.571044e+000, - 6.028774e+000, - -4.253715e-002, - 6.875746e-001, - -5.279456e-006, - 2.180150e+000, - 8.487705e-001, - -2.240415e+000, - -1.171166e+000, - -7.182771e+000, - 8.417068e+000, - -1.932866e-002, - 1.101887e-001, - -1.098862e-002, - 6.242195e-001, - -2.393875e-001, - -2.712354e+000, - -1.198830e+000, - 3.180200e+000, - -6.768130e+000, - -2.563386e-003, - 7.984607e-001, - 2.764376e-001, - 4.695358e+000, - 1.557045e+000, - -3.655172e-002, - -2.142321e-002, - -9.138120e-001, - 7.932786e+000, - 3.516542e-001, - -7.994343e-001, - 1.786761e-001, - -4.208399e+000, - 1.820576e-002, - -1.368610e+000, - -2.656212e-001, - 1.249397e+000, - -8.317818e+000, - -8.962772e-001, - 1.423249e+000, - 1.478381e-001, - 2.191660e+000, - 1.007748e+000, - -1.041753e+000, - -2.453366e-001, - 1.061102e+000, - 1.130172e+001, - -4.739312e+000, - -9.223334e-001, - 2.982776e-001, - 6.162931e-001, - 6.080302e-001, - // albedo 1, turbidity 10 - -1.989159e+000, - -1.095160e+000, - -2.915550e+000, - 3.275339e+000, - -5.735765e-002, - 5.742174e-001, - -7.683288e-006, - 1.763400e+000, - 9.001342e-001, - -2.070020e+000, - -1.086338e+000, - -1.095898e+001, - 1.206960e+001, - 3.780123e-002, - -1.774699e-002, - -5.881348e-004, - 1.333819e+000, - -2.605423e-001, - -5.249653e+000, - -2.383040e+000, - 6.160406e+000, - -9.097138e+000, - -1.955319e-001, - 1.651785e+000, - 6.016463e-004, - 3.021824e+000, - 1.493574e+000, - 4.685432e-001, - -2.358662e-001, - -2.666433e+000, - 9.685763e+000, - 5.804928e-001, - -1.521875e+000, - 5.668989e-001, - -1.548136e+000, - 1.688642e-002, - -1.296891e+000, - -3.449031e-001, - 1.928548e+000, - -1.167560e+001, - -1.627615e+000, - 1.355603e+000, - -1.929074e-001, - -6.568952e-001, - 1.009774e+000, - -1.067288e+000, - -2.410392e-001, - 7.147961e-001, - 1.783840e+001, - -4.374399e+000, - -6.588777e-001, - 3.329831e-001, - 1.012066e+000, - 6.118645e-001, +static const double datasetXYZ2[] = { + // albedo 0, turbidity 1 + -1.127942e+000, + -1.905548e-001, + -1.252356e+001, + 1.375799e+001, + -3.624732e-002, + 1.055453e+000, + 1.385036e-002, + 4.176970e+000, + 5.928345e-001, + -1.155260e+000, + -1.778135e-001, + 6.216056e+000, + -5.254116e+000, + -8.787445e-002, + 8.434621e-001, + 4.025734e-002, + 6.195322e+000, + 3.111856e-001, + -1.125624e+000, + -3.217593e-001, + 5.043919e-001, + 1.686284e+000, + -3.536071e-001, + 1.476321e+000, + -7.899019e-002, + -4.522531e+000, + 1.271691e+000, + -1.081801e+000, + -1.033234e-001, + 9.995550e-001, + 7.482946e-003, + -6.776018e-002, + 1.463141e+000, + 9.492021e-002, + 5.612723e+000, + 1.298846e-001, + -1.075320e+000, + -2.402711e-001, + 2.141284e+000, + -1.203359e+000, + -4.945188e+000, + 1.437221e+000, + -8.096750e-002, + -1.028378e+000, + 1.004164e+000, + -1.073337e+000, + -1.516517e-001, + 1.639379e+000, + 2.304669e+000, + -3.214244e+000, + 1.286245e+000, + 5.613957e-002, + 2.480902e+000, + 4.999363e-001, + // albedo 0, turbidity 2 + -1.128399e+000, + -1.857793e-001, + -1.089863e+001, + 1.172984e+001, + -3.768099e-002, + 9.439285e-001, + 4.869335e-002, + 4.845114e+000, + 6.119211e-001, + -1.114002e+000, + -1.399280e-001, + 4.963800e+000, + -4.685500e+000, + -7.780879e-002, + 4.049736e-001, + 1.586297e-001, + 7.770264e+000, + 3.449006e-001, + -1.185472e+000, + -3.403543e-001, + 6.588322e-001, + 1.133713e+000, + -4.118674e-001, + 2.061191e+000, + -1.882768e-001, + -4.372586e+000, + 1.223530e+000, + -1.002272e+000, + 2.000703e-002, + 7.073269e-002, + 1.485075e+000, + 5.005589e-001, + 4.301494e-001, + 3.626541e-001, + 7.921098e+000, + 1.574766e-001, + -1.121006e+000, + -3.007777e-001, + 2.242051e+000, + -4.571561e+000, + -7.761071e+000, + 2.053404e+000, + -1.524018e-001, + -1.886162e+000, + 1.018208e+000, + -1.058864e+000, + -1.358673e-001, + 1.389667e+000, + 8.633409e+000, + -3.437249e+000, + 7.295429e-001, + 1.514700e-001, + 2.842513e+000, + 5.014325e-001, + // albedo 0, turbidity 3 + -1.144464e+000, + -2.043799e-001, + -1.020188e+001, + 1.071247e+001, + -3.256693e-002, + 7.860205e-001, + 6.872719e-002, + 4.824771e+000, + 6.259836e-001, + -1.170104e+000, + -2.118626e-001, + 4.391405e+000, + -4.198900e+000, + -7.111559e-002, + 3.890442e-001, + 1.024831e-001, + 6.282535e+000, + 5.365688e-001, + -1.129171e+000, + -2.552880e-001, + 2.238298e-001, + 7.314295e-001, + -3.562730e-001, + 1.881931e+000, + -3.078716e-002, + -1.039120e+000, + 9.096301e-001, + -1.042294e+000, + 4.450203e-003, + -5.116033e-001, + 2.627589e+000, + 6.098996e-001, + -1.264638e-001, + 4.325281e-001, + 7.080503e+000, + 4.583646e-001, + -1.082293e+000, + -2.723056e-001, + 2.065076e+000, + -8.143133e+000, + -7.892212e+000, + 2.142231e+000, + -7.106240e-002, + -1.122398e+000, + 8.338505e-001, + -1.071715e+000, + -1.426568e-001, + 1.095351e+000, + 1.729783e+001, + -3.851931e+000, + 4.360514e-001, + 2.114440e-001, + 2.970832e+000, + 5.944389e-001, + // albedo 0, turbidity 4 + -1.195909e+000, + -2.590449e-001, + -1.191037e+001, + 1.207947e+001, + -1.589842e-002, + 6.297846e-001, + 9.054772e-002, + 4.285959e+000, + 5.933752e-001, + -1.245763e+000, + -3.316637e-001, + 4.293660e+000, + -3.694011e+000, + -4.699947e-002, + 4.843684e-001, + 2.130425e-002, + 4.097549e+000, + 6.530809e-001, + -1.148742e+000, + -1.902509e-001, + -2.393233e-001, + -2.441254e-001, + -2.610918e-001, + 1.846988e+000, + 3.532866e-002, + 2.660106e+000, + 8.358294e-001, + -1.016080e+000, + -7.444960e-002, + -5.053436e-001, + 4.388855e+000, + 6.054987e-001, + -1.208300e+000, + 5.817215e-001, + 2.543570e+000, + 4.726568e-001, + -1.072027e+000, + -2.101440e-001, + 1.518378e+000, + -1.060119e+001, + -6.016546e+000, + 2.649475e+000, + -5.166992e-002, + 1.571269e+000, + 8.344622e-001, + -1.072365e+000, + -1.511201e-001, + 7.478010e-001, + 1.900732e+001, + -3.950387e+000, + -3.473907e-001, + 3.797211e-001, + 2.782949e+000, + 6.296808e-001, + // albedo 0, turbidity 5 + -1.239423e+000, + -3.136289e-001, + -1.351100e+001, + 1.349468e+001, + -7.070423e-003, + 5.012315e-001, + 1.106008e-001, + 3.803619e+000, + 5.577948e-001, + -1.452524e+000, + -5.676944e-001, + 2.993153e+000, + -2.277288e+000, + -2.168954e-002, + 3.056720e-001, + 1.152338e-002, + 1.852697e+000, + 6.427228e-001, + -1.061421e+000, + -4.590521e-002, + 6.057022e-001, + -1.096835e+000, + -1.504952e-001, + 2.344921e+000, + -5.491832e-002, + 5.268322e+000, + 9.082253e-001, + -1.042373e+000, + -1.769498e-001, + -1.075388e+000, + 3.831712e+000, + 3.154140e-001, + -2.416458e+000, + 7.909032e-001, + -1.492892e-002, + 3.854049e-001, + -1.064159e+000, + -1.892684e-001, + 1.438685e+000, + -8.166362e+000, + -3.616364e+000, + 3.275206e+000, + -1.203825e-001, + 2.039491e+000, + 8.688057e-001, + -1.070120e+000, + -1.569508e-001, + 4.124760e-001, + 1.399683e+001, + -3.547085e+000, + -1.046326e+000, + 4.973825e-001, + 2.791231e+000, + 6.503286e-001, + // albedo 0, turbidity 6 + -1.283579e+000, + -3.609518e-001, + -1.335397e+001, + 1.315248e+001, + -4.431938e-004, + 3.769526e-001, + 1.429824e-001, + 3.573613e+000, + 4.998696e-001, + -1.657952e+000, + -7.627948e-001, + 1.958222e+000, + -7.949816e-001, + -2.882837e-002, + 5.356149e-001, + -5.191946e-002, + 8.869955e-001, + 6.263320e-001, + -9.527600e-001, + 6.494189e-002, + 5.361303e-001, + -2.129590e+000, + -9.258630e-002, + 1.604776e+000, + 5.067770e-002, + 6.376055e+000, + 9.138052e-001, + -1.080827e+000, + -2.523120e-001, + -7.154262e-001, + 4.120085e+000, + 1.878228e-001, + -1.492158e+000, + 6.881655e-001, + -1.446611e+000, + 4.040631e-001, + -1.054075e+000, + -1.665498e-001, + 9.191052e-001, + -6.636943e+000, + -1.894826e+000, + 2.107810e+000, + -3.680499e-002, + 2.655452e+000, + 8.413840e-001, + -1.061127e+000, + -1.448849e-001, + 2.667493e-001, + 1.034103e+001, + -4.285769e+000, + -3.874504e-001, + 5.998752e-001, + 3.132426e+000, + 6.652753e-001, + // albedo 0, turbidity 7 + -1.347345e+000, + -4.287832e-001, + -9.305553e+000, + 9.133813e+000, + -3.173527e-003, + 3.977564e-001, + 1.151420e-001, + 3.320564e+000, + 4.998134e-001, + -1.927296e+000, + -9.901372e-001, + -2.593499e+000, + 4.087421e+000, + -5.833993e-002, + 8.158929e-001, + -4.681279e-002, + 2.423716e-001, + 4.938052e-001, + -9.470092e-001, + 7.325237e-002, + 2.064735e+000, + -5.167540e+000, + -1.313751e-002, + 4.832169e-001, + 1.126295e-001, + 6.970522e+000, + 1.035022e+000, + -1.022557e+000, + -2.762616e-001, + -9.375748e-001, + 6.696739e+000, + 2.200765e-001, + -1.133253e-001, + 5.492505e-001, + -3.109391e+000, + 3.321914e-001, + -1.087444e+000, + -1.836263e-001, + 6.225024e-001, + -8.576765e+000, + -1.107637e+000, + 7.859427e-001, + 9.910909e-002, + 3.112938e+000, + 8.596261e-001, + -1.051544e+000, + -1.546262e-001, + 2.371731e-001, + 1.200502e+001, + -4.527291e+000, + 7.268862e-002, + 5.571478e-001, + 2.532873e+000, + 6.662000e-001, + // albedo 0, turbidity 8 + -1.375576e+000, + -4.840019e-001, + -8.121290e+000, + 8.058140e+000, + -1.445661e-002, + 5.123314e-001, + 5.813321e-002, + 3.203219e+000, + 5.442318e-001, + -2.325221e+000, + -1.241463e+000, + -7.063430e+000, + 8.741369e+000, + -7.829950e-002, + 8.844273e-001, + -3.471106e-002, + 1.740583e-001, + 2.814079e-001, + -1.228700e+000, + -2.013412e-001, + 2.949042e+000, + -7.371945e+000, + 1.071753e-001, + -2.491970e-001, + 2.265223e-001, + 6.391504e+000, + 1.172389e+000, + -7.601786e-001, + -1.680631e-001, + -7.584444e-001, + 8.541356e+000, + 8.222291e-002, + 6.729633e-001, + 3.206615e-001, + -3.700940e+000, + 2.710054e-001, + -1.191166e+000, + -2.672347e-001, + 2.927498e-001, + -9.713613e+000, + -4.783721e-001, + 2.352803e-001, + 2.161949e-001, + 2.691481e+000, + 8.745447e-001, + -1.030135e+000, + -1.653301e-001, + 2.263443e-001, + 1.296157e+001, + -4.650644e+000, + 7.055709e-003, + 5.091975e-001, + 2.000370e+000, + 6.603839e-001, + // albedo 0, turbidity 9 + -1.508018e+000, + -6.460933e-001, + -6.402745e+000, + 6.545995e+000, + -3.750320e-002, + 6.921803e-001, + 3.309819e-003, + 2.797527e+000, + 6.978446e-001, + -2.333308e+000, + -1.167837e+000, + -1.746787e+001, + 1.868630e+001, + -8.948229e-003, + 5.621946e-001, + -3.402626e-002, + 1.217943e+000, + 1.149865e-002, + -2.665953e+000, + -1.226307e+000, + 7.169725e+000, + -1.159434e+001, + 3.583420e-002, + -3.074378e-001, + 3.412248e-001, + 4.422122e+000, + 1.283791e+000, + -9.705116e-002, + 8.312991e-002, + -2.160462e+000, + 1.028235e+001, + 3.543357e-002, + 1.032049e+000, + 1.058310e-001, + -2.972898e+000, + 2.418628e-001, + -1.329617e+000, + -3.699557e-001, + 5.560117e-001, + -9.730113e+000, + 9.938865e-002, + -3.071488e-001, + 2.510691e-001, + 1.777111e+000, + 8.705142e-001, + -1.019387e+000, + -1.893247e-001, + 1.194079e-001, + 1.239436e+001, + -4.799224e+000, + 2.940213e-001, + 4.841268e-001, + 1.529724e+000, + 6.582615e-001, + // albedo 0, turbidity 10 + -1.896737e+000, + -1.005442e+000, + -6.411032e+000, + 6.548220e+000, + -3.227596e-002, + 5.717262e-001, + -8.115192e-006, + 2.296704e+000, + 9.000749e-001, + -2.411116e+000, + -1.225587e+000, + -1.753629e+001, + 1.829393e+001, + 1.247555e-002, + 2.364616e-001, + -5.114637e-003, + 1.603778e+000, + -2.224156e-001, + -4.707121e+000, + -2.074977e+000, + 7.942300e+000, + -1.132407e+001, + -5.415654e-002, + 5.446811e-001, + 1.032493e-001, + 4.010235e+000, + 1.369802e+000, + 1.010482e-001, + -4.013305e-001, + -2.674579e+000, + 9.779409e+000, + 1.782506e-001, + 7.053045e-001, + 4.200002e-001, + -2.400671e+000, + 1.953165e-001, + -1.243526e+000, + -3.391255e-001, + 8.848882e-001, + -9.789025e+000, + -3.997324e-001, + -9.546227e-001, + -1.044017e-001, + 6.010593e-001, + 8.714462e-001, + -1.014633e+000, + -1.730009e-001, + -7.738934e-002, + 1.390903e+001, + -4.847307e+000, + 1.076059e+000, + 5.685743e-001, + 1.572992e+000, + 6.561432e-001, + // albedo 1, turbidity 1 + -1.122998e+000, + -1.881183e-001, + -1.030709e+001, + 1.158932e+001, + -4.079495e-002, + 9.603774e-001, + 3.079436e-002, + 4.009235e+000, + 5.060745e-001, + -1.134790e+000, + -1.539688e-001, + 5.478405e+000, + -4.217270e+000, + -1.043858e-001, + 7.165008e-001, + 1.524765e-002, + 6.473623e+000, + 4.207882e-001, + -1.134957e+000, + -3.513318e-001, + 7.393837e-001, + 1.354415e+000, + -4.764078e-001, + 1.690441e+000, + -5.492640e-002, + -5.563523e+000, + 1.145743e+000, + -1.058344e+000, + -5.758503e-002, + 1.168230e+000, + 3.269824e-001, + 1.795193e-001, + 7.849011e-001, + 7.441853e-002, + 6.904804e+000, + 2.818790e-001, + -1.075194e+000, + -2.355813e-001, + 2.463685e+000, + -1.536505e+000, + -7.505771e+000, + 9.619712e-001, + -6.465851e-002, + -1.355492e+000, + 8.489847e-001, + -1.079030e+000, + -1.465328e-001, + 1.773838e+000, + 2.310131e+000, + -3.136065e+000, + 3.507952e-001, + 4.435014e-002, + 2.819225e+000, + 5.689008e-001, + // albedo 1, turbidity 2 + -1.125833e+000, + -1.870849e-001, + -9.555833e+000, + 1.059713e+001, + -4.225402e-002, + 9.164663e-001, + 4.338796e-002, + 4.400980e+000, + 6.056119e-001, + -1.127440e+000, + -1.551891e-001, + 4.755621e+000, + -4.408806e+000, + -7.851763e-002, + 2.268284e-001, + 1.460070e-001, + 7.048003e+000, + 3.525997e-001, + -1.143788e+000, + -3.170178e-001, + 5.480669e-001, + 2.041830e+000, + -4.532139e-001, + 2.302233e+000, + -1.887419e-001, + -4.489221e+000, + 1.250967e+000, + -1.032849e+000, + 7.376031e-003, + 5.666073e-001, + -2.312203e-001, + 4.862894e-001, + -1.748294e-001, + 3.572870e-001, + 8.380522e+000, + 1.302333e-001, + -1.093728e+000, + -2.786977e-001, + 2.641272e+000, + -1.507494e+000, + -8.731243e+000, + 1.684055e+000, + -2.023377e-001, + -2.176398e+000, + 1.013249e+000, + -1.076578e+000, + -1.456205e-001, + 1.693935e+000, + 2.945003e+000, + -2.822673e+000, + -2.520033e-001, + 1.517034e-001, + 2.649109e+000, + 5.179094e-001, + // albedo 1, turbidity 3 + -1.146417e+000, + -2.119353e-001, + -7.187525e+000, + 8.058599e+000, + -5.256438e-002, + 8.375733e-001, + 3.887093e-002, + 4.222111e+000, + 6.695347e-001, + -1.173674e+000, + -2.067025e-001, + 2.899359e+000, + -2.804918e+000, + -8.473899e-002, + 3.944225e-003, + 1.340641e-001, + 6.160887e+000, + 4.527141e-001, + -1.090098e+000, + -2.599633e-001, + 9.180856e-001, + 1.092710e+000, + -4.215019e-001, + 2.427660e+000, + -9.277667e-002, + -2.123523e+000, + 1.058159e+000, + -1.084460e+000, + 8.056181e-003, + -2.453510e-001, + 6.619567e-001, + 4.668118e-001, + -9.526719e-001, + 4.648454e-001, + 8.001572e+000, + 3.054194e-001, + -1.053728e+000, + -2.765784e-001, + 2.792388e+000, + -3.489517e+000, + -8.150535e+000, + 2.195757e+000, + -2.017234e-001, + -2.128017e+000, + 9.326589e-001, + -1.099348e+000, + -1.593939e-001, + 1.568292e+000, + 7.247853e+000, + -2.933000e+000, + -5.890481e-001, + 1.724440e-001, + 2.433484e+000, + 5.736558e-001, + // albedo 1, turbidity 4 + -1.185983e+000, + -2.581184e-001, + -7.761056e+000, + 8.317053e+000, + -3.351773e-002, + 6.676667e-001, + 5.941733e-002, + 3.820727e+000, + 6.324032e-001, + -1.268591e+000, + -3.398067e-001, + 2.348503e+000, + -2.023779e+000, + -5.368458e-002, + 1.083282e-001, + 8.402858e-002, + 3.910254e+000, + 5.577481e-001, + -1.071353e+000, + -1.992459e-001, + 7.878387e-001, + 1.974702e-001, + -3.033058e-001, + 2.335298e+000, + -8.205259e-002, + 7.954454e-001, + 9.972312e-001, + -1.089513e+000, + -3.104364e-002, + -5.995746e-001, + 2.330281e+000, + 6.581939e-001, + -1.821467e+000, + 6.679973e-001, + 5.090195e+000, + 3.125161e-001, + -1.040214e+000, + -2.570934e-001, + 2.660489e+000, + -6.506045e+000, + -7.053586e+000, + 2.763153e+000, + -2.433632e-001, + -7.648176e-001, + 9.452937e-001, + -1.116052e+000, + -1.831993e-001, + 1.457694e+000, + 1.163608e+001, + -3.216426e+000, + -1.045594e+000, + 2.285002e-001, + 1.817407e+000, + 5.810396e-001, + // albedo 1, turbidity 5 + -1.230134e+000, + -3.136264e-001, + -8.909301e+000, + 9.145006e+000, + -1.055387e-002, + 4.467317e-001, + 1.016826e-001, + 3.342964e+000, + 5.633840e-001, + -1.442907e+000, + -5.593147e-001, + 2.156447e+000, + -1.241657e+000, + -3.512130e-002, + 3.050274e-001, + 1.797175e-002, + 1.742358e+000, + 5.977153e-001, + -1.027627e+000, + -6.481539e-002, + 4.351975e-001, + -1.051677e+000, + -2.030672e-001, + 1.942684e+000, + -3.615993e-002, + 4.050266e+000, + 9.801624e-001, + -1.082110e+000, + -1.578209e-001, + -3.397511e-001, + 4.163851e+000, + 6.650368e-001, + -1.841730e+000, + 7.062544e-001, + 6.789881e-001, + 3.172623e-001, + -1.047447e+000, + -1.977560e-001, + 2.183364e+000, + -8.805249e+000, + -5.483962e+000, + 2.551309e+000, + -1.779640e-001, + 1.519501e+000, + 9.212536e-001, + -1.111853e+000, + -1.935736e-001, + 1.394408e+000, + 1.392405e+001, + -3.465430e+000, + -1.068432e+000, + 2.388671e-001, + 1.455336e+000, + 6.233425e-001, + // albedo 1, turbidity 6 + -1.262238e+000, + -3.546341e-001, + -1.008703e+001, + 1.020084e+001, + -1.852187e-003, + 3.537580e-001, + 1.239199e-001, + 3.056093e+000, + 5.132052e-001, + -1.613810e+000, + -7.355585e-001, + 2.760123e+000, + -1.685253e+000, + -2.517552e-002, + 2.914258e-001, + 4.743448e-003, + 8.689596e-001, + 5.674192e-001, + -9.462336e-001, + 2.950767e-002, + -2.613816e-001, + -7.398653e-001, + -1.315558e-001, + 1.901042e+000, + -6.447844e-002, + 4.969341e+000, + 1.027342e+000, + -1.111481e+000, + -2.194054e-001, + -9.004538e-002, + 3.983442e+000, + 4.871278e-001, + -1.965315e+000, + 7.956121e-001, + -2.363225e-001, + 2.718037e-001, + -1.036397e+000, + -1.827106e-001, + 1.964747e+000, + -8.870759e+000, + -4.208011e+000, + 2.461215e+000, + -2.158905e-001, + 1.561676e+000, + 9.436866e-001, + -1.113769e+000, + -1.947819e-001, + 1.300720e+000, + 1.516476e+001, + -4.088732e+000, + -1.069384e+000, + 2.836434e-001, + 1.671451e+000, + 6.229612e-001, + // albedo 1, turbidity 7 + -1.328069e+000, + -4.244047e-001, + -8.417040e+000, + 8.552244e+000, + -6.813504e-003, + 4.127422e-001, + 9.619897e-002, + 2.854227e+000, + 5.059880e-001, + -1.927552e+000, + -1.025290e+000, + 9.529576e-001, + 4.255950e-001, + -3.738779e-002, + 2.584586e-001, + 4.911004e-002, + -2.640913e-001, + 4.138626e-001, + -8.488094e-001, + 1.435988e-001, + 6.356807e-001, + -2.895732e+000, + -8.473961e-002, + 1.701305e+000, + -1.323908e-001, + 6.499338e+000, + 1.210928e+000, + -1.128313e+000, + -3.397048e-001, + -4.043140e-001, + 6.265097e+000, + 5.482395e-001, + -2.057614e+000, + 8.884087e-001, + -2.943879e+000, + 9.760301e-002, + -1.039764e+000, + -1.494772e-001, + 1.781915e+000, + -1.153012e+001, + -3.379232e+000, + 2.517231e+000, + -2.764393e-001, + 2.588849e+000, + 1.052120e+000, + -1.108447e+000, + -2.012251e-001, + 1.198640e+000, + 1.925331e+001, + -4.423892e+000, + -1.257122e+000, + 3.395690e-001, + 1.481220e+000, + 5.880175e-001, + // albedo 1, turbidity 8 + -1.374185e+000, + -4.967434e-001, + -7.401318e+000, + 7.724021e+000, + -2.345723e-002, + 5.979653e-001, + 2.436346e-002, + 2.658970e+000, + 6.014891e-001, + -2.310933e+000, + -1.290290e+000, + -1.301909e+000, + 2.557806e+000, + -3.744449e-002, + 8.982861e-002, + 1.090613e-001, + -4.398363e-001, + 1.184329e-001, + -1.124730e+000, + -9.921830e-002, + 1.366902e+000, + -4.172489e+000, + -5.078016e-002, + 1.393597e+000, + -9.323843e-002, + 6.452721e+000, + 1.435913e+000, + -8.468477e-001, + -2.744819e-001, + -4.347200e-001, + 6.713362e+000, + 6.127133e-001, + -1.685634e+000, + 7.360941e-001, + -4.535502e+000, + -2.920866e-002, + -1.165242e+000, + -2.008697e-001, + 1.438778e+000, + -1.008936e+001, + -2.214771e+000, + 2.102909e+000, + -1.763085e-001, + 2.859075e+000, + 1.093470e+000, + -1.074614e+000, + -2.066374e-001, + 1.131891e+000, + 1.630063e+001, + -4.801441e+000, + -1.112590e+000, + 3.595785e-001, + 1.122227e+000, + 5.794610e-001, + // albedo 1, turbidity 9 + -1.521515e+000, + -6.835604e-001, + -5.571044e+000, + 6.028774e+000, + -4.253715e-002, + 6.875746e-001, + -5.279456e-006, + 2.180150e+000, + 8.487705e-001, + -2.240415e+000, + -1.171166e+000, + -7.182771e+000, + 8.417068e+000, + -1.932866e-002, + 1.101887e-001, + -1.098862e-002, + 6.242195e-001, + -2.393875e-001, + -2.712354e+000, + -1.198830e+000, + 3.180200e+000, + -6.768130e+000, + -2.563386e-003, + 7.984607e-001, + 2.764376e-001, + 4.695358e+000, + 1.557045e+000, + -3.655172e-002, + -2.142321e-002, + -9.138120e-001, + 7.932786e+000, + 3.516542e-001, + -7.994343e-001, + 1.786761e-001, + -4.208399e+000, + 1.820576e-002, + -1.368610e+000, + -2.656212e-001, + 1.249397e+000, + -8.317818e+000, + -8.962772e-001, + 1.423249e+000, + 1.478381e-001, + 2.191660e+000, + 1.007748e+000, + -1.041753e+000, + -2.453366e-001, + 1.061102e+000, + 1.130172e+001, + -4.739312e+000, + -9.223334e-001, + 2.982776e-001, + 6.162931e-001, + 6.080302e-001, + // albedo 1, turbidity 10 + -1.989159e+000, + -1.095160e+000, + -2.915550e+000, + 3.275339e+000, + -5.735765e-002, + 5.742174e-001, + -7.683288e-006, + 1.763400e+000, + 9.001342e-001, + -2.070020e+000, + -1.086338e+000, + -1.095898e+001, + 1.206960e+001, + 3.780123e-002, + -1.774699e-002, + -5.881348e-004, + 1.333819e+000, + -2.605423e-001, + -5.249653e+000, + -2.383040e+000, + 6.160406e+000, + -9.097138e+000, + -1.955319e-001, + 1.651785e+000, + 6.016463e-004, + 3.021824e+000, + 1.493574e+000, + 4.685432e-001, + -2.358662e-001, + -2.666433e+000, + 9.685763e+000, + 5.804928e-001, + -1.521875e+000, + 5.668989e-001, + -1.548136e+000, + 1.688642e-002, + -1.296891e+000, + -3.449031e-001, + 1.928548e+000, + -1.167560e+001, + -1.627615e+000, + 1.355603e+000, + -1.929074e-001, + -6.568952e-001, + 1.009774e+000, + -1.067288e+000, + -2.410392e-001, + 7.147961e-001, + 1.783840e+001, + -4.374399e+000, + -6.588777e-001, + 3.329831e-001, + 1.012066e+000, + 6.118645e-001, }; -static const double datasetXYZRad2[] = -{ - // albedo 0, turbidity 1 - 1.632341e+000, - 1.395230e+000, - 1.375634e+000, - 1.238193e+001, - 5.921102e+000, - 7.766508e+000, - // albedo 0, turbidity 2 - 1.597115e+000, - 1.554617e+000, - 3.932382e-001, - 1.505284e+001, - 5.725234e+000, - 8.158155e+000, - // albedo 0, turbidity 3 - 1.522034e+000, - 1.844545e+000, - -1.322862e+000, - 1.918382e+001, - 5.440769e+000, - 8.837119e+000, - // albedo 0, turbidity 4 - 1.403048e+000, - 2.290852e+000, - -4.013792e+000, - 2.485100e+001, - 5.521888e+000, - 9.845547e+000, - // albedo 0, turbidity 5 - 1.286364e+000, - 2.774498e+000, - -6.648221e+000, - 2.964151e+001, - 5.923777e+000, - 1.097075e+001, - // albedo 0, turbidity 6 - 1.213544e+000, - 3.040195e+000, - -8.092676e+000, - 3.186082e+001, - 6.789782e+000, - 1.158899e+001, - // albedo 0, turbidity 7 - 1.122622e+000, - 3.347465e+000, - -9.649016e+000, - 3.343824e+001, - 9.347715e+000, - 1.231374e+001, - // albedo 0, turbidity 8 - 1.007356e+000, - 3.543858e+000, - -1.053520e+001, - 3.239842e+001, - 1.483962e+001, - 1.331718e+001, - // albedo 0, turbidity 9 - 8.956642e-001, - 3.278700e+000, - -9.254933e+000, - 2.557923e+001, - 2.489677e+001, - 1.476166e+001, - // albedo 0, turbidity 10 - 7.985143e-001, - 2.340404e+000, - -4.928274e+000, - 1.141787e+001, - 3.961501e+001, - 1.682448e+001, - // albedo 1, turbidity 1 - 1.745162e+000, - 1.639467e+000, - 1.342721e+000, - 1.166033e+001, - 1.490124e+001, - 1.774031e+001, - // albedo 1, turbidity 2 - 1.708439e+000, - 1.819144e+000, - 2.834399e-001, - 1.448066e+001, - 1.459214e+001, - 1.858679e+001, - // albedo 1, turbidity 3 - 1.631720e+000, - 2.094799e+000, - -1.378825e+000, - 1.843198e+001, - 1.463173e+001, - 1.962881e+001, - // albedo 1, turbidity 4 - 1.516536e+000, - 2.438729e+000, - -3.624121e+000, - 2.298621e+001, - 1.599782e+001, - 2.070027e+001, - // albedo 1, turbidity 5 - 1.405863e+000, - 2.785191e+000, - -5.705236e+000, - 2.645121e+001, - 1.768330e+001, - 2.191903e+001, - // albedo 1, turbidity 6 - 1.344052e+000, - 2.951807e+000, - -6.683851e+000, - 2.744271e+001, - 1.985706e+001, - 2.229452e+001, - // albedo 1, turbidity 7 - 1.245827e+000, - 3.182923e+000, - -7.822960e+000, - 2.791395e+001, - 2.327254e+001, - 2.315910e+001, - // albedo 1, turbidity 8 - 1.132305e+000, - 3.202593e+000, - -8.008429e+000, - 2.521093e+001, - 3.000014e+001, - 2.405306e+001, - // albedo 1, turbidity 9 - 1.020330e+000, - 2.820556e+000, - -6.238704e+000, - 1.709276e+001, - 4.077916e+001, - 2.509949e+001, - // albedo 1, turbidity 10 - 9.031570e-001, - 1.863917e+000, - -1.955738e+000, - 3.032665e+000, - 5.434290e+001, - 2.641780e+001, +static const double datasetXYZRad2[] = { + // albedo 0, turbidity 1 + 1.632341e+000, + 1.395230e+000, + 1.375634e+000, + 1.238193e+001, + 5.921102e+000, + 7.766508e+000, + // albedo 0, turbidity 2 + 1.597115e+000, + 1.554617e+000, + 3.932382e-001, + 1.505284e+001, + 5.725234e+000, + 8.158155e+000, + // albedo 0, turbidity 3 + 1.522034e+000, + 1.844545e+000, + -1.322862e+000, + 1.918382e+001, + 5.440769e+000, + 8.837119e+000, + // albedo 0, turbidity 4 + 1.403048e+000, + 2.290852e+000, + -4.013792e+000, + 2.485100e+001, + 5.521888e+000, + 9.845547e+000, + // albedo 0, turbidity 5 + 1.286364e+000, + 2.774498e+000, + -6.648221e+000, + 2.964151e+001, + 5.923777e+000, + 1.097075e+001, + // albedo 0, turbidity 6 + 1.213544e+000, + 3.040195e+000, + -8.092676e+000, + 3.186082e+001, + 6.789782e+000, + 1.158899e+001, + // albedo 0, turbidity 7 + 1.122622e+000, + 3.347465e+000, + -9.649016e+000, + 3.343824e+001, + 9.347715e+000, + 1.231374e+001, + // albedo 0, turbidity 8 + 1.007356e+000, + 3.543858e+000, + -1.053520e+001, + 3.239842e+001, + 1.483962e+001, + 1.331718e+001, + // albedo 0, turbidity 9 + 8.956642e-001, + 3.278700e+000, + -9.254933e+000, + 2.557923e+001, + 2.489677e+001, + 1.476166e+001, + // albedo 0, turbidity 10 + 7.985143e-001, + 2.340404e+000, + -4.928274e+000, + 1.141787e+001, + 3.961501e+001, + 1.682448e+001, + // albedo 1, turbidity 1 + 1.745162e+000, + 1.639467e+000, + 1.342721e+000, + 1.166033e+001, + 1.490124e+001, + 1.774031e+001, + // albedo 1, turbidity 2 + 1.708439e+000, + 1.819144e+000, + 2.834399e-001, + 1.448066e+001, + 1.459214e+001, + 1.858679e+001, + // albedo 1, turbidity 3 + 1.631720e+000, + 2.094799e+000, + -1.378825e+000, + 1.843198e+001, + 1.463173e+001, + 1.962881e+001, + // albedo 1, turbidity 4 + 1.516536e+000, + 2.438729e+000, + -3.624121e+000, + 2.298621e+001, + 1.599782e+001, + 2.070027e+001, + // albedo 1, turbidity 5 + 1.405863e+000, + 2.785191e+000, + -5.705236e+000, + 2.645121e+001, + 1.768330e+001, + 2.191903e+001, + // albedo 1, turbidity 6 + 1.344052e+000, + 2.951807e+000, + -6.683851e+000, + 2.744271e+001, + 1.985706e+001, + 2.229452e+001, + // albedo 1, turbidity 7 + 1.245827e+000, + 3.182923e+000, + -7.822960e+000, + 2.791395e+001, + 2.327254e+001, + 2.315910e+001, + // albedo 1, turbidity 8 + 1.132305e+000, + 3.202593e+000, + -8.008429e+000, + 2.521093e+001, + 3.000014e+001, + 2.405306e+001, + // albedo 1, turbidity 9 + 1.020330e+000, + 2.820556e+000, + -6.238704e+000, + 1.709276e+001, + 4.077916e+001, + 2.509949e+001, + // albedo 1, turbidity 10 + 9.031570e-001, + 1.863917e+000, + -1.955738e+000, + 3.032665e+000, + 5.434290e+001, + 2.641780e+001, }; -static const double datasetXYZ3[] = -{ - // albedo 0, turbidity 1 - -1.310023e+000, - -4.407658e-001, - -3.640340e+001, - 3.683292e+001, - -8.124762e-003, - 5.297961e-001, - 1.188633e-002, - 3.138320e+000, - 5.134778e-001, - -1.424100e+000, - -5.501606e-001, - -1.753510e+001, - 1.822769e+001, - -1.539272e-002, - 6.366826e-001, - 2.661996e-003, - 2.659915e+000, - 4.071138e-001, - -1.103436e+000, - -1.884105e-001, - 6.425322e+000, - -6.910579e+000, - -2.019861e-002, - 3.553271e-001, - -1.589061e-002, - 5.345985e+000, - 8.790218e-001, - -1.186200e+000, - -4.307514e-001, - -3.957947e+000, - 5.979352e+000, - -5.348869e-002, - 1.736117e+000, - 3.491346e-002, - -2.692261e+000, - 5.610506e-001, - -1.006038e+000, - -1.305995e-001, - 4.473513e+000, - -3.806719e+000, - 1.419407e-001, - -2.148238e-002, - -5.081185e-002, - 3.735362e+000, - 5.358280e-001, - -1.078507e+000, - -1.633754e-001, - -3.812368e+000, - 4.381700e+000, - 2.988122e-002, - 1.754224e+000, - 1.472376e-001, - 3.722798e+000, - 4.999157e-001, - // albedo 0, turbidity 2 - -1.333582e+000, - -4.649908e-001, - -3.359528e+001, - 3.404375e+001, - -9.384242e-003, - 5.587511e-001, - 5.726310e-003, - 3.073145e+000, - 5.425529e-001, - -1.562624e+000, - -7.107068e-001, - -1.478170e+001, - 1.559839e+001, - -1.462375e-002, - 5.050133e-001, - 2.516017e-002, - 1.604696e+000, - 2.902403e-001, - -8.930158e-001, - 4.068077e-002, - 1.373481e+000, - -2.342752e+000, - -2.098058e-002, - 6.248686e-001, - -5.258363e-002, - 7.058214e+000, - 1.150373e+000, - -1.262823e+000, - -4.818353e-001, - 8.892610e-004, - 1.923120e+000, - -4.979718e-002, - 1.040693e+000, - 1.558103e-001, - -2.852480e+000, - 2.420691e-001, - -9.968383e-001, - -1.200648e-001, - 1.324342e+000, - -9.430889e-001, - 1.931098e-001, - 4.436916e-001, - -7.320456e-002, - 4.215931e+000, - 7.898019e-001, - -1.078185e+000, - -1.718192e-001, - -1.720191e+000, - 2.358918e+000, - 2.765637e-002, - 1.260245e+000, - 2.021941e-001, - 3.395483e+000, - 5.173628e-001, - // albedo 0, turbidity 3 - -1.353023e+000, - -4.813523e-001, - -3.104920e+001, - 3.140156e+001, - -9.510741e-003, - 5.542030e-001, - 8.135471e-003, - 3.136646e+000, - 5.215989e-001, - -1.624704e+000, - -7.990201e-001, - -2.167125e+001, - 2.246341e+001, - -1.163533e-002, - 5.415746e-001, - 2.618378e-002, - 1.139214e+000, - 3.444357e-001, - -7.983610e-001, - 1.417476e-001, - 9.914841e+000, - -1.081503e+001, - -1.218845e-002, - 3.411392e-001, - -6.137698e-002, - 7.445848e+000, - 1.180080e+000, - -1.266679e+000, - -4.288977e-001, - -5.818701e+000, - 6.986437e+000, - -8.180711e-002, - 1.397403e+000, - 2.016916e-001, - -1.275731e+000, - 2.592773e-001, - -1.009707e+000, - -1.537754e-001, - 3.496378e+000, - -3.013726e+000, - 2.421150e-001, - -2.831925e-001, - 3.003395e-002, - 3.702862e+000, - 7.746320e-001, - -1.075646e+000, - -1.768747e-001, - -1.347762e+000, - 1.989004e+000, - 1.375836e-002, - 1.764810e+000, - 1.330018e-001, - 3.230864e+000, - 6.626210e-001, - // albedo 0, turbidity 4 - -1.375269e+000, - -5.103569e-001, - -3.442661e+001, - 3.478703e+001, - -8.460009e-003, - 5.408643e-001, - 4.813323e-003, - 3.016078e+000, - 5.062069e-001, - -1.821679e+000, - -9.766461e-001, - -1.926488e+001, - 1.997912e+001, - -9.822567e-003, - 3.649556e-001, - 4.316092e-002, - 8.930190e-001, - 4.166527e-001, - -6.633542e-001, - 1.997841e-001, - 2.395592e+000, - -3.117175e+000, - -1.080884e-002, - 8.983814e-001, - -1.375825e-001, - 6.673463e+000, - 1.115663e+000, - -1.303240e+000, - -3.612712e-001, - 8.292959e-002, - 3.381364e-001, - -6.078648e-002, - 3.229247e-001, - 3.680987e-001, - 7.046755e-001, - 3.144924e-001, - -9.952598e-001, - -2.039076e-001, - 4.026851e-001, - 2.686684e-001, - 1.640712e-001, - 5.186341e-001, - -1.205520e-002, - 2.659613e+000, - 8.030394e-001, - -1.098579e+000, - -2.151992e-001, - 6.558198e-001, - -7.436900e-004, - -1.421817e-003, - 1.073701e+000, - 1.886875e-001, - 2.536857e+000, - 6.673923e-001, - // albedo 0, turbidity 5 - -1.457986e+000, - -5.906842e-001, - -3.812464e+001, - 3.838539e+001, - -6.024357e-003, - 4.741484e-001, - 1.209223e-002, - 2.818432e+000, - 5.012433e-001, - -1.835728e+000, - -1.003405e+000, - -6.848129e+000, - 7.601943e+000, - -1.277375e-002, - 4.785598e-001, - 3.366853e-002, - 1.097701e+000, - 4.636635e-001, - -8.491348e-001, - 9.466365e-003, - -2.685226e+000, - 2.004060e+000, - -1.168708e-002, - 6.752316e-001, - -1.543371e-001, - 5.674759e+000, - 1.039534e+000, - -1.083379e+000, - -1.506790e-001, - 7.328236e-001, - -5.095568e-001, - -8.609153e-002, - 4.448820e-001, - 4.174662e-001, - 1.481556e+000, - 3.942551e-001, - -1.117089e+000, - -3.337605e-001, - 2.502281e-001, - 4.036323e-001, - 2.673899e-001, - 2.829817e-001, - 2.242450e-002, - 2.043207e+000, - 7.706902e-001, - -1.071648e+000, - -2.126200e-001, - 6.069466e-001, - -1.456290e-003, - -5.515960e-001, - 1.046755e+000, - 1.985021e-001, - 2.290245e+000, - 6.876058e-001, - // albedo 0, turbidity 6 - -1.483903e+000, - -6.309647e-001, - -4.380213e+001, - 4.410537e+001, - -5.712161e-003, - 5.195992e-001, - 2.028428e-003, - 2.687114e+000, - 5.098321e-001, - -2.053976e+000, - -1.141473e+000, - 5.109183e-001, - 8.060391e-002, - -1.033983e-002, - 4.066532e-001, - 4.869627e-002, - 1.161722e+000, - 4.039525e-001, - -6.348185e-001, - 7.651292e-002, - -1.031327e+001, - 1.007598e+001, - -2.083688e-002, - 7.359516e-001, - -2.029459e-001, - 5.013257e+000, - 1.077649e+000, - -1.228630e+000, - -1.650496e-001, - 4.077157e-002, - -7.189167e-001, - -5.092220e-002, - 2.959814e-001, - 5.111496e-001, - 2.540433e+000, - 3.615330e-001, - -1.041883e+000, - -3.278413e-001, - -6.691911e-002, - 1.307364e+000, - 2.166663e-001, - 3.000595e-001, - -3.157136e-003, - 1.389208e+000, - 7.999026e-001, - -1.103556e+000, - -2.443602e-001, - 4.705347e-001, - -9.296482e-004, - -5.309920e-001, - 9.654511e-001, - 2.142587e-001, - 2.244723e+000, - 6.839976e-001, - // albedo 0, turbidity 7 - -1.555684e+000, - -6.962113e-001, - -4.647983e+001, - 4.674270e+001, - -5.034895e-003, - 4.755090e-001, - -9.502561e-007, - 2.626569e+000, - 5.056194e-001, - -1.998288e+000, - -1.124720e+000, - -1.629586e+000, - 2.187993e+000, - -8.284384e-003, - 3.845258e-001, - 5.726240e-002, - 1.185644e+000, - 4.255812e-001, - -1.032570e+000, - -2.513850e-001, - -3.721112e+000, - 3.506967e+000, - -2.186561e-002, - 9.436049e-001, - -2.451412e-001, - 4.725724e+000, - 1.039256e+000, - -8.597532e-001, - 9.073332e-002, - -2.553741e+000, - 1.993237e+000, - -4.390891e-002, - -2.046928e-001, - 5.515623e-001, - 1.909127e+000, - 3.948212e-001, - -1.210482e+000, - -4.477622e-001, - -2.267805e-001, - 1.219488e+000, - 1.336186e-001, - 6.866897e-001, - 2.808997e-002, - 1.600403e+000, - 7.816409e-001, - -1.078168e+000, - -2.699261e-001, - 2.537282e-001, - 3.820684e-001, - -4.425103e-001, - 5.298235e-001, - 2.185217e-001, - 1.728679e+000, - 6.882743e-001, - // albedo 0, turbidity 8 - -1.697968e+000, - -8.391488e-001, - -5.790105e+001, - 5.814120e+001, - -3.404760e-003, - 4.265140e-001, - -1.796301e-006, - 2.368442e+000, - 5.324429e-001, - -2.141552e+000, - -1.172230e+000, - 1.677872e+001, - -1.641470e+001, - -5.732425e-003, - 2.002199e-001, - 6.841834e-002, - 1.485338e+000, - 3.215763e-001, - -1.442946e+000, - -7.264245e-001, - -9.503706e+000, - 9.650462e+000, - -2.120995e-002, - 1.419263e+000, - -2.893098e-001, - 3.860731e+000, - 1.120857e+000, - -5.696752e-001, - 3.411279e-001, - -2.931035e-001, - -6.512552e-001, - -1.068437e-001, - -1.085661e+000, - 6.107549e-001, - 1.459503e+000, - 3.210336e-001, - -1.313839e+000, - -5.921371e-001, - -2.332222e-001, - 1.648196e+000, - 2.492787e-001, - 1.381033e+000, - -1.993392e-002, - 9.812560e-001, - 8.316329e-001, - -1.087464e+000, - -3.195534e-001, - 2.902095e-001, - 3.383709e-001, - -8.798482e-001, - 1.494668e-002, - 2.529703e-001, - 1.452644e+000, - 6.693870e-001, - // albedo 0, turbidity 9 - -2.068582e+000, - -1.118605e+000, - -5.081598e+001, - 5.097486e+001, - -3.280669e-003, - 4.067371e-001, - -2.544951e-006, - 2.179497e+000, - 5.778017e-001, - -1.744693e+000, - -8.537207e-001, - 2.234361e+001, - -2.208318e+001, - -5.932616e-003, - 1.035049e-001, - 5.742772e-002, - 1.977880e+000, - 2.124846e-001, - -3.287515e+000, - -2.140268e+000, - -1.249566e+001, - 1.240091e+001, - -2.409349e-002, - 1.397821e+000, - -2.371627e-001, - 2.771192e+000, - 1.170496e+000, - 5.502311e-001, - 1.046630e+000, - 2.193517e+000, - -2.220400e+000, - -1.064394e-001, - -1.017926e+000, - 4.795457e-001, - 1.030644e+000, - 3.177516e-001, - -1.719734e+000, - -9.536198e-001, - -6.586821e-001, - 1.386361e+000, - -2.513065e-002, - 1.187011e+000, - 6.542539e-002, - 5.296055e-001, - 8.082660e-001, - -1.005700e+000, - -3.028096e-001, - 4.470957e-002, - 1.007760e+000, - -8.119016e-001, - 3.153338e-002, - 2.311321e-001, - 1.182208e+000, - 6.824758e-001, - // albedo 0, turbidity 10 - -2.728867e+000, - -1.580388e+000, - -3.079627e+001, - 3.092586e+001, - -4.197673e-003, - 3.154759e-001, - -3.897675e-006, - 1.920567e+000, - 6.664791e-001, - -1.322495e+000, - -7.249275e-001, - 1.477660e+001, - -1.468154e+001, - -9.044857e-003, - 5.624314e-002, - 6.498392e-002, - 2.047389e+000, - 6.367540e-002, - -6.102376e+000, - -3.473018e+000, - -9.926071e+000, - 9.637797e+000, - -1.097909e-002, - 1.103498e+000, - -2.424521e-001, - 2.520748e+000, - 1.240260e+000, - 1.351796e+000, - 1.018588e+000, - 2.009081e+000, - -1.333394e+000, - -1.979125e-001, - -3.318292e-001, - 4.476624e-001, - 9.095235e-001, - 2.955611e-001, - -1.774467e+000, - -1.079880e+000, - -8.084680e-002, - 2.577697e-001, - -1.149295e-001, - 4.975303e-001, - 2.931611e-003, - -3.803171e-001, - 8.002794e-001, - -9.898401e-001, - -2.542513e-001, - -7.530911e-002, - 1.870355e+000, - -1.521918e+000, - 2.405164e-001, - 2.964615e-001, - 1.334800e+000, - 6.789053e-001, - // albedo 1, turbidity 1 - -1.279730e+000, - -4.290674e-001, - -4.277972e+001, - 4.343305e+001, - -6.541826e-003, - 4.945086e-001, - 1.425338e-002, - 2.685244e+000, - 5.011313e-001, - -1.449506e+000, - -5.766374e-001, - -1.688496e+001, - 1.781118e+001, - -1.121649e-002, - 3.545020e-001, - 2.287338e-002, - 1.904281e+000, - 4.936998e-001, - -1.021980e+000, - -1.897574e-001, - 2.482462e+000, - -2.941725e+000, - -1.570448e-002, - 7.532578e-001, - -4.256800e-002, - 5.239660e+000, - 4.983116e-001, - -1.162608e+000, - -3.428049e-001, - 3.974358e+000, - -1.527935e+000, - -3.919201e-002, - 8.758593e-001, - 7.291363e-002, - -3.455257e+000, - 8.007426e-001, - -9.929985e-001, - -8.712006e-002, - -7.397313e-001, - 1.348372e+000, - 9.511685e-002, - 3.233584e-001, - -7.549148e-002, - 5.806452e+000, - 4.990042e-001, - -1.084996e+000, - -1.739767e-001, - 1.580475e-001, - 9.088180e-001, - 6.871433e-002, - 5.933079e-001, - 1.188921e-001, - 3.074079e+000, - 4.999327e-001, - // albedo 1, turbidity 2 - -1.317009e+000, - -4.661946e-001, - -4.255347e+001, - 4.312782e+001, - -5.727235e-003, - 4.285447e-001, - 2.189854e-002, - 2.608310e+000, - 5.190700e-001, - -1.469236e+000, - -6.282139e-001, - -1.241404e+001, - 1.348765e+001, - -1.204770e-002, - 5.070285e-001, - -7.280216e-004, - 1.491533e+000, - 3.635064e-001, - -9.713808e-001, - -8.138038e-002, - 3.709854e-001, - -1.041174e+000, - -1.814075e-002, - 5.060860e-001, - -2.053756e-002, - 6.161431e+000, - 1.093736e+000, - -1.159057e+000, - -3.698074e-001, - 2.711209e+000, - -6.006479e-001, - -4.896926e-002, - 9.273957e-001, - 1.137712e-001, - -3.496828e+000, - 2.867109e-001, - -1.011601e+000, - -8.201890e-002, - 2.105725e-001, - 4.597520e-001, - 1.478925e-001, - 2.138940e-001, - -5.660670e-002, - 6.057755e+000, - 7.859121e-001, - -1.078020e+000, - -1.811580e-001, - 1.646622e-001, - 8.348426e-001, - 1.149064e-001, - 4.985738e-001, - 1.376605e-001, - 2.746607e+000, - 4.999626e-001, - // albedo 1, turbidity 3 - -1.325672e+000, - -4.769313e-001, - -4.111215e+001, - 4.168293e+001, - -6.274997e-003, - 4.649469e-001, - 1.119411e-002, - 2.631267e+000, - 5.234546e-001, - -1.619391e+000, - -8.000253e-001, - -1.534098e+001, - 1.632706e+001, - -1.012023e-002, - 4.242255e-001, - 2.931597e-002, - 8.925807e-001, - 3.314765e-001, - -7.356979e-001, - 1.368406e-001, - 2.972579e+000, - -3.535359e+000, - -1.318948e-002, - 4.607620e-001, - -7.182778e-002, - 6.254100e+000, - 1.236299e+000, - -1.316217e+000, - -4.194427e-001, - 3.489902e-002, - 1.289849e+000, - -4.755960e-002, - 1.138222e+000, - 1.975992e-001, - -8.991542e-001, - 2.290572e-001, - -9.502188e-001, - -1.172703e-001, - 1.405202e+000, - -3.061919e-001, - 1.058772e-001, - -3.760592e-001, - -1.983179e-002, - 3.562353e+000, - 7.895959e-001, - -1.100117e+000, - -1.900567e-001, - 4.925030e-001, - 5.250225e-001, - 1.576804e-001, - 1.042701e+000, - 7.330743e-002, - 2.796064e+000, - 6.749783e-001, - // albedo 1, turbidity 4 - -1.354183e+000, - -5.130625e-001, - -4.219268e+001, - 4.271772e+001, - -5.365373e-003, - 4.136743e-001, - 1.235172e-002, - 2.520122e+000, - 5.187269e-001, - -1.741434e+000, - -9.589761e-001, - -8.230339e+000, - 9.296799e+000, - -9.600162e-003, - 4.994969e-001, - 2.955452e-002, - 3.667099e-001, - 3.526999e-001, - -6.917347e-001, - 2.154887e-001, - -8.760264e-001, - 2.334121e-001, - -1.909621e-002, - 4.748033e-001, - -1.138514e-001, - 6.515360e+000, - 1.225097e+000, - -1.293189e+000, - -4.218700e-001, - 1.620952e+000, - -7.858597e-001, - -3.769410e-002, - 6.636786e-001, - 3.364945e-001, - -5.341017e-001, - 2.128347e-001, - -9.735521e-001, - -1.325495e-001, - 1.007517e+000, - 2.598258e-001, - 6.762169e-002, - 1.421018e-003, - -6.915987e-002, - 3.185897e+000, - 8.641956e-001, - -1.094800e+000, - -1.962062e-001, - 5.755591e-001, - 2.906259e-001, - 2.625748e-001, - 7.644049e-001, - 1.347492e-001, - 2.677126e+000, - 6.465460e-001, - // albedo 1, turbidity 5 - -1.393063e+000, - -5.578338e-001, - -4.185249e+001, - 4.233504e+001, - -5.435640e-003, - 4.743765e-001, - 7.422477e-003, - 2.442801e+000, - 5.211707e-001, - -1.939487e+000, - -1.128509e+000, - -8.974257e+000, - 9.978383e+000, - -7.965597e-003, - 2.948830e-001, - 4.436763e-002, - 2.839868e-001, - 3.440424e-001, - -6.011562e-001, - 2.354877e-001, - -3.079820e+000, - 2.585094e+000, - -2.002701e-002, - 7.793909e-001, - -1.598414e-001, - 5.834678e+000, - 1.202856e+000, - -1.315676e+000, - -3.903446e-001, - 1.701900e+000, - -1.304609e+000, - -1.045121e-002, - 2.747707e-001, - 4.143967e-001, - 3.197102e-001, - 2.637580e-001, - -9.618628e-001, - -1.625841e-001, - 1.187138e+000, - 1.497802e-001, - -5.590954e-006, - 3.178475e-002, - -4.153145e-002, - 2.496096e+000, - 8.195082e-001, - -1.111554e+000, - -2.365546e-001, - 7.831875e-001, - 2.018684e-001, - 2.074369e-001, - 7.395978e-001, - 1.225730e-001, - 1.876478e+000, - 6.821167e-001, - // albedo 1, turbidity 6 - -1.427879e+000, - -5.994879e-001, - -3.531016e+001, - 3.581581e+001, - -6.431497e-003, - 4.554192e-001, - 7.348731e-004, - 2.334619e+000, - 5.233377e-001, - -1.998177e+000, - -1.206633e+000, - -2.146510e+001, - 2.242237e+001, - -5.857596e-003, - 2.755663e-001, - 6.384795e-002, - 1.358244e-001, - 3.328437e-001, - -6.440630e-001, - 2.058571e-001, - 2.155499e+000, - -2.587968e+000, - -1.840023e-002, - 8.826555e-001, - -2.222452e-001, - 5.847073e+000, - 1.228387e+000, - -1.229071e+000, - -3.360441e-001, - -3.429599e-001, - 6.179469e-001, - 2.029610e-003, - 8.899319e-002, - 5.041624e-001, - 1.882964e-001, - 2.252040e-001, - -1.022905e+000, - -2.101621e-001, - 1.915689e+000, - -6.498794e-001, - -3.463651e-002, - 8.954605e-002, - -6.797854e-002, - 2.417705e+000, - 8.568618e-001, - -1.082538e+000, - -2.007723e-001, - 4.731009e-001, - 4.077267e-001, - 1.324289e-001, - 6.514880e-001, - 1.702912e-001, - 2.309383e+000, - 6.600895e-001, - // albedo 1, turbidity 7 - -1.472139e+000, - -6.499815e-001, - -3.428465e+001, - 3.469659e+001, - -5.747023e-003, - 4.174167e-001, - 1.688597e-003, - 2.323046e+000, - 5.395191e-001, - -2.161176e+000, - -1.353089e+000, - -2.226827e+001, - 2.329138e+001, - -5.583808e-003, - 2.364793e-001, - 6.096656e-002, - 1.944666e-003, - 2.861624e-001, - -6.593044e-001, - 1.393558e-001, - 4.698373e+000, - -5.193883e+000, - -1.998390e-002, - 1.095635e+000, - -2.391254e-001, - 5.598103e+000, - 1.236193e+000, - -1.195717e+000, - -2.972715e-001, - 4.648953e-002, - 3.024588e-001, - 5.003313e-003, - -3.754741e-001, - 5.247265e-001, - -1.381312e-001, - 2.493896e-001, - -1.020139e+000, - -2.253524e-001, - 3.548437e-001, - 7.030485e-001, - -2.107076e-002, - 4.581395e-001, - -3.243757e-002, - 2.453259e+000, - 8.323623e-001, - -1.098770e+000, - -2.435780e-001, - 8.761614e-001, - 1.941613e-001, - -1.990692e-001, - 3.761139e-001, - 1.657412e-001, - 1.590503e+000, - 6.741417e-001, - // albedo 1, turbidity 8 - -1.648007e+000, - -8.205121e-001, - -4.435106e+001, - 4.479801e+001, - -4.181353e-003, - 3.854830e-001, - -1.842385e-006, - 2.000281e+000, - 5.518363e-001, - -2.140986e+000, - -1.282239e+000, - -3.979213e+000, - 4.672459e+000, - -5.008582e-003, - 2.421920e-001, - 6.253602e-002, - 6.612713e-001, - 2.555851e-001, - -1.300502e+000, - -5.137898e-001, - 5.179821e-001, - -4.032341e-001, - -2.066785e-002, - 1.087929e+000, - -2.615309e-001, - 4.225887e+000, - 1.229237e+000, - -6.963340e-001, - 9.241060e-002, - 6.936356e-002, - -3.588571e-001, - -5.461843e-002, - -5.616643e-001, - 5.484166e-001, - -4.776267e-002, - 2.414935e-001, - -1.233179e+000, - -4.325498e-001, - 6.479813e-001, - 8.368356e-001, - 2.458875e-001, - 6.464752e-001, - -2.897097e-002, - 1.561773e+000, - 8.518598e-001, - -1.051023e+000, - -2.533690e-001, - 1.004294e+000, - 3.028083e-001, - -1.520108e+000, - 1.607013e-001, - 1.619975e-001, - 1.131094e+000, - 6.706655e-001, - // albedo 1, turbidity 9 - -1.948249e+000, - -1.097383e+000, - -4.453697e+001, - 4.494902e+001, - -3.579939e-003, - 3.491605e-001, - -2.500253e-006, - 1.740442e+000, - 6.188022e-001, - -2.154253e+000, - -1.209559e+000, - 4.144894e+000, - -3.562411e+000, - -5.638843e-003, - 1.067169e-001, - 7.594858e-002, - 1.005280e+000, - 1.072543e-001, - -2.513259e+000, - -1.507208e+000, - -1.602979e+000, - 1.404154e+000, - -5.560750e-003, - 1.240490e+000, - -2.852117e-001, - 3.485252e+000, - 1.349321e+000, - -7.832214e-002, - 3.655626e-001, - 3.856288e-001, - 6.867894e-001, - -1.609523e-001, - -6.704306e-001, - 5.357301e-001, - -6.457935e-001, - 1.479503e-001, - -1.354784e+000, - -5.454375e-001, - 8.797469e-001, - -1.466514e+000, - 7.134420e-001, - 5.934903e-001, - -2.911178e-002, - 8.643737e-001, - 9.030724e-001, - -1.048324e+000, - -2.738736e-001, - 8.783074e-001, - 3.246188e+000, - -4.435369e+000, - 1.251791e-001, - 1.783486e-001, - 1.064657e+000, - 6.522878e-001, - // albedo 1, turbidity 10 - -2.770408e+000, - -1.618911e+000, - -2.504031e+001, - 2.531674e+001, - -4.239279e-003, - 3.241013e-001, - -3.764484e-006, - 1.586843e+000, - 7.035906e-001, - -1.913500e+000, - -1.144014e+000, - -1.080587e+001, - 1.153677e+001, - -1.003197e-002, - 1.577515e-001, - 5.217789e-002, - 1.225278e+000, - 5.172771e-003, - -5.293208e+000, - -2.876463e+000, - 2.087053e+000, - -3.201552e+000, - 3.892964e-003, - 5.323930e-001, - -2.034512e-001, - 2.617760e+000, - 1.273597e+000, - 9.060340e-001, - 3.773409e-001, - -6.399945e-001, - 3.213979e+000, - -9.112172e-002, - 6.494055e-001, - 3.953280e-001, - 5.047796e-001, - 2.998695e-001, - -1.482179e+000, - -6.778310e-001, - 1.161775e+000, - -3.004872e+000, - 4.774797e-001, - -4.969248e-001, - -3.512074e-003, - -1.307190e+000, - 7.927378e-001, - -9.863181e-001, - -1.803364e-001, - 5.810824e-001, - 4.580570e+000, - -3.863454e+000, - 5.328174e-001, - 2.272821e-001, - 1.771114e+000, - 6.791814e-001, +static const double datasetXYZ3[] = { + // albedo 0, turbidity 1 + -1.310023e+000, + -4.407658e-001, + -3.640340e+001, + 3.683292e+001, + -8.124762e-003, + 5.297961e-001, + 1.188633e-002, + 3.138320e+000, + 5.134778e-001, + -1.424100e+000, + -5.501606e-001, + -1.753510e+001, + 1.822769e+001, + -1.539272e-002, + 6.366826e-001, + 2.661996e-003, + 2.659915e+000, + 4.071138e-001, + -1.103436e+000, + -1.884105e-001, + 6.425322e+000, + -6.910579e+000, + -2.019861e-002, + 3.553271e-001, + -1.589061e-002, + 5.345985e+000, + 8.790218e-001, + -1.186200e+000, + -4.307514e-001, + -3.957947e+000, + 5.979352e+000, + -5.348869e-002, + 1.736117e+000, + 3.491346e-002, + -2.692261e+000, + 5.610506e-001, + -1.006038e+000, + -1.305995e-001, + 4.473513e+000, + -3.806719e+000, + 1.419407e-001, + -2.148238e-002, + -5.081185e-002, + 3.735362e+000, + 5.358280e-001, + -1.078507e+000, + -1.633754e-001, + -3.812368e+000, + 4.381700e+000, + 2.988122e-002, + 1.754224e+000, + 1.472376e-001, + 3.722798e+000, + 4.999157e-001, + // albedo 0, turbidity 2 + -1.333582e+000, + -4.649908e-001, + -3.359528e+001, + 3.404375e+001, + -9.384242e-003, + 5.587511e-001, + 5.726310e-003, + 3.073145e+000, + 5.425529e-001, + -1.562624e+000, + -7.107068e-001, + -1.478170e+001, + 1.559839e+001, + -1.462375e-002, + 5.050133e-001, + 2.516017e-002, + 1.604696e+000, + 2.902403e-001, + -8.930158e-001, + 4.068077e-002, + 1.373481e+000, + -2.342752e+000, + -2.098058e-002, + 6.248686e-001, + -5.258363e-002, + 7.058214e+000, + 1.150373e+000, + -1.262823e+000, + -4.818353e-001, + 8.892610e-004, + 1.923120e+000, + -4.979718e-002, + 1.040693e+000, + 1.558103e-001, + -2.852480e+000, + 2.420691e-001, + -9.968383e-001, + -1.200648e-001, + 1.324342e+000, + -9.430889e-001, + 1.931098e-001, + 4.436916e-001, + -7.320456e-002, + 4.215931e+000, + 7.898019e-001, + -1.078185e+000, + -1.718192e-001, + -1.720191e+000, + 2.358918e+000, + 2.765637e-002, + 1.260245e+000, + 2.021941e-001, + 3.395483e+000, + 5.173628e-001, + // albedo 0, turbidity 3 + -1.353023e+000, + -4.813523e-001, + -3.104920e+001, + 3.140156e+001, + -9.510741e-003, + 5.542030e-001, + 8.135471e-003, + 3.136646e+000, + 5.215989e-001, + -1.624704e+000, + -7.990201e-001, + -2.167125e+001, + 2.246341e+001, + -1.163533e-002, + 5.415746e-001, + 2.618378e-002, + 1.139214e+000, + 3.444357e-001, + -7.983610e-001, + 1.417476e-001, + 9.914841e+000, + -1.081503e+001, + -1.218845e-002, + 3.411392e-001, + -6.137698e-002, + 7.445848e+000, + 1.180080e+000, + -1.266679e+000, + -4.288977e-001, + -5.818701e+000, + 6.986437e+000, + -8.180711e-002, + 1.397403e+000, + 2.016916e-001, + -1.275731e+000, + 2.592773e-001, + -1.009707e+000, + -1.537754e-001, + 3.496378e+000, + -3.013726e+000, + 2.421150e-001, + -2.831925e-001, + 3.003395e-002, + 3.702862e+000, + 7.746320e-001, + -1.075646e+000, + -1.768747e-001, + -1.347762e+000, + 1.989004e+000, + 1.375836e-002, + 1.764810e+000, + 1.330018e-001, + 3.230864e+000, + 6.626210e-001, + // albedo 0, turbidity 4 + -1.375269e+000, + -5.103569e-001, + -3.442661e+001, + 3.478703e+001, + -8.460009e-003, + 5.408643e-001, + 4.813323e-003, + 3.016078e+000, + 5.062069e-001, + -1.821679e+000, + -9.766461e-001, + -1.926488e+001, + 1.997912e+001, + -9.822567e-003, + 3.649556e-001, + 4.316092e-002, + 8.930190e-001, + 4.166527e-001, + -6.633542e-001, + 1.997841e-001, + 2.395592e+000, + -3.117175e+000, + -1.080884e-002, + 8.983814e-001, + -1.375825e-001, + 6.673463e+000, + 1.115663e+000, + -1.303240e+000, + -3.612712e-001, + 8.292959e-002, + 3.381364e-001, + -6.078648e-002, + 3.229247e-001, + 3.680987e-001, + 7.046755e-001, + 3.144924e-001, + -9.952598e-001, + -2.039076e-001, + 4.026851e-001, + 2.686684e-001, + 1.640712e-001, + 5.186341e-001, + -1.205520e-002, + 2.659613e+000, + 8.030394e-001, + -1.098579e+000, + -2.151992e-001, + 6.558198e-001, + -7.436900e-004, + -1.421817e-003, + 1.073701e+000, + 1.886875e-001, + 2.536857e+000, + 6.673923e-001, + // albedo 0, turbidity 5 + -1.457986e+000, + -5.906842e-001, + -3.812464e+001, + 3.838539e+001, + -6.024357e-003, + 4.741484e-001, + 1.209223e-002, + 2.818432e+000, + 5.012433e-001, + -1.835728e+000, + -1.003405e+000, + -6.848129e+000, + 7.601943e+000, + -1.277375e-002, + 4.785598e-001, + 3.366853e-002, + 1.097701e+000, + 4.636635e-001, + -8.491348e-001, + 9.466365e-003, + -2.685226e+000, + 2.004060e+000, + -1.168708e-002, + 6.752316e-001, + -1.543371e-001, + 5.674759e+000, + 1.039534e+000, + -1.083379e+000, + -1.506790e-001, + 7.328236e-001, + -5.095568e-001, + -8.609153e-002, + 4.448820e-001, + 4.174662e-001, + 1.481556e+000, + 3.942551e-001, + -1.117089e+000, + -3.337605e-001, + 2.502281e-001, + 4.036323e-001, + 2.673899e-001, + 2.829817e-001, + 2.242450e-002, + 2.043207e+000, + 7.706902e-001, + -1.071648e+000, + -2.126200e-001, + 6.069466e-001, + -1.456290e-003, + -5.515960e-001, + 1.046755e+000, + 1.985021e-001, + 2.290245e+000, + 6.876058e-001, + // albedo 0, turbidity 6 + -1.483903e+000, + -6.309647e-001, + -4.380213e+001, + 4.410537e+001, + -5.712161e-003, + 5.195992e-001, + 2.028428e-003, + 2.687114e+000, + 5.098321e-001, + -2.053976e+000, + -1.141473e+000, + 5.109183e-001, + 8.060391e-002, + -1.033983e-002, + 4.066532e-001, + 4.869627e-002, + 1.161722e+000, + 4.039525e-001, + -6.348185e-001, + 7.651292e-002, + -1.031327e+001, + 1.007598e+001, + -2.083688e-002, + 7.359516e-001, + -2.029459e-001, + 5.013257e+000, + 1.077649e+000, + -1.228630e+000, + -1.650496e-001, + 4.077157e-002, + -7.189167e-001, + -5.092220e-002, + 2.959814e-001, + 5.111496e-001, + 2.540433e+000, + 3.615330e-001, + -1.041883e+000, + -3.278413e-001, + -6.691911e-002, + 1.307364e+000, + 2.166663e-001, + 3.000595e-001, + -3.157136e-003, + 1.389208e+000, + 7.999026e-001, + -1.103556e+000, + -2.443602e-001, + 4.705347e-001, + -9.296482e-004, + -5.309920e-001, + 9.654511e-001, + 2.142587e-001, + 2.244723e+000, + 6.839976e-001, + // albedo 0, turbidity 7 + -1.555684e+000, + -6.962113e-001, + -4.647983e+001, + 4.674270e+001, + -5.034895e-003, + 4.755090e-001, + -9.502561e-007, + 2.626569e+000, + 5.056194e-001, + -1.998288e+000, + -1.124720e+000, + -1.629586e+000, + 2.187993e+000, + -8.284384e-003, + 3.845258e-001, + 5.726240e-002, + 1.185644e+000, + 4.255812e-001, + -1.032570e+000, + -2.513850e-001, + -3.721112e+000, + 3.506967e+000, + -2.186561e-002, + 9.436049e-001, + -2.451412e-001, + 4.725724e+000, + 1.039256e+000, + -8.597532e-001, + 9.073332e-002, + -2.553741e+000, + 1.993237e+000, + -4.390891e-002, + -2.046928e-001, + 5.515623e-001, + 1.909127e+000, + 3.948212e-001, + -1.210482e+000, + -4.477622e-001, + -2.267805e-001, + 1.219488e+000, + 1.336186e-001, + 6.866897e-001, + 2.808997e-002, + 1.600403e+000, + 7.816409e-001, + -1.078168e+000, + -2.699261e-001, + 2.537282e-001, + 3.820684e-001, + -4.425103e-001, + 5.298235e-001, + 2.185217e-001, + 1.728679e+000, + 6.882743e-001, + // albedo 0, turbidity 8 + -1.697968e+000, + -8.391488e-001, + -5.790105e+001, + 5.814120e+001, + -3.404760e-003, + 4.265140e-001, + -1.796301e-006, + 2.368442e+000, + 5.324429e-001, + -2.141552e+000, + -1.172230e+000, + 1.677872e+001, + -1.641470e+001, + -5.732425e-003, + 2.002199e-001, + 6.841834e-002, + 1.485338e+000, + 3.215763e-001, + -1.442946e+000, + -7.264245e-001, + -9.503706e+000, + 9.650462e+000, + -2.120995e-002, + 1.419263e+000, + -2.893098e-001, + 3.860731e+000, + 1.120857e+000, + -5.696752e-001, + 3.411279e-001, + -2.931035e-001, + -6.512552e-001, + -1.068437e-001, + -1.085661e+000, + 6.107549e-001, + 1.459503e+000, + 3.210336e-001, + -1.313839e+000, + -5.921371e-001, + -2.332222e-001, + 1.648196e+000, + 2.492787e-001, + 1.381033e+000, + -1.993392e-002, + 9.812560e-001, + 8.316329e-001, + -1.087464e+000, + -3.195534e-001, + 2.902095e-001, + 3.383709e-001, + -8.798482e-001, + 1.494668e-002, + 2.529703e-001, + 1.452644e+000, + 6.693870e-001, + // albedo 0, turbidity 9 + -2.068582e+000, + -1.118605e+000, + -5.081598e+001, + 5.097486e+001, + -3.280669e-003, + 4.067371e-001, + -2.544951e-006, + 2.179497e+000, + 5.778017e-001, + -1.744693e+000, + -8.537207e-001, + 2.234361e+001, + -2.208318e+001, + -5.932616e-003, + 1.035049e-001, + 5.742772e-002, + 1.977880e+000, + 2.124846e-001, + -3.287515e+000, + -2.140268e+000, + -1.249566e+001, + 1.240091e+001, + -2.409349e-002, + 1.397821e+000, + -2.371627e-001, + 2.771192e+000, + 1.170496e+000, + 5.502311e-001, + 1.046630e+000, + 2.193517e+000, + -2.220400e+000, + -1.064394e-001, + -1.017926e+000, + 4.795457e-001, + 1.030644e+000, + 3.177516e-001, + -1.719734e+000, + -9.536198e-001, + -6.586821e-001, + 1.386361e+000, + -2.513065e-002, + 1.187011e+000, + 6.542539e-002, + 5.296055e-001, + 8.082660e-001, + -1.005700e+000, + -3.028096e-001, + 4.470957e-002, + 1.007760e+000, + -8.119016e-001, + 3.153338e-002, + 2.311321e-001, + 1.182208e+000, + 6.824758e-001, + // albedo 0, turbidity 10 + -2.728867e+000, + -1.580388e+000, + -3.079627e+001, + 3.092586e+001, + -4.197673e-003, + 3.154759e-001, + -3.897675e-006, + 1.920567e+000, + 6.664791e-001, + -1.322495e+000, + -7.249275e-001, + 1.477660e+001, + -1.468154e+001, + -9.044857e-003, + 5.624314e-002, + 6.498392e-002, + 2.047389e+000, + 6.367540e-002, + -6.102376e+000, + -3.473018e+000, + -9.926071e+000, + 9.637797e+000, + -1.097909e-002, + 1.103498e+000, + -2.424521e-001, + 2.520748e+000, + 1.240260e+000, + 1.351796e+000, + 1.018588e+000, + 2.009081e+000, + -1.333394e+000, + -1.979125e-001, + -3.318292e-001, + 4.476624e-001, + 9.095235e-001, + 2.955611e-001, + -1.774467e+000, + -1.079880e+000, + -8.084680e-002, + 2.577697e-001, + -1.149295e-001, + 4.975303e-001, + 2.931611e-003, + -3.803171e-001, + 8.002794e-001, + -9.898401e-001, + -2.542513e-001, + -7.530911e-002, + 1.870355e+000, + -1.521918e+000, + 2.405164e-001, + 2.964615e-001, + 1.334800e+000, + 6.789053e-001, + // albedo 1, turbidity 1 + -1.279730e+000, + -4.290674e-001, + -4.277972e+001, + 4.343305e+001, + -6.541826e-003, + 4.945086e-001, + 1.425338e-002, + 2.685244e+000, + 5.011313e-001, + -1.449506e+000, + -5.766374e-001, + -1.688496e+001, + 1.781118e+001, + -1.121649e-002, + 3.545020e-001, + 2.287338e-002, + 1.904281e+000, + 4.936998e-001, + -1.021980e+000, + -1.897574e-001, + 2.482462e+000, + -2.941725e+000, + -1.570448e-002, + 7.532578e-001, + -4.256800e-002, + 5.239660e+000, + 4.983116e-001, + -1.162608e+000, + -3.428049e-001, + 3.974358e+000, + -1.527935e+000, + -3.919201e-002, + 8.758593e-001, + 7.291363e-002, + -3.455257e+000, + 8.007426e-001, + -9.929985e-001, + -8.712006e-002, + -7.397313e-001, + 1.348372e+000, + 9.511685e-002, + 3.233584e-001, + -7.549148e-002, + 5.806452e+000, + 4.990042e-001, + -1.084996e+000, + -1.739767e-001, + 1.580475e-001, + 9.088180e-001, + 6.871433e-002, + 5.933079e-001, + 1.188921e-001, + 3.074079e+000, + 4.999327e-001, + // albedo 1, turbidity 2 + -1.317009e+000, + -4.661946e-001, + -4.255347e+001, + 4.312782e+001, + -5.727235e-003, + 4.285447e-001, + 2.189854e-002, + 2.608310e+000, + 5.190700e-001, + -1.469236e+000, + -6.282139e-001, + -1.241404e+001, + 1.348765e+001, + -1.204770e-002, + 5.070285e-001, + -7.280216e-004, + 1.491533e+000, + 3.635064e-001, + -9.713808e-001, + -8.138038e-002, + 3.709854e-001, + -1.041174e+000, + -1.814075e-002, + 5.060860e-001, + -2.053756e-002, + 6.161431e+000, + 1.093736e+000, + -1.159057e+000, + -3.698074e-001, + 2.711209e+000, + -6.006479e-001, + -4.896926e-002, + 9.273957e-001, + 1.137712e-001, + -3.496828e+000, + 2.867109e-001, + -1.011601e+000, + -8.201890e-002, + 2.105725e-001, + 4.597520e-001, + 1.478925e-001, + 2.138940e-001, + -5.660670e-002, + 6.057755e+000, + 7.859121e-001, + -1.078020e+000, + -1.811580e-001, + 1.646622e-001, + 8.348426e-001, + 1.149064e-001, + 4.985738e-001, + 1.376605e-001, + 2.746607e+000, + 4.999626e-001, + // albedo 1, turbidity 3 + -1.325672e+000, + -4.769313e-001, + -4.111215e+001, + 4.168293e+001, + -6.274997e-003, + 4.649469e-001, + 1.119411e-002, + 2.631267e+000, + 5.234546e-001, + -1.619391e+000, + -8.000253e-001, + -1.534098e+001, + 1.632706e+001, + -1.012023e-002, + 4.242255e-001, + 2.931597e-002, + 8.925807e-001, + 3.314765e-001, + -7.356979e-001, + 1.368406e-001, + 2.972579e+000, + -3.535359e+000, + -1.318948e-002, + 4.607620e-001, + -7.182778e-002, + 6.254100e+000, + 1.236299e+000, + -1.316217e+000, + -4.194427e-001, + 3.489902e-002, + 1.289849e+000, + -4.755960e-002, + 1.138222e+000, + 1.975992e-001, + -8.991542e-001, + 2.290572e-001, + -9.502188e-001, + -1.172703e-001, + 1.405202e+000, + -3.061919e-001, + 1.058772e-001, + -3.760592e-001, + -1.983179e-002, + 3.562353e+000, + 7.895959e-001, + -1.100117e+000, + -1.900567e-001, + 4.925030e-001, + 5.250225e-001, + 1.576804e-001, + 1.042701e+000, + 7.330743e-002, + 2.796064e+000, + 6.749783e-001, + // albedo 1, turbidity 4 + -1.354183e+000, + -5.130625e-001, + -4.219268e+001, + 4.271772e+001, + -5.365373e-003, + 4.136743e-001, + 1.235172e-002, + 2.520122e+000, + 5.187269e-001, + -1.741434e+000, + -9.589761e-001, + -8.230339e+000, + 9.296799e+000, + -9.600162e-003, + 4.994969e-001, + 2.955452e-002, + 3.667099e-001, + 3.526999e-001, + -6.917347e-001, + 2.154887e-001, + -8.760264e-001, + 2.334121e-001, + -1.909621e-002, + 4.748033e-001, + -1.138514e-001, + 6.515360e+000, + 1.225097e+000, + -1.293189e+000, + -4.218700e-001, + 1.620952e+000, + -7.858597e-001, + -3.769410e-002, + 6.636786e-001, + 3.364945e-001, + -5.341017e-001, + 2.128347e-001, + -9.735521e-001, + -1.325495e-001, + 1.007517e+000, + 2.598258e-001, + 6.762169e-002, + 1.421018e-003, + -6.915987e-002, + 3.185897e+000, + 8.641956e-001, + -1.094800e+000, + -1.962062e-001, + 5.755591e-001, + 2.906259e-001, + 2.625748e-001, + 7.644049e-001, + 1.347492e-001, + 2.677126e+000, + 6.465460e-001, + // albedo 1, turbidity 5 + -1.393063e+000, + -5.578338e-001, + -4.185249e+001, + 4.233504e+001, + -5.435640e-003, + 4.743765e-001, + 7.422477e-003, + 2.442801e+000, + 5.211707e-001, + -1.939487e+000, + -1.128509e+000, + -8.974257e+000, + 9.978383e+000, + -7.965597e-003, + 2.948830e-001, + 4.436763e-002, + 2.839868e-001, + 3.440424e-001, + -6.011562e-001, + 2.354877e-001, + -3.079820e+000, + 2.585094e+000, + -2.002701e-002, + 7.793909e-001, + -1.598414e-001, + 5.834678e+000, + 1.202856e+000, + -1.315676e+000, + -3.903446e-001, + 1.701900e+000, + -1.304609e+000, + -1.045121e-002, + 2.747707e-001, + 4.143967e-001, + 3.197102e-001, + 2.637580e-001, + -9.618628e-001, + -1.625841e-001, + 1.187138e+000, + 1.497802e-001, + -5.590954e-006, + 3.178475e-002, + -4.153145e-002, + 2.496096e+000, + 8.195082e-001, + -1.111554e+000, + -2.365546e-001, + 7.831875e-001, + 2.018684e-001, + 2.074369e-001, + 7.395978e-001, + 1.225730e-001, + 1.876478e+000, + 6.821167e-001, + // albedo 1, turbidity 6 + -1.427879e+000, + -5.994879e-001, + -3.531016e+001, + 3.581581e+001, + -6.431497e-003, + 4.554192e-001, + 7.348731e-004, + 2.334619e+000, + 5.233377e-001, + -1.998177e+000, + -1.206633e+000, + -2.146510e+001, + 2.242237e+001, + -5.857596e-003, + 2.755663e-001, + 6.384795e-002, + 1.358244e-001, + 3.328437e-001, + -6.440630e-001, + 2.058571e-001, + 2.155499e+000, + -2.587968e+000, + -1.840023e-002, + 8.826555e-001, + -2.222452e-001, + 5.847073e+000, + 1.228387e+000, + -1.229071e+000, + -3.360441e-001, + -3.429599e-001, + 6.179469e-001, + 2.029610e-003, + 8.899319e-002, + 5.041624e-001, + 1.882964e-001, + 2.252040e-001, + -1.022905e+000, + -2.101621e-001, + 1.915689e+000, + -6.498794e-001, + -3.463651e-002, + 8.954605e-002, + -6.797854e-002, + 2.417705e+000, + 8.568618e-001, + -1.082538e+000, + -2.007723e-001, + 4.731009e-001, + 4.077267e-001, + 1.324289e-001, + 6.514880e-001, + 1.702912e-001, + 2.309383e+000, + 6.600895e-001, + // albedo 1, turbidity 7 + -1.472139e+000, + -6.499815e-001, + -3.428465e+001, + 3.469659e+001, + -5.747023e-003, + 4.174167e-001, + 1.688597e-003, + 2.323046e+000, + 5.395191e-001, + -2.161176e+000, + -1.353089e+000, + -2.226827e+001, + 2.329138e+001, + -5.583808e-003, + 2.364793e-001, + 6.096656e-002, + 1.944666e-003, + 2.861624e-001, + -6.593044e-001, + 1.393558e-001, + 4.698373e+000, + -5.193883e+000, + -1.998390e-002, + 1.095635e+000, + -2.391254e-001, + 5.598103e+000, + 1.236193e+000, + -1.195717e+000, + -2.972715e-001, + 4.648953e-002, + 3.024588e-001, + 5.003313e-003, + -3.754741e-001, + 5.247265e-001, + -1.381312e-001, + 2.493896e-001, + -1.020139e+000, + -2.253524e-001, + 3.548437e-001, + 7.030485e-001, + -2.107076e-002, + 4.581395e-001, + -3.243757e-002, + 2.453259e+000, + 8.323623e-001, + -1.098770e+000, + -2.435780e-001, + 8.761614e-001, + 1.941613e-001, + -1.990692e-001, + 3.761139e-001, + 1.657412e-001, + 1.590503e+000, + 6.741417e-001, + // albedo 1, turbidity 8 + -1.648007e+000, + -8.205121e-001, + -4.435106e+001, + 4.479801e+001, + -4.181353e-003, + 3.854830e-001, + -1.842385e-006, + 2.000281e+000, + 5.518363e-001, + -2.140986e+000, + -1.282239e+000, + -3.979213e+000, + 4.672459e+000, + -5.008582e-003, + 2.421920e-001, + 6.253602e-002, + 6.612713e-001, + 2.555851e-001, + -1.300502e+000, + -5.137898e-001, + 5.179821e-001, + -4.032341e-001, + -2.066785e-002, + 1.087929e+000, + -2.615309e-001, + 4.225887e+000, + 1.229237e+000, + -6.963340e-001, + 9.241060e-002, + 6.936356e-002, + -3.588571e-001, + -5.461843e-002, + -5.616643e-001, + 5.484166e-001, + -4.776267e-002, + 2.414935e-001, + -1.233179e+000, + -4.325498e-001, + 6.479813e-001, + 8.368356e-001, + 2.458875e-001, + 6.464752e-001, + -2.897097e-002, + 1.561773e+000, + 8.518598e-001, + -1.051023e+000, + -2.533690e-001, + 1.004294e+000, + 3.028083e-001, + -1.520108e+000, + 1.607013e-001, + 1.619975e-001, + 1.131094e+000, + 6.706655e-001, + // albedo 1, turbidity 9 + -1.948249e+000, + -1.097383e+000, + -4.453697e+001, + 4.494902e+001, + -3.579939e-003, + 3.491605e-001, + -2.500253e-006, + 1.740442e+000, + 6.188022e-001, + -2.154253e+000, + -1.209559e+000, + 4.144894e+000, + -3.562411e+000, + -5.638843e-003, + 1.067169e-001, + 7.594858e-002, + 1.005280e+000, + 1.072543e-001, + -2.513259e+000, + -1.507208e+000, + -1.602979e+000, + 1.404154e+000, + -5.560750e-003, + 1.240490e+000, + -2.852117e-001, + 3.485252e+000, + 1.349321e+000, + -7.832214e-002, + 3.655626e-001, + 3.856288e-001, + 6.867894e-001, + -1.609523e-001, + -6.704306e-001, + 5.357301e-001, + -6.457935e-001, + 1.479503e-001, + -1.354784e+000, + -5.454375e-001, + 8.797469e-001, + -1.466514e+000, + 7.134420e-001, + 5.934903e-001, + -2.911178e-002, + 8.643737e-001, + 9.030724e-001, + -1.048324e+000, + -2.738736e-001, + 8.783074e-001, + 3.246188e+000, + -4.435369e+000, + 1.251791e-001, + 1.783486e-001, + 1.064657e+000, + 6.522878e-001, + // albedo 1, turbidity 10 + -2.770408e+000, + -1.618911e+000, + -2.504031e+001, + 2.531674e+001, + -4.239279e-003, + 3.241013e-001, + -3.764484e-006, + 1.586843e+000, + 7.035906e-001, + -1.913500e+000, + -1.144014e+000, + -1.080587e+001, + 1.153677e+001, + -1.003197e-002, + 1.577515e-001, + 5.217789e-002, + 1.225278e+000, + 5.172771e-003, + -5.293208e+000, + -2.876463e+000, + 2.087053e+000, + -3.201552e+000, + 3.892964e-003, + 5.323930e-001, + -2.034512e-001, + 2.617760e+000, + 1.273597e+000, + 9.060340e-001, + 3.773409e-001, + -6.399945e-001, + 3.213979e+000, + -9.112172e-002, + 6.494055e-001, + 3.953280e-001, + 5.047796e-001, + 2.998695e-001, + -1.482179e+000, + -6.778310e-001, + 1.161775e+000, + -3.004872e+000, + 4.774797e-001, + -4.969248e-001, + -3.512074e-003, + -1.307190e+000, + 7.927378e-001, + -9.863181e-001, + -1.803364e-001, + 5.810824e-001, + 4.580570e+000, + -3.863454e+000, + 5.328174e-001, + 2.272821e-001, + 1.771114e+000, + 6.791814e-001, }; -static const double datasetXYZRad3[] = -{ - // albedo 0, turbidity 1 - 1.168084e+000, - 2.156455e+000, - -3.980314e+000, - 1.989302e+001, - 1.328335e+001, - 1.435621e+001, - // albedo 0, turbidity 2 - 1.135488e+000, - 2.294701e+000, - -4.585886e+000, - 2.090208e+001, - 1.347840e+001, - 1.467658e+001, - // albedo 0, turbidity 3 - 1.107408e+000, - 2.382765e+000, - -5.112357e+000, - 2.147823e+001, - 1.493128e+001, - 1.460882e+001, - // albedo 0, turbidity 4 - 1.054193e+000, - 2.592891e+000, - -6.115000e+000, - 2.268967e+001, - 1.635672e+001, - 1.518999e+001, - // albedo 0, turbidity 5 - 1.006946e+000, - 2.705420e+000, - -6.698930e+000, - 2.291830e+001, - 1.834324e+001, - 1.570651e+001, - // albedo 0, turbidity 6 - 9.794044e-001, - 2.742440e+000, - -6.805283e+000, - 2.225271e+001, - 2.050797e+001, - 1.563130e+001, - // albedo 0, turbidity 7 - 9.413577e-001, - 2.722009e+000, - -6.760707e+000, - 2.098242e+001, - 2.342588e+001, - 1.605011e+001, - // albedo 0, turbidity 8 - 8.917923e-001, - 2.592780e+000, - -6.152635e+000, - 1.774141e+001, - 2.858324e+001, - 1.657910e+001, - // albedo 0, turbidity 9 - 8.288391e-001, - 2.153434e+000, - -4.118327e+000, - 1.078118e+001, - 3.681710e+001, - 1.738139e+001, - // albedo 0, turbidity 10 - 7.623528e-001, - 1.418187e+000, - -8.845235e-001, - 7.590129e-001, - 4.629859e+001, - 1.921657e+001, - // albedo 1, turbidity 1 - 1.352858e+000, - 2.048862e+000, - -2.053393e+000, - 1.405874e+001, - 3.045344e+001, - 3.044430e+001, - // albedo 1, turbidity 2 - 1.330497e+000, - 2.126497e+000, - -2.466296e+000, - 1.467559e+001, - 3.090738e+001, - 3.069707e+001, - // albedo 1, turbidity 3 - 1.286344e+000, - 2.200436e+000, - -2.877228e+000, - 1.492701e+001, - 3.236288e+001, - 3.077223e+001, - // albedo 1, turbidity 4 - 1.234428e+000, - 2.289628e+000, - -3.404699e+000, - 1.499436e+001, - 3.468390e+001, - 3.084842e+001, - // albedo 1, turbidity 5 - 1.178660e+000, - 2.306071e+000, - -3.549159e+000, - 1.411006e+001, - 3.754188e+001, - 3.079730e+001, - // albedo 1, turbidity 6 - 1.151366e+000, - 2.333005e+000, - -3.728627e+000, - 1.363374e+001, - 3.905894e+001, - 3.092599e+001, - // albedo 1, turbidity 7 - 1.101593e+000, - 2.299422e+000, - -3.565787e+000, - 1.196745e+001, - 4.188472e+001, - 3.102755e+001, - // albedo 1, turbidity 8 - 1.038322e+000, - 2.083539e+000, - -2.649585e+000, - 8.037389e+000, - 4.700869e+001, - 3.065948e+001, - // albedo 1, turbidity 9 - 9.596146e-001, - 1.671470e+000, - -8.751538e-001, - 1.679772e+000, - 5.345784e+001, - 3.054520e+001, - // albedo 1, turbidity 10 - 8.640731e-001, - 9.858301e-001, - 1.854956e+000, - -6.798097e+000, - 5.936468e+001, - 3.110255e+001, +static const double datasetXYZRad3[] = { + // albedo 0, turbidity 1 + 1.168084e+000, + 2.156455e+000, + -3.980314e+000, + 1.989302e+001, + 1.328335e+001, + 1.435621e+001, + // albedo 0, turbidity 2 + 1.135488e+000, + 2.294701e+000, + -4.585886e+000, + 2.090208e+001, + 1.347840e+001, + 1.467658e+001, + // albedo 0, turbidity 3 + 1.107408e+000, + 2.382765e+000, + -5.112357e+000, + 2.147823e+001, + 1.493128e+001, + 1.460882e+001, + // albedo 0, turbidity 4 + 1.054193e+000, + 2.592891e+000, + -6.115000e+000, + 2.268967e+001, + 1.635672e+001, + 1.518999e+001, + // albedo 0, turbidity 5 + 1.006946e+000, + 2.705420e+000, + -6.698930e+000, + 2.291830e+001, + 1.834324e+001, + 1.570651e+001, + // albedo 0, turbidity 6 + 9.794044e-001, + 2.742440e+000, + -6.805283e+000, + 2.225271e+001, + 2.050797e+001, + 1.563130e+001, + // albedo 0, turbidity 7 + 9.413577e-001, + 2.722009e+000, + -6.760707e+000, + 2.098242e+001, + 2.342588e+001, + 1.605011e+001, + // albedo 0, turbidity 8 + 8.917923e-001, + 2.592780e+000, + -6.152635e+000, + 1.774141e+001, + 2.858324e+001, + 1.657910e+001, + // albedo 0, turbidity 9 + 8.288391e-001, + 2.153434e+000, + -4.118327e+000, + 1.078118e+001, + 3.681710e+001, + 1.738139e+001, + // albedo 0, turbidity 10 + 7.623528e-001, + 1.418187e+000, + -8.845235e-001, + 7.590129e-001, + 4.629859e+001, + 1.921657e+001, + // albedo 1, turbidity 1 + 1.352858e+000, + 2.048862e+000, + -2.053393e+000, + 1.405874e+001, + 3.045344e+001, + 3.044430e+001, + // albedo 1, turbidity 2 + 1.330497e+000, + 2.126497e+000, + -2.466296e+000, + 1.467559e+001, + 3.090738e+001, + 3.069707e+001, + // albedo 1, turbidity 3 + 1.286344e+000, + 2.200436e+000, + -2.877228e+000, + 1.492701e+001, + 3.236288e+001, + 3.077223e+001, + // albedo 1, turbidity 4 + 1.234428e+000, + 2.289628e+000, + -3.404699e+000, + 1.499436e+001, + 3.468390e+001, + 3.084842e+001, + // albedo 1, turbidity 5 + 1.178660e+000, + 2.306071e+000, + -3.549159e+000, + 1.411006e+001, + 3.754188e+001, + 3.079730e+001, + // albedo 1, turbidity 6 + 1.151366e+000, + 2.333005e+000, + -3.728627e+000, + 1.363374e+001, + 3.905894e+001, + 3.092599e+001, + // albedo 1, turbidity 7 + 1.101593e+000, + 2.299422e+000, + -3.565787e+000, + 1.196745e+001, + 4.188472e+001, + 3.102755e+001, + // albedo 1, turbidity 8 + 1.038322e+000, + 2.083539e+000, + -2.649585e+000, + 8.037389e+000, + 4.700869e+001, + 3.065948e+001, + // albedo 1, turbidity 9 + 9.596146e-001, + 1.671470e+000, + -8.751538e-001, + 1.679772e+000, + 5.345784e+001, + 3.054520e+001, + // albedo 1, turbidity 10 + 8.640731e-001, + 9.858301e-001, + 1.854956e+000, + -6.798097e+000, + 5.936468e+001, + 3.110255e+001, }; +static const double *datasetsXYZ[] = {datasetXYZ1, datasetXYZ2, datasetXYZ3}; - -static const double* datasetsXYZ[] = { - datasetXYZ1, - datasetXYZ2, - datasetXYZ3 -}; - -static const double* datasetsXYZRad[] = { - datasetXYZRad1, - datasetXYZRad2, - datasetXYZRad3 -}; +static const double *datasetsXYZRad[] = {datasetXYZRad1, datasetXYZRad2, datasetXYZRad3}; CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h index f6810505126..56f8f676ba1 100644 --- a/intern/cycles/util/util_sseb.h +++ b/intern/cycles/util/util_sseb.h @@ -26,150 +26,274 @@ struct ssei; struct ssef; /*! 4-wide SSE bool type. */ -struct sseb -{ - typedef sseb Mask; // mask type - typedef ssei Int; // int type - typedef ssef Float; // float type - - enum { size = 4 }; // number of SIMD elements - union { __m128 m128; int32_t v[4]; }; // data - - //////////////////////////////////////////////////////////////////////////////// - /// Constructors, Assignment & Cast Operators - //////////////////////////////////////////////////////////////////////////////// - - __forceinline sseb ( ) {} - __forceinline sseb ( const sseb& other ) { m128 = other.m128; } - __forceinline sseb& operator=( const sseb& other ) { m128 = other.m128; return *this; } - - __forceinline sseb( const __m128 input ) : m128(input) {} - __forceinline operator const __m128&( void ) const { return m128; } - __forceinline operator const __m128i( void ) const { return _mm_castps_si128(m128); } - __forceinline operator const __m128d( void ) const { return _mm_castps_pd(m128); } - - __forceinline sseb ( bool a ) - : m128(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {} - __forceinline sseb ( bool a, bool b) - : m128(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)]) {} - __forceinline sseb ( bool a, bool b, bool c, bool d) - : m128(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {} - __forceinline sseb(int mask) { - assert(mask >= 0 && mask < 16); - m128 = _mm_lookupmask_ps[mask]; - } - - //////////////////////////////////////////////////////////////////////////////// - /// Constants - //////////////////////////////////////////////////////////////////////////////// - - __forceinline sseb( FalseTy ) : m128(_mm_setzero_ps()) {} - __forceinline sseb( TrueTy ) : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))) {} - - //////////////////////////////////////////////////////////////////////////////// - /// Array Access - //////////////////////////////////////////////////////////////////////////////// - - __forceinline bool operator []( const size_t i ) const { assert(i < 4); return (_mm_movemask_ps(m128) >> i) & 1; } - __forceinline int32_t& operator []( const size_t i ) { assert(i < 4); return v[i]; } +struct sseb { + typedef sseb Mask; // mask type + typedef ssei Int; // int type + typedef ssef Float; // float type + + enum { size = 4 }; // number of SIMD elements + union { + __m128 m128; + int32_t v[4]; + }; // data + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline sseb() + { + } + __forceinline sseb(const sseb &other) + { + m128 = other.m128; + } + __forceinline sseb &operator=(const sseb &other) + { + m128 = other.m128; + return *this; + } + + __forceinline sseb(const __m128 input) : m128(input) + { + } + __forceinline operator const __m128 &(void)const + { + return m128; + } + __forceinline operator const __m128i(void) const + { + return _mm_castps_si128(m128); + } + __forceinline operator const __m128d(void) const + { + return _mm_castps_pd(m128); + } + + __forceinline sseb(bool a) + : m128(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) + { + } + __forceinline sseb(bool a, bool b) + : m128(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)]) + { + } + __forceinline sseb(bool a, bool b, bool c, bool d) + : m128(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) + { + } + __forceinline sseb(int mask) + { + assert(mask >= 0 && mask < 16); + m128 = _mm_lookupmask_ps[mask]; + } + + //////////////////////////////////////////////////////////////////////////////// + /// Constants + //////////////////////////////////////////////////////////////////////////////// + + __forceinline sseb(FalseTy) : m128(_mm_setzero_ps()) + { + } + __forceinline sseb(TrueTy) + : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()))) + { + } + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + __forceinline bool operator[](const size_t i) const + { + assert(i < 4); + return (_mm_movemask_ps(m128) >> i) & 1; + } + __forceinline int32_t &operator[](const size_t i) + { + assert(i < 4); + return v[i]; + } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const sseb operator !( const sseb& a ) { return _mm_xor_ps(a, sseb(True)); } +__forceinline const sseb operator!(const sseb &a) +{ + return _mm_xor_ps(a, sseb(True)); +} //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const sseb operator &( const sseb& a, const sseb& b ) { return _mm_and_ps(a, b); } -__forceinline const sseb operator |( const sseb& a, const sseb& b ) { return _mm_or_ps (a, b); } -__forceinline const sseb operator ^( const sseb& a, const sseb& b ) { return _mm_xor_ps(a, b); } +__forceinline const sseb operator&(const sseb &a, const sseb &b) +{ + return _mm_and_ps(a, b); +} +__forceinline const sseb operator|(const sseb &a, const sseb &b) +{ + return _mm_or_ps(a, b); +} +__forceinline const sseb operator^(const sseb &a, const sseb &b) +{ + return _mm_xor_ps(a, b); +} //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const sseb operator &=( sseb& a, const sseb& b ) { return a = a & b; } -__forceinline const sseb operator |=( sseb& a, const sseb& b ) { return a = a | b; } -__forceinline const sseb operator ^=( sseb& a, const sseb& b ) { return a = a ^ b; } +__forceinline const sseb operator&=(sseb &a, const sseb &b) +{ + return a = a & b; +} +__forceinline const sseb operator|=(sseb &a, const sseb &b) +{ + return a = a | b; +} +__forceinline const sseb operator^=(sseb &a, const sseb &b) +{ + return a = a ^ b; +} //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators + Select //////////////////////////////////////////////////////////////////////////////// -__forceinline const sseb operator !=( const sseb& a, const sseb& b ) { return _mm_xor_ps(a, b); } -__forceinline const sseb operator ==( const sseb& a, const sseb& b ) { return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b)); } +__forceinline const sseb operator!=(const sseb &a, const sseb &b) +{ + return _mm_xor_ps(a, b); +} +__forceinline const sseb operator==(const sseb &a, const sseb &b) +{ + return _mm_castsi128_ps(_mm_cmpeq_epi32(a, b)); +} -__forceinline const sseb select( const sseb& m, const sseb& t, const sseb& f ) { -#if defined(__KERNEL_SSE41__) - return _mm_blendv_ps(f, t, m); -#else - return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); -#endif +__forceinline const sseb select(const sseb &m, const sseb &t, const sseb &f) +{ +# if defined(__KERNEL_SSE41__) + return _mm_blendv_ps(f, t, m); +# else + return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); +# endif } //////////////////////////////////////////////////////////////////////////////// /// Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// -__forceinline const sseb unpacklo( const sseb& a, const sseb& b ) { return _mm_unpacklo_ps(a, b); } -__forceinline const sseb unpackhi( const sseb& a, const sseb& b ) { return _mm_unpackhi_ps(a, b); } +__forceinline const sseb unpacklo(const sseb &a, const sseb &b) +{ + return _mm_unpacklo_ps(a, b); +} +__forceinline const sseb unpackhi(const sseb &a, const sseb &b) +{ + return _mm_unpackhi_ps(a, b); +} -template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const sseb shuffle( const sseb& a ) { - return _mm_castsi128_ps(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0))); +template<size_t i0, size_t i1, size_t i2, size_t i3> +__forceinline const sseb shuffle(const sseb &a) +{ + return _mm_castsi128_ps(_mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0))); } -template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a ) { - return _mm_movelh_ps(a, a); +template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a) +{ + return _mm_movelh_ps(a, a); } -template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a ) { - return _mm_movehl_ps(a, a); +template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a) +{ + return _mm_movehl_ps(a, a); } -template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const sseb shuffle( const sseb& a, const sseb& b ) { - return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); +template<size_t i0, size_t i1, size_t i2, size_t i3> +__forceinline const sseb shuffle(const sseb &a, const sseb &b) +{ + return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); } -template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a, const sseb& b ) { - return _mm_movelh_ps(a, b); +template<> __forceinline const sseb shuffle<0, 1, 0, 1>(const sseb &a, const sseb &b) +{ + return _mm_movelh_ps(a, b); } -template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a, const sseb& b ) { - return _mm_movehl_ps(b, a); +template<> __forceinline const sseb shuffle<2, 3, 2, 3>(const sseb &a, const sseb &b) +{ + return _mm_movehl_ps(b, a); } -#if defined(__KERNEL_SSE3__) -template<> __forceinline const sseb shuffle<0, 0, 2, 2>( const sseb& a ) { return _mm_moveldup_ps(a); } -template<> __forceinline const sseb shuffle<1, 1, 3, 3>( const sseb& a ) { return _mm_movehdup_ps(a); } -#endif +# if defined(__KERNEL_SSE3__) +template<> __forceinline const sseb shuffle<0, 0, 2, 2>(const sseb &a) +{ + return _mm_moveldup_ps(a); +} +template<> __forceinline const sseb shuffle<1, 1, 3, 3>(const sseb &a) +{ + return _mm_movehdup_ps(a); +} +# endif -#if defined(__KERNEL_SSE41__) -template<size_t dst, size_t src, size_t clr> __forceinline const sseb insert( const sseb& a, const sseb& b ) { return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); } -template<size_t dst, size_t src> __forceinline const sseb insert( const sseb& a, const sseb& b ) { return insert<dst, src, 0>(a, b); } -template<size_t dst> __forceinline const sseb insert( const sseb& a, const bool b ) { return insert<dst,0>(a, sseb(b)); } -#endif +# if defined(__KERNEL_SSE41__) +template<size_t dst, size_t src, size_t clr> +__forceinline const sseb insert(const sseb &a, const sseb &b) +{ + return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); +} +template<size_t dst, size_t src> __forceinline const sseb insert(const sseb &a, const sseb &b) +{ + return insert<dst, src, 0>(a, b); +} +template<size_t dst> __forceinline const sseb insert(const sseb &a, const bool b) +{ + return insert<dst, 0>(a, sseb(b)); +} +# endif //////////////////////////////////////////////////////////////////////////////// /// Reduction Operations //////////////////////////////////////////////////////////////////////////////// -#if defined(__KERNEL_SSE41__) -__forceinline size_t popcnt( const sseb& a ) { return __popcnt(_mm_movemask_ps(a)); } -#else -__forceinline size_t popcnt( const sseb& a ) { return bool(a[0])+bool(a[1])+bool(a[2])+bool(a[3]); } -#endif +# if defined(__KERNEL_SSE41__) +__forceinline size_t popcnt(const sseb &a) +{ + return __popcnt(_mm_movemask_ps(a)); +} +# else +__forceinline size_t popcnt(const sseb &a) +{ + return bool(a[0]) + bool(a[1]) + bool(a[2]) + bool(a[3]); +} +# endif -__forceinline bool reduce_and( const sseb& a ) { return _mm_movemask_ps(a) == 0xf; } -__forceinline bool reduce_or ( const sseb& a ) { return _mm_movemask_ps(a) != 0x0; } -__forceinline bool all ( const sseb& b ) { return _mm_movemask_ps(b) == 0xf; } -__forceinline bool any ( const sseb& b ) { return _mm_movemask_ps(b) != 0x0; } -__forceinline bool none ( const sseb& b ) { return _mm_movemask_ps(b) == 0x0; } +__forceinline bool reduce_and(const sseb &a) +{ + return _mm_movemask_ps(a) == 0xf; +} +__forceinline bool reduce_or(const sseb &a) +{ + return _mm_movemask_ps(a) != 0x0; +} +__forceinline bool all(const sseb &b) +{ + return _mm_movemask_ps(b) == 0xf; +} +__forceinline bool any(const sseb &b) +{ + return _mm_movemask_ps(b) != 0x0; +} +__forceinline bool none(const sseb &b) +{ + return _mm_movemask_ps(b) == 0x0; +} -__forceinline size_t movemask( const sseb& a ) { return _mm_movemask_ps(a); } +__forceinline size_t movemask(const sseb &a) +{ + return _mm_movemask_ps(a); +} //////////////////////////////////////////////////////////////////////////////// /// Debug Functions @@ -177,8 +301,7 @@ __forceinline size_t movemask( const sseb& a ) { return _mm_movemask_ps(a); } ccl_device_inline void print_sseb(const char *label, const sseb &a) { - printf("%s: %d %d %d %d\n", - label, a[0], a[1], a[2], a[3]); + printf("%s: %d %d %d %d\n", label, a[0], a[1], a[2], a[3]); } #endif diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h index 66670c9a779..e6610dbb197 100644 --- a/intern/cycles/util/util_ssef.h +++ b/intern/cycles/util/util_ssef.h @@ -26,587 +26,971 @@ struct sseb; struct ssef; /*! 4-wide SSE float type. */ -struct ssef -{ - typedef sseb Mask; // mask type - typedef ssei Int; // int type - typedef ssef Float; // float type - - enum { size = 4 }; // number of SIMD elements - union { __m128 m128; float f[4]; int i[4]; }; // data - - //////////////////////////////////////////////////////////////////////////////// - /// Constructors, Assignment & Cast Operators - //////////////////////////////////////////////////////////////////////////////// - - __forceinline ssef () {} - __forceinline ssef (const ssef& other) { m128 = other.m128; } - __forceinline ssef& operator=(const ssef& other) { m128 = other.m128; return *this; } - - __forceinline ssef(const __m128 a) : m128(a) {} - __forceinline operator const __m128&() const { return m128; } - __forceinline operator __m128&() { return m128; } - - __forceinline ssef (float a) : m128(_mm_set1_ps(a)) {} - __forceinline ssef (float a, float b, float c, float d) : m128(_mm_setr_ps(a, b, c, d)) {} - - __forceinline explicit ssef(const __m128i a) : m128(_mm_cvtepi32_ps(a)) {} - - //////////////////////////////////////////////////////////////////////////////// - /// Loads and Stores - //////////////////////////////////////////////////////////////////////////////// - -#if defined(__KERNEL_AVX__) - static __forceinline ssef broadcast(const void* const a) { return _mm_broadcast_ss((float*)a); } -#else - static __forceinline ssef broadcast(const void* const a) { return _mm_set1_ps(*(float*)a); } -#endif - - //////////////////////////////////////////////////////////////////////////////// - /// Array Access - //////////////////////////////////////////////////////////////////////////////// - - __forceinline const float& operator [](const size_t i) const { assert(i < 4); return f[i]; } - __forceinline float& operator [](const size_t i) { assert(i < 4); return f[i]; } +struct ssef { + typedef sseb Mask; // mask type + typedef ssei Int; // int type + typedef ssef Float; // float type + + enum { size = 4 }; // number of SIMD elements + union { + __m128 m128; + float f[4]; + int i[4]; + }; // data + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline ssef() + { + } + __forceinline ssef(const ssef &other) + { + m128 = other.m128; + } + __forceinline ssef &operator=(const ssef &other) + { + m128 = other.m128; + return *this; + } + + __forceinline ssef(const __m128 a) : m128(a) + { + } + __forceinline operator const __m128 &() const + { + return m128; + } + __forceinline operator __m128 &() + { + return m128; + } + + __forceinline ssef(float a) : m128(_mm_set1_ps(a)) + { + } + __forceinline ssef(float a, float b, float c, float d) : m128(_mm_setr_ps(a, b, c, d)) + { + } + + __forceinline explicit ssef(const __m128i a) : m128(_mm_cvtepi32_ps(a)) + { + } + + //////////////////////////////////////////////////////////////////////////////// + /// Loads and Stores + //////////////////////////////////////////////////////////////////////////////// + +# if defined(__KERNEL_AVX__) + static __forceinline ssef broadcast(const void *const a) + { + return _mm_broadcast_ss((float *)a); + } +# else + static __forceinline ssef broadcast(const void *const a) + { + return _mm_set1_ps(*(float *)a); + } +# endif + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + __forceinline const float &operator[](const size_t i) const + { + assert(i < 4); + return f[i]; + } + __forceinline float &operator[](const size_t i) + { + assert(i < 4); + return f[i]; + } }; - //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const ssef cast (const __m128i& a) { return _mm_castsi128_ps(a); } -__forceinline const ssef operator +(const ssef& a) { return a; } -__forceinline const ssef operator -(const ssef& a) { return _mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); } -__forceinline const ssef abs (const ssef& a) { return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))); } -#if defined(__KERNEL_SSE41__) -__forceinline const ssef sign (const ssef& a) { return _mm_blendv_ps(ssef(1.0f), -ssef(1.0f), _mm_cmplt_ps(a,ssef(0.0f))); } -#endif -__forceinline const ssef signmsk (const ssef& a) { return _mm_and_ps(a.m128,_mm_castsi128_ps(_mm_set1_epi32(0x80000000))); } +__forceinline const ssef cast(const __m128i &a) +{ + return _mm_castsi128_ps(a); +} +__forceinline const ssef operator+(const ssef &a) +{ + return a; +} +__forceinline const ssef operator-(const ssef &a) +{ + return _mm_xor_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); +} +__forceinline const ssef abs(const ssef &a) +{ + return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff))); +} +# if defined(__KERNEL_SSE41__) +__forceinline const ssef sign(const ssef &a) +{ + return _mm_blendv_ps(ssef(1.0f), -ssef(1.0f), _mm_cmplt_ps(a, ssef(0.0f))); +} +# endif +__forceinline const ssef signmsk(const ssef &a) +{ + return _mm_and_ps(a.m128, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); +} -__forceinline const ssef rcp (const ssef& a) { - const ssef r = _mm_rcp_ps(a.m128); - return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a)); +__forceinline const ssef rcp(const ssef &a) +{ + const ssef r = _mm_rcp_ps(a.m128); + return _mm_sub_ps(_mm_add_ps(r, r), _mm_mul_ps(_mm_mul_ps(r, r), a)); +} +__forceinline const ssef sqr(const ssef &a) +{ + return _mm_mul_ps(a, a); } -__forceinline const ssef sqr (const ssef& a) { return _mm_mul_ps(a,a); } -__forceinline const ssef mm_sqrt(const ssef& a) { return _mm_sqrt_ps(a.m128); } -__forceinline const ssef rsqrt(const ssef& a) { - const ssef r = _mm_rsqrt_ps(a.m128); - return _mm_add_ps(_mm_mul_ps(_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f), r), - _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set_ps(-0.5f, -0.5f, -0.5f, -0.5f)), r), _mm_mul_ps(r, r))); +__forceinline const ssef mm_sqrt(const ssef &a) +{ + return _mm_sqrt_ps(a.m128); +} +__forceinline const ssef rsqrt(const ssef &a) +{ + const ssef r = _mm_rsqrt_ps(a.m128); + return _mm_add_ps( + _mm_mul_ps(_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f), r), + _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set_ps(-0.5f, -0.5f, -0.5f, -0.5f)), r), + _mm_mul_ps(r, r))); } //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const ssef operator +(const ssef& a, const ssef& b) { return _mm_add_ps(a.m128, b.m128); } -__forceinline const ssef operator +(const ssef& a, const float& b) { return a + ssef(b); } -__forceinline const ssef operator +(const float& a, const ssef& b) { return ssef(a) + b; } +__forceinline const ssef operator+(const ssef &a, const ssef &b) +{ + return _mm_add_ps(a.m128, b.m128); +} +__forceinline const ssef operator+(const ssef &a, const float &b) +{ + return a + ssef(b); +} +__forceinline const ssef operator+(const float &a, const ssef &b) +{ + return ssef(a) + b; +} -__forceinline const ssef operator -(const ssef& a, const ssef& b) { return _mm_sub_ps(a.m128, b.m128); } -__forceinline const ssef operator -(const ssef& a, const float& b) { return a - ssef(b); } -__forceinline const ssef operator -(const float& a, const ssef& b) { return ssef(a) - b; } +__forceinline const ssef operator-(const ssef &a, const ssef &b) +{ + return _mm_sub_ps(a.m128, b.m128); +} +__forceinline const ssef operator-(const ssef &a, const float &b) +{ + return a - ssef(b); +} +__forceinline const ssef operator-(const float &a, const ssef &b) +{ + return ssef(a) - b; +} -__forceinline const ssef operator *(const ssef& a, const ssef& b) { return _mm_mul_ps(a.m128, b.m128); } -__forceinline const ssef operator *(const ssef& a, const float& b) { return a * ssef(b); } -__forceinline const ssef operator *(const float& a, const ssef& b) { return ssef(a) * b; } +__forceinline const ssef operator*(const ssef &a, const ssef &b) +{ + return _mm_mul_ps(a.m128, b.m128); +} +__forceinline const ssef operator*(const ssef &a, const float &b) +{ + return a * ssef(b); +} +__forceinline const ssef operator*(const float &a, const ssef &b) +{ + return ssef(a) * b; +} -__forceinline const ssef operator /(const ssef& a, const ssef& b) { return _mm_div_ps(a.m128,b.m128); } -__forceinline const ssef operator /(const ssef& a, const float& b) { return a/ssef(b); } -__forceinline const ssef operator /(const float& a, const ssef& b) { return ssef(a)/b; } +__forceinline const ssef operator/(const ssef &a, const ssef &b) +{ + return _mm_div_ps(a.m128, b.m128); +} +__forceinline const ssef operator/(const ssef &a, const float &b) +{ + return a / ssef(b); +} +__forceinline const ssef operator/(const float &a, const ssef &b) +{ + return ssef(a) / b; +} -__forceinline const ssef operator^(const ssef& a, const ssef& b) { return _mm_xor_ps(a.m128,b.m128); } -__forceinline const ssef operator^(const ssef& a, const ssei& b) { return _mm_xor_ps(a.m128,_mm_castsi128_ps(b.m128)); } +__forceinline const ssef operator^(const ssef &a, const ssef &b) +{ + return _mm_xor_ps(a.m128, b.m128); +} +__forceinline const ssef operator^(const ssef &a, const ssei &b) +{ + return _mm_xor_ps(a.m128, _mm_castsi128_ps(b.m128)); +} -__forceinline const ssef operator&(const ssef& a, const ssef& b) { return _mm_and_ps(a.m128,b.m128); } -__forceinline const ssef operator&(const ssef& a, const ssei& b) { return _mm_and_ps(a.m128,_mm_castsi128_ps(b.m128)); } +__forceinline const ssef operator&(const ssef &a, const ssef &b) +{ + return _mm_and_ps(a.m128, b.m128); +} +__forceinline const ssef operator&(const ssef &a, const ssei &b) +{ + return _mm_and_ps(a.m128, _mm_castsi128_ps(b.m128)); +} -__forceinline const ssef operator|(const ssef& a, const ssef& b) { return _mm_or_ps(a.m128,b.m128); } -__forceinline const ssef operator|(const ssef& a, const ssei& b) { return _mm_or_ps(a.m128,_mm_castsi128_ps(b.m128)); } +__forceinline const ssef operator|(const ssef &a, const ssef &b) +{ + return _mm_or_ps(a.m128, b.m128); +} +__forceinline const ssef operator|(const ssef &a, const ssei &b) +{ + return _mm_or_ps(a.m128, _mm_castsi128_ps(b.m128)); +} -__forceinline const ssef andnot(const ssef& a, const ssef& b) { return _mm_andnot_ps(a.m128,b.m128); } +__forceinline const ssef andnot(const ssef &a, const ssef &b) +{ + return _mm_andnot_ps(a.m128, b.m128); +} -__forceinline const ssef min(const ssef& a, const ssef& b) { return _mm_min_ps(a.m128,b.m128); } -__forceinline const ssef min(const ssef& a, const float& b) { return _mm_min_ps(a.m128,ssef(b)); } -__forceinline const ssef min(const float& a, const ssef& b) { return _mm_min_ps(ssef(a),b.m128); } +__forceinline const ssef min(const ssef &a, const ssef &b) +{ + return _mm_min_ps(a.m128, b.m128); +} +__forceinline const ssef min(const ssef &a, const float &b) +{ + return _mm_min_ps(a.m128, ssef(b)); +} +__forceinline const ssef min(const float &a, const ssef &b) +{ + return _mm_min_ps(ssef(a), b.m128); +} -__forceinline const ssef max(const ssef& a, const ssef& b) { return _mm_max_ps(a.m128,b.m128); } -__forceinline const ssef max(const ssef& a, const float& b) { return _mm_max_ps(a.m128,ssef(b)); } -__forceinline const ssef max(const float& a, const ssef& b) { return _mm_max_ps(ssef(a),b.m128); } +__forceinline const ssef max(const ssef &a, const ssef &b) +{ + return _mm_max_ps(a.m128, b.m128); +} +__forceinline const ssef max(const ssef &a, const float &b) +{ + return _mm_max_ps(a.m128, ssef(b)); +} +__forceinline const ssef max(const float &a, const ssef &b) +{ + return _mm_max_ps(ssef(a), b.m128); +} -#if defined(__KERNEL_SSE41__) -__forceinline ssef mini(const ssef& a, const ssef& b) { - const ssei ai = _mm_castps_si128(a); - const ssei bi = _mm_castps_si128(b); - const ssei ci = _mm_min_epi32(ai,bi); - return _mm_castsi128_ps(ci); +# if defined(__KERNEL_SSE41__) +__forceinline ssef mini(const ssef &a, const ssef &b) +{ + const ssei ai = _mm_castps_si128(a); + const ssei bi = _mm_castps_si128(b); + const ssei ci = _mm_min_epi32(ai, bi); + return _mm_castsi128_ps(ci); } -#endif +# endif -#if defined(__KERNEL_SSE41__) -__forceinline ssef maxi(const ssef& a, const ssef& b) { - const ssei ai = _mm_castps_si128(a); - const ssei bi = _mm_castps_si128(b); - const ssei ci = _mm_max_epi32(ai,bi); - return _mm_castsi128_ps(ci); +# if defined(__KERNEL_SSE41__) +__forceinline ssef maxi(const ssef &a, const ssef &b) +{ + const ssei ai = _mm_castps_si128(a); + const ssei bi = _mm_castps_si128(b); + const ssei ci = _mm_max_epi32(ai, bi); + return _mm_castsi128_ps(ci); } -#endif +# endif //////////////////////////////////////////////////////////////////////////////// /// Ternary Operators //////////////////////////////////////////////////////////////////////////////// -#if defined(__KERNEL_AVX2__) -__forceinline const ssef madd (const ssef& a, const ssef& b, const ssef& c) { return _mm_fmadd_ps(a,b,c); } -__forceinline const ssef msub (const ssef& a, const ssef& b, const ssef& c) { return _mm_fmsub_ps(a,b,c); } -__forceinline const ssef nmadd(const ssef& a, const ssef& b, const ssef& c) { return _mm_fnmadd_ps(a,b,c); } -__forceinline const ssef nmsub(const ssef& a, const ssef& b, const ssef& c) { return _mm_fnmsub_ps(a,b,c); } -#else -__forceinline const ssef madd (const ssef& a, const ssef& b, const ssef& c) { return a*b+c; } -__forceinline const ssef msub (const ssef& a, const ssef& b, const ssef& c) { return a*b-c; } -__forceinline const ssef nmadd(const ssef& a, const ssef& b, const ssef& c) { return c-a*b;} -__forceinline const ssef nmsub(const ssef& a, const ssef& b, const ssef& c) { return -a*b-c; } -#endif +# if defined(__KERNEL_AVX2__) +__forceinline const ssef madd(const ssef &a, const ssef &b, const ssef &c) +{ + return _mm_fmadd_ps(a, b, c); +} +__forceinline const ssef msub(const ssef &a, const ssef &b, const ssef &c) +{ + return _mm_fmsub_ps(a, b, c); +} +__forceinline const ssef nmadd(const ssef &a, const ssef &b, const ssef &c) +{ + return _mm_fnmadd_ps(a, b, c); +} +__forceinline const ssef nmsub(const ssef &a, const ssef &b, const ssef &c) +{ + return _mm_fnmsub_ps(a, b, c); +} +# else +__forceinline const ssef madd(const ssef &a, const ssef &b, const ssef &c) +{ + return a * b + c; +} +__forceinline const ssef msub(const ssef &a, const ssef &b, const ssef &c) +{ + return a * b - c; +} +__forceinline const ssef nmadd(const ssef &a, const ssef &b, const ssef &c) +{ + return c - a * b; +} +__forceinline const ssef nmsub(const ssef &a, const ssef &b, const ssef &c) +{ + return -a * b - c; +} +# endif //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline ssef& operator +=(ssef& a, const ssef& b) { return a = a + b; } -__forceinline ssef& operator +=(ssef& a, const float& b) { return a = a + b; } +__forceinline ssef &operator+=(ssef &a, const ssef &b) +{ + return a = a + b; +} +__forceinline ssef &operator+=(ssef &a, const float &b) +{ + return a = a + b; +} -__forceinline ssef& operator -=(ssef& a, const ssef& b) { return a = a - b; } -__forceinline ssef& operator -=(ssef& a, const float& b) { return a = a - b; } +__forceinline ssef &operator-=(ssef &a, const ssef &b) +{ + return a = a - b; +} +__forceinline ssef &operator-=(ssef &a, const float &b) +{ + return a = a - b; +} -__forceinline ssef& operator *=(ssef& a, const ssef& b) { return a = a * b; } -__forceinline ssef& operator *=(ssef& a, const float& b) { return a = a * b; } +__forceinline ssef &operator*=(ssef &a, const ssef &b) +{ + return a = a * b; +} +__forceinline ssef &operator*=(ssef &a, const float &b) +{ + return a = a * b; +} -__forceinline ssef& operator /=(ssef& a, const ssef& b) { return a = a / b; } -__forceinline ssef& operator /=(ssef& a, const float& b) { return a = a / b; } +__forceinline ssef &operator/=(ssef &a, const ssef &b) +{ + return a = a / b; +} +__forceinline ssef &operator/=(ssef &a, const float &b) +{ + return a = a / b; +} //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators + Select //////////////////////////////////////////////////////////////////////////////// -__forceinline const sseb operator ==(const ssef& a, const ssef& b) { return _mm_cmpeq_ps(a.m128, b.m128); } -__forceinline const sseb operator ==(const ssef& a, const float& b) { return a == ssef(b); } -__forceinline const sseb operator ==(const float& a, const ssef& b) { return ssef(a) == b; } +__forceinline const sseb operator==(const ssef &a, const ssef &b) +{ + return _mm_cmpeq_ps(a.m128, b.m128); +} +__forceinline const sseb operator==(const ssef &a, const float &b) +{ + return a == ssef(b); +} +__forceinline const sseb operator==(const float &a, const ssef &b) +{ + return ssef(a) == b; +} -__forceinline const sseb operator !=(const ssef& a, const ssef& b) { return _mm_cmpneq_ps(a.m128, b.m128); } -__forceinline const sseb operator !=(const ssef& a, const float& b) { return a != ssef(b); } -__forceinline const sseb operator !=(const float& a, const ssef& b) { return ssef(a) != b; } +__forceinline const sseb operator!=(const ssef &a, const ssef &b) +{ + return _mm_cmpneq_ps(a.m128, b.m128); +} +__forceinline const sseb operator!=(const ssef &a, const float &b) +{ + return a != ssef(b); +} +__forceinline const sseb operator!=(const float &a, const ssef &b) +{ + return ssef(a) != b; +} -__forceinline const sseb operator <(const ssef& a, const ssef& b) { return _mm_cmplt_ps(a.m128, b.m128); } -__forceinline const sseb operator <(const ssef& a, const float& b) { return a < ssef(b); } -__forceinline const sseb operator <(const float& a, const ssef& b) { return ssef(a) < b; } +__forceinline const sseb operator<(const ssef &a, const ssef &b) +{ + return _mm_cmplt_ps(a.m128, b.m128); +} +__forceinline const sseb operator<(const ssef &a, const float &b) +{ + return a < ssef(b); +} +__forceinline const sseb operator<(const float &a, const ssef &b) +{ + return ssef(a) < b; +} -__forceinline const sseb operator >=(const ssef& a, const ssef& b) { return _mm_cmpnlt_ps(a.m128, b.m128); } -__forceinline const sseb operator >=(const ssef& a, const float& b) { return a >= ssef(b); } -__forceinline const sseb operator >=(const float& a, const ssef& b) { return ssef(a) >= b; } +__forceinline const sseb operator>=(const ssef &a, const ssef &b) +{ + return _mm_cmpnlt_ps(a.m128, b.m128); +} +__forceinline const sseb operator>=(const ssef &a, const float &b) +{ + return a >= ssef(b); +} +__forceinline const sseb operator>=(const float &a, const ssef &b) +{ + return ssef(a) >= b; +} -__forceinline const sseb operator >(const ssef& a, const ssef& b) { return _mm_cmpnle_ps(a.m128, b.m128); } -__forceinline const sseb operator >(const ssef& a, const float& b) { return a > ssef(b); } -__forceinline const sseb operator >(const float& a, const ssef& b) { return ssef(a) > b; } +__forceinline const sseb operator>(const ssef &a, const ssef &b) +{ + return _mm_cmpnle_ps(a.m128, b.m128); +} +__forceinline const sseb operator>(const ssef &a, const float &b) +{ + return a > ssef(b); +} +__forceinline const sseb operator>(const float &a, const ssef &b) +{ + return ssef(a) > b; +} -__forceinline const sseb operator <=(const ssef& a, const ssef& b) { return _mm_cmple_ps(a.m128, b.m128); } -__forceinline const sseb operator <=(const ssef& a, const float& b) { return a <= ssef(b); } -__forceinline const sseb operator <=(const float& a, const ssef& b) { return ssef(a) <= b; } +__forceinline const sseb operator<=(const ssef &a, const ssef &b) +{ + return _mm_cmple_ps(a.m128, b.m128); +} +__forceinline const sseb operator<=(const ssef &a, const float &b) +{ + return a <= ssef(b); +} +__forceinline const sseb operator<=(const float &a, const ssef &b) +{ + return ssef(a) <= b; +} -__forceinline const ssef select(const sseb& m, const ssef& t, const ssef& f) { -#ifdef __KERNEL_SSE41__ - return _mm_blendv_ps(f, t, m); -#else - return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); -#endif +__forceinline const ssef select(const sseb &m, const ssef &t, const ssef &f) +{ +# ifdef __KERNEL_SSE41__ + return _mm_blendv_ps(f, t, m); +# else + return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); +# endif } -__forceinline const ssef select(const ssef& m, const ssef& t, const ssef& f) { -#ifdef __KERNEL_SSE41__ - return _mm_blendv_ps(f, t, m); -#else - return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); -#endif +__forceinline const ssef select(const ssef &m, const ssef &t, const ssef &f) +{ +# ifdef __KERNEL_SSE41__ + return _mm_blendv_ps(f, t, m); +# else + return _mm_or_ps(_mm_and_ps(m, t), _mm_andnot_ps(m, f)); +# endif } -__forceinline const ssef select(const int mask, const ssef& t, const ssef& f) { -#if defined(__KERNEL_SSE41__) && ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER)) - return _mm_blend_ps(f, t, mask); -#else - return select(sseb(mask),t,f); -#endif +__forceinline const ssef select(const int mask, const ssef &t, const ssef &f) +{ +# if defined(__KERNEL_SSE41__) && \ + ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER)) + return _mm_blend_ps(f, t, mask); +# else + return select(sseb(mask), t, f); +# endif } //////////////////////////////////////////////////////////////////////////////// /// Rounding Functions //////////////////////////////////////////////////////////////////////////////// -#if defined(__KERNEL_SSE41__) -__forceinline const ssef round_even(const ssef& a) { return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); } -__forceinline const ssef round_down(const ssef& a) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF ); } -__forceinline const ssef round_up (const ssef& a) { return _mm_round_ps(a, _MM_FROUND_TO_POS_INF ); } -__forceinline const ssef round_zero(const ssef& a) { return _mm_round_ps(a, _MM_FROUND_TO_ZERO ); } -__forceinline const ssef floor (const ssef& a) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF ); } -__forceinline const ssef ceil (const ssef& a) { return _mm_round_ps(a, _MM_FROUND_TO_POS_INF ); } -#endif +# if defined(__KERNEL_SSE41__) +__forceinline const ssef round_even(const ssef &a) +{ + return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); +} +__forceinline const ssef round_down(const ssef &a) +{ + return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF); +} +__forceinline const ssef round_up(const ssef &a) +{ + return _mm_round_ps(a, _MM_FROUND_TO_POS_INF); +} +__forceinline const ssef round_zero(const ssef &a) +{ + return _mm_round_ps(a, _MM_FROUND_TO_ZERO); +} +__forceinline const ssef floor(const ssef &a) +{ + return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF); +} +__forceinline const ssef ceil(const ssef &a) +{ + return _mm_round_ps(a, _MM_FROUND_TO_POS_INF); +} +# endif -__forceinline ssei truncatei(const ssef& a) { - return _mm_cvttps_epi32(a.m128); +__forceinline ssei truncatei(const ssef &a) +{ + return _mm_cvttps_epi32(a.m128); } -__forceinline ssei floori(const ssef& a) { -#if defined(__KERNEL_SSE41__) - return ssei(floor(a)); -#else - return ssei(a-ssef(0.5f)); -#endif +__forceinline ssei floori(const ssef &a) +{ +# if defined(__KERNEL_SSE41__) + return ssei(floor(a)); +# else + return ssei(a - ssef(0.5f)); +# endif } //////////////////////////////////////////////////////////////////////////////// /// Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// -__forceinline ssef unpacklo(const ssef& a, const ssef& b) { return _mm_unpacklo_ps(a.m128, b.m128); } -__forceinline ssef unpackhi(const ssef& a, const ssef& b) { return _mm_unpackhi_ps(a.m128, b.m128); } +__forceinline ssef unpacklo(const ssef &a, const ssef &b) +{ + return _mm_unpacklo_ps(a.m128, b.m128); +} +__forceinline ssef unpackhi(const ssef &a, const ssef &b) +{ + return _mm_unpackhi_ps(a.m128, b.m128); +} -template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssef shuffle(const ssef& b) { - return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0))); +template<size_t i0, size_t i1, size_t i2, size_t i3> +__forceinline const ssef shuffle(const ssef &b) +{ + return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0))); } -template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef& a) { - return _mm_movelh_ps(a, a); +template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a) +{ + return _mm_movelh_ps(a, a); } -template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef& a) { - return _mm_movehl_ps(a, a); +template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a) +{ + return _mm_movehl_ps(a, a); } -template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssef shuffle(const ssef& a, const ssef& b) { - return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); +template<size_t i0, size_t i1, size_t i2, size_t i3> +__forceinline const ssef shuffle(const ssef &a, const ssef &b) +{ + return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); } -template<size_t i0> __forceinline const ssef shuffle(const ssef& a, const ssef& b) { - return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0)); +template<size_t i0> __forceinline const ssef shuffle(const ssef &a, const ssef &b) +{ + return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0)); } -template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef& a, const ssef& b) { - return _mm_movelh_ps(a, b); +template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef &a, const ssef &b) +{ + return _mm_movelh_ps(a, b); } -template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef& a, const ssef& b) { - return _mm_movehl_ps(b, a); +template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef &a, const ssef &b) +{ + return _mm_movehl_ps(b, a); } -#if defined(__KERNEL_SSSE3__) -__forceinline const ssef shuffle8(const ssef& a, const ssei& shuf) { - return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf)); +# if defined(__KERNEL_SSSE3__) +__forceinline const ssef shuffle8(const ssef &a, const ssei &shuf) +{ + return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf)); } -#endif +# endif -#if defined(__KERNEL_SSE3__) -template<> __forceinline const ssef shuffle<0, 0, 2, 2>(const ssef& b) { return _mm_moveldup_ps(b); } -template<> __forceinline const ssef shuffle<1, 1, 3, 3>(const ssef& b) { return _mm_movehdup_ps(b); } -#endif +# if defined(__KERNEL_SSE3__) +template<> __forceinline const ssef shuffle<0, 0, 2, 2>(const ssef &b) +{ + return _mm_moveldup_ps(b); +} +template<> __forceinline const ssef shuffle<1, 1, 3, 3>(const ssef &b) +{ + return _mm_movehdup_ps(b); +} +# endif -template<size_t i0> __forceinline const ssef shuffle(const ssef& b) { - return shuffle<i0,i0,i0,i0>(b); +template<size_t i0> __forceinline const ssef shuffle(const ssef &b) +{ + return shuffle<i0, i0, i0, i0>(b); } -#if defined(__KERNEL_AVX__) -__forceinline const ssef shuffle(const ssef& a, const ssei& shuf) { - return _mm_permutevar_ps(a, shuf); +# if defined(__KERNEL_AVX__) +__forceinline const ssef shuffle(const ssef &a, const ssei &shuf) +{ + return _mm_permutevar_ps(a, shuf); } -#endif +# endif -template<size_t i> __forceinline float extract (const ssef& a) { return _mm_cvtss_f32(shuffle<i,i,i,i>(a)); } -template<> __forceinline float extract<0>(const ssef& a) { return _mm_cvtss_f32(a); } +template<size_t i> __forceinline float extract(const ssef &a) +{ + return _mm_cvtss_f32(shuffle<i, i, i, i>(a)); +} +template<> __forceinline float extract<0>(const ssef &a) +{ + return _mm_cvtss_f32(a); +} -#if defined(__KERNEL_SSE41__) -template<size_t dst, size_t src, size_t clr> __forceinline const ssef insert(const ssef& a, const ssef& b) { return _mm_insert_ps(a, b,(dst << 4) |(src << 6) | clr); } -template<size_t dst, size_t src> __forceinline const ssef insert(const ssef& a, const ssef& b) { return insert<dst, src, 0>(a, b); } -template<size_t dst> __forceinline const ssef insert(const ssef& a, const float b) { return insert<dst, 0>(a, _mm_set_ss(b)); } -#else -template<size_t dst> __forceinline const ssef insert(const ssef& a, const float b) { ssef c = a; c[dst] = b; return c; } -#endif +# if defined(__KERNEL_SSE41__) +template<size_t dst, size_t src, size_t clr> +__forceinline const ssef insert(const ssef &a, const ssef &b) +{ + return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); +} +template<size_t dst, size_t src> __forceinline const ssef insert(const ssef &a, const ssef &b) +{ + return insert<dst, src, 0>(a, b); +} +template<size_t dst> __forceinline const ssef insert(const ssef &a, const float b) +{ + return insert<dst, 0>(a, _mm_set_ss(b)); +} +# else +template<size_t dst> __forceinline const ssef insert(const ssef &a, const float b) +{ + ssef c = a; + c[dst] = b; + return c; +} +# endif //////////////////////////////////////////////////////////////////////////////// /// Transpose //////////////////////////////////////////////////////////////////////////////// -__forceinline void transpose(const ssef& r0, const ssef& r1, const ssef& r2, const ssef& r3, ssef& c0, ssef& c1, ssef& c2, ssef& c3) +__forceinline void transpose(const ssef &r0, + const ssef &r1, + const ssef &r2, + const ssef &r3, + ssef &c0, + ssef &c1, + ssef &c2, + ssef &c3) { - ssef l02 = unpacklo(r0,r2); - ssef h02 = unpackhi(r0,r2); - ssef l13 = unpacklo(r1,r3); - ssef h13 = unpackhi(r1,r3); - c0 = unpacklo(l02,l13); - c1 = unpackhi(l02,l13); - c2 = unpacklo(h02,h13); - c3 = unpackhi(h02,h13); -} - -__forceinline void transpose(const ssef& r0, const ssef& r1, const ssef& r2, const ssef& r3, ssef& c0, ssef& c1, ssef& c2) + ssef l02 = unpacklo(r0, r2); + ssef h02 = unpackhi(r0, r2); + ssef l13 = unpacklo(r1, r3); + ssef h13 = unpackhi(r1, r3); + c0 = unpacklo(l02, l13); + c1 = unpackhi(l02, l13); + c2 = unpacklo(h02, h13); + c3 = unpackhi(h02, h13); +} + +__forceinline void transpose( + const ssef &r0, const ssef &r1, const ssef &r2, const ssef &r3, ssef &c0, ssef &c1, ssef &c2) { - ssef l02 = unpacklo(r0,r2); - ssef h02 = unpackhi(r0,r2); - ssef l13 = unpacklo(r1,r3); - ssef h13 = unpackhi(r1,r3); - c0 = unpacklo(l02,l13); - c1 = unpackhi(l02,l13); - c2 = unpacklo(h02,h13); + ssef l02 = unpacklo(r0, r2); + ssef h02 = unpackhi(r0, r2); + ssef l13 = unpacklo(r1, r3); + ssef h13 = unpackhi(r1, r3); + c0 = unpacklo(l02, l13); + c1 = unpackhi(l02, l13); + c2 = unpacklo(h02, h13); } //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// -__forceinline const ssef vreduce_min(const ssef& v) { ssef h = min(shuffle<1,0,3,2>(v),v); return min(shuffle<2,3,0,1>(h),h); } -__forceinline const ssef vreduce_max(const ssef& v) { ssef h = max(shuffle<1,0,3,2>(v),v); return max(shuffle<2,3,0,1>(h),h); } -__forceinline const ssef vreduce_add(const ssef& v) { ssef h = shuffle<1,0,3,2>(v) + v ; return shuffle<2,3,0,1>(h) + h ; } +__forceinline const ssef vreduce_min(const ssef &v) +{ + ssef h = min(shuffle<1, 0, 3, 2>(v), v); + return min(shuffle<2, 3, 0, 1>(h), h); +} +__forceinline const ssef vreduce_max(const ssef &v) +{ + ssef h = max(shuffle<1, 0, 3, 2>(v), v); + return max(shuffle<2, 3, 0, 1>(h), h); +} +__forceinline const ssef vreduce_add(const ssef &v) +{ + ssef h = shuffle<1, 0, 3, 2>(v) + v; + return shuffle<2, 3, 0, 1>(h) + h; +} -__forceinline float reduce_min(const ssef& v) { return _mm_cvtss_f32(vreduce_min(v)); } -__forceinline float reduce_max(const ssef& v) { return _mm_cvtss_f32(vreduce_max(v)); } -__forceinline float reduce_add(const ssef& v) { return _mm_cvtss_f32(vreduce_add(v)); } +__forceinline float reduce_min(const ssef &v) +{ + return _mm_cvtss_f32(vreduce_min(v)); +} +__forceinline float reduce_max(const ssef &v) +{ + return _mm_cvtss_f32(vreduce_max(v)); +} +__forceinline float reduce_add(const ssef &v) +{ + return _mm_cvtss_f32(vreduce_add(v)); +} -__forceinline size_t select_min(const ssef& v) { return __bsf(movemask(v == vreduce_min(v))); } -__forceinline size_t select_max(const ssef& v) { return __bsf(movemask(v == vreduce_max(v))); } +__forceinline size_t select_min(const ssef &v) +{ + return __bsf(movemask(v == vreduce_min(v))); +} +__forceinline size_t select_max(const ssef &v) +{ + return __bsf(movemask(v == vreduce_max(v))); +} -__forceinline size_t select_min(const sseb& valid, const ssef& v) { const ssef a = select(valid,v,ssef(pos_inf)); return __bsf(movemask(valid &(a == vreduce_min(a)))); } -__forceinline size_t select_max(const sseb& valid, const ssef& v) { const ssef a = select(valid,v,ssef(neg_inf)); return __bsf(movemask(valid &(a == vreduce_max(a)))); } +__forceinline size_t select_min(const sseb &valid, const ssef &v) +{ + const ssef a = select(valid, v, ssef(pos_inf)); + return __bsf(movemask(valid & (a == vreduce_min(a)))); +} +__forceinline size_t select_max(const sseb &valid, const ssef &v) +{ + const ssef a = select(valid, v, ssef(neg_inf)); + return __bsf(movemask(valid & (a == vreduce_max(a)))); +} -__forceinline size_t movemask( const ssef& a ) { return _mm_movemask_ps(a); } +__forceinline size_t movemask(const ssef &a) +{ + return _mm_movemask_ps(a); +} //////////////////////////////////////////////////////////////////////////////// /// Memory load and store operations //////////////////////////////////////////////////////////////////////////////// -__forceinline ssef load4f(const float4& a) { -#ifdef __KERNEL_WITH_SSE_ALIGN__ - return _mm_load_ps(&a.x); -#else - return _mm_loadu_ps(&a.x); -#endif +__forceinline ssef load4f(const float4 &a) +{ +# ifdef __KERNEL_WITH_SSE_ALIGN__ + return _mm_load_ps(&a.x); +# else + return _mm_loadu_ps(&a.x); +# endif } -__forceinline ssef load4f(const float3& a) { -#ifdef __KERNEL_WITH_SSE_ALIGN__ - return _mm_load_ps(&a.x); -#else - return _mm_loadu_ps(&a.x); -#endif +__forceinline ssef load4f(const float3 &a) +{ +# ifdef __KERNEL_WITH_SSE_ALIGN__ + return _mm_load_ps(&a.x); +# else + return _mm_loadu_ps(&a.x); +# endif } -__forceinline ssef load4f(const void* const a) { - return _mm_load_ps((float*)a); +__forceinline ssef load4f(const void *const a) +{ + return _mm_load_ps((float *)a); } -__forceinline ssef load1f_first(const float a) { - return _mm_set_ss(a); +__forceinline ssef load1f_first(const float a) +{ + return _mm_set_ss(a); } -__forceinline void store4f(void* ptr, const ssef& v) { - _mm_store_ps((float*)ptr,v); +__forceinline void store4f(void *ptr, const ssef &v) +{ + _mm_store_ps((float *)ptr, v); } -__forceinline ssef loadu4f(const void* const a) { - return _mm_loadu_ps((float*)a); +__forceinline ssef loadu4f(const void *const a) +{ + return _mm_loadu_ps((float *)a); } -__forceinline void storeu4f(void* ptr, const ssef& v) { - _mm_storeu_ps((float*)ptr,v); +__forceinline void storeu4f(void *ptr, const ssef &v) +{ + _mm_storeu_ps((float *)ptr, v); } -__forceinline void store4f(const sseb& mask, void* ptr, const ssef& f) { -#if defined(__KERNEL_AVX__) - _mm_maskstore_ps((float*)ptr,(__m128i)mask,f); -#else - *(ssef*)ptr = select(mask,f,*(ssef*)ptr); -#endif +__forceinline void store4f(const sseb &mask, void *ptr, const ssef &f) +{ +# if defined(__KERNEL_AVX__) + _mm_maskstore_ps((float *)ptr, (__m128i)mask, f); +# else + *(ssef *)ptr = select(mask, f, *(ssef *)ptr); +# endif } -__forceinline ssef load4f_nt(void* ptr) { -#if defined(__KERNEL_SSE41__) - return _mm_castsi128_ps(_mm_stream_load_si128((__m128i*)ptr)); -#else - return _mm_load_ps((float*)ptr); -#endif +__forceinline ssef load4f_nt(void *ptr) +{ +# if defined(__KERNEL_SSE41__) + return _mm_castsi128_ps(_mm_stream_load_si128((__m128i *)ptr)); +# else + return _mm_load_ps((float *)ptr); +# endif } -__forceinline void store4f_nt(void* ptr, const ssef& v) { -#if defined(__KERNEL_SSE41__) - _mm_stream_ps((float*)ptr,v); -#else - _mm_store_ps((float*)ptr,v); -#endif +__forceinline void store4f_nt(void *ptr, const ssef &v) +{ +# if defined(__KERNEL_SSE41__) + _mm_stream_ps((float *)ptr, v); +# else + _mm_store_ps((float *)ptr, v); +# endif } //////////////////////////////////////////////////////////////////////////////// /// Euclidian Space Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline float dot(const ssef& a, const ssef& b) { - return reduce_add(a*b); +__forceinline float dot(const ssef &a, const ssef &b) +{ + return reduce_add(a * b); } /* calculate shuffled cross product, useful when order of components does not matter */ -__forceinline ssef cross_zxy(const ssef& a, const ssef& b) +__forceinline ssef cross_zxy(const ssef &a, const ssef &b) { - const ssef a0 = a; - const ssef b0 = shuffle<1,2,0,3>(b); - const ssef a1 = shuffle<1,2,0,3>(a); - const ssef b1 = b; - return msub(a0,b0,a1*b1); + const ssef a0 = a; + const ssef b0 = shuffle<1, 2, 0, 3>(b); + const ssef a1 = shuffle<1, 2, 0, 3>(a); + const ssef b1 = b; + return msub(a0, b0, a1 * b1); } -__forceinline ssef cross(const ssef& a, const ssef& b) +__forceinline ssef cross(const ssef &a, const ssef &b) { - return shuffle<1,2,0,3>(cross_zxy(a, b)); + return shuffle<1, 2, 0, 3>(cross_zxy(a, b)); } -ccl_device_inline const ssef dot3_splat(const ssef& a, const ssef& b) +ccl_device_inline const ssef dot3_splat(const ssef &a, const ssef &b) { -#ifdef __KERNEL_SSE41__ - return _mm_dp_ps(a.m128, b.m128, 0x7f); -#else - ssef t = a * b; - return ssef(((float*)&t)[0] + ((float*)&t)[1] + ((float*)&t)[2]); -#endif +# ifdef __KERNEL_SSE41__ + return _mm_dp_ps(a.m128, b.m128, 0x7f); +# else + ssef t = a * b; + return ssef(((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]); +# endif } /* squared length taking only specified axes into account */ -template<size_t X, size_t Y, size_t Z, size_t W> -ccl_device_inline float len_squared(const ssef& a) -{ -#ifndef __KERNEL_SSE41__ - float4& t = (float4 &)a; - return (X ? t.x * t.x : 0.0f) + (Y ? t.y * t.y : 0.0f) + (Z ? t.z * t.z : 0.0f) + (W ? t.w * t.w : 0.0f); -#else - return extract<0>(ssef(_mm_dp_ps(a.m128, a.m128, (X << 4) | (Y << 5) | (Z << 6) | (W << 7) | 0xf))); -#endif +template<size_t X, size_t Y, size_t Z, size_t W> ccl_device_inline float len_squared(const ssef &a) +{ +# ifndef __KERNEL_SSE41__ + float4 &t = (float4 &)a; + return (X ? t.x * t.x : 0.0f) + (Y ? t.y * t.y : 0.0f) + (Z ? t.z * t.z : 0.0f) + + (W ? t.w * t.w : 0.0f); +# else + return extract<0>( + ssef(_mm_dp_ps(a.m128, a.m128, (X << 4) | (Y << 5) | (Z << 6) | (W << 7) | 0xf))); +# endif } -ccl_device_inline float dot3(const ssef& a, const ssef& b) +ccl_device_inline float dot3(const ssef &a, const ssef &b) { -#ifdef __KERNEL_SSE41__ - return extract<0>(ssef(_mm_dp_ps(a.m128, b.m128, 0x7f))); -#else - ssef t = a * b; - return ((float*)&t)[0] + ((float*)&t)[1] + ((float*)&t)[2]; -#endif +# ifdef __KERNEL_SSE41__ + return extract<0>(ssef(_mm_dp_ps(a.m128, b.m128, 0x7f))); +# else + ssef t = a * b; + return ((float *)&t)[0] + ((float *)&t)[1] + ((float *)&t)[2]; +# endif } -ccl_device_inline const ssef len3_squared_splat(const ssef& a) +ccl_device_inline const ssef len3_squared_splat(const ssef &a) { - return dot3_splat(a, a); + return dot3_splat(a, a); } -ccl_device_inline float len3_squared(const ssef& a) +ccl_device_inline float len3_squared(const ssef &a) { - return dot3(a, a); + return dot3(a, a); } -ccl_device_inline float len3(const ssef& a) +ccl_device_inline float len3(const ssef &a) { - return extract<0>(mm_sqrt(dot3_splat(a, a))); + return extract<0>(mm_sqrt(dot3_splat(a, a))); } /* SSE shuffle utility functions */ -#ifdef __KERNEL_SSSE3__ +# ifdef __KERNEL_SSSE3__ /* faster version for SSSE3 */ typedef ssei shuffle_swap_t; ccl_device_inline shuffle_swap_t shuffle_swap_identity() { - return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); } ccl_device_inline shuffle_swap_t shuffle_swap_swap() { - return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + return _mm_set_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } -ccl_device_inline const ssef shuffle_swap(const ssef& a, const shuffle_swap_t& shuf) +ccl_device_inline const ssef shuffle_swap(const ssef &a, const shuffle_swap_t &shuf) { - return cast(_mm_shuffle_epi8(cast(a), shuf)); + return cast(_mm_shuffle_epi8(cast(a), shuf)); } -#else +# else /* somewhat slower version for SSE2 */ typedef int shuffle_swap_t; ccl_device_inline shuffle_swap_t shuffle_swap_identity() { - return 0; + return 0; } ccl_device_inline shuffle_swap_t shuffle_swap_swap() { - return 1; + return 1; } -ccl_device_inline const ssef shuffle_swap(const ssef& a, shuffle_swap_t shuf) +ccl_device_inline const ssef shuffle_swap(const ssef &a, shuffle_swap_t shuf) { - /* shuffle value must be a constant, so we need to branch */ - if(shuf) - return ssef(_mm_shuffle_ps(a.m128, a.m128, _MM_SHUFFLE(1, 0, 3, 2))); - else - return ssef(_mm_shuffle_ps(a.m128, a.m128, _MM_SHUFFLE(3, 2, 1, 0))); + /* shuffle value must be a constant, so we need to branch */ + if (shuf) + return ssef(_mm_shuffle_ps(a.m128, a.m128, _MM_SHUFFLE(1, 0, 3, 2))); + else + return ssef(_mm_shuffle_ps(a.m128, a.m128, _MM_SHUFFLE(3, 2, 1, 0))); } -#endif +# endif -#ifdef __KERNEL_SSE41__ +# ifdef __KERNEL_SSE41__ -ccl_device_inline void gen_idirsplat_swap(const ssef &pn, const shuffle_swap_t &shuf_identity, const shuffle_swap_t &shuf_swap, - const float3& idir, ssef idirsplat[3], shuffle_swap_t shufflexyz[3]) +ccl_device_inline void gen_idirsplat_swap(const ssef &pn, + const shuffle_swap_t &shuf_identity, + const shuffle_swap_t &shuf_swap, + const float3 &idir, + ssef idirsplat[3], + shuffle_swap_t shufflexyz[3]) { - const __m128 idirsplat_raw[] = { _mm_set_ps1(idir.x), _mm_set_ps1(idir.y), _mm_set_ps1(idir.z) }; - idirsplat[0] = _mm_xor_ps(idirsplat_raw[0], pn); - idirsplat[1] = _mm_xor_ps(idirsplat_raw[1], pn); - idirsplat[2] = _mm_xor_ps(idirsplat_raw[2], pn); - - const ssef signmask = cast(ssei(0x80000000)); - const ssef shuf_identity_f = cast(shuf_identity); - const ssef shuf_swap_f = cast(shuf_swap); - - shufflexyz[0] = _mm_castps_si128(_mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[0], signmask))); - shufflexyz[1] = _mm_castps_si128(_mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[1], signmask))); - shufflexyz[2] = _mm_castps_si128(_mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[2], signmask))); -} - -#else - -ccl_device_inline void gen_idirsplat_swap(const ssef &pn, const shuffle_swap_t &shuf_identity, const shuffle_swap_t &shuf_swap, - const float3& idir, ssef idirsplat[3], shuffle_swap_t shufflexyz[3]) + const __m128 idirsplat_raw[] = {_mm_set_ps1(idir.x), _mm_set_ps1(idir.y), _mm_set_ps1(idir.z)}; + idirsplat[0] = _mm_xor_ps(idirsplat_raw[0], pn); + idirsplat[1] = _mm_xor_ps(idirsplat_raw[1], pn); + idirsplat[2] = _mm_xor_ps(idirsplat_raw[2], pn); + + const ssef signmask = cast(ssei(0x80000000)); + const ssef shuf_identity_f = cast(shuf_identity); + const ssef shuf_swap_f = cast(shuf_swap); + + shufflexyz[0] = _mm_castps_si128( + _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[0], signmask))); + shufflexyz[1] = _mm_castps_si128( + _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[1], signmask))); + shufflexyz[2] = _mm_castps_si128( + _mm_blendv_ps(shuf_identity_f, shuf_swap_f, _mm_and_ps(idirsplat_raw[2], signmask))); +} + +# else + +ccl_device_inline void gen_idirsplat_swap(const ssef &pn, + const shuffle_swap_t &shuf_identity, + const shuffle_swap_t &shuf_swap, + const float3 &idir, + ssef idirsplat[3], + shuffle_swap_t shufflexyz[3]) { - idirsplat[0] = ssef(idir.x) ^ pn; - idirsplat[1] = ssef(idir.y) ^ pn; - idirsplat[2] = ssef(idir.z) ^ pn; + idirsplat[0] = ssef(idir.x) ^ pn; + idirsplat[1] = ssef(idir.y) ^ pn; + idirsplat[2] = ssef(idir.z) ^ pn; - shufflexyz[0] = (idir.x >= 0)? shuf_identity: shuf_swap; - shufflexyz[1] = (idir.y >= 0)? shuf_identity: shuf_swap; - shufflexyz[2] = (idir.z >= 0)? shuf_identity: shuf_swap; + shufflexyz[0] = (idir.x >= 0) ? shuf_identity : shuf_swap; + shufflexyz[1] = (idir.y >= 0) ? shuf_identity : shuf_swap; + shufflexyz[2] = (idir.z >= 0) ? shuf_identity : shuf_swap; } -#endif +# endif ccl_device_inline const ssef uint32_to_float(const ssei &in) { - ssei a = _mm_srli_epi32(in, 16); - ssei b = _mm_and_si128(in, _mm_set1_epi32(0x0000ffff)); - ssei c = _mm_or_si128(a, _mm_set1_epi32(0x53000000)); - ssef d = _mm_cvtepi32_ps(b); - ssef e = _mm_sub_ps(_mm_castsi128_ps(c), _mm_castsi128_ps(_mm_set1_epi32(0x53000000))); - return _mm_add_ps(e, d); + ssei a = _mm_srli_epi32(in, 16); + ssei b = _mm_and_si128(in, _mm_set1_epi32(0x0000ffff)); + ssei c = _mm_or_si128(a, _mm_set1_epi32(0x53000000)); + ssef d = _mm_cvtepi32_ps(b); + ssef e = _mm_sub_ps(_mm_castsi128_ps(c), _mm_castsi128_ps(_mm_set1_epi32(0x53000000))); + return _mm_add_ps(e, d); } template<size_t S1, size_t S2, size_t S3, size_t S4> ccl_device_inline const ssef set_sign_bit(const ssef &a) { - return cast(cast(a) ^ ssei(S1 << 31, S2 << 31, S3 << 31, S4 << 31)); + return cast(cast(a) ^ ssei(S1 << 31, S2 << 31, S3 << 31, S4 << 31)); } //////////////////////////////////////////////////////////////////////////////// @@ -615,12 +999,8 @@ ccl_device_inline const ssef set_sign_bit(const ssef &a) ccl_device_inline void print_ssef(const char *label, const ssef &a) { - printf("%s: %.8f %.8f %.8f %.8f\n", - label, - (double)a[0], - (double)a[1], - (double)a[2], - (double)a[3]); + printf( + "%s: %.8f %.8f %.8f %.8f\n", label, (double)a[0], (double)a[1], (double)a[2], (double)a[3]); } #endif diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h index ba0389cd114..86429260a0e 100644 --- a/intern/cycles/util/util_ssei.h +++ b/intern/cycles/util/util_ssei.h @@ -26,263 +26,576 @@ struct sseb; struct ssef; /*! 4-wide SSE integer type. */ -struct ssei -{ - typedef sseb Mask; // mask type - typedef ssei Int; // int type - typedef ssef Float; // float type - - enum { size = 4 }; // number of SIMD elements - union { __m128i m128; int32_t i[4]; }; // data - - //////////////////////////////////////////////////////////////////////////////// - /// Constructors, Assignment & Cast Operators - //////////////////////////////////////////////////////////////////////////////// - - __forceinline ssei ( ) {} - __forceinline ssei ( const ssei& a ) { m128 = a.m128; } - __forceinline ssei& operator=( const ssei& a ) { m128 = a.m128; return *this; } - - __forceinline ssei( const __m128i a ) : m128(a) {} - __forceinline operator const __m128i&( void ) const { return m128; } - __forceinline operator __m128i&( void ) { return m128; } - - __forceinline ssei ( const int a ) : m128(_mm_set1_epi32(a)) {} - __forceinline ssei ( int a, int b, int c, int d ) : m128(_mm_setr_epi32(a, b, c, d)) {} - - __forceinline explicit ssei( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {} - - //////////////////////////////////////////////////////////////////////////////// - /// Array Access - //////////////////////////////////////////////////////////////////////////////// - - __forceinline const int32_t& operator []( const size_t index ) const { assert(index < 4); return i[index]; } - __forceinline int32_t& operator []( const size_t index ) { assert(index < 4); return i[index]; } +struct ssei { + typedef sseb Mask; // mask type + typedef ssei Int; // int type + typedef ssef Float; // float type + + enum { size = 4 }; // number of SIMD elements + union { + __m128i m128; + int32_t i[4]; + }; // data + + //////////////////////////////////////////////////////////////////////////////// + /// Constructors, Assignment & Cast Operators + //////////////////////////////////////////////////////////////////////////////// + + __forceinline ssei() + { + } + __forceinline ssei(const ssei &a) + { + m128 = a.m128; + } + __forceinline ssei &operator=(const ssei &a) + { + m128 = a.m128; + return *this; + } + + __forceinline ssei(const __m128i a) : m128(a) + { + } + __forceinline operator const __m128i &(void)const + { + return m128; + } + __forceinline operator __m128i &(void) + { + return m128; + } + + __forceinline ssei(const int a) : m128(_mm_set1_epi32(a)) + { + } + __forceinline ssei(int a, int b, int c, int d) : m128(_mm_setr_epi32(a, b, c, d)) + { + } + + __forceinline explicit ssei(const __m128 a) : m128(_mm_cvtps_epi32(a)) + { + } + + //////////////////////////////////////////////////////////////////////////////// + /// Array Access + //////////////////////////////////////////////////////////////////////////////// + + __forceinline const int32_t &operator[](const size_t index) const + { + assert(index < 4); + return i[index]; + } + __forceinline int32_t &operator[](const size_t index) + { + assert(index < 4); + return i[index]; + } }; //////////////////////////////////////////////////////////////////////////////// /// Unary Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const ssei cast ( const __m128& a ) { return _mm_castps_si128(a); } -__forceinline const ssei operator +( const ssei& a ) { return a; } -__forceinline const ssei operator -( const ssei& a ) { return _mm_sub_epi32(_mm_setzero_si128(), a.m128); } -#if defined(__KERNEL_SSSE3__) -__forceinline const ssei abs ( const ssei& a ) { return _mm_abs_epi32(a.m128); } -#endif +__forceinline const ssei cast(const __m128 &a) +{ + return _mm_castps_si128(a); +} +__forceinline const ssei operator+(const ssei &a) +{ + return a; +} +__forceinline const ssei operator-(const ssei &a) +{ + return _mm_sub_epi32(_mm_setzero_si128(), a.m128); +} +# if defined(__KERNEL_SSSE3__) +__forceinline const ssei abs(const ssei &a) +{ + return _mm_abs_epi32(a.m128); +} +# endif //////////////////////////////////////////////////////////////////////////////// /// Binary Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline const ssei operator +( const ssei& a, const ssei& b ) { return _mm_add_epi32(a.m128, b.m128); } -__forceinline const ssei operator +( const ssei& a, const int32_t& b ) { return a + ssei(b); } -__forceinline const ssei operator +( const int32_t& a, const ssei& b ) { return ssei(a) + b; } +__forceinline const ssei operator+(const ssei &a, const ssei &b) +{ + return _mm_add_epi32(a.m128, b.m128); +} +__forceinline const ssei operator+(const ssei &a, const int32_t &b) +{ + return a + ssei(b); +} +__forceinline const ssei operator+(const int32_t &a, const ssei &b) +{ + return ssei(a) + b; +} -__forceinline const ssei operator -( const ssei& a, const ssei& b ) { return _mm_sub_epi32(a.m128, b.m128); } -__forceinline const ssei operator -( const ssei& a, const int32_t& b ) { return a - ssei(b); } -__forceinline const ssei operator -( const int32_t& a, const ssei& b ) { return ssei(a) - b; } +__forceinline const ssei operator-(const ssei &a, const ssei &b) +{ + return _mm_sub_epi32(a.m128, b.m128); +} +__forceinline const ssei operator-(const ssei &a, const int32_t &b) +{ + return a - ssei(b); +} +__forceinline const ssei operator-(const int32_t &a, const ssei &b) +{ + return ssei(a) - b; +} -#if defined(__KERNEL_SSE41__) -__forceinline const ssei operator *( const ssei& a, const ssei& b ) { return _mm_mullo_epi32(a.m128, b.m128); } -__forceinline const ssei operator *( const ssei& a, const int32_t& b ) { return a * ssei(b); } -__forceinline const ssei operator *( const int32_t& a, const ssei& b ) { return ssei(a) * b; } -#endif +# if defined(__KERNEL_SSE41__) +__forceinline const ssei operator*(const ssei &a, const ssei &b) +{ + return _mm_mullo_epi32(a.m128, b.m128); +} +__forceinline const ssei operator*(const ssei &a, const int32_t &b) +{ + return a * ssei(b); +} +__forceinline const ssei operator*(const int32_t &a, const ssei &b) +{ + return ssei(a) * b; +} +# endif -__forceinline const ssei operator &( const ssei& a, const ssei& b ) { return _mm_and_si128(a.m128, b.m128); } -__forceinline const ssei operator &( const ssei& a, const int32_t& b ) { return a & ssei(b); } -__forceinline const ssei operator &( const int32_t& a, const ssei& b ) { return ssei(a) & b; } +__forceinline const ssei operator&(const ssei &a, const ssei &b) +{ + return _mm_and_si128(a.m128, b.m128); +} +__forceinline const ssei operator&(const ssei &a, const int32_t &b) +{ + return a & ssei(b); +} +__forceinline const ssei operator&(const int32_t &a, const ssei &b) +{ + return ssei(a) & b; +} -__forceinline const ssei operator |( const ssei& a, const ssei& b ) { return _mm_or_si128(a.m128, b.m128); } -__forceinline const ssei operator |( const ssei& a, const int32_t& b ) { return a | ssei(b); } -__forceinline const ssei operator |( const int32_t& a, const ssei& b ) { return ssei(a) | b; } +__forceinline const ssei operator|(const ssei &a, const ssei &b) +{ + return _mm_or_si128(a.m128, b.m128); +} +__forceinline const ssei operator|(const ssei &a, const int32_t &b) +{ + return a | ssei(b); +} +__forceinline const ssei operator|(const int32_t &a, const ssei &b) +{ + return ssei(a) | b; +} -__forceinline const ssei operator ^( const ssei& a, const ssei& b ) { return _mm_xor_si128(a.m128, b.m128); } -__forceinline const ssei operator ^( const ssei& a, const int32_t& b ) { return a ^ ssei(b); } -__forceinline const ssei operator ^( const int32_t& a, const ssei& b ) { return ssei(a) ^ b; } +__forceinline const ssei operator^(const ssei &a, const ssei &b) +{ + return _mm_xor_si128(a.m128, b.m128); +} +__forceinline const ssei operator^(const ssei &a, const int32_t &b) +{ + return a ^ ssei(b); +} +__forceinline const ssei operator^(const int32_t &a, const ssei &b) +{ + return ssei(a) ^ b; +} -__forceinline const ssei operator <<( const ssei& a, const int32_t& n ) { return _mm_slli_epi32(a.m128, n); } -__forceinline const ssei operator >>( const ssei& a, const int32_t& n ) { return _mm_srai_epi32(a.m128, n); } +__forceinline const ssei operator<<(const ssei &a, const int32_t &n) +{ + return _mm_slli_epi32(a.m128, n); +} +__forceinline const ssei operator>>(const ssei &a, const int32_t &n) +{ + return _mm_srai_epi32(a.m128, n); +} -__forceinline const ssei andnot(const ssei& a, const ssei& b) { return _mm_andnot_si128(a.m128,b.m128); } -__forceinline const ssei andnot(const sseb& a, const ssei& b) { return _mm_andnot_si128(cast(a.m128),b.m128); } -__forceinline const ssei andnot(const ssei& a, const sseb& b) { return _mm_andnot_si128(a.m128,cast(b.m128)); } +__forceinline const ssei andnot(const ssei &a, const ssei &b) +{ + return _mm_andnot_si128(a.m128, b.m128); +} +__forceinline const ssei andnot(const sseb &a, const ssei &b) +{ + return _mm_andnot_si128(cast(a.m128), b.m128); +} +__forceinline const ssei andnot(const ssei &a, const sseb &b) +{ + return _mm_andnot_si128(a.m128, cast(b.m128)); +} -__forceinline const ssei sra ( const ssei& a, const int32_t& b ) { return _mm_srai_epi32(a.m128, b); } -__forceinline const ssei srl ( const ssei& a, const int32_t& b ) { return _mm_srli_epi32(a.m128, b); } +__forceinline const ssei sra(const ssei &a, const int32_t &b) +{ + return _mm_srai_epi32(a.m128, b); +} +__forceinline const ssei srl(const ssei &a, const int32_t &b) +{ + return _mm_srli_epi32(a.m128, b); +} -#if defined(__KERNEL_SSE41__) -__forceinline const ssei min( const ssei& a, const ssei& b ) { return _mm_min_epi32(a.m128, b.m128); } -__forceinline const ssei min( const ssei& a, const int32_t& b ) { return min(a,ssei(b)); } -__forceinline const ssei min( const int32_t& a, const ssei& b ) { return min(ssei(a),b); } +# if defined(__KERNEL_SSE41__) +__forceinline const ssei min(const ssei &a, const ssei &b) +{ + return _mm_min_epi32(a.m128, b.m128); +} +__forceinline const ssei min(const ssei &a, const int32_t &b) +{ + return min(a, ssei(b)); +} +__forceinline const ssei min(const int32_t &a, const ssei &b) +{ + return min(ssei(a), b); +} -__forceinline const ssei max( const ssei& a, const ssei& b ) { return _mm_max_epi32(a.m128, b.m128); } -__forceinline const ssei max( const ssei& a, const int32_t& b ) { return max(a,ssei(b)); } -__forceinline const ssei max( const int32_t& a, const ssei& b ) { return max(ssei(a),b); } -#endif +__forceinline const ssei max(const ssei &a, const ssei &b) +{ + return _mm_max_epi32(a.m128, b.m128); +} +__forceinline const ssei max(const ssei &a, const int32_t &b) +{ + return max(a, ssei(b)); +} +__forceinline const ssei max(const int32_t &a, const ssei &b) +{ + return max(ssei(a), b); +} +# endif //////////////////////////////////////////////////////////////////////////////// /// Assignment Operators //////////////////////////////////////////////////////////////////////////////// -__forceinline ssei& operator +=( ssei& a, const ssei& b ) { return a = a + b; } -__forceinline ssei& operator +=( ssei& a, const int32_t& b ) { return a = a + b; } +__forceinline ssei &operator+=(ssei &a, const ssei &b) +{ + return a = a + b; +} +__forceinline ssei &operator+=(ssei &a, const int32_t &b) +{ + return a = a + b; +} -__forceinline ssei& operator -=( ssei& a, const ssei& b ) { return a = a - b; } -__forceinline ssei& operator -=( ssei& a, const int32_t& b ) { return a = a - b; } +__forceinline ssei &operator-=(ssei &a, const ssei &b) +{ + return a = a - b; +} +__forceinline ssei &operator-=(ssei &a, const int32_t &b) +{ + return a = a - b; +} -#if defined(__KERNEL_SSE41__) -__forceinline ssei& operator *=( ssei& a, const ssei& b ) { return a = a * b; } -__forceinline ssei& operator *=( ssei& a, const int32_t& b ) { return a = a * b; } -#endif +# if defined(__KERNEL_SSE41__) +__forceinline ssei &operator*=(ssei &a, const ssei &b) +{ + return a = a * b; +} +__forceinline ssei &operator*=(ssei &a, const int32_t &b) +{ + return a = a * b; +} +# endif -__forceinline ssei& operator &=( ssei& a, const ssei& b ) { return a = a & b; } -__forceinline ssei& operator &=( ssei& a, const int32_t& b ) { return a = a & b; } +__forceinline ssei &operator&=(ssei &a, const ssei &b) +{ + return a = a & b; +} +__forceinline ssei &operator&=(ssei &a, const int32_t &b) +{ + return a = a & b; +} -__forceinline ssei& operator |=( ssei& a, const ssei& b ) { return a = a | b; } -__forceinline ssei& operator |=( ssei& a, const int32_t& b ) { return a = a | b; } +__forceinline ssei &operator|=(ssei &a, const ssei &b) +{ + return a = a | b; +} +__forceinline ssei &operator|=(ssei &a, const int32_t &b) +{ + return a = a | b; +} -__forceinline ssei& operator <<=( ssei& a, const int32_t& b ) { return a = a << b; } -__forceinline ssei& operator >>=( ssei& a, const int32_t& b ) { return a = a >> b; } +__forceinline ssei &operator<<=(ssei &a, const int32_t &b) +{ + return a = a << b; +} +__forceinline ssei &operator>>=(ssei &a, const int32_t &b) +{ + return a = a >> b; +} //////////////////////////////////////////////////////////////////////////////// /// Comparison Operators + Select //////////////////////////////////////////////////////////////////////////////// -__forceinline const sseb operator ==( const ssei& a, const ssei& b ) { return _mm_castsi128_ps(_mm_cmpeq_epi32 (a.m128, b.m128)); } -__forceinline const sseb operator ==( const ssei& a, const int32_t& b ) { return a == ssei(b); } -__forceinline const sseb operator ==( const int32_t& a, const ssei& b ) { return ssei(a) == b; } +__forceinline const sseb operator==(const ssei &a, const ssei &b) +{ + return _mm_castsi128_ps(_mm_cmpeq_epi32(a.m128, b.m128)); +} +__forceinline const sseb operator==(const ssei &a, const int32_t &b) +{ + return a == ssei(b); +} +__forceinline const sseb operator==(const int32_t &a, const ssei &b) +{ + return ssei(a) == b; +} -__forceinline const sseb operator !=( const ssei& a, const ssei& b ) { return !(a == b); } -__forceinline const sseb operator !=( const ssei& a, const int32_t& b ) { return a != ssei(b); } -__forceinline const sseb operator !=( const int32_t& a, const ssei& b ) { return ssei(a) != b; } +__forceinline const sseb operator!=(const ssei &a, const ssei &b) +{ + return !(a == b); +} +__forceinline const sseb operator!=(const ssei &a, const int32_t &b) +{ + return a != ssei(b); +} +__forceinline const sseb operator!=(const int32_t &a, const ssei &b) +{ + return ssei(a) != b; +} -__forceinline const sseb operator < ( const ssei& a, const ssei& b ) { return _mm_castsi128_ps(_mm_cmplt_epi32 (a.m128, b.m128)); } -__forceinline const sseb operator < ( const ssei& a, const int32_t& b ) { return a < ssei(b); } -__forceinline const sseb operator < ( const int32_t& a, const ssei& b ) { return ssei(a) < b; } +__forceinline const sseb operator<(const ssei &a, const ssei &b) +{ + return _mm_castsi128_ps(_mm_cmplt_epi32(a.m128, b.m128)); +} +__forceinline const sseb operator<(const ssei &a, const int32_t &b) +{ + return a < ssei(b); +} +__forceinline const sseb operator<(const int32_t &a, const ssei &b) +{ + return ssei(a) < b; +} -__forceinline const sseb operator >=( const ssei& a, const ssei& b ) { return !(a < b); } -__forceinline const sseb operator >=( const ssei& a, const int32_t& b ) { return a >= ssei(b); } -__forceinline const sseb operator >=( const int32_t& a, const ssei& b ) { return ssei(a) >= b; } +__forceinline const sseb operator>=(const ssei &a, const ssei &b) +{ + return !(a < b); +} +__forceinline const sseb operator>=(const ssei &a, const int32_t &b) +{ + return a >= ssei(b); +} +__forceinline const sseb operator>=(const int32_t &a, const ssei &b) +{ + return ssei(a) >= b; +} -__forceinline const sseb operator > ( const ssei& a, const ssei& b ) { return _mm_castsi128_ps(_mm_cmpgt_epi32 (a.m128, b.m128)); } -__forceinline const sseb operator > ( const ssei& a, const int32_t& b ) { return a > ssei(b); } -__forceinline const sseb operator > ( const int32_t& a, const ssei& b ) { return ssei(a) > b; } +__forceinline const sseb operator>(const ssei &a, const ssei &b) +{ + return _mm_castsi128_ps(_mm_cmpgt_epi32(a.m128, b.m128)); +} +__forceinline const sseb operator>(const ssei &a, const int32_t &b) +{ + return a > ssei(b); +} +__forceinline const sseb operator>(const int32_t &a, const ssei &b) +{ + return ssei(a) > b; +} -__forceinline const sseb operator <=( const ssei& a, const ssei& b ) { return !(a > b); } -__forceinline const sseb operator <=( const ssei& a, const int32_t& b ) { return a <= ssei(b); } -__forceinline const sseb operator <=( const int32_t& a, const ssei& b ) { return ssei(a) <= b; } +__forceinline const sseb operator<=(const ssei &a, const ssei &b) +{ + return !(a > b); +} +__forceinline const sseb operator<=(const ssei &a, const int32_t &b) +{ + return a <= ssei(b); +} +__forceinline const sseb operator<=(const int32_t &a, const ssei &b) +{ + return ssei(a) <= b; +} -__forceinline const ssei select( const sseb& m, const ssei& t, const ssei& f ) { -#ifdef __KERNEL_SSE41__ - return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m)); -#else - return _mm_or_si128(_mm_and_si128(m, t), _mm_andnot_si128(m, f)); -#endif +__forceinline const ssei select(const sseb &m, const ssei &t, const ssei &f) +{ +# ifdef __KERNEL_SSE41__ + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), m)); +# else + return _mm_or_si128(_mm_and_si128(m, t), _mm_andnot_si128(m, f)); +# endif } -__forceinline const ssei select( const int mask, const ssei& t, const ssei& f ) { -#if defined(__KERNEL_SSE41__) && ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER)) - return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), mask)); -#else - return select(sseb(mask),t,f); -#endif +__forceinline const ssei select(const int mask, const ssei &t, const ssei &f) +{ +# if defined(__KERNEL_SSE41__) && \ + ((!defined(__clang__) && !defined(_MSC_VER)) || defined(__INTEL_COMPILER)) + return _mm_castps_si128(_mm_blend_ps(_mm_castsi128_ps(f), _mm_castsi128_ps(t), mask)); +# else + return select(sseb(mask), t, f); +# endif } //////////////////////////////////////////////////////////////////////////////// // Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// -__forceinline ssei unpacklo( const ssei& a, const ssei& b ) { return _mm_unpacklo_epi32(a, b); } -__forceinline ssei unpackhi( const ssei& a, const ssei& b ) { return _mm_unpackhi_epi32(a, b); } +__forceinline ssei unpacklo(const ssei &a, const ssei &b) +{ + return _mm_unpacklo_epi32(a, b); +} +__forceinline ssei unpackhi(const ssei &a, const ssei &b) +{ + return _mm_unpackhi_epi32(a, b); +} -template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssei shuffle( const ssei& a ) { - return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)); +template<size_t i0, size_t i1, size_t i2, size_t i3> +__forceinline const ssei shuffle(const ssei &a) +{ + return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)); } -template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssei shuffle( const ssei& a, const ssei& b ) { - return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); +template<size_t i0, size_t i1, size_t i2, size_t i3> +__forceinline const ssei shuffle(const ssei &a, const ssei &b) +{ + return _mm_castps_si128( + _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); } -template<size_t i0> __forceinline const ssei shuffle( const ssei& b ) { - return shuffle<i0,i0,i0,i0>(b); +template<size_t i0> __forceinline const ssei shuffle(const ssei &b) +{ + return shuffle<i0, i0, i0, i0>(b); } -#if defined(__KERNEL_SSE41__) -template<size_t src> __forceinline int extract( const ssei& b ) { return _mm_extract_epi32(b, src); } -template<size_t dst> __forceinline const ssei insert( const ssei& a, const int32_t b ) { return _mm_insert_epi32(a, b, dst); } -#else -template<size_t src> __forceinline int extract( const ssei& b ) { return b[src]; } -template<size_t dst> __forceinline const ssei insert( const ssei& a, const int32_t b ) { ssei c = a; c[dst] = b; return c; } -#endif +# if defined(__KERNEL_SSE41__) +template<size_t src> __forceinline int extract(const ssei &b) +{ + return _mm_extract_epi32(b, src); +} +template<size_t dst> __forceinline const ssei insert(const ssei &a, const int32_t b) +{ + return _mm_insert_epi32(a, b, dst); +} +# else +template<size_t src> __forceinline int extract(const ssei &b) +{ + return b[src]; +} +template<size_t dst> __forceinline const ssei insert(const ssei &a, const int32_t b) +{ + ssei c = a; + c[dst] = b; + return c; +} +# endif //////////////////////////////////////////////////////////////////////////////// /// Reductions //////////////////////////////////////////////////////////////////////////////// -#if defined(__KERNEL_SSE41__) -__forceinline const ssei vreduce_min(const ssei& v) { ssei h = min(shuffle<1,0,3,2>(v),v); return min(shuffle<2,3,0,1>(h),h); } -__forceinline const ssei vreduce_max(const ssei& v) { ssei h = max(shuffle<1,0,3,2>(v),v); return max(shuffle<2,3,0,1>(h),h); } -__forceinline const ssei vreduce_add(const ssei& v) { ssei h = shuffle<1,0,3,2>(v) + v ; return shuffle<2,3,0,1>(h) + h ; } +# if defined(__KERNEL_SSE41__) +__forceinline const ssei vreduce_min(const ssei &v) +{ + ssei h = min(shuffle<1, 0, 3, 2>(v), v); + return min(shuffle<2, 3, 0, 1>(h), h); +} +__forceinline const ssei vreduce_max(const ssei &v) +{ + ssei h = max(shuffle<1, 0, 3, 2>(v), v); + return max(shuffle<2, 3, 0, 1>(h), h); +} +__forceinline const ssei vreduce_add(const ssei &v) +{ + ssei h = shuffle<1, 0, 3, 2>(v) + v; + return shuffle<2, 3, 0, 1>(h) + h; +} -__forceinline int reduce_min(const ssei& v) { return extract<0>(vreduce_min(v)); } -__forceinline int reduce_max(const ssei& v) { return extract<0>(vreduce_max(v)); } -__forceinline int reduce_add(const ssei& v) { return extract<0>(vreduce_add(v)); } +__forceinline int reduce_min(const ssei &v) +{ + return extract<0>(vreduce_min(v)); +} +__forceinline int reduce_max(const ssei &v) +{ + return extract<0>(vreduce_max(v)); +} +__forceinline int reduce_add(const ssei &v) +{ + return extract<0>(vreduce_add(v)); +} -__forceinline size_t select_min(const ssei& v) { return __bsf(movemask(v == vreduce_min(v))); } -__forceinline size_t select_max(const ssei& v) { return __bsf(movemask(v == vreduce_max(v))); } +__forceinline size_t select_min(const ssei &v) +{ + return __bsf(movemask(v == vreduce_min(v))); +} +__forceinline size_t select_max(const ssei &v) +{ + return __bsf(movemask(v == vreduce_max(v))); +} -__forceinline size_t select_min(const sseb& valid, const ssei& v) { const ssei a = select(valid,v,ssei((int)pos_inf)); return __bsf(movemask(valid & (a == vreduce_min(a)))); } -__forceinline size_t select_max(const sseb& valid, const ssei& v) { const ssei a = select(valid,v,ssei((int)neg_inf)); return __bsf(movemask(valid & (a == vreduce_max(a)))); } +__forceinline size_t select_min(const sseb &valid, const ssei &v) +{ + const ssei a = select(valid, v, ssei((int)pos_inf)); + return __bsf(movemask(valid & (a == vreduce_min(a)))); +} +__forceinline size_t select_max(const sseb &valid, const ssei &v) +{ + const ssei a = select(valid, v, ssei((int)neg_inf)); + return __bsf(movemask(valid & (a == vreduce_max(a)))); +} -#else +# else -__forceinline int ssei_min(int a, int b) { return (a < b)? a: b; } -__forceinline int ssei_max(int a, int b) { return (a > b)? a: b; } -__forceinline int reduce_min(const ssei& v) { return ssei_min(ssei_min(v[0],v[1]),ssei_min(v[2],v[3])); } -__forceinline int reduce_max(const ssei& v) { return ssei_max(ssei_max(v[0],v[1]),ssei_max(v[2],v[3])); } -__forceinline int reduce_add(const ssei& v) { return v[0]+v[1]+v[2]+v[3]; } +__forceinline int ssei_min(int a, int b) +{ + return (a < b) ? a : b; +} +__forceinline int ssei_max(int a, int b) +{ + return (a > b) ? a : b; +} +__forceinline int reduce_min(const ssei &v) +{ + return ssei_min(ssei_min(v[0], v[1]), ssei_min(v[2], v[3])); +} +__forceinline int reduce_max(const ssei &v) +{ + return ssei_max(ssei_max(v[0], v[1]), ssei_max(v[2], v[3])); +} +__forceinline int reduce_add(const ssei &v) +{ + return v[0] + v[1] + v[2] + v[3]; +} -#endif +# endif //////////////////////////////////////////////////////////////////////////////// /// Memory load and store operations //////////////////////////////////////////////////////////////////////////////// -__forceinline ssei load4i( const void* const a ) { - return _mm_load_si128((__m128i*)a); +__forceinline ssei load4i(const void *const a) +{ + return _mm_load_si128((__m128i *)a); } -__forceinline void store4i(void* ptr, const ssei& v) { - _mm_store_si128((__m128i*)ptr,v); +__forceinline void store4i(void *ptr, const ssei &v) +{ + _mm_store_si128((__m128i *)ptr, v); } -__forceinline void storeu4i(void* ptr, const ssei& v) { - _mm_storeu_si128((__m128i*)ptr,v); +__forceinline void storeu4i(void *ptr, const ssei &v) +{ + _mm_storeu_si128((__m128i *)ptr, v); } -__forceinline void store4i( const sseb& mask, void* ptr, const ssei& i ) { -#if defined (__KERNEL_AVX__) - _mm_maskstore_ps((float*)ptr,(__m128i)mask,_mm_castsi128_ps(i)); -#else - *(ssei*)ptr = select(mask,i,*(ssei*)ptr); -#endif +__forceinline void store4i(const sseb &mask, void *ptr, const ssei &i) +{ +# if defined(__KERNEL_AVX__) + _mm_maskstore_ps((float *)ptr, (__m128i)mask, _mm_castsi128_ps(i)); +# else + *(ssei *)ptr = select(mask, i, *(ssei *)ptr); +# endif } -__forceinline ssei load4i_nt (void* ptr) { -#if defined(__KERNEL_SSE41__) - return _mm_stream_load_si128((__m128i*)ptr); -#else - return _mm_load_si128((__m128i*)ptr); -#endif +__forceinline ssei load4i_nt(void *ptr) +{ +# if defined(__KERNEL_SSE41__) + return _mm_stream_load_si128((__m128i *)ptr); +# else + return _mm_load_si128((__m128i *)ptr); +# endif } -__forceinline void store4i_nt(void* ptr, const ssei& v) { -#if defined(__KERNEL_SSE41__) - _mm_stream_ps((float*)ptr,_mm_castsi128_ps(v)); -#else - _mm_store_si128((__m128i*)ptr,v); -#endif +__forceinline void store4i_nt(void *ptr, const ssei &v) +{ +# if defined(__KERNEL_SSE41__) + _mm_stream_ps((float *)ptr, _mm_castsi128_ps(v)); +# else + _mm_store_si128((__m128i *)ptr, v); +# endif } //////////////////////////////////////////////////////////////////////////////// @@ -291,8 +604,7 @@ __forceinline void store4i_nt(void* ptr, const ssei& v) { ccl_device_inline void print_ssei(const char *label, const ssei &a) { - printf("%s: %df %df %df %d\n", - label, a[0], a[1], a[2], a[3]); + printf("%s: %df %df %df %d\n", label, a[0], a[1], a[2], a[3]); } #endif diff --git a/intern/cycles/util/util_stack_allocator.h b/intern/cycles/util/util_stack_allocator.h index 4e978e18bee..36db655e5eb 100644 --- a/intern/cycles/util/util_stack_allocator.h +++ b/intern/cycles/util/util_stack_allocator.h @@ -23,145 +23,143 @@ CCL_NAMESPACE_BEGIN /* Stack allocator for the use with STL. */ -template <int SIZE, typename T> -class ccl_try_align(16) StackAllocator { -public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef T *pointer; - typedef const T *const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef T value_type; - - /* Allocator construction/destruction. */ - - StackAllocator() - : pointer_(0), - use_stack_(true) {} - - StackAllocator(const StackAllocator&) - : pointer_(0), - use_stack_(true) {} - - template <class U> - StackAllocator(const StackAllocator<SIZE, U>&) - : pointer_(0), - use_stack_(false) {} - - /* Memory allocation/deallocation. */ - - T *allocate(size_t n, const void *hint = 0) - { - (void) hint; - if(n == 0) { - return NULL; - } - if(pointer_ + n >= SIZE || use_stack_ == false) { - size_t size = n * sizeof(T); - util_guarded_mem_alloc(size); - T *mem; +template<int SIZE, typename T> class ccl_try_align(16) StackAllocator +{ + public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T *pointer; + typedef const T *const_pointer; + typedef T &reference; + typedef const T &const_reference; + typedef T value_type; + + /* Allocator construction/destruction. */ + + StackAllocator() : pointer_(0), use_stack_(true) + { + } + + StackAllocator(const StackAllocator &) : pointer_(0), use_stack_(true) + { + } + + template<class U> + StackAllocator(const StackAllocator<SIZE, U> &) : pointer_(0), use_stack_(false) + { + } + + /* Memory allocation/deallocation. */ + + T *allocate(size_t n, const void *hint = 0) + { + (void)hint; + if (n == 0) { + return NULL; + } + if (pointer_ + n >= SIZE || use_stack_ == false) { + size_t size = n * sizeof(T); + util_guarded_mem_alloc(size); + T *mem; #ifdef WITH_BLENDER_GUARDEDALLOC - mem = (T*)MEM_mallocN_aligned(size, 16, "Cycles Alloc"); + mem = (T *)MEM_mallocN_aligned(size, 16, "Cycles Alloc"); #else - mem = (T*)malloc(size); + mem = (T *)malloc(size); #endif - if(mem == NULL) { - throw std::bad_alloc(); - } - return mem; - } - T *mem = &data_[pointer_]; - pointer_ += n; - return mem; - } - - void deallocate(T *p, size_t n) - { - if(p == NULL) { - return; - } - if(p < data_ || p >= data_ + SIZE) { - util_guarded_mem_free(n * sizeof(T)); + if (mem == NULL) { + throw std::bad_alloc(); + } + return mem; + } + T *mem = &data_[pointer_]; + pointer_ += n; + return mem; + } + + void deallocate(T * p, size_t n) + { + if (p == NULL) { + return; + } + if (p < data_ || p >= data_ + SIZE) { + util_guarded_mem_free(n * sizeof(T)); #ifdef WITH_BLENDER_GUARDEDALLOC - MEM_freeN(p); + MEM_freeN(p); #else - free(p); + free(p); #endif - return; - } - /* We don't support memory free for the stack allocator. */ - } - - /* Address of an reference. */ - - T *address(T& x) const - { - return &x; - } - - const T *address(const T& x) const - { - return &x; - } - - /* Object construction/destruction. */ - - void construct(T *p, const T& val) - { - if(p != NULL) { - new ((T *)p) T(val); - } - } - - void destroy(T *p) - { - p->~T(); - } - - /* Maximum allocation size. */ - - size_t max_size() const - { - return size_t(-1); - } - - /* Rebind to other ype of allocator. */ - - template <class U> - struct rebind { - typedef StackAllocator<SIZE, U> other; - }; - - /* Operators */ - - template <class U> - inline StackAllocator& operator=(const StackAllocator<SIZE, U>&) - { - return *this; - } - - StackAllocator<SIZE, T>& operator=(const StackAllocator&) - { - return *this; - } - - inline bool operator==(StackAllocator const& /*other*/) const - { - return true; - } - - inline bool operator!=(StackAllocator const& other) const - { - return !operator==(other); - } - -private: - int pointer_; - bool use_stack_; - T data_[SIZE]; + return; + } + /* We don't support memory free for the stack allocator. */ + } + + /* Address of an reference. */ + + T *address(T & x) const + { + return &x; + } + + const T *address(const T &x) const + { + return &x; + } + + /* Object construction/destruction. */ + + void construct(T * p, const T &val) + { + if (p != NULL) { + new ((T *)p) T(val); + } + } + + void destroy(T * p) + { + p->~T(); + } + + /* Maximum allocation size. */ + + size_t max_size() const + { + return size_t(-1); + } + + /* Rebind to other ype of allocator. */ + + template<class U> struct rebind { + typedef StackAllocator<SIZE, U> other; + }; + + /* Operators */ + + template<class U> inline StackAllocator &operator=(const StackAllocator<SIZE, U> &) + { + return *this; + } + + StackAllocator<SIZE, T> &operator=(const StackAllocator &) + { + return *this; + } + + inline bool operator==(StackAllocator const & /*other*/) const + { + return true; + } + + inline bool operator!=(StackAllocator const &other) const + { + return !operator==(other); + } + + private: + int pointer_; + bool use_stack_; + T data_[SIZE]; }; CCL_NAMESPACE_END -#endif /* __UTIL_STACK_ALLOCATOR_H__ */ +#endif /* __UTIL_STACK_ALLOCATOR_H__ */ diff --git a/intern/cycles/util/util_static_assert.h b/intern/cycles/util/util_static_assert.h index b1c6c374693..b4b972a4036 100644 --- a/intern/cycles/util/util_static_assert.h +++ b/intern/cycles/util/util_static_assert.h @@ -15,18 +15,18 @@ */ #ifndef __UTIL_STATIC_ASSERT_H__ -#define __UTIL_STATIC_ASSERT_H__ +# define __UTIL_STATIC_ASSERT_H__ CCL_NAMESPACE_BEGIN /* TODO(sergey): In theory CUDA might work with own static assert * implementation since it's just pure C++. */ -#ifdef __KERNEL_GPU__ -# ifndef static_assert -# define static_assert(statement, message) -# endif -#endif /* __KERNEL_GPU__ */ +# ifdef __KERNEL_GPU__ +# ifndef static_assert +# define static_assert(statement, message) +# endif +# endif /* __KERNEL_GPU__ */ /* TODO(sergey): For until C++11 is a bare minimum for us, * we do a bit of a trickery to show meaningful message so @@ -43,8 +43,8 @@ CCL_NAMESPACE_BEGIN * name to the error message, */ # define static_assert_align(st, align) \ - static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT + static_assert((sizeof(st) % (align) == 0), "Structure must be strictly aligned") // NOLINT CCL_NAMESPACE_END -#endif /* __UTIL_STATIC_ASSERT_H__ */ +#endif /* __UTIL_STATIC_ASSERT_H__ */ diff --git a/intern/cycles/util/util_stats.h b/intern/cycles/util/util_stats.h index f1f2df94e33..15cf836de3c 100644 --- a/intern/cycles/util/util_stats.h +++ b/intern/cycles/util/util_stats.h @@ -23,26 +23,32 @@ CCL_NAMESPACE_BEGIN class Stats { -public: - enum static_init_t { static_init = 0 }; - - Stats() : mem_used(0), mem_peak(0) {} - explicit Stats(static_init_t) {} - - void mem_alloc(size_t size) { - atomic_add_and_fetch_z(&mem_used, size); - atomic_fetch_and_update_max_z(&mem_peak, mem_used); - } - - void mem_free(size_t size) { - assert(mem_used >= size); - atomic_sub_and_fetch_z(&mem_used, size); - } - - size_t mem_used; - size_t mem_peak; + public: + enum static_init_t { static_init = 0 }; + + Stats() : mem_used(0), mem_peak(0) + { + } + explicit Stats(static_init_t) + { + } + + void mem_alloc(size_t size) + { + atomic_add_and_fetch_z(&mem_used, size); + atomic_fetch_and_update_max_z(&mem_peak, mem_used); + } + + void mem_free(size_t size) + { + assert(mem_used >= size); + atomic_sub_and_fetch_z(&mem_used, size); + } + + size_t mem_used; + size_t mem_peak; }; CCL_NAMESPACE_END -#endif /* __UTIL_STATS_H__ */ +#endif /* __UTIL_STATS_H__ */ diff --git a/intern/cycles/util/util_string.cpp b/intern/cycles/util/util_string.cpp index 47119e90a45..afcca7e0411 100644 --- a/intern/cycles/util/util_string.cpp +++ b/intern/cycles/util/util_string.cpp @@ -25,276 +25,232 @@ # ifndef vsnprintf # define vsnprintf _vsnprintf # endif -#endif /* _WIN32 */ +#endif /* _WIN32 */ CCL_NAMESPACE_BEGIN string string_printf(const char *format, ...) { - vector<char> str(128, 0); - - while(1) { - va_list args; - int result; - - va_start(args, format); - result = vsnprintf(&str[0], str.size(), format, args); - va_end(args); - - if(result == -1) { - /* not enough space or formatting error */ - if(str.size() > 65536) { - assert(0); - return string(""); - } - - str.resize(str.size()*2, 0); - continue; - } - else if(result >= (int)str.size()) { - /* not enough space */ - str.resize(result + 1, 0); - continue; - } - - return string(&str[0]); - } + vector<char> str(128, 0); + + while (1) { + va_list args; + int result; + + va_start(args, format); + result = vsnprintf(&str[0], str.size(), format, args); + va_end(args); + + if (result == -1) { + /* not enough space or formatting error */ + if (str.size() > 65536) { + assert(0); + return string(""); + } + + str.resize(str.size() * 2, 0); + continue; + } + else if (result >= (int)str.size()) { + /* not enough space */ + str.resize(result + 1, 0); + continue; + } + + return string(&str[0]); + } } -bool string_iequals(const string& a, const string& b) +bool string_iequals(const string &a, const string &b) { - if(a.size() == b.size()) { - for(size_t i = 0; i < a.size(); i++) - if(toupper(a[i]) != toupper(b[i])) - return false; + if (a.size() == b.size()) { + for (size_t i = 0; i < a.size(); i++) + if (toupper(a[i]) != toupper(b[i])) + return false; - return true; - } + return true; + } - return false; + return false; } -void string_split(vector<string>& tokens, - const string& str, - const string& separators, +void string_split(vector<string> &tokens, + const string &str, + const string &separators, bool skip_empty_tokens) { - size_t token_start = 0, token_length = 0; - for(size_t i = 0; i < str.size(); ++i) { - const char ch = str[i]; - if(separators.find(ch) == string::npos) { - /* Current character is not a separator, - * append it to token by increasing token length. - */ - ++token_length; - } - else { - /* Current character is a separator, - * append current token to the list. - */ - if(!skip_empty_tokens || token_length > 0) { - string token = str.substr(token_start, token_length); - tokens.push_back(token); - } - token_start = i + 1; - token_length = 0; - } - } - /* Append token from the tail of the string if exists. */ - if(token_length) { - string token = str.substr(token_start, token_length); - tokens.push_back(token); - } + size_t token_start = 0, token_length = 0; + for (size_t i = 0; i < str.size(); ++i) { + const char ch = str[i]; + if (separators.find(ch) == string::npos) { + /* Current character is not a separator, + * append it to token by increasing token length. + */ + ++token_length; + } + else { + /* Current character is a separator, + * append current token to the list. + */ + if (!skip_empty_tokens || token_length > 0) { + string token = str.substr(token_start, token_length); + tokens.push_back(token); + } + token_start = i + 1; + token_length = 0; + } + } + /* Append token from the tail of the string if exists. */ + if (token_length) { + string token = str.substr(token_start, token_length); + tokens.push_back(token); + } } -bool string_startswith(const string& s, const char *start) +bool string_startswith(const string &s, const char *start) { - size_t len = strlen(start); + size_t len = strlen(start); - if(len > s.size()) - return 0; - else - return strncmp(s.c_str(), start, len) == 0; + if (len > s.size()) + return 0; + else + return strncmp(s.c_str(), start, len) == 0; } -bool string_endswith(const string& s, const char *end) +bool string_endswith(const string &s, const char *end) { - size_t len = strlen(end); + size_t len = strlen(end); - if(len > s.size()) - return 0; - else - return strncmp(s.c_str() + s.size() - len, end, len) == 0; + if (len > s.size()) + return 0; + else + return strncmp(s.c_str() + s.size() - len, end, len) == 0; } -string string_strip(const string& s) +string string_strip(const string &s) { - string result = s; - result.erase(0, result.find_first_not_of(' ')); - result.erase(result.find_last_not_of(' ') + 1); - return result; - + string result = s; + result.erase(0, result.find_first_not_of(' ')); + result.erase(result.find_last_not_of(' ') + 1); + return result; } -void string_replace(string& haystack, const string& needle, const string& other) +void string_replace(string &haystack, const string &needle, const string &other) { - size_t i = 0, index; - while((index = haystack.find(needle, i)) != string::npos) { - haystack.replace(index, needle.size(), other); - i = index + other.size(); - } + size_t i = 0, index; + while ((index = haystack.find(needle, i)) != string::npos) { + haystack.replace(index, needle.size(), other); + i = index + other.size(); + } } string string_remove_trademark(const string &s) { - string result = s; + string result = s; - /* Special case, so we don;t leave sequential spaces behind. */ - /* TODO(sergey): Consider using regex perhaps? */ - string_replace(result, " (TM)", ""); - string_replace(result, " (R)", ""); + /* Special case, so we don;t leave sequential spaces behind. */ + /* TODO(sergey): Consider using regex perhaps? */ + string_replace(result, " (TM)", ""); + string_replace(result, " (R)", ""); - string_replace(result, "(TM)", ""); - string_replace(result, "(R)", ""); + string_replace(result, "(TM)", ""); + string_replace(result, "(R)", ""); - return string_strip(result); + return string_strip(result); } string string_from_bool(bool var) { - if(var) - return "True"; - else - return "False"; + if (var) + return "True"; + else + return "False"; } string to_string(const char *str) { - return string(str); + return string(str); } /* Wide char strings helpers for Windows. */ #ifdef _WIN32 -wstring string_to_wstring(const string& str) +wstring string_to_wstring(const string &str) { - const int length_wc = MultiByteToWideChar(CP_UTF8, - 0, - str.c_str(), - str.length(), - NULL, - 0); - wstring str_wc(length_wc, 0); - MultiByteToWideChar(CP_UTF8, - 0, - str.c_str(), - str.length(), - &str_wc[0], - length_wc); - return str_wc; + const int length_wc = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0); + wstring str_wc(length_wc, 0); + MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &str_wc[0], length_wc); + return str_wc; } -string string_from_wstring(const wstring& str) +string string_from_wstring(const wstring &str) { - int length_mb = WideCharToMultiByte(CP_UTF8, - 0, - str.c_str(), - str.size(), - NULL, - 0, - NULL, NULL); - string str_mb(length_mb, 0); - WideCharToMultiByte(CP_UTF8, - 0, - str.c_str(), - str.size(), - &str_mb[0], - length_mb, - NULL, NULL); - return str_mb; + int length_mb = WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.size(), NULL, 0, NULL, NULL); + string str_mb(length_mb, 0); + WideCharToMultiByte(CP_UTF8, 0, str.c_str(), str.size(), &str_mb[0], length_mb, NULL, NULL); + return str_mb; } -string string_to_ansi(const string& str) +string string_to_ansi(const string &str) { - const int length_wc = MultiByteToWideChar(CP_UTF8, - 0, - str.c_str(), - str.length(), - NULL, - 0); - wstring str_wc(length_wc, 0); - MultiByteToWideChar(CP_UTF8, - 0, - str.c_str(), - str.length(), - &str_wc[0], - length_wc); - - int length_mb = WideCharToMultiByte(CP_ACP, - 0, - str_wc.c_str(), - str_wc.size(), - NULL, - 0, - NULL, NULL); - - string str_mb(length_mb, 0); - WideCharToMultiByte(CP_ACP, - 0, - str_wc.c_str(), - str_wc.size(), - &str_mb[0], - length_mb, - NULL, NULL); - - return str_mb; + const int length_wc = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), NULL, 0); + wstring str_wc(length_wc, 0); + MultiByteToWideChar(CP_UTF8, 0, str.c_str(), str.length(), &str_wc[0], length_wc); + + int length_mb = WideCharToMultiByte( + CP_ACP, 0, str_wc.c_str(), str_wc.size(), NULL, 0, NULL, NULL); + + string str_mb(length_mb, 0); + WideCharToMultiByte(CP_ACP, 0, str_wc.c_str(), str_wc.size(), &str_mb[0], length_mb, NULL, NULL); + + return str_mb; } -#endif /* _WIN32 */ +#endif /* _WIN32 */ string string_human_readable_size(size_t size) { - static const char suffixes[] = "BKMGTPEZY"; + static const char suffixes[] = "BKMGTPEZY"; - const char* suffix = suffixes; - size_t r = 0; + const char *suffix = suffixes; + size_t r = 0; - while(size >= 1024) { - r = size % 1024; - size /= 1024; - suffix++; - } + while (size >= 1024) { + r = size % 1024; + size /= 1024; + suffix++; + } - if(*suffix != 'B') - return string_printf("%.2f%c", double(size*1024+r)/1024.0, *suffix); - else - return string_printf("%zu", size); + if (*suffix != 'B') + return string_printf("%.2f%c", double(size * 1024 + r) / 1024.0, *suffix); + else + return string_printf("%zu", size); } string string_human_readable_number(size_t num) { - if(num == 0) { - return "0"; - } + if (num == 0) { + return "0"; + } - /* Add thousands separators. */ - char buf[32]; + /* Add thousands separators. */ + char buf[32]; - char* p = buf+31; - *p = '\0'; + char *p = buf + 31; + *p = '\0'; - int i = -1; - while(num) { - if(++i && i % 3 == 0) - *(--p) = ','; + int i = -1; + while (num) { + if (++i && i % 3 == 0) + *(--p) = ','; - *(--p) = '0' + (num % 10); + *(--p) = '0' + (num % 10); - num /= 10; - } + num /= 10; + } - return p; + return p; } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h index 02584fe5113..f71145741c9 100644 --- a/intern/cycles/util/util_string.h +++ b/intern/cycles/util/util_string.h @@ -25,30 +25,30 @@ CCL_NAMESPACE_BEGIN +using std::istringstream; +using std::ostringstream; using std::string; using std::stringstream; -using std::ostringstream; -using std::istringstream; using std::to_string; #ifdef __GNUC__ -#define PRINTF_ATTRIBUTE __attribute__((format(printf, 1, 2))) +# define PRINTF_ATTRIBUTE __attribute__((format(printf, 1, 2))) #else -#define PRINTF_ATTRIBUTE +# define PRINTF_ATTRIBUTE #endif string string_printf(const char *format, ...) PRINTF_ATTRIBUTE; -bool string_iequals(const string& a, const string& b); -void string_split(vector<string>& tokens, - const string& str, - const string& separators = "\t ", +bool string_iequals(const string &a, const string &b); +void string_split(vector<string> &tokens, + const string &str, + const string &separators = "\t ", bool skip_empty_tokens = true); -void string_replace(string& haystack, const string& needle, const string& other); -bool string_startswith(const string& s, const char *start); -bool string_endswith(const string& s, const char *end); -string string_strip(const string& s); -string string_remove_trademark(const string& s); +void string_replace(string &haystack, const string &needle, const string &other); +bool string_startswith(const string &s, const char *start); +bool string_endswith(const string &s, const char *end); +string string_strip(const string &s); +string string_remove_trademark(const string &s); string string_from_bool(const bool var); string to_string(const char *str); @@ -61,9 +61,9 @@ string to_string(const char *str); */ #ifdef _WIN32 using std::wstring; -wstring string_to_wstring(const string& path); -string string_from_wstring(const wstring& path); -string string_to_ansi(const string& str); +wstring string_to_wstring(const string &path); +string string_from_wstring(const wstring &path); +string string_to_ansi(const string &str); #endif /* Make a string from a size in bytes in human readable form */ @@ -73,4 +73,4 @@ string string_human_readable_number(size_t num); CCL_NAMESPACE_END -#endif /* __UTIL_STRING_H__ */ +#endif /* __UTIL_STRING_H__ */ diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index 2a5c4a8f012..0cd991c6231 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -26,7 +26,7 @@ OIIO_NAMESPACE_USING #ifdef _WIN32 -# if(!defined(FREE_WINDOWS)) +# if (!defined(FREE_WINDOWS)) # include <intrin.h> # endif # include "util_windows.h" @@ -43,333 +43,332 @@ CCL_NAMESPACE_BEGIN bool system_cpu_ensure_initialized() { - static bool is_initialized = false; - static bool result = false; - if(is_initialized) { - return result; - } - is_initialized = true; - const NUMAAPI_Result numa_result = numaAPI_Initialize(); - result = (numa_result == NUMAAPI_SUCCESS); - return result; + static bool is_initialized = false; + static bool result = false; + if (is_initialized) { + return result; + } + is_initialized = true; + const NUMAAPI_Result numa_result = numaAPI_Initialize(); + result = (numa_result == NUMAAPI_SUCCESS); + return result; } /* Fallback solution, which doesn't use NUMA/CPU groups. */ static int system_cpu_thread_count_fallback() { #ifdef _WIN32 - SYSTEM_INFO info; - GetSystemInfo(&info); - return info.dwNumberOfProcessors; + SYSTEM_INFO info; + GetSystemInfo(&info); + return info.dwNumberOfProcessors; #elif defined(__APPLE__) - int count; - size_t len = sizeof(count); - int mib[2] = { CTL_HW, HW_NCPU }; - sysctl(mib, 2, &count, &len, NULL, 0); - return count; + int count; + size_t len = sizeof(count); + int mib[2] = {CTL_HW, HW_NCPU}; + sysctl(mib, 2, &count, &len, NULL, 0); + return count; #else - return sysconf(_SC_NPROCESSORS_ONLN); + return sysconf(_SC_NPROCESSORS_ONLN); #endif } int system_cpu_thread_count() { - const int num_nodes = system_cpu_num_numa_nodes(); - int num_threads = 0; - for(int node = 0; node < num_nodes; ++node) { - if(!system_cpu_is_numa_node_available(node)) { - continue; - } - num_threads += system_cpu_num_numa_node_processors(node); - } - return num_threads; + const int num_nodes = system_cpu_num_numa_nodes(); + int num_threads = 0; + for (int node = 0; node < num_nodes; ++node) { + if (!system_cpu_is_numa_node_available(node)) { + continue; + } + num_threads += system_cpu_num_numa_node_processors(node); + } + return num_threads; } int system_cpu_num_numa_nodes() { - if(!system_cpu_ensure_initialized()) { - /* Fallback to a single node with all the threads. */ - return 1; - } - return numaAPI_GetNumNodes(); + if (!system_cpu_ensure_initialized()) { + /* Fallback to a single node with all the threads. */ + return 1; + } + return numaAPI_GetNumNodes(); } bool system_cpu_is_numa_node_available(int node) { - if(!system_cpu_ensure_initialized()) { - return true; - } - return numaAPI_IsNodeAvailable(node); + if (!system_cpu_ensure_initialized()) { + return true; + } + return numaAPI_IsNodeAvailable(node); } int system_cpu_num_numa_node_processors(int node) { - if(!system_cpu_ensure_initialized()) { - return system_cpu_thread_count_fallback(); - } - return numaAPI_GetNumNodeProcessors(node); + if (!system_cpu_ensure_initialized()) { + return system_cpu_thread_count_fallback(); + } + return numaAPI_GetNumNodeProcessors(node); } bool system_cpu_run_thread_on_node(int node) { - if(!system_cpu_ensure_initialized()) { - return true; - } - return numaAPI_RunThreadOnNode(node); + if (!system_cpu_ensure_initialized()) { + return true; + } + return numaAPI_RunThreadOnNode(node); } int system_console_width() { - int columns = 0; + int columns = 0; #ifdef _WIN32 - CONSOLE_SCREEN_BUFFER_INFO csbi; - if(GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) { - columns = csbi.dwSize.X; - } + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) { + columns = csbi.dwSize.X; + } #else - struct winsize w; - if(ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) { - columns = w.ws_col; - } + struct winsize w; + if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &w) == 0) { + columns = w.ws_col; + } #endif - return (columns > 0) ? columns : 80; + return (columns > 0) ? columns : 80; } int system_cpu_num_active_group_processors() { - if(!system_cpu_ensure_initialized()) { - return system_cpu_thread_count_fallback(); - } - return numaAPI_GetNumCurrentNodesProcessors(); + if (!system_cpu_ensure_initialized()) { + return system_cpu_thread_count_fallback(); + } + return numaAPI_GetNumCurrentNodesProcessors(); } #if !defined(_WIN32) || defined(FREE_WINDOWS) static void __cpuid(int data[4], int selector) { -#if defined(__x86_64__) - asm("cpuid" : "=a" (data[0]), "=b" (data[1]), "=c" (data[2]), "=d" (data[3]) : "a"(selector)); -#elif defined(__i386__) - asm("pushl %%ebx \n\t" - "cpuid \n\t" - "movl %%ebx, %1 \n\t" - "popl %%ebx \n\t" - : "=a" (data[0]), "=r" (data[1]), "=c" (data[2]), "=d" (data[3]) - : "a"(selector) - : "ebx"); -#else - data[0] = data[1] = data[2] = data[3] = 0; -#endif +# if defined(__x86_64__) + asm("cpuid" : "=a"(data[0]), "=b"(data[1]), "=c"(data[2]), "=d"(data[3]) : "a"(selector)); +# elif defined(__i386__) + asm("pushl %%ebx \n\t" + "cpuid \n\t" + "movl %%ebx, %1 \n\t" + "popl %%ebx \n\t" + : "=a"(data[0]), "=r"(data[1]), "=c"(data[2]), "=d"(data[3]) + : "a"(selector) + : "ebx"); +# else + data[0] = data[1] = data[2] = data[3] = 0; +# endif } #endif string system_cpu_brand_string() { - char buf[48] = {0}; - int result[4] = {0}; + char buf[48] = {0}; + int result[4] = {0}; - __cpuid(result, 0x80000000); + __cpuid(result, 0x80000000); - if(result[0] >= (int)0x80000004) { - __cpuid((int*)(buf+0), 0x80000002); - __cpuid((int*)(buf+16), 0x80000003); - __cpuid((int*)(buf+32), 0x80000004); + if (result[0] >= (int)0x80000004) { + __cpuid((int *)(buf + 0), 0x80000002); + __cpuid((int *)(buf + 16), 0x80000003); + __cpuid((int *)(buf + 32), 0x80000004); - string brand = buf; + string brand = buf; - /* make it a bit more presentable */ - brand = string_remove_trademark(brand); + /* make it a bit more presentable */ + brand = string_remove_trademark(brand); - return brand; - } + return brand; + } - return "Unknown CPU"; + return "Unknown CPU"; } int system_cpu_bits() { - return (sizeof(void*)*8); + return (sizeof(void *) * 8); } #if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(_M_IX86) struct CPUCapabilities { - bool x64; - bool mmx; - bool sse; - bool sse2; - bool sse3; - bool ssse3; - bool sse41; - bool sse42; - bool sse4a; - bool avx; - bool f16c; - bool avx2; - bool xop; - bool fma3; - bool fma4; - bool bmi1; - bool bmi2; + bool x64; + bool mmx; + bool sse; + bool sse2; + bool sse3; + bool ssse3; + bool sse41; + bool sse42; + bool sse4a; + bool avx; + bool f16c; + bool avx2; + bool xop; + bool fma3; + bool fma4; + bool bmi1; + bool bmi2; }; -static CPUCapabilities& system_cpu_capabilities() +static CPUCapabilities &system_cpu_capabilities() { - static CPUCapabilities caps; - static bool caps_init = false; - - if(!caps_init) { - int result[4], num; - - memset(&caps, 0, sizeof(caps)); - - __cpuid(result, 0); - num = result[0]; - - if(num >= 1) { - __cpuid(result, 0x00000001); - caps.mmx = (result[3] & ((int)1 << 23)) != 0; - caps.sse = (result[3] & ((int)1 << 25)) != 0; - caps.sse2 = (result[3] & ((int)1 << 26)) != 0; - caps.sse3 = (result[2] & ((int)1 << 0)) != 0; - - caps.ssse3 = (result[2] & ((int)1 << 9)) != 0; - caps.sse41 = (result[2] & ((int)1 << 19)) != 0; - caps.sse42 = (result[2] & ((int)1 << 20)) != 0; - - caps.fma3 = (result[2] & ((int)1 << 12)) != 0; - caps.avx = false; - bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0; - bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0; - - if( os_uses_xsave_xrestore && cpu_avx_support) { - // Check if the OS will save the YMM registers - uint32_t xcr_feature_mask; -#if defined(__GNUC__) - int edx; /* not used */ - /* actual opcode for xgetbv */ - __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (xcr_feature_mask) , "=d" (edx) : "c" (0) ); -#elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) - xcr_feature_mask = (uint32_t)_xgetbv(_XCR_XFEATURE_ENABLED_MASK); /* min VS2010 SP1 compiler is required */ -#else - xcr_feature_mask = 0; -#endif - caps.avx = (xcr_feature_mask & 0x6) == 0x6; - } + static CPUCapabilities caps; + static bool caps_init = false; + + if (!caps_init) { + int result[4], num; + + memset(&caps, 0, sizeof(caps)); + + __cpuid(result, 0); + num = result[0]; + + if (num >= 1) { + __cpuid(result, 0x00000001); + caps.mmx = (result[3] & ((int)1 << 23)) != 0; + caps.sse = (result[3] & ((int)1 << 25)) != 0; + caps.sse2 = (result[3] & ((int)1 << 26)) != 0; + caps.sse3 = (result[2] & ((int)1 << 0)) != 0; + + caps.ssse3 = (result[2] & ((int)1 << 9)) != 0; + caps.sse41 = (result[2] & ((int)1 << 19)) != 0; + caps.sse42 = (result[2] & ((int)1 << 20)) != 0; + + caps.fma3 = (result[2] & ((int)1 << 12)) != 0; + caps.avx = false; + bool os_uses_xsave_xrestore = (result[2] & ((int)1 << 27)) != 0; + bool cpu_avx_support = (result[2] & ((int)1 << 28)) != 0; + + if (os_uses_xsave_xrestore && cpu_avx_support) { + // Check if the OS will save the YMM registers + uint32_t xcr_feature_mask; +# if defined(__GNUC__) + int edx; /* not used */ + /* actual opcode for xgetbv */ + __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr_feature_mask), "=d"(edx) : "c"(0)); +# elif defined(_MSC_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) + xcr_feature_mask = (uint32_t)_xgetbv( + _XCR_XFEATURE_ENABLED_MASK); /* min VS2010 SP1 compiler is required */ +# else + xcr_feature_mask = 0; +# endif + caps.avx = (xcr_feature_mask & 0x6) == 0x6; + } - caps.f16c = (result[2] & ((int)1 << 29)) != 0; + caps.f16c = (result[2] & ((int)1 << 29)) != 0; - __cpuid(result, 0x00000007); - caps.bmi1 = (result[1] & ((int)1 << 3)) != 0; - caps.bmi2 = (result[1] & ((int)1 << 8)) != 0; - caps.avx2 = (result[1] & ((int)1 << 5)) != 0; - } + __cpuid(result, 0x00000007); + caps.bmi1 = (result[1] & ((int)1 << 3)) != 0; + caps.bmi2 = (result[1] & ((int)1 << 8)) != 0; + caps.avx2 = (result[1] & ((int)1 << 5)) != 0; + } - caps_init = true; - } + caps_init = true; + } - return caps; + return caps; } bool system_cpu_support_sse2() { - CPUCapabilities& caps = system_cpu_capabilities(); - return caps.sse && caps.sse2; + CPUCapabilities &caps = system_cpu_capabilities(); + return caps.sse && caps.sse2; } bool system_cpu_support_sse3() { - CPUCapabilities& caps = system_cpu_capabilities(); - return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3; + CPUCapabilities &caps = system_cpu_capabilities(); + return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3; } bool system_cpu_support_sse41() { - CPUCapabilities& caps = system_cpu_capabilities(); - return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41; + CPUCapabilities &caps = system_cpu_capabilities(); + return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41; } bool system_cpu_support_avx() { - CPUCapabilities& caps = system_cpu_capabilities(); - return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && - caps.sse41 && caps.avx; + CPUCapabilities &caps = system_cpu_capabilities(); + return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx; } bool system_cpu_support_avx2() { - CPUCapabilities& caps = system_cpu_capabilities(); - return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && - caps.avx && caps.f16c && caps.avx2 && caps.fma3 && caps.bmi1 && - caps.bmi2; + CPUCapabilities &caps = system_cpu_capabilities(); + return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.f16c && + caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2; } #else bool system_cpu_support_sse2() { - return false; + return false; } bool system_cpu_support_sse3() { - return false; + return false; } bool system_cpu_support_sse41() { - return false; + return false; } bool system_cpu_support_avx() { - return false; + return false; } bool system_cpu_support_avx2() { - return false; + return false; } #endif -bool system_call_self(const vector<string>& args) +bool system_call_self(const vector<string> &args) { - /* Escape program and arguments in case they contain spaces. */ - string cmd = "\"" + Sysutil::this_program_path() + "\""; + /* Escape program and arguments in case they contain spaces. */ + string cmd = "\"" + Sysutil::this_program_path() + "\""; - for(int i = 0; i < args.size(); i++) { - cmd += " \"" + args[i] + "\""; - } + for (int i = 0; i < args.size(); i++) { + cmd += " \"" + args[i] + "\""; + } #ifdef _WIN32 - /* Use cmd /S to avoid issues with spaces in arguments. */ - cmd = "cmd /S /C \"" + cmd + " > nul \""; + /* Use cmd /S to avoid issues with spaces in arguments. */ + cmd = "cmd /S /C \"" + cmd + " > nul \""; #else - /* Quiet output. */ - cmd += " > /dev/null"; + /* Quiet output. */ + cmd += " > /dev/null"; #endif - return (system(cmd.c_str()) == 0); + return (system(cmd.c_str()) == 0); } size_t system_physical_ram() { #ifdef _WIN32 - MEMORYSTATUSEX ram; - ram.dwLength = sizeof (ram); - GlobalMemoryStatusEx(&ram); - return ram.ullTotalPhys * 1024; + MEMORYSTATUSEX ram; + ram.dwLength = sizeof(ram); + GlobalMemoryStatusEx(&ram); + return ram.ullTotalPhys * 1024; #elif defined(__APPLE__) - uint64_t ram = 0; - size_t len = sizeof(ram); - if(sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) { - return ram; - } - return 0; + uint64_t ram = 0; + size_t len = sizeof(ram); + if (sysctlbyname("hw.memsize", &ram, &len, NULL, 0) == 0) { + return ram; + } + return 0; #else - size_t ps = sysconf(_SC_PAGESIZE); - size_t pn = sysconf(_SC_PHYS_PAGES); - return ps * pn; + size_t ps = sysconf(_SC_PAGESIZE); + size_t pn = sysconf(_SC_PHYS_PAGES); + return ps * pn; #endif } diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h index 2590b31a59d..c4db8b74339 100644 --- a/intern/cycles/util/util_system.h +++ b/intern/cycles/util/util_system.h @@ -63,8 +63,8 @@ bool system_cpu_support_avx2(); size_t system_physical_ram(); /* Start a new process of the current application with the given arguments. */ -bool system_call_self(const vector<string>& args); +bool system_call_self(const vector<string> &args); CCL_NAMESPACE_END -#endif /* __UTIL_SYSTEM_H__ */ +#endif /* __UTIL_SYSTEM_H__ */ diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp index ce166af206a..4b11ce73ea9 100644 --- a/intern/cycles/util/util_task.cpp +++ b/intern/cycles/util/util_task.cpp @@ -23,10 +23,14 @@ //#define THREADING_DEBUG_ENABLED #ifdef THREADING_DEBUG_ENABLED -#include <stdio.h> -#define THREADING_DEBUG(...) do { printf(__VA_ARGS__); fflush(stdout); } while(0) +# include <stdio.h> +# define THREADING_DEBUG(...) \ + do { \ + printf(__VA_ARGS__); \ + fflush(stdout); \ + } while (0) #else -#define THREADING_DEBUG(...) +# define THREADING_DEBUG(...) #endif CCL_NAMESPACE_BEGIN @@ -35,156 +39,156 @@ CCL_NAMESPACE_BEGIN TaskPool::TaskPool() { - num_tasks_handled = 0; - num = 0; - do_cancel = false; + num_tasks_handled = 0; + num = 0; + do_cancel = false; } TaskPool::~TaskPool() { - stop(); + stop(); } void TaskPool::push(Task *task, bool front) { - TaskScheduler::Entry entry; + TaskScheduler::Entry entry; - entry.task = task; - entry.pool = this; + entry.task = task; + entry.pool = this; - TaskScheduler::push(entry, front); + TaskScheduler::push(entry, front); } -void TaskPool::push(const TaskRunFunction& run, bool front) +void TaskPool::push(const TaskRunFunction &run, bool front) { - push(new Task(run), front); + push(new Task(run), front); } void TaskPool::wait_work(Summary *stats) { - thread_scoped_lock num_lock(num_mutex); + thread_scoped_lock num_lock(num_mutex); - while(num != 0) { - num_lock.unlock(); + while (num != 0) { + num_lock.unlock(); - thread_scoped_lock queue_lock(TaskScheduler::queue_mutex); + thread_scoped_lock queue_lock(TaskScheduler::queue_mutex); - /* find task from this pool. if we get a task from another pool, - * we can get into deadlock */ - TaskScheduler::Entry work_entry; - bool found_entry = false; - list<TaskScheduler::Entry>::iterator it; + /* find task from this pool. if we get a task from another pool, + * we can get into deadlock */ + TaskScheduler::Entry work_entry; + bool found_entry = false; + list<TaskScheduler::Entry>::iterator it; - for(it = TaskScheduler::queue.begin(); it != TaskScheduler::queue.end(); it++) { - TaskScheduler::Entry& entry = *it; + for (it = TaskScheduler::queue.begin(); it != TaskScheduler::queue.end(); it++) { + TaskScheduler::Entry &entry = *it; - if(entry.pool == this) { - work_entry = entry; - found_entry = true; - TaskScheduler::queue.erase(it); - break; - } - } + if (entry.pool == this) { + work_entry = entry; + found_entry = true; + TaskScheduler::queue.erase(it); + break; + } + } - queue_lock.unlock(); + queue_lock.unlock(); - /* if found task, do it, otherwise wait until other tasks are done */ - if(found_entry) { - /* run task */ - work_entry.task->run(0); + /* if found task, do it, otherwise wait until other tasks are done */ + if (found_entry) { + /* run task */ + work_entry.task->run(0); - /* delete task */ - delete work_entry.task; + /* delete task */ + delete work_entry.task; - /* notify pool task was done */ - num_decrease(1); - } + /* notify pool task was done */ + num_decrease(1); + } - num_lock.lock(); - if(num == 0) - break; + num_lock.lock(); + if (num == 0) + break; - if(!found_entry) { - THREADING_DEBUG("num==%d, Waiting for condition in TaskPool::wait_work !found_entry\n", num); - num_cond.wait(num_lock); - THREADING_DEBUG("num==%d, condition wait done in TaskPool::wait_work !found_entry\n", num); - } - } + if (!found_entry) { + THREADING_DEBUG("num==%d, Waiting for condition in TaskPool::wait_work !found_entry\n", num); + num_cond.wait(num_lock); + THREADING_DEBUG("num==%d, condition wait done in TaskPool::wait_work !found_entry\n", num); + } + } - if(stats != NULL) { - stats->time_total = time_dt() - start_time; - stats->num_tasks_handled = num_tasks_handled; - } + if (stats != NULL) { + stats->time_total = time_dt() - start_time; + stats->num_tasks_handled = num_tasks_handled; + } } void TaskPool::cancel() { - do_cancel = true; + do_cancel = true; - TaskScheduler::clear(this); + TaskScheduler::clear(this); - { - thread_scoped_lock num_lock(num_mutex); + { + thread_scoped_lock num_lock(num_mutex); - while(num) { - THREADING_DEBUG("num==%d, Waiting for condition in TaskPool::cancel\n", num); - num_cond.wait(num_lock); - THREADING_DEBUG("num==%d condition wait done in TaskPool::cancel\n", num); - } - } + while (num) { + THREADING_DEBUG("num==%d, Waiting for condition in TaskPool::cancel\n", num); + num_cond.wait(num_lock); + THREADING_DEBUG("num==%d condition wait done in TaskPool::cancel\n", num); + } + } - do_cancel = false; + do_cancel = false; } void TaskPool::stop() { - TaskScheduler::clear(this); + TaskScheduler::clear(this); - assert(num == 0); + assert(num == 0); } bool TaskPool::canceled() { - return do_cancel; + return do_cancel; } bool TaskPool::finished() { - thread_scoped_lock num_lock(num_mutex); - return num == 0; + thread_scoped_lock num_lock(num_mutex); + return num == 0; } void TaskPool::num_decrease(int done) { - num_mutex.lock(); - num -= done; + num_mutex.lock(); + num -= done; - assert(num >= 0); - if(num == 0) { - THREADING_DEBUG("num==%d, notifying all in TaskPool::num_decrease\n", num); - num_cond.notify_all(); - } + assert(num >= 0); + if (num == 0) { + THREADING_DEBUG("num==%d, notifying all in TaskPool::num_decrease\n", num); + num_cond.notify_all(); + } - num_mutex.unlock(); + num_mutex.unlock(); } void TaskPool::num_increase() { - thread_scoped_lock num_lock(num_mutex); - if(num_tasks_handled == 0) { - start_time = time_dt(); - } - num++; - num_tasks_handled++; - THREADING_DEBUG("num==%d, notifying all in TaskPool::num_increase\n", num); - num_cond.notify_all(); + thread_scoped_lock num_lock(num_mutex); + if (num_tasks_handled == 0) { + start_time = time_dt(); + } + num++; + num_tasks_handled++; + THREADING_DEBUG("num==%d, notifying all in TaskPool::num_increase\n", num); + num_cond.notify_all(); } /* Task Scheduler */ thread_mutex TaskScheduler::mutex; int TaskScheduler::users = 0; -vector<thread*> TaskScheduler::threads; +vector<thread *> TaskScheduler::threads; bool TaskScheduler::do_exit = false; list<TaskScheduler::Entry> TaskScheduler::queue; @@ -198,412 +202,401 @@ namespace { * that node. * If node is not available, then the corresponding number of processors is * zero. */ -void get_per_node_num_processors(vector<int>* num_per_node_processors) -{ - const int num_nodes = system_cpu_num_numa_nodes(); - if(num_nodes == 0) { - LOG(ERROR) << "Zero available NUMA nodes, is not supposed to happen."; - return; - } - num_per_node_processors->resize(num_nodes); - for(int node = 0; node < num_nodes; ++node) { - if(!system_cpu_is_numa_node_available(node)) { - (*num_per_node_processors)[node] = 0; - continue; - } - (*num_per_node_processors)[node] = - system_cpu_num_numa_node_processors(node); - } +void get_per_node_num_processors(vector<int> *num_per_node_processors) +{ + const int num_nodes = system_cpu_num_numa_nodes(); + if (num_nodes == 0) { + LOG(ERROR) << "Zero available NUMA nodes, is not supposed to happen."; + return; + } + num_per_node_processors->resize(num_nodes); + for (int node = 0; node < num_nodes; ++node) { + if (!system_cpu_is_numa_node_available(node)) { + (*num_per_node_processors)[node] = 0; + continue; + } + (*num_per_node_processors)[node] = system_cpu_num_numa_node_processors(node); + } } /* Calculate total number of processors on all available nodes. * This is similar to system_cpu_thread_count(), but uses pre-calculated number * of processors on each of the node, avoiding extra system calls and checks for * the node availability. */ -int get_num_total_processors(const vector<int>& num_per_node_processors) +int get_num_total_processors(const vector<int> &num_per_node_processors) { - int num_total_processors = 0; - foreach(int num_node_processors, num_per_node_processors) { - num_total_processors += num_node_processors; - } - return num_total_processors; + int num_total_processors = 0; + foreach (int num_node_processors, num_per_node_processors) { + num_total_processors += num_node_processors; + } + return num_total_processors; } /* Compute NUMA node for every thread to run on, for the best performance. */ vector<int> distribute_threads_on_nodes(const int num_threads) { - /* Start with all threads unassigned to any specific NUMA node. */ - vector<int> thread_nodes(num_threads, -1); - const int num_active_group_processors = - system_cpu_num_active_group_processors(); - VLOG(1) << "Detected " << num_active_group_processors << " processors " - << "in active group."; - if(num_active_group_processors >= num_threads) { - /* If the current thread is set up in a way that its affinity allows to - * use at least requested number of threads we do not explicitly set - * affinity to the worker therads. - * This way we allow users to manually edit affinity of the parent - * thread, and here we follow that affinity. This way it's possible to - * have two Cycles/Blender instances running manually set to a different - * dies on a CPU. */ - VLOG(1) << "Not setting thread group affinity."; - return thread_nodes; - } - vector<int> num_per_node_processors; - get_per_node_num_processors(&num_per_node_processors); - if(num_per_node_processors.size() == 0) { - /* Error was already repported, here we can't do anything, so we simply - * leave default affinity to all the worker threads. */ - return thread_nodes; - } - const int num_nodes = num_per_node_processors.size(); - int thread_index = 0; - /* First pass: fill in all the nodes to their maximum. - * - * If there is less threads than the overall nodes capacity, some of the - * nodes or parts of them will idle. - * - * TODO(sergey): Consider picking up fastest nodes if number of threads - * fits on them. For example, on Threadripper2 we might consider using nodes - * 0 and 2 if user requested 32 render threads. */ - const int num_total_node_processors = - get_num_total_processors(num_per_node_processors); - int current_node_index = 0; - while(thread_index < num_total_node_processors && - thread_index < num_threads) { - const int num_node_processors = - num_per_node_processors[current_node_index]; - for(int processor_index = 0; - processor_index < num_node_processors; - ++processor_index) - { - VLOG(1) << "Scheduling thread " << thread_index << " to node " - << current_node_index << "."; - thread_nodes[thread_index] = current_node_index; - ++thread_index; - if(thread_index == num_threads) { - /* All threads are scheduled on their nodes. */ - return thread_nodes; - } - } - ++current_node_index; - } - /* Second pass: keep scheduling threads to each node one by one, uniformly - * fillign them in. - * This is where things becomes tricky to predict for the maximum - * performance: on the one hand this avoids too much threading overhead on - * few nodes, but for the final performance having all the overhead on one - * node might be better idea (since other nodes will have better chance of - * rendering faster). - * But more tricky is that nodes might have difference capacity, so we might - * want to do some weighted scheduling. For example, if node 0 has 16 - * processors and node 1 has 32 processors, we'd better schedule 1 extra - * thread on node 0 and 2 extra threads on node 1. */ - current_node_index = 0; - while(thread_index < num_threads) { - /* Skip unavailable nodes. */ - /* TODO(sergey): Add sanity check against deadlock. */ - while(num_per_node_processors[current_node_index] == 0) { - current_node_index = (current_node_index + 1) % num_nodes; - } - VLOG(1) << "Scheduling thread " << thread_index << " to node " - << current_node_index << "."; - ++thread_index; - current_node_index = (current_node_index + 1) % num_nodes; - } - - return thread_nodes; + /* Start with all threads unassigned to any specific NUMA node. */ + vector<int> thread_nodes(num_threads, -1); + const int num_active_group_processors = system_cpu_num_active_group_processors(); + VLOG(1) << "Detected " << num_active_group_processors << " processors " + << "in active group."; + if (num_active_group_processors >= num_threads) { + /* If the current thread is set up in a way that its affinity allows to + * use at least requested number of threads we do not explicitly set + * affinity to the worker therads. + * This way we allow users to manually edit affinity of the parent + * thread, and here we follow that affinity. This way it's possible to + * have two Cycles/Blender instances running manually set to a different + * dies on a CPU. */ + VLOG(1) << "Not setting thread group affinity."; + return thread_nodes; + } + vector<int> num_per_node_processors; + get_per_node_num_processors(&num_per_node_processors); + if (num_per_node_processors.size() == 0) { + /* Error was already repported, here we can't do anything, so we simply + * leave default affinity to all the worker threads. */ + return thread_nodes; + } + const int num_nodes = num_per_node_processors.size(); + int thread_index = 0; + /* First pass: fill in all the nodes to their maximum. + * + * If there is less threads than the overall nodes capacity, some of the + * nodes or parts of them will idle. + * + * TODO(sergey): Consider picking up fastest nodes if number of threads + * fits on them. For example, on Threadripper2 we might consider using nodes + * 0 and 2 if user requested 32 render threads. */ + const int num_total_node_processors = get_num_total_processors(num_per_node_processors); + int current_node_index = 0; + while (thread_index < num_total_node_processors && thread_index < num_threads) { + const int num_node_processors = num_per_node_processors[current_node_index]; + for (int processor_index = 0; processor_index < num_node_processors; ++processor_index) { + VLOG(1) << "Scheduling thread " << thread_index << " to node " << current_node_index << "."; + thread_nodes[thread_index] = current_node_index; + ++thread_index; + if (thread_index == num_threads) { + /* All threads are scheduled on their nodes. */ + return thread_nodes; + } + } + ++current_node_index; + } + /* Second pass: keep scheduling threads to each node one by one, uniformly + * fillign them in. + * This is where things becomes tricky to predict for the maximum + * performance: on the one hand this avoids too much threading overhead on + * few nodes, but for the final performance having all the overhead on one + * node might be better idea (since other nodes will have better chance of + * rendering faster). + * But more tricky is that nodes might have difference capacity, so we might + * want to do some weighted scheduling. For example, if node 0 has 16 + * processors and node 1 has 32 processors, we'd better schedule 1 extra + * thread on node 0 and 2 extra threads on node 1. */ + current_node_index = 0; + while (thread_index < num_threads) { + /* Skip unavailable nodes. */ + /* TODO(sergey): Add sanity check against deadlock. */ + while (num_per_node_processors[current_node_index] == 0) { + current_node_index = (current_node_index + 1) % num_nodes; + } + VLOG(1) << "Scheduling thread " << thread_index << " to node " << current_node_index << "."; + ++thread_index; + current_node_index = (current_node_index + 1) % num_nodes; + } + + return thread_nodes; } } // namespace void TaskScheduler::init(int num_threads) { - thread_scoped_lock lock(mutex); - /* Multiple cycles instances can use this task scheduler, sharing the same - * threads, so we keep track of the number of users. */ - ++users; - if(users != 1) { - return; - } - do_exit = false; - const bool use_auto_threads = (num_threads == 0); - if(use_auto_threads) { - /* Automatic number of threads. */ - num_threads = system_cpu_thread_count(); - } - VLOG(1) << "Creating pool of " << num_threads << " threads."; - - /* Compute distribution on NUMA nodes. */ - vector<int> thread_nodes = distribute_threads_on_nodes(num_threads); - - /* Launch threads that will be waiting for work. */ - threads.resize(num_threads); - for(int thread_index = 0; thread_index < num_threads; ++thread_index) { - threads[thread_index] = new thread( - function_bind(&TaskScheduler::thread_run, thread_index + 1), - thread_nodes[thread_index]); - } + thread_scoped_lock lock(mutex); + /* Multiple cycles instances can use this task scheduler, sharing the same + * threads, so we keep track of the number of users. */ + ++users; + if (users != 1) { + return; + } + do_exit = false; + const bool use_auto_threads = (num_threads == 0); + if (use_auto_threads) { + /* Automatic number of threads. */ + num_threads = system_cpu_thread_count(); + } + VLOG(1) << "Creating pool of " << num_threads << " threads."; + + /* Compute distribution on NUMA nodes. */ + vector<int> thread_nodes = distribute_threads_on_nodes(num_threads); + + /* Launch threads that will be waiting for work. */ + threads.resize(num_threads); + for (int thread_index = 0; thread_index < num_threads; ++thread_index) { + threads[thread_index] = new thread(function_bind(&TaskScheduler::thread_run, thread_index + 1), + thread_nodes[thread_index]); + } } void TaskScheduler::exit() { - thread_scoped_lock lock(mutex); - users--; - if(users == 0) { - VLOG(1) << "De-initializing thread pool of task scheduler."; - /* stop all waiting threads */ - TaskScheduler::queue_mutex.lock(); - do_exit = true; - TaskScheduler::queue_cond.notify_all(); - TaskScheduler::queue_mutex.unlock(); - - /* delete threads */ - foreach(thread *t, threads) { - t->join(); - delete t; - } - threads.clear(); - } + thread_scoped_lock lock(mutex); + users--; + if (users == 0) { + VLOG(1) << "De-initializing thread pool of task scheduler."; + /* stop all waiting threads */ + TaskScheduler::queue_mutex.lock(); + do_exit = true; + TaskScheduler::queue_cond.notify_all(); + TaskScheduler::queue_mutex.unlock(); + + /* delete threads */ + foreach (thread *t, threads) { + t->join(); + delete t; + } + threads.clear(); + } } void TaskScheduler::free_memory() { - assert(users == 0); - threads.free_memory(); + assert(users == 0); + threads.free_memory(); } -bool TaskScheduler::thread_wait_pop(Entry& entry) +bool TaskScheduler::thread_wait_pop(Entry &entry) { - thread_scoped_lock queue_lock(queue_mutex); + thread_scoped_lock queue_lock(queue_mutex); - while(queue.empty() && !do_exit) - queue_cond.wait(queue_lock); + while (queue.empty() && !do_exit) + queue_cond.wait(queue_lock); - if(queue.empty()) { - assert(do_exit); - return false; - } + if (queue.empty()) { + assert(do_exit); + return false; + } - entry = queue.front(); - queue.pop_front(); + entry = queue.front(); + queue.pop_front(); - return true; + return true; } void TaskScheduler::thread_run(int thread_id) { - Entry entry; + Entry entry; - /* todo: test affinity/denormal mask */ + /* todo: test affinity/denormal mask */ - /* keep popping off tasks */ - while(thread_wait_pop(entry)) { - /* run task */ - entry.task->run(thread_id); + /* keep popping off tasks */ + while (thread_wait_pop(entry)) { + /* run task */ + entry.task->run(thread_id); - /* delete task */ - delete entry.task; + /* delete task */ + delete entry.task; - /* notify pool task was done */ - entry.pool->num_decrease(1); - } + /* notify pool task was done */ + entry.pool->num_decrease(1); + } } -void TaskScheduler::push(Entry& entry, bool front) +void TaskScheduler::push(Entry &entry, bool front) { - entry.pool->num_increase(); + entry.pool->num_increase(); - /* add entry to queue */ - TaskScheduler::queue_mutex.lock(); - if(front) - TaskScheduler::queue.push_front(entry); - else - TaskScheduler::queue.push_back(entry); + /* add entry to queue */ + TaskScheduler::queue_mutex.lock(); + if (front) + TaskScheduler::queue.push_front(entry); + else + TaskScheduler::queue.push_back(entry); - TaskScheduler::queue_cond.notify_one(); - TaskScheduler::queue_mutex.unlock(); + TaskScheduler::queue_cond.notify_one(); + TaskScheduler::queue_mutex.unlock(); } void TaskScheduler::clear(TaskPool *pool) { - thread_scoped_lock queue_lock(TaskScheduler::queue_mutex); + thread_scoped_lock queue_lock(TaskScheduler::queue_mutex); - /* erase all tasks from this pool from the queue */ - list<Entry>::iterator it = queue.begin(); - int done = 0; + /* erase all tasks from this pool from the queue */ + list<Entry>::iterator it = queue.begin(); + int done = 0; - while(it != queue.end()) { - Entry& entry = *it; + while (it != queue.end()) { + Entry &entry = *it; - if(entry.pool == pool) { - done++; - delete entry.task; + if (entry.pool == pool) { + done++; + delete entry.task; - it = queue.erase(it); - } - else - it++; - } + it = queue.erase(it); + } + else + it++; + } - queue_lock.unlock(); + queue_lock.unlock(); - /* notify done */ - pool->num_decrease(done); + /* notify done */ + pool->num_decrease(done); } /* Dedicated Task Pool */ DedicatedTaskPool::DedicatedTaskPool() { - do_cancel = false; - do_exit = false; - num = 0; + do_cancel = false; + do_exit = false; + num = 0; - worker_thread = new thread(function_bind(&DedicatedTaskPool::thread_run, this)); + worker_thread = new thread(function_bind(&DedicatedTaskPool::thread_run, this)); } DedicatedTaskPool::~DedicatedTaskPool() { - stop(); - worker_thread->join(); - delete worker_thread; + stop(); + worker_thread->join(); + delete worker_thread; } void DedicatedTaskPool::push(Task *task, bool front) { - num_increase(); + num_increase(); - /* add task to queue */ - queue_mutex.lock(); - if(front) - queue.push_front(task); - else - queue.push_back(task); + /* add task to queue */ + queue_mutex.lock(); + if (front) + queue.push_front(task); + else + queue.push_back(task); - queue_cond.notify_one(); - queue_mutex.unlock(); + queue_cond.notify_one(); + queue_mutex.unlock(); } -void DedicatedTaskPool::push(const TaskRunFunction& run, bool front) +void DedicatedTaskPool::push(const TaskRunFunction &run, bool front) { - push(new Task(run), front); + push(new Task(run), front); } void DedicatedTaskPool::wait() { - thread_scoped_lock num_lock(num_mutex); + thread_scoped_lock num_lock(num_mutex); - while(num) - num_cond.wait(num_lock); + while (num) + num_cond.wait(num_lock); } void DedicatedTaskPool::cancel() { - do_cancel = true; + do_cancel = true; - clear(); - wait(); + clear(); + wait(); - do_cancel = false; + do_cancel = false; } void DedicatedTaskPool::stop() { - clear(); + clear(); - do_exit = true; - queue_cond.notify_all(); + do_exit = true; + queue_cond.notify_all(); - wait(); + wait(); - assert(num == 0); + assert(num == 0); } bool DedicatedTaskPool::canceled() { - return do_cancel; + return do_cancel; } void DedicatedTaskPool::num_decrease(int done) { - thread_scoped_lock num_lock(num_mutex); - num -= done; + thread_scoped_lock num_lock(num_mutex); + num -= done; - assert(num >= 0); - if(num == 0) - num_cond.notify_all(); + assert(num >= 0); + if (num == 0) + num_cond.notify_all(); } void DedicatedTaskPool::num_increase() { - thread_scoped_lock num_lock(num_mutex); - num++; - num_cond.notify_all(); + thread_scoped_lock num_lock(num_mutex); + num++; + num_cond.notify_all(); } -bool DedicatedTaskPool::thread_wait_pop(Task*& task) +bool DedicatedTaskPool::thread_wait_pop(Task *&task) { - thread_scoped_lock queue_lock(queue_mutex); + thread_scoped_lock queue_lock(queue_mutex); - while(queue.empty() && !do_exit) - queue_cond.wait(queue_lock); + while (queue.empty() && !do_exit) + queue_cond.wait(queue_lock); - if(queue.empty()) { - assert(do_exit); - return false; - } + if (queue.empty()) { + assert(do_exit); + return false; + } - task = queue.front(); - queue.pop_front(); + task = queue.front(); + queue.pop_front(); - return true; + return true; } void DedicatedTaskPool::thread_run() { - Task *task; + Task *task; - /* keep popping off tasks */ - while(thread_wait_pop(task)) { - /* run task */ - task->run(0); + /* keep popping off tasks */ + while (thread_wait_pop(task)) { + /* run task */ + task->run(0); - /* delete task */ - delete task; + /* delete task */ + delete task; - /* notify task was done */ - num_decrease(1); - } + /* notify task was done */ + num_decrease(1); + } } void DedicatedTaskPool::clear() { - thread_scoped_lock queue_lock(queue_mutex); + thread_scoped_lock queue_lock(queue_mutex); - /* erase all tasks from the queue */ - list<Task*>::iterator it = queue.begin(); - int done = 0; + /* erase all tasks from the queue */ + list<Task *>::iterator it = queue.begin(); + int done = 0; - while(it != queue.end()) { - done++; - delete *it; + while (it != queue.end()) { + done++; + delete *it; - it = queue.erase(it); - } + it = queue.erase(it); + } - queue_lock.unlock(); + queue_lock.unlock(); - /* notify done */ - num_decrease(done); + /* notify done */ + num_decrease(done); } string TaskPool::Summary::full_report() const { - string report = ""; - report += string_printf("Total time: %f\n", time_total); - report += string_printf("Tasks handled: %d\n", num_tasks_handled); - return report; + string report = ""; + report += string_printf("Total time: %f\n", time_total); + report += string_printf("Tasks handled: %d\n", num_tasks_handled); + return report; } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h index a7e19d1ab75..fd30a33d8ef 100644 --- a/intern/cycles/util/util_task.h +++ b/intern/cycles/util/util_task.h @@ -48,15 +48,18 @@ typedef function<void(int thread_id)> TaskRunFunction; * * Base class for tasks to be executed in threads. */ -class Task -{ -public: - Task() {}; - explicit Task(const TaskRunFunction& run_) : run(run_) {} - - virtual ~Task() {} - - TaskRunFunction run; +class Task { + public: + Task(){}; + explicit Task(const TaskRunFunction &run_) : run(run_) + { + } + + virtual ~Task() + { + } + + TaskRunFunction run; }; /* Task Pool @@ -68,54 +71,53 @@ public: * The run callback that actually executes the task may be created like this: * function_bind(&MyClass::task_execute, this, _1, _2) */ -class TaskPool -{ -public: - struct Summary { - /* Time spent to handle all tasks. */ - double time_total; +class TaskPool { + public: + struct Summary { + /* Time spent to handle all tasks. */ + double time_total; - /* Number of all tasks handled by this pool. */ - int num_tasks_handled; + /* Number of all tasks handled by this pool. */ + int num_tasks_handled; - /* A full multiline description of the state of the pool after - * all work is done. - */ - string full_report() const; - }; + /* A full multiline description of the state of the pool after + * all work is done. + */ + string full_report() const; + }; - TaskPool(); - ~TaskPool(); + TaskPool(); + ~TaskPool(); - void push(Task *task, bool front = false); - void push(const TaskRunFunction& run, bool front = false); + void push(Task *task, bool front = false); + void push(const TaskRunFunction &run, bool front = false); - void wait_work(Summary *stats = NULL); /* work and wait until all tasks are done */ - void cancel(); /* cancel all tasks, keep worker threads running */ - void stop(); /* stop all worker threads */ - bool finished(); /* check if all work has been completed */ + void wait_work(Summary *stats = NULL); /* work and wait until all tasks are done */ + void cancel(); /* cancel all tasks, keep worker threads running */ + void stop(); /* stop all worker threads */ + bool finished(); /* check if all work has been completed */ - bool canceled(); /* for worker threads, test if canceled */ + bool canceled(); /* for worker threads, test if canceled */ -protected: - friend class TaskScheduler; + protected: + friend class TaskScheduler; - void num_decrease(int done); - void num_increase(); + void num_decrease(int done); + void num_increase(); - thread_mutex num_mutex; - thread_condition_variable num_cond; + thread_mutex num_mutex; + thread_condition_variable num_cond; - int num; - bool do_cancel; + int num; + bool do_cancel; - /* ** Statistics ** */ + /* ** Statistics ** */ - /* Time time stamp of first task pushed. */ - double start_time; + /* Time time stamp of first task pushed. */ + double start_time; - /* Number of all tasks handled by this pool. */ - int num_tasks_handled; + /* Number of all tasks handled by this pool. */ + int num_tasks_handled; }; /* Task Scheduler @@ -123,41 +125,46 @@ protected: * Central scheduler that holds running threads ready to execute tasks. A singe * queue holds the task from all pools. */ -class TaskScheduler -{ -public: - static void init(int num_threads = 0); - static void exit(); - static void free_memory(); - - /* number of threads that can work on task */ - static int num_threads() { return threads.size(); } - - /* test if any session is using the scheduler */ - static bool active() { return users != 0; } - -protected: - friend class TaskPool; - - struct Entry { - Task *task; - TaskPool *pool; - }; - - static thread_mutex mutex; - static int users; - static vector<thread*> threads; - static bool do_exit; - - static list<Entry> queue; - static thread_mutex queue_mutex; - static thread_condition_variable queue_cond; - - static void thread_run(int thread_id); - static bool thread_wait_pop(Entry& entry); - - static void push(Entry& entry, bool front); - static void clear(TaskPool *pool); +class TaskScheduler { + public: + static void init(int num_threads = 0); + static void exit(); + static void free_memory(); + + /* number of threads that can work on task */ + static int num_threads() + { + return threads.size(); + } + + /* test if any session is using the scheduler */ + static bool active() + { + return users != 0; + } + + protected: + friend class TaskPool; + + struct Entry { + Task *task; + TaskPool *pool; + }; + + static thread_mutex mutex; + static int users; + static vector<thread *> threads; + static bool do_exit; + + static list<Entry> queue; + static thread_mutex queue_mutex; + static thread_condition_variable queue_cond; + + static void thread_run(int thread_id); + static bool thread_wait_pop(Entry &entry); + + static void push(Entry &entry, bool front); + static void clear(TaskPool *pool); }; /* Dedicated Task Pool @@ -167,42 +174,41 @@ protected: * The run callback that actually executes the task may be created like this: * function_bind(&MyClass::task_execute, this, _1, _2) */ -class DedicatedTaskPool -{ -public: - DedicatedTaskPool(); - ~DedicatedTaskPool(); +class DedicatedTaskPool { + public: + DedicatedTaskPool(); + ~DedicatedTaskPool(); - void push(Task *task, bool front = false); - void push(const TaskRunFunction& run, bool front = false); + void push(Task *task, bool front = false); + void push(const TaskRunFunction &run, bool front = false); - void wait(); /* wait until all tasks are done */ - void cancel(); /* cancel all tasks, keep worker thread running */ - void stop(); /* stop worker thread */ + void wait(); /* wait until all tasks are done */ + void cancel(); /* cancel all tasks, keep worker thread running */ + void stop(); /* stop worker thread */ - bool canceled(); /* for worker thread, test if canceled */ + bool canceled(); /* for worker thread, test if canceled */ -protected: - void num_decrease(int done); - void num_increase(); + protected: + void num_decrease(int done); + void num_increase(); - void thread_run(); - bool thread_wait_pop(Task*& entry); + void thread_run(); + bool thread_wait_pop(Task *&entry); - void clear(); + void clear(); - thread_mutex num_mutex; - thread_condition_variable num_cond; + thread_mutex num_mutex; + thread_condition_variable num_cond; - list<Task*> queue; - thread_mutex queue_mutex; - thread_condition_variable queue_cond; + list<Task *> queue; + thread_mutex queue_mutex; + thread_condition_variable queue_cond; - int num; - bool do_cancel; - bool do_exit; + int num; + bool do_cancel; + bool do_exit; - thread *worker_thread; + thread *worker_thread; }; CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_texture.h b/intern/cycles/util/util_texture.h index 233cfe33305..5ce16e0095a 100644 --- a/intern/cycles/util/util_texture.h +++ b/intern/cycles/util/util_texture.h @@ -34,29 +34,29 @@ CCL_NAMESPACE_BEGIN /* Interpolation types for textures * cuda also use texture space to store other objects */ typedef enum InterpolationType { - INTERPOLATION_NONE = -1, - INTERPOLATION_LINEAR = 0, - INTERPOLATION_CLOSEST = 1, - INTERPOLATION_CUBIC = 2, - INTERPOLATION_SMART = 3, + INTERPOLATION_NONE = -1, + INTERPOLATION_LINEAR = 0, + INTERPOLATION_CLOSEST = 1, + INTERPOLATION_CUBIC = 2, + INTERPOLATION_SMART = 3, - INTERPOLATION_NUM_TYPES, + INTERPOLATION_NUM_TYPES, } InterpolationType; /* Texture types * Since we store the type in the lower bits of a flat index, * the shift and bit mask constant below need to be kept in sync. */ typedef enum ImageDataType { - IMAGE_DATA_TYPE_FLOAT4 = 0, - IMAGE_DATA_TYPE_BYTE4 = 1, - IMAGE_DATA_TYPE_HALF4 = 2, - IMAGE_DATA_TYPE_FLOAT = 3, - IMAGE_DATA_TYPE_BYTE = 4, - IMAGE_DATA_TYPE_HALF = 5, - IMAGE_DATA_TYPE_USHORT4 = 6, - IMAGE_DATA_TYPE_USHORT = 7, + IMAGE_DATA_TYPE_FLOAT4 = 0, + IMAGE_DATA_TYPE_BYTE4 = 1, + IMAGE_DATA_TYPE_HALF4 = 2, + IMAGE_DATA_TYPE_FLOAT = 3, + IMAGE_DATA_TYPE_BYTE = 4, + IMAGE_DATA_TYPE_HALF = 5, + IMAGE_DATA_TYPE_USHORT4 = 6, + IMAGE_DATA_TYPE_USHORT = 7, - IMAGE_DATA_NUM_TYPES + IMAGE_DATA_NUM_TYPES } ImageDataType; #define IMAGE_DATA_TYPE_SHIFT 3 @@ -66,27 +66,27 @@ typedef enum ImageDataType { * * Defines how the image is extrapolated past its original bounds. */ typedef enum ExtensionType { - /* Cause the image to repeat horizontally and vertically. */ - EXTENSION_REPEAT = 0, - /* Extend by repeating edge pixels of the image. */ - EXTENSION_EXTEND = 1, - /* Clip to image size and set exterior pixels as transparent. */ - EXTENSION_CLIP = 2, + /* Cause the image to repeat horizontally and vertically. */ + EXTENSION_REPEAT = 0, + /* Extend by repeating edge pixels of the image. */ + EXTENSION_EXTEND = 1, + /* Clip to image size and set exterior pixels as transparent. */ + EXTENSION_CLIP = 2, - EXTENSION_NUM_TYPES, + EXTENSION_NUM_TYPES, } ExtensionType; typedef struct TextureInfo { - /* Pointer, offset or texture depending on device. */ - uint64_t data; - /* Buffer number for OpenCL. */ - uint cl_buffer; - /* Interpolation and extension type. */ - uint interpolation, extension; - /* Dimensions. */ - uint width, height, depth; + /* Pointer, offset or texture depending on device. */ + uint64_t data; + /* Buffer number for OpenCL. */ + uint cl_buffer; + /* Interpolation and extension type. */ + uint interpolation, extension; + /* Dimensions. */ + uint width, height, depth; } TextureInfo; CCL_NAMESPACE_END -#endif /* __UTIL_TEXTURE_H__ */ +#endif /* __UTIL_TEXTURE_H__ */ diff --git a/intern/cycles/util/util_thread.cpp b/intern/cycles/util/util_thread.cpp index f3c6077f6b7..cccde5ae7d5 100644 --- a/intern/cycles/util/util_thread.cpp +++ b/intern/cycles/util/util_thread.cpp @@ -21,54 +21,51 @@ CCL_NAMESPACE_BEGIN -thread::thread(function<void()> run_cb, int node) - : run_cb_(run_cb), - joined_(false), - node_(node) +thread::thread(function<void()> run_cb, int node) : run_cb_(run_cb), joined_(false), node_(node) { #ifdef __APPLE__ - /* Set the stack size to 2MB to match Linux. The default 512KB on macOS is - * too small for Embree, and consistent stack size also makes things more - * predictable in general. */ - pthread_attr_t attribute; - pthread_attr_init(&attribute); - pthread_attr_setstacksize(&attribute, 1024*1024*2); - pthread_create(&pthread_id, &attribute, run, (void*)this); + /* Set the stack size to 2MB to match Linux. The default 512KB on macOS is + * too small for Embree, and consistent stack size also makes things more + * predictable in general. */ + pthread_attr_t attribute; + pthread_attr_init(&attribute); + pthread_attr_setstacksize(&attribute, 1024 * 1024 * 2); + pthread_create(&pthread_id, &attribute, run, (void *)this); #else - std_thread = std::thread(&thread::run, this); + std_thread = std::thread(&thread::run, this); #endif } thread::~thread() { - if(!joined_) { - join(); - } + if (!joined_) { + join(); + } } void *thread::run(void *arg) { - thread *self = (thread*)(arg); - if (self->node_ != -1) { - system_cpu_run_thread_on_node(self->node_); - } - self->run_cb_(); - return NULL; + thread *self = (thread *)(arg); + if (self->node_ != -1) { + system_cpu_run_thread_on_node(self->node_); + } + self->run_cb_(); + return NULL; } bool thread::join() { - joined_ = true; + joined_ = true; #ifdef __APPLE__ - return pthread_join(pthread_id, NULL) == 0; + return pthread_join(pthread_id, NULL) == 0; #else - try { - std_thread.join(); - return true; - } - catch (const std::system_error&) { - return false; - } + try { + std_thread.join(); + return true; + } + catch (const std::system_error &) { + return false; + } #endif } diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h index 793d44130b6..18ec5b32144 100644 --- a/intern/cycles/util/util_thread.h +++ b/intern/cycles/util/util_thread.h @@ -45,103 +45,115 @@ typedef std::condition_variable thread_condition_variable; * custom stack size on macOS. */ class thread { -public: - /* NOTE: Node index of -1 means that affinity will be inherited from the - * parent thread and no override on top of that will happen. */ - thread(function<void()> run_cb, int node = -1); - ~thread(); + public: + /* NOTE: Node index of -1 means that affinity will be inherited from the + * parent thread and no override on top of that will happen. */ + thread(function<void()> run_cb, int node = -1); + ~thread(); - static void *run(void *arg); - bool join(); + static void *run(void *arg); + bool join(); -protected: - function<void()> run_cb_; + protected: + function<void()> run_cb_; #ifdef __APPLE__ - pthread_t pthread_id; + pthread_t pthread_id; #else - std::thread std_thread; + std::thread std_thread; #endif - bool joined_; - int node_; + bool joined_; + int node_; }; /* Own wrapper around pthread's spin lock to make it's use easier. */ class thread_spin_lock { -public: + public: #ifdef __APPLE__ - inline thread_spin_lock() { - spin_ = OS_SPINLOCK_INIT; - } - - inline void lock() { - OSSpinLockLock(&spin_); - } - - inline void unlock() { - OSSpinLockUnlock(&spin_); - } + inline thread_spin_lock() + { + spin_ = OS_SPINLOCK_INIT; + } + + inline void lock() + { + OSSpinLockLock(&spin_); + } + + inline void unlock() + { + OSSpinLockUnlock(&spin_); + } #elif defined(_WIN32) - inline thread_spin_lock() { - const DWORD SPIN_COUNT = 50000; - InitializeCriticalSectionAndSpinCount(&cs_, SPIN_COUNT); - } - - inline ~thread_spin_lock() { - DeleteCriticalSection(&cs_); - } - - inline void lock() { - EnterCriticalSection(&cs_); - } - - inline void unlock() { - LeaveCriticalSection(&cs_); - } + inline thread_spin_lock() + { + const DWORD SPIN_COUNT = 50000; + InitializeCriticalSectionAndSpinCount(&cs_, SPIN_COUNT); + } + + inline ~thread_spin_lock() + { + DeleteCriticalSection(&cs_); + } + + inline void lock() + { + EnterCriticalSection(&cs_); + } + + inline void unlock() + { + LeaveCriticalSection(&cs_); + } #else - inline thread_spin_lock() { - pthread_spin_init(&spin_, 0); - } - - inline ~thread_spin_lock() { - pthread_spin_destroy(&spin_); - } - - inline void lock() { - pthread_spin_lock(&spin_); - } - - inline void unlock() { - pthread_spin_unlock(&spin_); - } + inline thread_spin_lock() + { + pthread_spin_init(&spin_, 0); + } + + inline ~thread_spin_lock() + { + pthread_spin_destroy(&spin_); + } + + inline void lock() + { + pthread_spin_lock(&spin_); + } + + inline void unlock() + { + pthread_spin_unlock(&spin_); + } #endif -protected: + protected: #ifdef __APPLE__ - OSSpinLock spin_; + OSSpinLock spin_; #elif defined(_WIN32) - CRITICAL_SECTION cs_; + CRITICAL_SECTION cs_; #else - pthread_spinlock_t spin_; + pthread_spinlock_t spin_; #endif }; class thread_scoped_spin_lock { -public: - explicit thread_scoped_spin_lock(thread_spin_lock& lock) - : lock_(lock) { - lock_.lock(); - } + public: + explicit thread_scoped_spin_lock(thread_spin_lock &lock) : lock_(lock) + { + lock_.lock(); + } - ~thread_scoped_spin_lock() { - lock_.unlock(); - } + ~thread_scoped_spin_lock() + { + lock_.unlock(); + } - /* TODO(sergey): Implement manual control over lock/unlock. */ + /* TODO(sergey): Implement manual control over lock/unlock. */ -protected: - thread_spin_lock& lock_; + protected: + thread_spin_lock &lock_; }; CCL_NAMESPACE_END -#endif /* __UTIL_THREAD_H__ */ +#endif /* __UTIL_THREAD_H__ */ diff --git a/intern/cycles/util/util_time.cpp b/intern/cycles/util/util_time.cpp index 9983fdd1df3..1641395d07e 100644 --- a/intern/cycles/util/util_time.cpp +++ b/intern/cycles/util/util_time.cpp @@ -32,44 +32,44 @@ CCL_NAMESPACE_BEGIN #ifdef _WIN32 double time_dt() { - __int64 frequency, counter; + __int64 frequency, counter; - QueryPerformanceFrequency((LARGE_INTEGER*)&frequency); - QueryPerformanceCounter((LARGE_INTEGER*)&counter); + QueryPerformanceFrequency((LARGE_INTEGER *)&frequency); + QueryPerformanceCounter((LARGE_INTEGER *)&counter); - return (double)counter/(double)frequency; + return (double)counter / (double)frequency; } void time_sleep(double t) { - Sleep((int)(t*1000)); + Sleep((int)(t * 1000)); } #else double time_dt() { - struct timeval now; - gettimeofday(&now, NULL); + struct timeval now; + gettimeofday(&now, NULL); - return now.tv_sec + now.tv_usec*1e-6; + return now.tv_sec + now.tv_usec * 1e-6; } /* sleep t seconds */ void time_sleep(double t) { - /* get whole seconds */ - int s = (int)t; + /* get whole seconds */ + int s = (int)t; - if(s >= 1) { - sleep(s); + if (s >= 1) { + sleep(s); - /* adjust parameter to remove whole seconds */ - t -= s; - } + /* adjust parameter to remove whole seconds */ + t -= s; + } - /* get microseconds */ - int us = (int)(t * 1e6); - if(us > 0) - usleep(us); + /* get microseconds */ + int us = (int)(t * 1e6); + if (us > 0) + usleep(us); } #endif @@ -77,63 +77,63 @@ void time_sleep(double t) string time_human_readable_from_seconds(const double seconds) { - const int h = (((int)seconds) / (60 * 60)); - const int m = (((int)seconds) / 60) % 60; - const int s = (((int)seconds) % 60); - const int r = (((int)(seconds * 100)) % 100); - - if(h > 0) { - return string_printf("%.2d:%.2d:%.2d.%.2d", h, m, s, r); - } - else { - return string_printf("%.2d:%.2d.%.2d", m, s, r); - } + const int h = (((int)seconds) / (60 * 60)); + const int m = (((int)seconds) / 60) % 60; + const int s = (((int)seconds) % 60); + const int r = (((int)(seconds * 100)) % 100); + + if (h > 0) { + return string_printf("%.2d:%.2d:%.2d.%.2d", h, m, s, r); + } + else { + return string_printf("%.2d:%.2d.%.2d", m, s, r); + } } -double time_human_readable_to_seconds(const string& time_string) +double time_human_readable_to_seconds(const string &time_string) { - /* Those are multiplies of a corresponding token surrounded by : in the - * time string, which denotes how to convert value to seconds. - * Effectively: seconds, minutes, hours, days in seconds. */ - const int multipliers[] = {1, 60, 60*60, 24*60*60}; - const int num_multiplies = sizeof(multipliers) / sizeof(*multipliers); - if(time_string.empty()) { - return 0.0; - } - double result = 0.0; - /* Split fractions of a second from the encoded time. */ - vector<string> fraction_tokens; - string_split(fraction_tokens, time_string, ".", false); - const int num_fraction_tokens = fraction_tokens.size(); - if(num_fraction_tokens == 0) { - /* Time string is malformed. */ - return 0.0; - } - else if(fraction_tokens.size() == 1) { - /* There is no fraction of a second specified, the rest of the code - * handles this normally. */ - } - else if(fraction_tokens.size() == 2) { - result = atof(fraction_tokens[1].c_str()); - result *= pow(0.1, fraction_tokens[1].length()); - } - else { - /* This is not a valid string, the result can not be reliable. */ - return 0.0; - } - /* Split hours, minutes and seconds. - * Hours part is optional. */ - vector<string> tokens; - string_split(tokens, fraction_tokens[0], ":", false); - const int num_tokens = tokens.size(); - if(num_tokens > num_multiplies) { - /* Can not reliably represent the value. */ - return 0.0; - } - for(int i = 0; i < num_tokens; ++i) { - result += atoi(tokens[num_tokens - i - 1].c_str()) * multipliers[i]; - } - return result; + /* Those are multiplies of a corresponding token surrounded by : in the + * time string, which denotes how to convert value to seconds. + * Effectively: seconds, minutes, hours, days in seconds. */ + const int multipliers[] = {1, 60, 60 * 60, 24 * 60 * 60}; + const int num_multiplies = sizeof(multipliers) / sizeof(*multipliers); + if (time_string.empty()) { + return 0.0; + } + double result = 0.0; + /* Split fractions of a second from the encoded time. */ + vector<string> fraction_tokens; + string_split(fraction_tokens, time_string, ".", false); + const int num_fraction_tokens = fraction_tokens.size(); + if (num_fraction_tokens == 0) { + /* Time string is malformed. */ + return 0.0; + } + else if (fraction_tokens.size() == 1) { + /* There is no fraction of a second specified, the rest of the code + * handles this normally. */ + } + else if (fraction_tokens.size() == 2) { + result = atof(fraction_tokens[1].c_str()); + result *= pow(0.1, fraction_tokens[1].length()); + } + else { + /* This is not a valid string, the result can not be reliable. */ + return 0.0; + } + /* Split hours, minutes and seconds. + * Hours part is optional. */ + vector<string> tokens; + string_split(tokens, fraction_tokens[0], ":", false); + const int num_tokens = tokens.size(); + if (num_tokens > num_multiplies) { + /* Can not reliably represent the value. */ + return 0.0; + } + for (int i = 0; i < num_tokens; ++i) { + result += atoi(tokens[num_tokens - i - 1].c_str()) * multipliers[i]; + } + return result; } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_time.h b/intern/cycles/util/util_time.h index ed4dd5154d7..bc9a7414312 100644 --- a/intern/cycles/util/util_time.h +++ b/intern/cycles/util/util_time.h @@ -32,38 +32,38 @@ void time_sleep(double t); /* Scoped timer. */ class scoped_timer { -public: - explicit scoped_timer(double *value = NULL) : value_(value) - { - time_start_ = time_dt(); - } - - ~scoped_timer() - { - if(value_ != NULL) { - *value_ = get_time(); - } - } - - double get_start() const - { - return time_start_; - } - - double get_time() const - { - return time_dt() - time_start_; - } - -protected: - double *value_; - double time_start_; + public: + explicit scoped_timer(double *value = NULL) : value_(value) + { + time_start_ = time_dt(); + } + + ~scoped_timer() + { + if (value_ != NULL) { + *value_ = get_time(); + } + } + + double get_start() const + { + return time_start_; + } + + double get_time() const + { + return time_dt() - time_start_; + } + + protected: + double *value_; + double time_start_; }; /* Make human readable string from time, compatible with Blender metadata. */ string time_human_readable_from_seconds(const double seconds); -double time_human_readable_to_seconds(const string& str); +double time_human_readable_to_seconds(const string &str); CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp index c72e729a2e5..6a9bfbea4ca 100644 --- a/intern/cycles/util/util_transform.cpp +++ b/intern/cycles/util/util_transform.cpp @@ -58,232 +58,230 @@ CCL_NAMESPACE_BEGIN static bool transform_matrix4_gj_inverse(float R[][4], float M[][4]) { - /* forward elimination */ - for(int i = 0; i < 4; i++) { - int pivot = i; - float pivotsize = M[i][i]; - - if(pivotsize < 0) - pivotsize = -pivotsize; - - for(int j = i + 1; j < 4; j++) { - float tmp = M[j][i]; - - if(tmp < 0) - tmp = -tmp; - - if(tmp > pivotsize) { - pivot = j; - pivotsize = tmp; - } - } - - if(UNLIKELY(pivotsize == 0.0f)) - return false; - - if(pivot != i) { - for(int j = 0; j < 4; j++) { - float tmp; - - tmp = M[i][j]; - M[i][j] = M[pivot][j]; - M[pivot][j] = tmp; - - tmp = R[i][j]; - R[i][j] = R[pivot][j]; - R[pivot][j] = tmp; - } - } - - for(int j = i + 1; j < 4; j++) { - float f = M[j][i] / M[i][i]; - - for(int k = 0; k < 4; k++) { - M[j][k] -= f*M[i][k]; - R[j][k] -= f*R[i][k]; - } - } - } - - /* backward substitution */ - for(int i = 3; i >= 0; --i) { - float f; - - if(UNLIKELY((f = M[i][i]) == 0.0f)) - return false; - - for(int j = 0; j < 4; j++) { - M[i][j] /= f; - R[i][j] /= f; - } - - for(int j = 0; j < i; j++) { - f = M[j][i]; - - for(int k = 0; k < 4; k++) { - M[j][k] -= f*M[i][k]; - R[j][k] -= f*R[i][k]; - } - } - } - - return true; + /* forward elimination */ + for (int i = 0; i < 4; i++) { + int pivot = i; + float pivotsize = M[i][i]; + + if (pivotsize < 0) + pivotsize = -pivotsize; + + for (int j = i + 1; j < 4; j++) { + float tmp = M[j][i]; + + if (tmp < 0) + tmp = -tmp; + + if (tmp > pivotsize) { + pivot = j; + pivotsize = tmp; + } + } + + if (UNLIKELY(pivotsize == 0.0f)) + return false; + + if (pivot != i) { + for (int j = 0; j < 4; j++) { + float tmp; + + tmp = M[i][j]; + M[i][j] = M[pivot][j]; + M[pivot][j] = tmp; + + tmp = R[i][j]; + R[i][j] = R[pivot][j]; + R[pivot][j] = tmp; + } + } + + for (int j = i + 1; j < 4; j++) { + float f = M[j][i] / M[i][i]; + + for (int k = 0; k < 4; k++) { + M[j][k] -= f * M[i][k]; + R[j][k] -= f * R[i][k]; + } + } + } + + /* backward substitution */ + for (int i = 3; i >= 0; --i) { + float f; + + if (UNLIKELY((f = M[i][i]) == 0.0f)) + return false; + + for (int j = 0; j < 4; j++) { + M[i][j] /= f; + R[i][j] /= f; + } + + for (int j = 0; j < i; j++) { + f = M[j][i]; + + for (int k = 0; k < 4; k++) { + M[j][k] -= f * M[i][k]; + R[j][k] -= f * R[i][k]; + } + } + } + + return true; } -ProjectionTransform projection_inverse(const ProjectionTransform& tfm) +ProjectionTransform projection_inverse(const ProjectionTransform &tfm) { - ProjectionTransform tfmR = projection_identity(); - float M[4][4], R[4][4]; + ProjectionTransform tfmR = projection_identity(); + float M[4][4], R[4][4]; - memcpy(R, &tfmR, sizeof(R)); - memcpy(M, &tfm, sizeof(M)); + memcpy(R, &tfmR, sizeof(R)); + memcpy(M, &tfm, sizeof(M)); - if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) { - /* matrix is degenerate (e.g. 0 scale on some axis), ideally we should - * never be in this situation, but try to invert it anyway with tweak */ - M[0][0] += 1e-8f; - M[1][1] += 1e-8f; - M[2][2] += 1e-8f; + if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) { + /* matrix is degenerate (e.g. 0 scale on some axis), ideally we should + * never be in this situation, but try to invert it anyway with tweak */ + M[0][0] += 1e-8f; + M[1][1] += 1e-8f; + M[2][2] += 1e-8f; - if(UNLIKELY(!transform_matrix4_gj_inverse(R, M))) { - return projection_identity(); - } - } + if (UNLIKELY(!transform_matrix4_gj_inverse(R, M))) { + return projection_identity(); + } + } - memcpy(&tfmR, R, sizeof(R)); + memcpy(&tfmR, R, sizeof(R)); - return tfmR; + return tfmR; } -Transform transform_inverse(const Transform& tfm) +Transform transform_inverse(const Transform &tfm) { - ProjectionTransform projection(tfm); - return projection_to_transform(projection_inverse(projection)); + ProjectionTransform projection(tfm); + return projection_to_transform(projection_inverse(projection)); } -Transform transform_transposed_inverse(const Transform& tfm) +Transform transform_transposed_inverse(const Transform &tfm) { - ProjectionTransform projection(tfm); - ProjectionTransform iprojection = projection_inverse(projection); - return projection_to_transform(projection_transpose(iprojection)); + ProjectionTransform projection(tfm); + ProjectionTransform iprojection = projection_inverse(projection); + return projection_to_transform(projection_transpose(iprojection)); } /* Motion Transform */ -float4 transform_to_quat(const Transform& tfm) +float4 transform_to_quat(const Transform &tfm) { - double trace = (double)(tfm[0][0] + tfm[1][1] + tfm[2][2]); - float4 qt; + double trace = (double)(tfm[0][0] + tfm[1][1] + tfm[2][2]); + float4 qt; - if(trace > 0.0) { - double s = sqrt(trace + 1.0); + if (trace > 0.0) { + double s = sqrt(trace + 1.0); - qt.w = (float)(s/2.0); - s = 0.5/s; + qt.w = (float)(s / 2.0); + s = 0.5 / s; - qt.x = (float)((double)(tfm[2][1] - tfm[1][2]) * s); - qt.y = (float)((double)(tfm[0][2] - tfm[2][0]) * s); - qt.z = (float)((double)(tfm[1][0] - tfm[0][1]) * s); - } - else { - int i = 0; + qt.x = (float)((double)(tfm[2][1] - tfm[1][2]) * s); + qt.y = (float)((double)(tfm[0][2] - tfm[2][0]) * s); + qt.z = (float)((double)(tfm[1][0] - tfm[0][1]) * s); + } + else { + int i = 0; - if(tfm[1][1] > tfm[i][i]) - i = 1; - if(tfm[2][2] > tfm[i][i]) - i = 2; + if (tfm[1][1] > tfm[i][i]) + i = 1; + if (tfm[2][2] > tfm[i][i]) + i = 2; - int j = (i + 1)%3; - int k = (j + 1)%3; + int j = (i + 1) % 3; + int k = (j + 1) % 3; - double s = sqrt((double)(tfm[i][i] - (tfm[j][j] + tfm[k][k])) + 1.0); + double s = sqrt((double)(tfm[i][i] - (tfm[j][j] + tfm[k][k])) + 1.0); - double q[3]; - q[i] = s * 0.5; - if(s != 0.0) - s = 0.5/s; + double q[3]; + q[i] = s * 0.5; + if (s != 0.0) + s = 0.5 / s; - double w = (double)(tfm[k][j] - tfm[j][k]) * s; - q[j] = (double)(tfm[j][i] + tfm[i][j]) * s; - q[k] = (double)(tfm[k][i] + tfm[i][k]) * s; + double w = (double)(tfm[k][j] - tfm[j][k]) * s; + q[j] = (double)(tfm[j][i] + tfm[i][j]) * s; + q[k] = (double)(tfm[k][i] + tfm[i][k]) * s; - qt.x = (float)q[0]; - qt.y = (float)q[1]; - qt.z = (float)q[2]; - qt.w = (float)w; - } + qt.x = (float)q[0]; + qt.y = (float)q[1]; + qt.z = (float)q[2]; + qt.w = (float)w; + } - return qt; + return qt; } static void transform_decompose(DecomposedTransform *decomp, const Transform *tfm) { - /* extract translation */ - decomp->y = make_float4(tfm->x.w, tfm->y.w, tfm->z.w, 0.0f); - - /* extract rotation */ - Transform M = *tfm; - M.x.w = 0.0f; M.y.w = 0.0f; M.z.w = 0.0f; - - Transform R = M; - float norm; - int iteration = 0; - - do { - Transform Rnext; - Transform Rit = transform_transposed_inverse(R); - - for(int i = 0; i < 3; i++) - for(int j = 0; j < 4; j++) - Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]); - - norm = 0.0f; - for(int i = 0; i < 3; i++) { - norm = max(norm, - fabsf(R[i][0] - Rnext[i][0]) + - fabsf(R[i][1] - Rnext[i][1]) + - fabsf(R[i][2] - Rnext[i][2])); - } - - R = Rnext; - iteration++; - } while(iteration < 100 && norm > 1e-4f); - - if(transform_negative_scale(R)) - R = R * transform_scale(-1.0f, -1.0f, -1.0f); - - decomp->x = transform_to_quat(R); - - /* extract scale and pack it */ - Transform scale = transform_inverse(R) * M; - decomp->y.w = scale.x.x; - decomp->z = make_float4(scale.x.y, scale.x.z, scale.y.x, scale.y.y); - decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z); + /* extract translation */ + decomp->y = make_float4(tfm->x.w, tfm->y.w, tfm->z.w, 0.0f); + + /* extract rotation */ + Transform M = *tfm; + M.x.w = 0.0f; + M.y.w = 0.0f; + M.z.w = 0.0f; + + Transform R = M; + float norm; + int iteration = 0; + + do { + Transform Rnext; + Transform Rit = transform_transposed_inverse(R); + + for (int i = 0; i < 3; i++) + for (int j = 0; j < 4; j++) + Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]); + + norm = 0.0f; + for (int i = 0; i < 3; i++) { + norm = max(norm, + fabsf(R[i][0] - Rnext[i][0]) + fabsf(R[i][1] - Rnext[i][1]) + + fabsf(R[i][2] - Rnext[i][2])); + } + + R = Rnext; + iteration++; + } while (iteration < 100 && norm > 1e-4f); + + if (transform_negative_scale(R)) + R = R * transform_scale(-1.0f, -1.0f, -1.0f); + + decomp->x = transform_to_quat(R); + + /* extract scale and pack it */ + Transform scale = transform_inverse(R) * M; + decomp->y.w = scale.x.x; + decomp->z = make_float4(scale.x.y, scale.x.z, scale.y.x, scale.y.y); + decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z); } void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size) { - for(size_t i = 0; i < size; i++) { - transform_decompose(decomp + i, motion + i); - - if(i > 0) { - /* Ensure rotation around shortest angle, negated quaternions are the same - * but this means we don't have to do the check in quat_interpolate */ - if(dot(decomp[i-1].x, decomp[i].x) < 0.0f) - decomp[i-1].x = -decomp[i-1].x; - } - } + for (size_t i = 0; i < size; i++) { + transform_decompose(decomp + i, motion + i); + + if (i > 0) { + /* Ensure rotation around shortest angle, negated quaternions are the same + * but this means we don't have to do the check in quat_interpolate */ + if (dot(decomp[i - 1].x, decomp[i].x) < 0.0f) + decomp[i - 1].x = -decomp[i - 1].x; + } + } } -Transform transform_from_viewplane(BoundBox2D& viewplane) +Transform transform_from_viewplane(BoundBox2D &viewplane) { - return - transform_scale(1.0f / (viewplane.right - viewplane.left), - 1.0f / (viewplane.top - viewplane.bottom), - 1.0f) * - transform_translate(-viewplane.left, - -viewplane.bottom, - 0.0f); + return transform_scale(1.0f / (viewplane.right - viewplane.left), + 1.0f / (viewplane.top - viewplane.bottom), + 1.0f) * + transform_translate(-viewplane.left, -viewplane.bottom, 0.0f); } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h index e4cadd3e81a..13ca27c2fed 100644 --- a/intern/cycles/util/util_transform.h +++ b/intern/cycles/util/util_transform.h @@ -18,7 +18,7 @@ #define __UTIL_TRANSFORM_H__ #ifndef __KERNEL_GPU__ -#include <string.h> +# include <string.h> #endif #include "util/util_math.h" @@ -29,11 +29,17 @@ CCL_NAMESPACE_BEGIN /* Affine transformation, stored as 4x3 matrix. */ typedef struct Transform { - float4 x, y, z; + float4 x, y, z; #ifndef __KERNEL_GPU__ - float4 operator[](int i) const { return *(&x + i); } - float4& operator[](int i) { return *(&x + i); } + float4 operator[](int i) const + { + return *(&x + i); + } + float4 &operator[](int i) + { + return *(&x + i); + } #endif } Transform; @@ -42,267 +48,268 @@ typedef struct Transform { * rotation (4), then translation (3), then 3x3 scale matrix (9). */ typedef struct DecomposedTransform { - float4 x, y, z, w; + float4 x, y, z, w; } DecomposedTransform; /* Functions */ ccl_device_inline float3 transform_point(const Transform *t, const float3 a) { - /* TODO(sergey): Disabled for now, causes crashes in certain cases. */ + /* TODO(sergey): Disabled for now, causes crashes in certain cases. */ #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) - ssef x, y, z, w, aa; - aa = a.m128; + ssef x, y, z, w, aa; + aa = a.m128; - x = _mm_loadu_ps(&t->x.x); - y = _mm_loadu_ps(&t->y.x); - z = _mm_loadu_ps(&t->z.x); - w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); + x = _mm_loadu_ps(&t->x.x); + y = _mm_loadu_ps(&t->y.x); + z = _mm_loadu_ps(&t->z.x); + w = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); - _MM_TRANSPOSE4_PS(x, y, z, w); + _MM_TRANSPOSE4_PS(x, y, z, w); - ssef tmp = shuffle<0>(aa) * x; - tmp = madd(shuffle<1>(aa), y, tmp); - tmp = madd(shuffle<2>(aa), z, tmp); - tmp += w; + ssef tmp = shuffle<0>(aa) * x; + tmp = madd(shuffle<1>(aa), y, tmp); + tmp = madd(shuffle<2>(aa), z, tmp); + tmp += w; - return float3(tmp.m128); + return float3(tmp.m128); #else - float3 c = make_float3( - a.x*t->x.x + a.y*t->x.y + a.z*t->x.z + t->x.w, - a.x*t->y.x + a.y*t->y.y + a.z*t->y.z + t->y.w, - a.x*t->z.x + a.y*t->z.y + a.z*t->z.z + t->z.w); + float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z + t->x.w, + a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w, + a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w); - return c; + return c; #endif } ccl_device_inline float3 transform_direction(const Transform *t, const float3 a) { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) - ssef x, y, z, w, aa; - aa = a.m128; - x = _mm_loadu_ps(&t->x.x); - y = _mm_loadu_ps(&t->y.x); - z = _mm_loadu_ps(&t->z.x); - w = _mm_setzero_ps(); + ssef x, y, z, w, aa; + aa = a.m128; + x = _mm_loadu_ps(&t->x.x); + y = _mm_loadu_ps(&t->y.x); + z = _mm_loadu_ps(&t->z.x); + w = _mm_setzero_ps(); - _MM_TRANSPOSE4_PS(x, y, z, w); + _MM_TRANSPOSE4_PS(x, y, z, w); - ssef tmp = shuffle<0>(aa) * x; - tmp = madd(shuffle<1>(aa), y, tmp); - tmp = madd(shuffle<2>(aa), z, tmp); + ssef tmp = shuffle<0>(aa) * x; + tmp = madd(shuffle<1>(aa), y, tmp); + tmp = madd(shuffle<2>(aa), z, tmp); - return float3(tmp.m128); + return float3(tmp.m128); #else - float3 c = make_float3( - a.x*t->x.x + a.y*t->x.y + a.z*t->x.z, - a.x*t->y.x + a.y*t->y.y + a.z*t->y.z, - a.x*t->z.x + a.y*t->z.y + a.z*t->z.z); + float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z, + a.x * t->y.x + a.y * t->y.y + a.z * t->y.z, + a.x * t->z.x + a.y * t->z.y + a.z * t->z.z); - return c; + return c; #endif } ccl_device_inline float3 transform_direction_transposed(const Transform *t, const float3 a) { - float3 x = make_float3(t->x.x, t->y.x, t->z.x); - float3 y = make_float3(t->x.y, t->y.y, t->z.y); - float3 z = make_float3(t->x.z, t->y.z, t->z.z); + float3 x = make_float3(t->x.x, t->y.x, t->z.x); + float3 y = make_float3(t->x.y, t->y.y, t->z.y); + float3 z = make_float3(t->x.z, t->y.z, t->z.z); - return make_float3(dot(x, a), dot(y, a), dot(z, a)); + return make_float3(dot(x, a), dot(y, a), dot(z, a)); } -ccl_device_inline Transform make_transform(float a, float b, float c, float d, - float e, float f, float g, float h, - float i, float j, float k, float l) +ccl_device_inline Transform make_transform(float a, + float b, + float c, + float d, + float e, + float f, + float g, + float h, + float i, + float j, + float k, + float l) { - Transform t; - - t.x.x = a; t.x.y = b; t.x.z = c; t.x.w = d; - t.y.x = e; t.y.y = f; t.y.z = g; t.y.w = h; - t.z.x = i; t.z.y = j; t.z.z = k; t.z.w = l; - - return t; + Transform t; + + t.x.x = a; + t.x.y = b; + t.x.z = c; + t.x.w = d; + t.y.x = e; + t.y.y = f; + t.y.z = g; + t.y.w = h; + t.z.x = i; + t.z.y = j; + t.z.z = k; + t.z.w = l; + + return t; } /* Constructs a coordinate frame from a normalized normal. */ ccl_device_inline Transform make_transform_frame(float3 N) { - const float3 dx0 = cross(make_float3(1.0f, 0.0f, 0.0f), N); - const float3 dx1 = cross(make_float3(0.0f, 1.0f, 0.0f), N); - const float3 dx = normalize((dot(dx0,dx0) > dot(dx1,dx1))? dx0: dx1); - const float3 dy = normalize(cross(N, dx)); - return make_transform(dx.x, dx.y, dx.z, 0.0f, - dy.x, dy.y, dy.z, 0.0f, - N.x , N.y, N.z, 0.0f); + const float3 dx0 = cross(make_float3(1.0f, 0.0f, 0.0f), N); + const float3 dx1 = cross(make_float3(0.0f, 1.0f, 0.0f), N); + const float3 dx = normalize((dot(dx0, dx0) > dot(dx1, dx1)) ? dx0 : dx1); + const float3 dy = normalize(cross(N, dx)); + return make_transform(dx.x, dx.y, dx.z, 0.0f, dy.x, dy.y, dy.z, 0.0f, N.x, N.y, N.z, 0.0f); } #ifndef __KERNEL_GPU__ ccl_device_inline Transform operator*(const Transform a, const Transform b) { - float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f); - float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f); - float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f); - float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f); + float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f); + float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f); + float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f); + float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f); - Transform t; - t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w)); - t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w)); - t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w)); + Transform t; + t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w)); + t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w)); + t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w)); - return t; + return t; } -ccl_device_inline void print_transform(const char *label, const Transform& t) +ccl_device_inline void print_transform(const char *label, const Transform &t) { - print_float4(label, t.x); - print_float4(label, t.y); - print_float4(label, t.z); - printf("\n"); + print_float4(label, t.x); + print_float4(label, t.y); + print_float4(label, t.z); + printf("\n"); } ccl_device_inline Transform transform_translate(float3 t) { - return make_transform( - 1, 0, 0, t.x, - 0, 1, 0, t.y, - 0, 0, 1, t.z); + return make_transform(1, 0, 0, t.x, 0, 1, 0, t.y, 0, 0, 1, t.z); } ccl_device_inline Transform transform_translate(float x, float y, float z) { - return transform_translate(make_float3(x, y, z)); + return transform_translate(make_float3(x, y, z)); } ccl_device_inline Transform transform_scale(float3 s) { - return make_transform( - s.x, 0, 0, 0, - 0, s.y, 0, 0, - 0, 0, s.z, 0); + return make_transform(s.x, 0, 0, 0, 0, s.y, 0, 0, 0, 0, s.z, 0); } ccl_device_inline Transform transform_scale(float x, float y, float z) { - return transform_scale(make_float3(x, y, z)); + return transform_scale(make_float3(x, y, z)); } ccl_device_inline Transform transform_rotate(float angle, float3 axis) { - float s = sinf(angle); - float c = cosf(angle); - float t = 1.0f - c; - - axis = normalize(axis); - - return make_transform( - axis.x*axis.x*t + c, - axis.x*axis.y*t - s*axis.z, - axis.x*axis.z*t + s*axis.y, - 0.0f, - - axis.y*axis.x*t + s*axis.z, - axis.y*axis.y*t + c, - axis.y*axis.z*t - s*axis.x, - 0.0f, - - axis.z*axis.x*t - s*axis.y, - axis.z*axis.y*t + s*axis.x, - axis.z*axis.z*t + c, - 0.0f); + float s = sinf(angle); + float c = cosf(angle); + float t = 1.0f - c; + + axis = normalize(axis); + + return make_transform(axis.x * axis.x * t + c, + axis.x * axis.y * t - s * axis.z, + axis.x * axis.z * t + s * axis.y, + 0.0f, + + axis.y * axis.x * t + s * axis.z, + axis.y * axis.y * t + c, + axis.y * axis.z * t - s * axis.x, + 0.0f, + + axis.z * axis.x * t - s * axis.y, + axis.z * axis.y * t + s * axis.x, + axis.z * axis.z * t + c, + 0.0f); } /* Euler is assumed to be in XYZ order. */ ccl_device_inline Transform transform_euler(float3 euler) { - return - transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)) * - transform_rotate(euler.y, make_float3(0.0f, 1.0f, 0.0f)) * - transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f)); + return transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)) * + transform_rotate(euler.y, make_float3(0.0f, 1.0f, 0.0f)) * + transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f)); } ccl_device_inline Transform transform_identity() { - return transform_scale(1.0f, 1.0f, 1.0f); + return transform_scale(1.0f, 1.0f, 1.0f); } -ccl_device_inline bool operator==(const Transform& A, const Transform& B) +ccl_device_inline bool operator==(const Transform &A, const Transform &B) { - return memcmp(&A, &B, sizeof(Transform)) == 0; + return memcmp(&A, &B, sizeof(Transform)) == 0; } -ccl_device_inline bool operator!=(const Transform& A, const Transform& B) +ccl_device_inline bool operator!=(const Transform &A, const Transform &B) { - return !(A == B); + return !(A == B); } ccl_device_inline float3 transform_get_column(const Transform *t, int column) { - return make_float3(t->x[column], t->y[column], t->z[column]); + return make_float3(t->x[column], t->y[column], t->z[column]); } ccl_device_inline void transform_set_column(Transform *t, int column, float3 value) { - t->x[column] = value.x; - t->y[column] = value.y; - t->z[column] = value.z; + t->x[column] = value.x; + t->y[column] = value.y; + t->z[column] = value.z; } -Transform transform_inverse(const Transform& a); -Transform transform_transposed_inverse(const Transform& a); +Transform transform_inverse(const Transform &a); +Transform transform_transposed_inverse(const Transform &a); -ccl_device_inline bool transform_uniform_scale(const Transform& tfm, float& scale) +ccl_device_inline bool transform_uniform_scale(const Transform &tfm, float &scale) { - /* the epsilon here is quite arbitrary, but this function is only used for - * surface area and bump, where we expect it to not be so sensitive */ - float eps = 1e-6f; - - float sx = len_squared(float4_to_float3(tfm.x)); - float sy = len_squared(float4_to_float3(tfm.y)); - float sz = len_squared(float4_to_float3(tfm.z)); - float stx = len_squared(transform_get_column(&tfm, 0)); - float sty = len_squared(transform_get_column(&tfm, 1)); - float stz = len_squared(transform_get_column(&tfm, 2)); - - if(fabsf(sx - sy) < eps && fabsf(sx - sz) < eps && - fabsf(sx - stx) < eps && fabsf(sx - sty) < eps && - fabsf(sx - stz) < eps) - { - scale = sx; - return true; - } - - return false; + /* the epsilon here is quite arbitrary, but this function is only used for + * surface area and bump, where we expect it to not be so sensitive */ + float eps = 1e-6f; + + float sx = len_squared(float4_to_float3(tfm.x)); + float sy = len_squared(float4_to_float3(tfm.y)); + float sz = len_squared(float4_to_float3(tfm.z)); + float stx = len_squared(transform_get_column(&tfm, 0)); + float sty = len_squared(transform_get_column(&tfm, 1)); + float stz = len_squared(transform_get_column(&tfm, 2)); + + if (fabsf(sx - sy) < eps && fabsf(sx - sz) < eps && fabsf(sx - stx) < eps && + fabsf(sx - sty) < eps && fabsf(sx - stz) < eps) { + scale = sx; + return true; + } + + return false; } -ccl_device_inline bool transform_negative_scale(const Transform& tfm) +ccl_device_inline bool transform_negative_scale(const Transform &tfm) { - float3 c0 = transform_get_column(&tfm, 0); - float3 c1 = transform_get_column(&tfm, 1); - float3 c2 = transform_get_column(&tfm, 2); + float3 c0 = transform_get_column(&tfm, 0); + float3 c1 = transform_get_column(&tfm, 1); + float3 c2 = transform_get_column(&tfm, 2); - return (dot(cross(c0, c1), c2) < 0.0f); + return (dot(cross(c0, c1), c2) < 0.0f); } -ccl_device_inline Transform transform_clear_scale(const Transform& tfm) +ccl_device_inline Transform transform_clear_scale(const Transform &tfm) { - Transform ntfm = tfm; + Transform ntfm = tfm; - transform_set_column(&ntfm, 0, normalize(transform_get_column(&ntfm, 0))); - transform_set_column(&ntfm, 1, normalize(transform_get_column(&ntfm, 1))); - transform_set_column(&ntfm, 2, normalize(transform_get_column(&ntfm, 2))); + transform_set_column(&ntfm, 0, normalize(transform_get_column(&ntfm, 0))); + transform_set_column(&ntfm, 1, normalize(transform_get_column(&ntfm, 1))); + transform_set_column(&ntfm, 2, normalize(transform_get_column(&ntfm, 2))); - return ntfm; + return ntfm; } ccl_device_inline Transform transform_empty() { - return make_transform( - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0); + return make_transform(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } #endif @@ -311,90 +318,101 @@ ccl_device_inline Transform transform_empty() ccl_device_inline float4 quat_interpolate(float4 q1, float4 q2, float t) { - /* use simpe nlerp instead of slerp. it's faster and almost the same */ - return normalize((1.0f - t)*q1 + t*q2); + /* use simpe nlerp instead of slerp. it's faster and almost the same */ + return normalize((1.0f - t) * q1 + t * q2); #if 0 - /* note: this does not ensure rotation around shortest angle, q1 and q2 - * are assumed to be matched already in transform_motion_decompose */ - float costheta = dot(q1, q2); - - /* possible optimization: it might be possible to precompute theta/qperp */ - - if(costheta > 0.9995f) { - /* linear interpolation in degenerate case */ - return normalize((1.0f - t)*q1 + t*q2); - } - else { - /* slerp */ - float theta = acosf(clamp(costheta, -1.0f, 1.0f)); - float4 qperp = normalize(q2 - q1 * costheta); - float thetap = theta * t; - return q1 * cosf(thetap) + qperp * sinf(thetap); - } + /* note: this does not ensure rotation around shortest angle, q1 and q2 + * are assumed to be matched already in transform_motion_decompose */ + float costheta = dot(q1, q2); + + /* possible optimization: it might be possible to precompute theta/qperp */ + + if(costheta > 0.9995f) { + /* linear interpolation in degenerate case */ + return normalize((1.0f - t)*q1 + t*q2); + } + else { + /* slerp */ + float theta = acosf(clamp(costheta, -1.0f, 1.0f)); + float4 qperp = normalize(q2 - q1 * costheta); + float thetap = theta * t; + return q1 * cosf(thetap) + qperp * sinf(thetap); + } #endif } ccl_device_inline Transform transform_quick_inverse(Transform M) { - /* possible optimization: can we avoid doing this altogether and construct - * the inverse matrix directly from negated translation, transposed rotation, - * scale can be inverted but what about shearing? */ - Transform R; - float det = M.x.x*(M.z.z*M.y.y - M.z.y*M.y.z) - M.y.x*(M.z.z*M.x.y - M.z.y*M.x.z) + M.z.x*(M.y.z*M.x.y - M.y.y*M.x.z); - if(det == 0.0f) { - M.x.x += 1e-8f; - M.y.y += 1e-8f; - M.z.z += 1e-8f; - det = M.x.x*(M.z.z*M.y.y - M.z.y*M.y.z) - M.y.x*(M.z.z*M.x.y - M.z.y*M.x.z) + M.z.x*(M.y.z*M.x.y - M.y.y*M.x.z); - } - det = (det != 0.0f)? 1.0f/det: 0.0f; - - float3 Rx = det*make_float3(M.z.z*M.y.y - M.z.y*M.y.z, M.z.y*M.x.z - M.z.z*M.x.y, M.y.z*M.x.y - M.y.y*M.x.z); - float3 Ry = det*make_float3(M.z.x*M.y.z - M.z.z*M.y.x, M.z.z*M.x.x - M.z.x*M.x.z, M.y.x*M.x.z - M.y.z*M.x.x); - float3 Rz = det*make_float3(M.z.y*M.y.x - M.z.x*M.y.y, M.z.x*M.x.y - M.z.y*M.x.x, M.y.y*M.x.x - M.y.x*M.x.y); - float3 T = -make_float3(M.x.w, M.y.w, M.z.w); - - R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T)); - R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T)); - R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T)); - - return R; + /* possible optimization: can we avoid doing this altogether and construct + * the inverse matrix directly from negated translation, transposed rotation, + * scale can be inverted but what about shearing? */ + Transform R; + float det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) + + M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z); + if (det == 0.0f) { + M.x.x += 1e-8f; + M.y.y += 1e-8f; + M.z.z += 1e-8f; + det = M.x.x * (M.z.z * M.y.y - M.z.y * M.y.z) - M.y.x * (M.z.z * M.x.y - M.z.y * M.x.z) + + M.z.x * (M.y.z * M.x.y - M.y.y * M.x.z); + } + det = (det != 0.0f) ? 1.0f / det : 0.0f; + + float3 Rx = det * make_float3(M.z.z * M.y.y - M.z.y * M.y.z, + M.z.y * M.x.z - M.z.z * M.x.y, + M.y.z * M.x.y - M.y.y * M.x.z); + float3 Ry = det * make_float3(M.z.x * M.y.z - M.z.z * M.y.x, + M.z.z * M.x.x - M.z.x * M.x.z, + M.y.x * M.x.z - M.y.z * M.x.x); + float3 Rz = det * make_float3(M.z.y * M.y.x - M.z.x * M.y.y, + M.z.x * M.x.y - M.z.y * M.x.x, + M.y.y * M.x.x - M.y.x * M.x.y); + float3 T = -make_float3(M.x.w, M.y.w, M.z.w); + + R.x = make_float4(Rx.x, Rx.y, Rx.z, dot(Rx, T)); + R.y = make_float4(Ry.x, Ry.y, Ry.z, dot(Ry, T)); + R.z = make_float4(Rz.x, Rz.y, Rz.z, dot(Rz, T)); + + return R; } ccl_device_inline void transform_compose(Transform *tfm, const DecomposedTransform *decomp) { - /* rotation */ - float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc; - - q0 = M_SQRT2_F * decomp->x.w; - q1 = M_SQRT2_F * decomp->x.x; - q2 = M_SQRT2_F * decomp->x.y; - q3 = M_SQRT2_F * decomp->x.z; - - qda = q0*q1; - qdb = q0*q2; - qdc = q0*q3; - qaa = q1*q1; - qab = q1*q2; - qac = q1*q3; - qbb = q2*q2; - qbc = q2*q3; - qcc = q3*q3; - - float3 rotation_x = make_float3(1.0f-qbb-qcc, -qdc+qab, qdb+qac); - float3 rotation_y = make_float3(qdc+qab, 1.0f-qaa-qcc, -qda+qbc); - float3 rotation_z = make_float3(-qdb+qac, qda+qbc, 1.0f-qaa-qbb); - - /* scale */ - float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y); - float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z); - float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w); - - /* compose with translation */ - tfm->x = make_float4(dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x); - tfm->y = make_float4(dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y); - tfm->z = make_float4(dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z); + /* rotation */ + float q0, q1, q2, q3, qda, qdb, qdc, qaa, qab, qac, qbb, qbc, qcc; + + q0 = M_SQRT2_F * decomp->x.w; + q1 = M_SQRT2_F * decomp->x.x; + q2 = M_SQRT2_F * decomp->x.y; + q3 = M_SQRT2_F * decomp->x.z; + + qda = q0 * q1; + qdb = q0 * q2; + qdc = q0 * q3; + qaa = q1 * q1; + qab = q1 * q2; + qac = q1 * q3; + qbb = q2 * q2; + qbc = q2 * q3; + qcc = q3 * q3; + + float3 rotation_x = make_float3(1.0f - qbb - qcc, -qdc + qab, qdb + qac); + float3 rotation_y = make_float3(qdc + qab, 1.0f - qaa - qcc, -qda + qbc); + float3 rotation_z = make_float3(-qdb + qac, qda + qbc, 1.0f - qaa - qbb); + + /* scale */ + float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y); + float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z); + float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w); + + /* compose with translation */ + tfm->x = make_float4( + dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x); + tfm->y = make_float4( + dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y); + tfm->z = make_float4( + dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z); } /* Interpolate from array of decomposed transforms. */ @@ -403,62 +421,60 @@ ccl_device void transform_motion_array_interpolate(Transform *tfm, uint numsteps, float time) { - /* Figure out which steps we need to interpolate. */ - int maxstep = numsteps-1; - int step = min((int)(time*maxstep), maxstep-1); - float t = time*maxstep - step; - - const ccl_global DecomposedTransform *a = motion + step; - const ccl_global DecomposedTransform *b = motion + step + 1; - - /* Interpolate rotation, translation and scale. */ - DecomposedTransform decomp; - decomp.x = quat_interpolate(a->x, b->x, t); - decomp.y = (1.0f - t)*a->y + t*b->y; - decomp.z = (1.0f - t)*a->z + t*b->z; - decomp.w = (1.0f - t)*a->w + t*b->w; - - /* Compose rotation, translation, scale into matrix. */ - transform_compose(tfm, &decomp); + /* Figure out which steps we need to interpolate. */ + int maxstep = numsteps - 1; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; + + const ccl_global DecomposedTransform *a = motion + step; + const ccl_global DecomposedTransform *b = motion + step + 1; + + /* Interpolate rotation, translation and scale. */ + DecomposedTransform decomp; + decomp.x = quat_interpolate(a->x, b->x, t); + decomp.y = (1.0f - t) * a->y + t * b->y; + decomp.z = (1.0f - t) * a->z + t * b->z; + decomp.w = (1.0f - t) * a->w + t * b->w; + + /* Compose rotation, translation, scale into matrix. */ + transform_compose(tfm, &decomp); } #ifndef __KERNEL_GPU__ -#ifdef WITH_EMBREE -ccl_device void transform_motion_array_interpolate_straight(Transform *tfm, - const ccl_global DecomposedTransform *motion, - uint numsteps, - float time) +# ifdef WITH_EMBREE +ccl_device void transform_motion_array_interpolate_straight( + Transform *tfm, const ccl_global DecomposedTransform *motion, uint numsteps, float time) { - /* Figure out which steps we need to interpolate. */ - int maxstep = numsteps - 1; - int step = min((int)(time*maxstep), maxstep - 1); - float t = time * maxstep - step; - - const ccl_global DecomposedTransform *a = motion + step; - const ccl_global DecomposedTransform *b = motion + step + 1; - Transform step1, step2; - - transform_compose(&step1, a); - transform_compose(&step2, b); - - /* matrix lerp */ - tfm->x = (1.0f - t) * step1.x + t * step2.x; - tfm->y = (1.0f - t) * step1.y + t * step2.y; - tfm->z = (1.0f - t) * step1.z + t * step2.z; + /* Figure out which steps we need to interpolate. */ + int maxstep = numsteps - 1; + int step = min((int)(time * maxstep), maxstep - 1); + float t = time * maxstep - step; + + const ccl_global DecomposedTransform *a = motion + step; + const ccl_global DecomposedTransform *b = motion + step + 1; + Transform step1, step2; + + transform_compose(&step1, a); + transform_compose(&step2, b); + + /* matrix lerp */ + tfm->x = (1.0f - t) * step1.x + t * step2.x; + tfm->y = (1.0f - t) * step1.y + t * step2.y; + tfm->z = (1.0f - t) * step1.z + t * step2.z; } -#endif +# endif class BoundBox2D; -ccl_device_inline bool operator==(const DecomposedTransform& A, const DecomposedTransform& B) +ccl_device_inline bool operator==(const DecomposedTransform &A, const DecomposedTransform &B) { - return memcmp(&A, &B, sizeof(DecomposedTransform)) == 0; + return memcmp(&A, &B, sizeof(DecomposedTransform)) == 0; } -float4 transform_to_quat(const Transform& tfm); +float4 transform_to_quat(const Transform &tfm); void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size); -Transform transform_from_viewplane(BoundBox2D& viewplane); +Transform transform_from_viewplane(BoundBox2D &viewplane); #endif @@ -469,14 +485,14 @@ Transform transform_from_viewplane(BoundBox2D& viewplane); #ifdef __KERNEL_OPENCL__ -#define OPENCL_TRANSFORM_ADDRSPACE_GLUE(a, b) a ## b -#define OPENCL_TRANSFORM_ADDRSPACE_DECLARE(function) \ -ccl_device_inline float3 OPENCL_TRANSFORM_ADDRSPACE_GLUE(function, _addrspace)( \ - ccl_addr_space const Transform *t, const float3 a) \ -{ \ - Transform private_tfm = *t; \ - return function(&private_tfm, a); \ -} +# define OPENCL_TRANSFORM_ADDRSPACE_GLUE(a, b) a##b +# define OPENCL_TRANSFORM_ADDRSPACE_DECLARE(function) \ + ccl_device_inline float3 OPENCL_TRANSFORM_ADDRSPACE_GLUE(function, _addrspace)( \ + ccl_addr_space const Transform *t, const float3 a) \ + { \ + Transform private_tfm = *t; \ + return function(&private_tfm, a); \ + } OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_point) OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction) @@ -495,4 +511,4 @@ OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction_transposed) CCL_NAMESPACE_END -#endif /* __UTIL_TRANSFORM_H__ */ +#endif /* __UTIL_TRANSFORM_H__ */ diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 535048d8f8c..48e9983ac8f 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -74,31 +74,31 @@ typedef int64_t ssize_t; # else typedef int32_t ssize_t; # endif -# endif /* _WIN32 */ +# endif /* _WIN32 */ /* Generic Memory Pointer */ typedef uint64_t device_ptr; -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ ccl_device_inline size_t align_up(size_t offset, size_t alignment) { - return (offset + alignment - 1) & ~(alignment - 1); + return (offset + alignment - 1) & ~(alignment - 1); } ccl_device_inline size_t divide_up(size_t x, size_t y) { - return (x + y - 1) / y; + return (x + y - 1) / y; } ccl_device_inline size_t round_up(size_t x, size_t multiple) { - return ((x + multiple - 1) / multiple) * multiple; + return ((x + multiple - 1) / multiple) * multiple; } ccl_device_inline size_t round_down(size_t x, size_t multiple) { - return (x / multiple) * multiple; + return (x / multiple) * multiple; } CCL_NAMESPACE_END @@ -150,10 +150,10 @@ CCL_NAMESPACE_END # include "util/util_sseb.h" # include "util/util_ssei.h" # include "util/util_ssef.h" -#if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) -# include "util/util_avxb.h" -# include "util/util_avxf.h" -#endif +# if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__) +# include "util/util_avxb.h" +# include "util/util_avxf.h" +# endif #endif -#endif /* __UTIL_TYPES_H__ */ +#endif /* __UTIL_TYPES_H__ */ diff --git a/intern/cycles/util/util_types_float2.h b/intern/cycles/util/util_types_float2.h index ec7a1f717a1..3760bf579b6 100644 --- a/intern/cycles/util/util_types_float2.h +++ b/intern/cycles/util/util_types_float2.h @@ -25,16 +25,16 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ struct float2 { - float x, y; + float x, y; - __forceinline float operator[](int i) const; - __forceinline float& operator[](int i); + __forceinline float operator[](int i) const; + __forceinline float &operator[](int i); }; ccl_device_inline float2 make_float2(float x, float y); -ccl_device_inline void print_float2(const char *label, const float2& a); -#endif /* __KERNEL_GPU__ */ +ccl_device_inline void print_float2(const char *label, const float2 &a); +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_FLOAT2_H__ */ +#endif /* __UTIL_TYPES_FLOAT2_H__ */ diff --git a/intern/cycles/util/util_types_float2_impl.h b/intern/cycles/util/util_types_float2_impl.h index 782dda195eb..7810d2a8781 100644 --- a/intern/cycles/util/util_types_float2_impl.h +++ b/intern/cycles/util/util_types_float2_impl.h @@ -30,30 +30,30 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ __forceinline float float2::operator[](int i) const { - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); } -__forceinline float& float2::operator[](int i) +__forceinline float &float2::operator[](int i) { - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); } ccl_device_inline float2 make_float2(float x, float y) { - float2 a = {x, y}; - return a; + float2 a = {x, y}; + return a; } -ccl_device_inline void print_float2(const char *label, const float2& a) +ccl_device_inline void print_float2(const char *label, const float2 &a) { - printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); + printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_FLOAT2_IMPL_H__ */ +#endif /* __UTIL_TYPES_FLOAT2_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_float3.h b/intern/cycles/util/util_types_float3.h index ed2300e7996..694a600bf5c 100644 --- a/intern/cycles/util/util_types_float3.h +++ b/intern/cycles/util/util_types_float3.h @@ -24,34 +24,37 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ -struct ccl_try_align(16) float3 { -#ifdef __KERNEL_SSE__ - union { - __m128 m128; - struct { float x, y, z, w; }; - }; - - __forceinline float3(); - __forceinline float3(const float3& a); - __forceinline explicit float3(const __m128& a); - - __forceinline operator const __m128&() const; - __forceinline operator __m128&(); - - __forceinline float3& operator =(const float3& a); -#else /* __KERNEL_SSE__ */ - float x, y, z, w; -#endif /* __KERNEL_SSE__ */ - - __forceinline float operator[](int i) const; - __forceinline float& operator[](int i); +struct ccl_try_align(16) float3 +{ +# ifdef __KERNEL_SSE__ + union { + __m128 m128; + struct { + float x, y, z, w; + }; + }; + + __forceinline float3(); + __forceinline float3(const float3 &a); + __forceinline explicit float3(const __m128 &a); + + __forceinline operator const __m128 &() const; + __forceinline operator __m128 &(); + + __forceinline float3 &operator=(const float3 &a); +# else /* __KERNEL_SSE__ */ + float x, y, z, w; +# endif /* __KERNEL_SSE__ */ + + __forceinline float operator[](int i) const; + __forceinline float &operator[](int i); }; ccl_device_inline float3 make_float3(float f); ccl_device_inline float3 make_float3(float x, float y, float z); -ccl_device_inline void print_float3(const char *label, const float3& a); -#endif /* __KERNEL_GPU__ */ +ccl_device_inline void print_float3(const char *label, const float3 &a); +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_FLOAT3_H__ */ +#endif /* __UTIL_TYPES_FLOAT3_H__ */ diff --git a/intern/cycles/util/util_types_float3_impl.h b/intern/cycles/util/util_types_float3_impl.h index 2e840a5c399..ab25fb4c975 100644 --- a/intern/cycles/util/util_types_float3_impl.h +++ b/intern/cycles/util/util_types_float3_impl.h @@ -28,78 +28,76 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ -#ifdef __KERNEL_SSE__ +# ifdef __KERNEL_SSE__ __forceinline float3::float3() { } -__forceinline float3::float3(const float3& a) - : m128(a.m128) +__forceinline float3::float3(const float3 &a) : m128(a.m128) { } -__forceinline float3::float3(const __m128& a) - : m128(a) +__forceinline float3::float3(const __m128 &a) : m128(a) { } -__forceinline float3::operator const __m128&() const +__forceinline float3::operator const __m128 &() const { - return m128; + return m128; } -__forceinline float3::operator __m128&() +__forceinline float3::operator __m128 &() { - return m128; + return m128; } -__forceinline float3& float3::operator =(const float3& a) +__forceinline float3 &float3::operator=(const float3 &a) { - m128 = a.m128; - return *this; + m128 = a.m128; + return *this; } -#endif /* __KERNEL_SSE__ */ +# endif /* __KERNEL_SSE__ */ __forceinline float float3::operator[](int i) const { - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); } -__forceinline float& float3::operator[](int i) +__forceinline float &float3::operator[](int i) { - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); } ccl_device_inline float3 make_float3(float f) { -#ifdef __KERNEL_SSE__ - float3 a(_mm_set1_ps(f)); -#else - float3 a = {f, f, f, f}; -#endif - return a; +# ifdef __KERNEL_SSE__ + float3 a(_mm_set1_ps(f)); +# else + float3 a = {f, f, f, f}; +# endif + return a; } ccl_device_inline float3 make_float3(float x, float y, float z) { -#ifdef __KERNEL_SSE__ - float3 a(_mm_set_ps(0.0f, z, y, x)); -#else - float3 a = {x, y, z, 0.0f}; -#endif - return a; +# ifdef __KERNEL_SSE__ + float3 a(_mm_set_ps(0.0f, z, y, x)); +# else + float3 a = {x, y, z, 0.0f}; +# endif + return a; } -ccl_device_inline void print_float3(const char *label, const float3& a) +ccl_device_inline void print_float3(const char *label, const float3 &a) { - printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); + printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_FLOAT3_IMPL_H__ */ +#endif /* __UTIL_TYPES_FLOAT3_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_float4.h b/intern/cycles/util/util_types_float4.h index 5c10d483c2e..c29e6e15bc3 100644 --- a/intern/cycles/util/util_types_float4.h +++ b/intern/cycles/util/util_types_float4.h @@ -26,35 +26,38 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ struct int4; -struct ccl_try_align(16) float4 { -#ifdef __KERNEL_SSE__ - union { - __m128 m128; - struct { float x, y, z, w; }; - }; - - __forceinline float4(); - __forceinline explicit float4(const __m128& a); - - __forceinline operator const __m128&() const; - __forceinline operator __m128&(); - - __forceinline float4& operator =(const float4& a); - -#else /* __KERNEL_SSE__ */ - float x, y, z, w; -#endif /* __KERNEL_SSE__ */ - - __forceinline float operator[](int i) const; - __forceinline float& operator[](int i); +struct ccl_try_align(16) float4 +{ +# ifdef __KERNEL_SSE__ + union { + __m128 m128; + struct { + float x, y, z, w; + }; + }; + + __forceinline float4(); + __forceinline explicit float4(const __m128 &a); + + __forceinline operator const __m128 &() const; + __forceinline operator __m128 &(); + + __forceinline float4 &operator=(const float4 &a); + +# else /* __KERNEL_SSE__ */ + float x, y, z, w; +# endif /* __KERNEL_SSE__ */ + + __forceinline float operator[](int i) const; + __forceinline float &operator[](int i); }; ccl_device_inline float4 make_float4(float f); ccl_device_inline float4 make_float4(float x, float y, float z, float w); -ccl_device_inline float4 make_float4(const int4& i); -ccl_device_inline void print_float4(const char *label, const float4& a); -#endif /* __KERNEL_GPU__ */ +ccl_device_inline float4 make_float4(const int4 &i); +ccl_device_inline void print_float4(const char *label, const float4 &a); +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_FLOAT4_H__ */ +#endif /* __UTIL_TYPES_FLOAT4_H__ */ diff --git a/intern/cycles/util/util_types_float4_impl.h b/intern/cycles/util/util_types_float4_impl.h index a83148031f1..05a1feee5b2 100644 --- a/intern/cycles/util/util_types_float4_impl.h +++ b/intern/cycles/util/util_types_float4_impl.h @@ -28,85 +28,82 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ -#ifdef __KERNEL_SSE__ +# ifdef __KERNEL_SSE__ __forceinline float4::float4() { } -__forceinline float4::float4(const __m128& a) - : m128(a) +__forceinline float4::float4(const __m128 &a) : m128(a) { } -__forceinline float4::operator const __m128&() const +__forceinline float4::operator const __m128 &() const { - return m128; + return m128; } -__forceinline float4::operator __m128&() +__forceinline float4::operator __m128 &() { - return m128; + return m128; } -__forceinline float4& float4::operator =(const float4& a) +__forceinline float4 &float4::operator=(const float4 &a) { - m128 = a.m128; - return *this; + m128 = a.m128; + return *this; } -#endif /* __KERNEL_SSE__ */ +# endif /* __KERNEL_SSE__ */ __forceinline float float4::operator[](int i) const { - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); } -__forceinline float& float4::operator[](int i) +__forceinline float &float4::operator[](int i) { - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); } ccl_device_inline float4 make_float4(float f) { -#ifdef __KERNEL_SSE__ - float4 a(_mm_set1_ps(f)); -#else - float4 a = {f, f, f, f}; -#endif - return a; +# ifdef __KERNEL_SSE__ + float4 a(_mm_set1_ps(f)); +# else + float4 a = {f, f, f, f}; +# endif + return a; } ccl_device_inline float4 make_float4(float x, float y, float z, float w) { -#ifdef __KERNEL_SSE__ - float4 a(_mm_set_ps(w, z, y, x)); -#else - float4 a = {x, y, z, w}; -#endif - return a; +# ifdef __KERNEL_SSE__ + float4 a(_mm_set_ps(w, z, y, x)); +# else + float4 a = {x, y, z, w}; +# endif + return a; } -ccl_device_inline float4 make_float4(const int4& i) +ccl_device_inline float4 make_float4(const int4 &i) { -#ifdef __KERNEL_SSE__ - float4 a(_mm_cvtepi32_ps(i.m128)); -#else - float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; -#endif - return a; +# ifdef __KERNEL_SSE__ + float4 a(_mm_cvtepi32_ps(i.m128)); +# else + float4 a = {(float)i.x, (float)i.y, (float)i.z, (float)i.w}; +# endif + return a; } -ccl_device_inline void print_float4(const char *label, const float4& a) +ccl_device_inline void print_float4(const char *label, const float4 &a) { - printf("%s: %.8f %.8f %.8f %.8f\n", - label, - (double)a.x, (double)a.y, (double)a.z, (double)a.w); + printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_FLOAT4_IMPL_H__ */ +#endif /* __UTIL_TYPES_FLOAT4_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_float8.h b/intern/cycles/util/util_types_float8.h index 08720b8ff48..7289e3298c3 100644 --- a/intern/cycles/util/util_types_float8.h +++ b/intern/cycles/util/util_types_float8.h @@ -37,35 +37,38 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ -struct ccl_try_align(32) float8 { -#ifdef __KERNEL_AVX2__ - union { - __m256 m256; - struct { float a, b, c, d, e, f, g, h; }; - }; +struct ccl_try_align(32) float8 +{ +# ifdef __KERNEL_AVX2__ + union { + __m256 m256; + struct { + float a, b, c, d, e, f, g, h; + }; + }; - __forceinline float8(); - __forceinline float8(const float8& a); - __forceinline explicit float8(const __m256& a); + __forceinline float8(); + __forceinline float8(const float8 &a); + __forceinline explicit float8(const __m256 &a); - __forceinline operator const __m256&() const; - __forceinline operator __m256&(); + __forceinline operator const __m256 &() const; + __forceinline operator __m256 &(); - __forceinline float8& operator =(const float8& a); + __forceinline float8 &operator=(const float8 &a); -#else /* __KERNEL_AVX2__ */ - float a, b, c, d, e, f, g, h; -#endif /* __KERNEL_AVX2__ */ +# else /* __KERNEL_AVX2__ */ + float a, b, c, d, e, f, g, h; +# endif /* __KERNEL_AVX2__ */ - __forceinline float operator[](int i) const; - __forceinline float& operator[](int i); + __forceinline float operator[](int i) const; + __forceinline float &operator[](int i); }; ccl_device_inline float8 make_float8(float f); -ccl_device_inline float8 make_float8(float a, float b, float c, float d, - float e, float f, float g, float h); -#endif /* __KERNEL_GPU__ */ +ccl_device_inline float8 +make_float8(float a, float b, float c, float d, float e, float f, float g, float h); +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_FLOAT8_H__ */ +#endif /* __UTIL_TYPES_FLOAT8_H__ */ diff --git a/intern/cycles/util/util_types_float8_impl.h b/intern/cycles/util/util_types_float8_impl.h index 84fe233c334..8ce3d81b1bb 100644 --- a/intern/cycles/util/util_types_float8_impl.h +++ b/intern/cycles/util/util_types_float8_impl.h @@ -40,75 +40,73 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ -#ifdef __KERNEL_AVX2__ +# ifdef __KERNEL_AVX2__ __forceinline float8::float8() { } -__forceinline float8::float8(const float8& f) - : m256(f.m256) +__forceinline float8::float8(const float8 &f) : m256(f.m256) { } -__forceinline float8::float8(const __m256& f) - : m256(f) +__forceinline float8::float8(const __m256 &f) : m256(f) { } -__forceinline float8::operator const __m256&() const +__forceinline float8::operator const __m256 &() const { - return m256; + return m256; } -__forceinline float8::operator __m256&() +__forceinline float8::operator __m256 &() { - return m256; + return m256; } -__forceinline float8& float8::operator =(const float8& f) +__forceinline float8 &float8::operator=(const float8 &f) { - m256 = f.m256; - return *this; + m256 = f.m256; + return *this; } -#endif /* __KERNEL_AVX2__ */ +# endif /* __KERNEL_AVX2__ */ __forceinline float float8::operator[](int i) const { - util_assert(i >= 0); - util_assert(i < 8); - return *(&a + i); + util_assert(i >= 0); + util_assert(i < 8); + return *(&a + i); } -__forceinline float& float8::operator[](int i) +__forceinline float &float8::operator[](int i) { - util_assert(i >= 0); - util_assert(i < 8); - return *(&a + i); + util_assert(i >= 0); + util_assert(i < 8); + return *(&a + i); } ccl_device_inline float8 make_float8(float f) { -#ifdef __KERNEL_AVX2__ - float8 r(_mm256_set1_ps(f)); -#else - float8 r = {f, f, f, f, f, f, f, f}; -#endif - return r; +# ifdef __KERNEL_AVX2__ + float8 r(_mm256_set1_ps(f)); +# else + float8 r = {f, f, f, f, f, f, f, f}; +# endif + return r; } -ccl_device_inline float8 make_float8(float a, float b, float c, float d, - float e, float f, float g, float h) +ccl_device_inline float8 +make_float8(float a, float b, float c, float d, float e, float f, float g, float h) { -#ifdef __KERNEL_AVX2__ - float8 r(_mm256_set_ps(a, b, c, d, e, f, g, h)); -#else - float8 r = {a, b, c, d, e, f, g, h}; -#endif - return r; +# ifdef __KERNEL_AVX2__ + float8 r(_mm256_set_ps(a, b, c, d, e, f, g, h)); +# else + float8 r = {a, b, c, d, e, f, g, h}; +# endif + return r; } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_FLOAT8_IMPL_H__ */ +#endif /* __UTIL_TYPES_FLOAT8_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_int2.h b/intern/cycles/util/util_types_int2.h index 82e860f89eb..8811e5ec7c2 100644 --- a/intern/cycles/util/util_types_int2.h +++ b/intern/cycles/util/util_types_int2.h @@ -25,15 +25,15 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ struct int2 { - int x, y; + int x, y; - __forceinline int operator[](int i) const; - __forceinline int& operator[](int i); + __forceinline int operator[](int i) const; + __forceinline int &operator[](int i); }; ccl_device_inline int2 make_int2(int x, int y); -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_INT2_H__ */ +#endif /* __UTIL_TYPES_INT2_H__ */ diff --git a/intern/cycles/util/util_types_int2_impl.h b/intern/cycles/util/util_types_int2_impl.h index c7d3942e723..ce95d4f14e5 100644 --- a/intern/cycles/util/util_types_int2_impl.h +++ b/intern/cycles/util/util_types_int2_impl.h @@ -26,25 +26,25 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ int int2::operator[](int i) const { - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); } -int& int2::operator[](int i) +int &int2::operator[](int i) { - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); } ccl_device_inline int2 make_int2(int x, int y) { - int2 a = {x, y}; - return a; + int2 a = {x, y}; + return a; } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_INT2_IMPL_H__ */ +#endif /* __UTIL_TYPES_INT2_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_int3.h b/intern/cycles/util/util_types_int3.h index f68074b982b..09edc09dff3 100644 --- a/intern/cycles/util/util_types_int3.h +++ b/intern/cycles/util/util_types_int3.h @@ -24,34 +24,37 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ -struct ccl_try_align(16) int3 { -#ifdef __KERNEL_SSE__ - union { - __m128i m128; - struct { int x, y, z, w; }; - }; - - __forceinline int3(); - __forceinline int3(const int3& a); - __forceinline explicit int3(const __m128i& a); - - __forceinline operator const __m128i&() const; - __forceinline operator __m128i&(); - - __forceinline int3& operator =(const int3& a); -#else /* __KERNEL_SSE__ */ - int x, y, z, w; -#endif /* __KERNEL_SSE__ */ - - __forceinline int operator[](int i) const; - __forceinline int& operator[](int i); +struct ccl_try_align(16) int3 +{ +# ifdef __KERNEL_SSE__ + union { + __m128i m128; + struct { + int x, y, z, w; + }; + }; + + __forceinline int3(); + __forceinline int3(const int3 &a); + __forceinline explicit int3(const __m128i &a); + + __forceinline operator const __m128i &() const; + __forceinline operator __m128i &(); + + __forceinline int3 &operator=(const int3 &a); +# else /* __KERNEL_SSE__ */ + int x, y, z, w; +# endif /* __KERNEL_SSE__ */ + + __forceinline int operator[](int i) const; + __forceinline int &operator[](int i); }; ccl_device_inline int3 make_int3(int i); ccl_device_inline int3 make_int3(int x, int y, int z); -ccl_device_inline void print_int3(const char *label, const int3& a); -#endif /* __KERNEL_GPU__ */ +ccl_device_inline void print_int3(const char *label, const int3 &a); +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_INT3_H__ */ +#endif /* __UTIL_TYPES_INT3_H__ */ diff --git a/intern/cycles/util/util_types_int3_impl.h b/intern/cycles/util/util_types_int3_impl.h index 1b195ca753f..080c892640b 100644 --- a/intern/cycles/util/util_types_int3_impl.h +++ b/intern/cycles/util/util_types_int3_impl.h @@ -28,79 +28,77 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ -#ifdef __KERNEL_SSE__ +# ifdef __KERNEL_SSE__ __forceinline int3::int3() { } -__forceinline int3::int3(const __m128i& a) - : m128(a) +__forceinline int3::int3(const __m128i &a) : m128(a) { } -__forceinline int3::int3(const int3& a) - : m128(a.m128) +__forceinline int3::int3(const int3 &a) : m128(a.m128) { } -__forceinline int3::operator const __m128i&() const +__forceinline int3::operator const __m128i &() const { - return m128; + return m128; } -__forceinline int3::operator __m128i&() +__forceinline int3::operator __m128i &() { - return m128; + return m128; } -__forceinline int3& int3::operator =(const int3& a) +__forceinline int3 &int3::operator=(const int3 &a) { - m128 = a.m128; - return *this; + m128 = a.m128; + return *this; } -#endif /* __KERNEL_SSE__ */ +# endif /* __KERNEL_SSE__ */ __forceinline int int3::operator[](int i) const { - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); } -__forceinline int& int3::operator[](int i) +__forceinline int &int3::operator[](int i) { - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); } ccl_device_inline int3 make_int3(int i) { -#ifdef __KERNEL_SSE__ - int3 a(_mm_set1_epi32(i)); -#else - int3 a = {i, i, i, i}; -#endif - return a; +# ifdef __KERNEL_SSE__ + int3 a(_mm_set1_epi32(i)); +# else + int3 a = {i, i, i, i}; +# endif + return a; } ccl_device_inline int3 make_int3(int x, int y, int z) { -#ifdef __KERNEL_SSE__ - int3 a(_mm_set_epi32(0, z, y, x)); -#else - int3 a = {x, y, z, 0}; -#endif +# ifdef __KERNEL_SSE__ + int3 a(_mm_set_epi32(0, z, y, x)); +# else + int3 a = {x, y, z, 0}; +# endif - return a; + return a; } -ccl_device_inline void print_int3(const char *label, const int3& a) +ccl_device_inline void print_int3(const char *label, const int3 &a) { - printf("%s: %d %d %d\n", label, a.x, a.y, a.z); + printf("%s: %d %d %d\n", label, a.x, a.y, a.z); } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_INT3_IMPL_H__ */ +#endif /* __UTIL_TYPES_INT3_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_int4.h b/intern/cycles/util/util_types_int4.h index 52e6fed8c14..5c7917cf5d6 100644 --- a/intern/cycles/util/util_types_int4.h +++ b/intern/cycles/util/util_types_int4.h @@ -28,36 +28,39 @@ CCL_NAMESPACE_BEGIN struct float3; struct float4; -struct ccl_try_align(16) int4 { -#ifdef __KERNEL_SSE__ - union { - __m128i m128; - struct { int x, y, z, w; }; - }; - - __forceinline int4(); - __forceinline int4(const int4& a); - __forceinline explicit int4(const __m128i& a); - - __forceinline operator const __m128i&() const; - __forceinline operator __m128i&(); - - __forceinline int4& operator=(const int4& a); -#else /* __KERNEL_SSE__ */ - int x, y, z, w; -#endif /* __KERNEL_SSE__ */ - - __forceinline int operator[](int i) const; - __forceinline int& operator[](int i); +struct ccl_try_align(16) int4 +{ +# ifdef __KERNEL_SSE__ + union { + __m128i m128; + struct { + int x, y, z, w; + }; + }; + + __forceinline int4(); + __forceinline int4(const int4 &a); + __forceinline explicit int4(const __m128i &a); + + __forceinline operator const __m128i &() const; + __forceinline operator __m128i &(); + + __forceinline int4 &operator=(const int4 &a); +# else /* __KERNEL_SSE__ */ + int x, y, z, w; +# endif /* __KERNEL_SSE__ */ + + __forceinline int operator[](int i) const; + __forceinline int &operator[](int i); }; ccl_device_inline int4 make_int4(int i); ccl_device_inline int4 make_int4(int x, int y, int z, int w); -ccl_device_inline int4 make_int4(const float3& f); -ccl_device_inline int4 make_int4(const float4& f); -ccl_device_inline void print_int4(const char *label, const int4& a); -#endif /* __KERNEL_GPU__ */ +ccl_device_inline int4 make_int4(const float3 &f); +ccl_device_inline int4 make_int4(const float4 &f); +ccl_device_inline void print_int4(const char *label, const int4 &a); +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_INT4_H__ */ +#endif /* __UTIL_TYPES_INT4_H__ */ diff --git a/intern/cycles/util/util_types_int4_impl.h b/intern/cycles/util/util_types_int4_impl.h index c058f86c400..c6f6ff23a17 100644 --- a/intern/cycles/util/util_types_int4_impl.h +++ b/intern/cycles/util/util_types_int4_impl.h @@ -28,98 +28,96 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ -#ifdef __KERNEL_SSE__ +# ifdef __KERNEL_SSE__ __forceinline int4::int4() { } -__forceinline int4::int4(const int4& a) - : m128(a.m128) +__forceinline int4::int4(const int4 &a) : m128(a.m128) { } -__forceinline int4::int4(const __m128i& a) - : m128(a) +__forceinline int4::int4(const __m128i &a) : m128(a) { } -__forceinline int4::operator const __m128i&() const +__forceinline int4::operator const __m128i &() const { - return m128; + return m128; } -__forceinline int4::operator __m128i&() +__forceinline int4::operator __m128i &() { - return m128; + return m128; } -__forceinline int4& int4::operator=(const int4& a) +__forceinline int4 &int4::operator=(const int4 &a) { - m128 = a.m128; - return *this; + m128 = a.m128; + return *this; } -#endif /* __KERNEL_SSE__ */ +# endif /* __KERNEL_SSE__ */ __forceinline int int4::operator[](int i) const { - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); } -__forceinline int& int4::operator[](int i) +__forceinline int &int4::operator[](int i) { - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); } ccl_device_inline int4 make_int4(int i) { -#ifdef __KERNEL_SSE__ - int4 a(_mm_set1_epi32(i)); -#else - int4 a = {i, i, i, i}; -#endif - return a; +# ifdef __KERNEL_SSE__ + int4 a(_mm_set1_epi32(i)); +# else + int4 a = {i, i, i, i}; +# endif + return a; } ccl_device_inline int4 make_int4(int x, int y, int z, int w) { -#ifdef __KERNEL_SSE__ - int4 a(_mm_set_epi32(w, z, y, x)); -#else - int4 a = {x, y, z, w}; -#endif - return a; +# ifdef __KERNEL_SSE__ + int4 a(_mm_set_epi32(w, z, y, x)); +# else + int4 a = {x, y, z, w}; +# endif + return a; } -ccl_device_inline int4 make_int4(const float3& f) +ccl_device_inline int4 make_int4(const float3 &f) { -#ifdef __KERNEL_SSE__ - int4 a(_mm_cvtps_epi32(f.m128)); -#else - int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; -#endif - return a; +# ifdef __KERNEL_SSE__ + int4 a(_mm_cvtps_epi32(f.m128)); +# else + int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; +# endif + return a; } -ccl_device_inline int4 make_int4(const float4& f) +ccl_device_inline int4 make_int4(const float4 &f) { -#ifdef __KERNEL_SSE__ - int4 a(_mm_cvtps_epi32(f.m128)); -#else - int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; -#endif - return a; +# ifdef __KERNEL_SSE__ + int4 a(_mm_cvtps_epi32(f.m128)); +# else + int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; +# endif + return a; } -ccl_device_inline void print_int4(const char *label, const int4& a) +ccl_device_inline void print_int4(const char *label, const int4 &a) { - printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); + printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_INT4_IMPL_H__ */ +#endif /* __UTIL_TYPES_INT4_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_uchar2.h b/intern/cycles/util/util_types_uchar2.h index f618a2234ca..8cc486e3e48 100644 --- a/intern/cycles/util/util_types_uchar2.h +++ b/intern/cycles/util/util_types_uchar2.h @@ -25,15 +25,15 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ struct uchar2 { - uchar x, y; + uchar x, y; - __forceinline uchar operator[](int i) const; - __forceinline uchar& operator[](int i); + __forceinline uchar operator[](int i) const; + __forceinline uchar &operator[](int i); }; ccl_device_inline uchar2 make_uchar2(uchar x, uchar y); -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_UCHAR2_H__ */ +#endif /* __UTIL_TYPES_UCHAR2_H__ */ diff --git a/intern/cycles/util/util_types_uchar2_impl.h b/intern/cycles/util/util_types_uchar2_impl.h index d5f196d0ce0..16968c32dd9 100644 --- a/intern/cycles/util/util_types_uchar2_impl.h +++ b/intern/cycles/util/util_types_uchar2_impl.h @@ -26,25 +26,25 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ uchar uchar2::operator[](int i) const { - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); } -uchar& uchar2::operator[](int i) +uchar &uchar2::operator[](int i) { - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); } ccl_device_inline uchar2 make_uchar2(uchar x, uchar y) { - uchar2 a = {x, y}; - return a; + uchar2 a = {x, y}; + return a; } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_UCHAR2_IMPL_H__ */ +#endif /* __UTIL_TYPES_UCHAR2_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_uchar3.h b/intern/cycles/util/util_types_uchar3.h index 1e3644e6fd6..5838c437c70 100644 --- a/intern/cycles/util/util_types_uchar3.h +++ b/intern/cycles/util/util_types_uchar3.h @@ -25,15 +25,15 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ struct uchar3 { - uchar x, y, z; + uchar x, y, z; - __forceinline uchar operator[](int i) const; - __forceinline uchar& operator[](int i); + __forceinline uchar operator[](int i) const; + __forceinline uchar &operator[](int i); }; ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z); -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_UCHAR3_H__ */ +#endif /* __UTIL_TYPES_UCHAR3_H__ */ diff --git a/intern/cycles/util/util_types_uchar3_impl.h b/intern/cycles/util/util_types_uchar3_impl.h index 611021efb7f..aa31b725731 100644 --- a/intern/cycles/util/util_types_uchar3_impl.h +++ b/intern/cycles/util/util_types_uchar3_impl.h @@ -26,25 +26,25 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ uchar uchar3::operator[](int i) const { - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); } -uchar& uchar3::operator[](int i) +uchar &uchar3::operator[](int i) { - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); } ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z) { - uchar3 a = {x, y, z}; - return a; + uchar3 a = {x, y, z}; + return a; } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_UCHAR3_IMPL_H__ */ +#endif /* __UTIL_TYPES_UCHAR3_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_uchar4.h b/intern/cycles/util/util_types_uchar4.h index 3802cebbfb9..22b6a1ac705 100644 --- a/intern/cycles/util/util_types_uchar4.h +++ b/intern/cycles/util/util_types_uchar4.h @@ -25,15 +25,15 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ struct uchar4 { - uchar x, y, z, w; + uchar x, y, z, w; - __forceinline uchar operator[](int i) const; - __forceinline uchar& operator[](int i); + __forceinline uchar operator[](int i) const; + __forceinline uchar &operator[](int i); }; ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w); -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_UCHAR4_H__ */ +#endif /* __UTIL_TYPES_UCHAR4_H__ */ diff --git a/intern/cycles/util/util_types_uchar4_impl.h b/intern/cycles/util/util_types_uchar4_impl.h index 03039f60c54..79879f176a6 100644 --- a/intern/cycles/util/util_types_uchar4_impl.h +++ b/intern/cycles/util/util_types_uchar4_impl.h @@ -26,25 +26,25 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ uchar uchar4::operator[](int i) const { - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); } -uchar& uchar4::operator[](int i) +uchar &uchar4::operator[](int i) { - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); } ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w) { - uchar4 a = {x, y, z, w}; - return a; + uchar4 a = {x, y, z, w}; + return a; } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_UCHAR4_IMPL_H__ */ +#endif /* __UTIL_TYPES_UCHAR4_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_uint2.h b/intern/cycles/util/util_types_uint2.h index c4a31899614..abcb8ee5346 100644 --- a/intern/cycles/util/util_types_uint2.h +++ b/intern/cycles/util/util_types_uint2.h @@ -25,15 +25,15 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ struct uint2 { - uint x, y; + uint x, y; - __forceinline uint operator[](uint i) const; - __forceinline uint& operator[](uint i); + __forceinline uint operator[](uint i) const; + __forceinline uint &operator[](uint i); }; ccl_device_inline uint2 make_uint2(uint x, uint y); -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_UINT2_H__ */ +#endif /* __UTIL_TYPES_UINT2_H__ */ diff --git a/intern/cycles/util/util_types_uint2_impl.h b/intern/cycles/util/util_types_uint2_impl.h index b50ffa2667f..db62bd99b89 100644 --- a/intern/cycles/util/util_types_uint2_impl.h +++ b/intern/cycles/util/util_types_uint2_impl.h @@ -26,23 +26,23 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ __forceinline uint uint2::operator[](uint i) const { - util_assert(i < 2); - return *(&x + i); + util_assert(i < 2); + return *(&x + i); } -__forceinline uint& uint2::operator[](uint i) +__forceinline uint &uint2::operator[](uint i) { - util_assert(i < 2); - return *(&x + i); + util_assert(i < 2); + return *(&x + i); } ccl_device_inline uint2 make_uint2(uint x, uint y) { - uint2 a = {x, y}; - return a; + uint2 a = {x, y}; + return a; } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_UINT2_IMPL_H__ */ +#endif /* __UTIL_TYPES_UINT2_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_uint3.h b/intern/cycles/util/util_types_uint3.h index aeeecd2df06..436d870b621 100644 --- a/intern/cycles/util/util_types_uint3.h +++ b/intern/cycles/util/util_types_uint3.h @@ -25,15 +25,15 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ struct uint3 { - uint x, y, z; + uint x, y, z; - __forceinline uint operator[](uint i) const; - __forceinline uint& operator[](uint i); + __forceinline uint operator[](uint i) const; + __forceinline uint &operator[](uint i); }; ccl_device_inline uint3 make_uint3(uint x, uint y, uint z); -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_UINT3_H__ */ +#endif /* __UTIL_TYPES_UINT3_H__ */ diff --git a/intern/cycles/util/util_types_uint3_impl.h b/intern/cycles/util/util_types_uint3_impl.h index 26005d5baff..d188fa06e2a 100644 --- a/intern/cycles/util/util_types_uint3_impl.h +++ b/intern/cycles/util/util_types_uint3_impl.h @@ -26,23 +26,23 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ __forceinline uint uint3::operator[](uint i) const { - util_assert(i < 3); - return *(&x + i); + util_assert(i < 3); + return *(&x + i); } -__forceinline uint& uint3::operator[](uint i) +__forceinline uint &uint3::operator[](uint i) { - util_assert(i < 3); - return *(&x + i); + util_assert(i < 3); + return *(&x + i); } ccl_device_inline uint3 make_uint3(uint x, uint y, uint z) { - uint3 a = {x, y, z}; - return a; + uint3 a = {x, y, z}; + return a; } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_UINT3_IMPL_H__ */ +#endif /* __UTIL_TYPES_UINT3_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_uint4.h b/intern/cycles/util/util_types_uint4.h index 2d3a7bb85e4..57f2859fedf 100644 --- a/intern/cycles/util/util_types_uint4.h +++ b/intern/cycles/util/util_types_uint4.h @@ -25,15 +25,15 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ struct uint4 { - uint x, y, z, w; + uint x, y, z, w; - __forceinline uint operator[](uint i) const; - __forceinline uint& operator[](uint i); + __forceinline uint operator[](uint i) const; + __forceinline uint &operator[](uint i); }; ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w); -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_UINT4_H__ */ +#endif /* __UTIL_TYPES_UINT4_H__ */ diff --git a/intern/cycles/util/util_types_uint4_impl.h b/intern/cycles/util/util_types_uint4_impl.h index 6d48131a446..bac8d23030d 100644 --- a/intern/cycles/util/util_types_uint4_impl.h +++ b/intern/cycles/util/util_types_uint4_impl.h @@ -26,23 +26,23 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ __forceinline uint uint4::operator[](uint i) const { - util_assert(i < 3); - return *(&x + i); + util_assert(i < 3); + return *(&x + i); } -__forceinline uint& uint4::operator[](uint i) +__forceinline uint &uint4::operator[](uint i) { - util_assert(i < 3); - return *(&x + i); + util_assert(i < 3); + return *(&x + i); } ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) { - uint4 a = {x, y, z, w}; - return a; + uint4 a = {x, y, z, w}; + return a; } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_UINT4_IMPL_H__ */ +#endif /* __UTIL_TYPES_UINT4_IMPL_H__ */ diff --git a/intern/cycles/util/util_types_ushort4.h b/intern/cycles/util/util_types_ushort4.h index fc234b8abe8..476ceec622c 100644 --- a/intern/cycles/util/util_types_ushort4.h +++ b/intern/cycles/util/util_types_ushort4.h @@ -26,11 +26,11 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ struct ushort4 { - uint16_t x, y, z, w; + uint16_t x, y, z, w; }; #endif CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_USHORT4_H__ */ +#endif /* __UTIL_TYPES_USHORT4_H__ */ diff --git a/intern/cycles/util/util_types_vector3.h b/intern/cycles/util/util_types_vector3.h index 12acf9dc959..728c7ca62a1 100644 --- a/intern/cycles/util/util_types_vector3.h +++ b/intern/cycles/util/util_types_vector3.h @@ -24,18 +24,16 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ -template<typename T> -class vector3 -{ -public: - T x, y, z; +template<typename T> class vector3 { + public: + T x, y, z; - __forceinline vector3(); - __forceinline vector3(const T& a); - __forceinline vector3(const T& x, const T& y, const T& z); + __forceinline vector3(); + __forceinline vector3(const T &a); + __forceinline vector3(const T &x, const T &y, const T &z); }; -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_VECTOR3_H__ */ +#endif /* __UTIL_TYPES_VECTOR3_H__ */ diff --git a/intern/cycles/util/util_types_vector3_impl.h b/intern/cycles/util/util_types_vector3_impl.h index 2f6b8368540..33ba53e20b2 100644 --- a/intern/cycles/util/util_types_vector3_impl.h +++ b/intern/cycles/util/util_types_vector3_impl.h @@ -24,24 +24,20 @@ CCL_NAMESPACE_BEGIN #ifndef __KERNEL_GPU__ -template<typename T> -ccl_always_inline vector3<T>::vector3() +template<typename T> ccl_always_inline vector3<T>::vector3() { } -template<typename T> -ccl_always_inline vector3<T>::vector3(const T& a) - : x(a), y(a), z(a) +template<typename T> ccl_always_inline vector3<T>::vector3(const T &a) : x(a), y(a), z(a) { } template<typename T> -ccl_always_inline vector3<T>::vector3(const T& x, const T& y, const T& z) - : x(x), y(y), z(z) +ccl_always_inline vector3<T>::vector3(const T &x, const T &y, const T &z) : x(x), y(y), z(z) { } -#endif /* __KERNEL_GPU__ */ +#endif /* __KERNEL_GPU__ */ CCL_NAMESPACE_END -#endif /* __UTIL_TYPES_VECTOR3_IMPL_H__ */ +#endif /* __UTIL_TYPES_VECTOR3_IMPL_H__ */ diff --git a/intern/cycles/util/util_unique_ptr.h b/intern/cycles/util/util_unique_ptr.h index 1ceae73172e..3aaaf083eff 100644 --- a/intern/cycles/util/util_unique_ptr.h +++ b/intern/cycles/util/util_unique_ptr.h @@ -25,4 +25,4 @@ using std::unique_ptr; CCL_NAMESPACE_END -#endif /* __UTIL_UNIQUE_PTR_H__ */ +#endif /* __UTIL_UNIQUE_PTR_H__ */ diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h index 18fa231d6e7..437478d64d3 100644 --- a/intern/cycles/util/util_vector.h +++ b/intern/cycles/util/util_vector.h @@ -32,30 +32,28 @@ CCL_NAMESPACE_BEGIN * - Use own allocator which keeps track of used/peak memory. * - Have method to ensure capacity is re-set to 0. */ -template<typename value_type, - typename allocator_type = GuardedAllocator<value_type> > -class vector : public std::vector<value_type, allocator_type> -{ -public: - typedef std::vector<value_type, allocator_type> BaseClass; - - /* Inherit all constructors from base class. */ - using BaseClass::vector; - - /* Try as hard as possible to use zero memory. */ - void free_memory() - { - BaseClass::resize(0); - BaseClass::shrink_to_fit(); - } - - /* Some external API might demand working with std::vector. */ - operator std::vector<value_type>() - { - return std::vector<value_type>(this->begin(), this->end()); - } +template<typename value_type, typename allocator_type = GuardedAllocator<value_type>> +class vector : public std::vector<value_type, allocator_type> { + public: + typedef std::vector<value_type, allocator_type> BaseClass; + + /* Inherit all constructors from base class. */ + using BaseClass::vector; + + /* Try as hard as possible to use zero memory. */ + void free_memory() + { + BaseClass::resize(0); + BaseClass::shrink_to_fit(); + } + + /* Some external API might demand working with std::vector. */ + operator std::vector<value_type>() + { + return std::vector<value_type>(this->begin(), this->end()); + } }; CCL_NAMESPACE_END -#endif /* __UTIL_VECTOR_H__ */ +#endif /* __UTIL_VECTOR_H__ */ diff --git a/intern/cycles/util/util_version.h b/intern/cycles/util/util_version.h index 980c5a269e6..38829d3a29c 100644 --- a/intern/cycles/util/util_version.h +++ b/intern/cycles/util/util_version.h @@ -21,17 +21,15 @@ CCL_NAMESPACE_BEGIN -#define CYCLES_VERSION_MAJOR 1 -#define CYCLES_VERSION_MINOR 9 -#define CYCLES_VERSION_PATCH 0 +#define CYCLES_VERSION_MAJOR 1 +#define CYCLES_VERSION_MINOR 9 +#define CYCLES_VERSION_PATCH 0 -#define CYCLES_MAKE_VERSION_STRING2(a,b,c) #a "." #b "." #c -#define CYCLES_MAKE_VERSION_STRING(a,b,c) CYCLES_MAKE_VERSION_STRING2(a,b,c) +#define CYCLES_MAKE_VERSION_STRING2(a, b, c) #a "." #b "." #c +#define CYCLES_MAKE_VERSION_STRING(a, b, c) CYCLES_MAKE_VERSION_STRING2(a, b, c) #define CYCLES_VERSION_STRING \ - CYCLES_MAKE_VERSION_STRING(CYCLES_VERSION_MAJOR, \ - CYCLES_VERSION_MINOR, \ - CYCLES_VERSION_PATCH) + CYCLES_MAKE_VERSION_STRING(CYCLES_VERSION_MAJOR, CYCLES_VERSION_MINOR, CYCLES_VERSION_PATCH) CCL_NAMESPACE_END -#endif /* __UTIL_VERSION_H__ */ +#endif /* __UTIL_VERSION_H__ */ diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp index c0ddc2a88bf..f23174fd6dc 100644 --- a/intern/cycles/util/util_view.cpp +++ b/intern/cycles/util/util_view.cpp @@ -24,9 +24,9 @@ #include "util/util_view.h" #ifdef __APPLE__ -#include <GLUT/glut.h> +# include <GLUT/glut.h> #else -#include <GL/glut.h> +# include <GL/glut.h> #endif CCL_NAMESPACE_BEGIN @@ -34,241 +34,249 @@ CCL_NAMESPACE_BEGIN /* structs */ struct View { - ViewInitFunc initf; - ViewExitFunc exitf; - ViewResizeFunc resize; - ViewDisplayFunc display; - ViewKeyboardFunc keyboard; - ViewMotionFunc motion; + ViewInitFunc initf; + ViewExitFunc exitf; + ViewResizeFunc resize; + ViewDisplayFunc display; + ViewKeyboardFunc keyboard; + ViewMotionFunc motion; - bool first_display; - bool redraw; + bool first_display; + bool redraw; - int mouseX, mouseY; - int mouseBut0, mouseBut2; + int mouseX, mouseY; + int mouseBut0, mouseBut2; - int width, height; + int width, height; } V; /* public */ static void view_display_text(int x, int y, const char *text) { - const char *c; + const char *c; - glRasterPos3f(x, y, 0); + glRasterPos3f(x, y, 0); - for(c = text; *c != '\0'; c++) - glutBitmapCharacter(GLUT_BITMAP_HELVETICA_10, *c); + for (c = text; *c != '\0'; c++) + glutBitmapCharacter(GLUT_BITMAP_HELVETICA_10, *c); } void view_display_info(const char *info) { - const int height = 20; + const int height = 20; - glEnable(GL_BLEND); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glColor4f(0.1f, 0.1f, 0.1f, 0.8f); - glRectf(0.0f, V.height - height, V.width, V.height); - glDisable(GL_BLEND); + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glColor4f(0.1f, 0.1f, 0.1f, 0.8f); + glRectf(0.0f, V.height - height, V.width, V.height); + glDisable(GL_BLEND); - glColor3f(0.5f, 0.5f, 0.5f); + glColor3f(0.5f, 0.5f, 0.5f); - view_display_text(10, 7 + V.height - height, info); + view_display_text(10, 7 + V.height - height, info); - glColor3f(1.0f, 1.0f, 1.0f); + glColor3f(1.0f, 1.0f, 1.0f); } void view_display_help() { - const int w = (int)((float)V.width / 1.15f); - const int h = (int)((float)V.height / 1.15f); + const int w = (int)((float)V.width / 1.15f); + const int h = (int)((float)V.height / 1.15f); - const int x1 = (V.width - w) / 2; - const int x2 = x1 + w; + const int x1 = (V.width - w) / 2; + const int x2 = x1 + w; - const int y1 = (V.height - h) / 2; - const int y2 = y1 + h; + const int y1 = (V.height - h) / 2; + const int y2 = y1 + h; - glEnable(GL_BLEND); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glColor4f(0.5f, 0.5f, 0.5f, 0.8f); - glRectf(x1, y1, x2, y2); - glDisable(GL_BLEND); + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glColor4f(0.5f, 0.5f, 0.5f, 0.8f); + glRectf(x1, y1, x2, y2); + glDisable(GL_BLEND); - glColor3f(0.8f, 0.8f, 0.8f); + glColor3f(0.8f, 0.8f, 0.8f); - string info = string("Cycles Renderer ") + CYCLES_VERSION_STRING; + string info = string("Cycles Renderer ") + CYCLES_VERSION_STRING; - view_display_text(x1+20, y2-20, info.c_str()); - view_display_text(x1+20, y2-40, "(C) 2011-2016 Blender Foundation"); - view_display_text(x1+20, y2-80, "Controls:"); - view_display_text(x1+20, y2-100, "h: Info/Help"); - view_display_text(x1+20, y2-120, "r: Reset"); - view_display_text(x1+20, y2-140, "p: Pause"); - view_display_text(x1+20, y2-160, "esc: Cancel"); - view_display_text(x1+20, y2-180, "q: Quit program"); + view_display_text(x1 + 20, y2 - 20, info.c_str()); + view_display_text(x1 + 20, y2 - 40, "(C) 2011-2016 Blender Foundation"); + view_display_text(x1 + 20, y2 - 80, "Controls:"); + view_display_text(x1 + 20, y2 - 100, "h: Info/Help"); + view_display_text(x1 + 20, y2 - 120, "r: Reset"); + view_display_text(x1 + 20, y2 - 140, "p: Pause"); + view_display_text(x1 + 20, y2 - 160, "esc: Cancel"); + view_display_text(x1 + 20, y2 - 180, "q: Quit program"); - view_display_text(x1+20, y2-210, "i: Interactive mode"); - view_display_text(x1+20, y2-230, "Left mouse: Move camera"); - view_display_text(x1+20, y2-250, "Right mouse: Rotate camera"); - view_display_text(x1+20, y2-270, "W/A/S/D: Move camera"); - view_display_text(x1+20, y2-290, "0/1/2/3: Set max bounces"); + view_display_text(x1 + 20, y2 - 210, "i: Interactive mode"); + view_display_text(x1 + 20, y2 - 230, "Left mouse: Move camera"); + view_display_text(x1 + 20, y2 - 250, "Right mouse: Rotate camera"); + view_display_text(x1 + 20, y2 - 270, "W/A/S/D: Move camera"); + view_display_text(x1 + 20, y2 - 290, "0/1/2/3: Set max bounces"); - glColor3f(1.0f, 1.0f, 1.0f); + glColor3f(1.0f, 1.0f, 1.0f); } static void view_display() { - if(V.first_display) { - if(V.initf) V.initf(); - if(V.exitf) atexit(V.exitf); + if (V.first_display) { + if (V.initf) + V.initf(); + if (V.exitf) + atexit(V.exitf); - V.first_display = false; - } + V.first_display = false; + } - glClearColor(0.05f, 0.05f, 0.05f, 0.0f); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + glClearColor(0.05f, 0.05f, 0.05f, 0.0f); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); - gluOrtho2D(0, V.width, 0, V.height); + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + gluOrtho2D(0, V.width, 0, V.height); - glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); - glRasterPos3f(0, 0, 0); + glRasterPos3f(0, 0, 0); - if(V.display) - V.display(); + if (V.display) + V.display(); - glutSwapBuffers(); + glutSwapBuffers(); } static void view_reshape(int width, int height) { - if(width <= 0 || height <= 0) - return; + if (width <= 0 || height <= 0) + return; - V.width = width; - V.height = height; + V.width = width; + V.height = height; - glViewport(0, 0, width, height); + glViewport(0, 0, width, height); - glMatrixMode(GL_PROJECTION); - glLoadIdentity(); + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); - glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); - if(V.resize) - V.resize(width, height); + if (V.resize) + V.resize(width, height); } static void view_keyboard(unsigned char key, int x, int y) { - if(V.keyboard) - V.keyboard(key); - - if(key == 'm') - printf("mouse %d %d\n", x, y); - if(key == 'q') { - if(V.exitf) V.exitf(); - exit(0); - } + if (V.keyboard) + V.keyboard(key); + + if (key == 'm') + printf("mouse %d %d\n", x, y); + if (key == 'q') { + if (V.exitf) + V.exitf(); + exit(0); + } } static void view_mouse(int button, int state, int x, int y) { - if(button == 0) { - if(state == GLUT_DOWN) { - V.mouseX = x; - V.mouseY = y; - V.mouseBut0 = 1; - } - else if(state == GLUT_UP) { - V.mouseBut0 = 0; - } - } - else if(button == 2) { - if(state == GLUT_DOWN) { - V.mouseX = x; - V.mouseY = y; - V.mouseBut2 = 1; - } - else if(state == GLUT_UP) { - V.mouseBut2 = 0; - } - } + if (button == 0) { + if (state == GLUT_DOWN) { + V.mouseX = x; + V.mouseY = y; + V.mouseBut0 = 1; + } + else if (state == GLUT_UP) { + V.mouseBut0 = 0; + } + } + else if (button == 2) { + if (state == GLUT_DOWN) { + V.mouseX = x; + V.mouseY = y; + V.mouseBut2 = 1; + } + else if (state == GLUT_UP) { + V.mouseBut2 = 0; + } + } } static void view_motion(int x, int y) { - const int but = V.mouseBut0? 0:2; - const int distX = x - V.mouseX; - const int distY = y - V.mouseY; + const int but = V.mouseBut0 ? 0 : 2; + const int distX = x - V.mouseX; + const int distY = y - V.mouseY; - if(V.motion) - V.motion(distX, distY, but); + if (V.motion) + V.motion(distX, distY, but); - V.mouseX = x; - V.mouseY = y; + V.mouseX = x; + V.mouseY = y; } static void view_idle() { - if(V.redraw) { - V.redraw = false; - glutPostRedisplay(); - } + if (V.redraw) { + V.redraw = false; + glutPostRedisplay(); + } - time_sleep(0.1); + time_sleep(0.1); } -void view_main_loop(const char *title, int width, int height, - ViewInitFunc initf, ViewExitFunc exitf, - ViewResizeFunc resize, ViewDisplayFunc display, - ViewKeyboardFunc keyboard, ViewMotionFunc motion) +void view_main_loop(const char *title, + int width, + int height, + ViewInitFunc initf, + ViewExitFunc exitf, + ViewResizeFunc resize, + ViewDisplayFunc display, + ViewKeyboardFunc keyboard, + ViewMotionFunc motion) { - const char *name = "app"; - char *argv = (char*)name; - int argc = 1; - - memset(&V, 0, sizeof(V)); - V.width = width; - V.height = height; - V.first_display = true; - V.redraw = false; - V.initf = initf; - V.exitf = exitf; - V.resize = resize; - V.display = display; - V.keyboard = keyboard; - V.motion = motion; - - glutInit(&argc, &argv); - glutInitWindowSize(width, height); - glutInitWindowPosition(0, 0); - glutInitDisplayMode(GLUT_RGB|GLUT_DOUBLE|GLUT_DEPTH); - glutCreateWindow(title); - - glewInit(); - - view_reshape(width, height); - - glutDisplayFunc(view_display); - glutIdleFunc(view_idle); - glutReshapeFunc(view_reshape); - glutKeyboardFunc(view_keyboard); - glutMouseFunc(view_mouse); - glutMotionFunc(view_motion); - - glutMainLoop(); + const char *name = "app"; + char *argv = (char *)name; + int argc = 1; + + memset(&V, 0, sizeof(V)); + V.width = width; + V.height = height; + V.first_display = true; + V.redraw = false; + V.initf = initf; + V.exitf = exitf; + V.resize = resize; + V.display = display; + V.keyboard = keyboard; + V.motion = motion; + + glutInit(&argc, &argv); + glutInitWindowSize(width, height); + glutInitWindowPosition(0, 0); + glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH); + glutCreateWindow(title); + + glewInit(); + + view_reshape(width, height); + + glutDisplayFunc(view_display); + glutIdleFunc(view_idle); + glutReshapeFunc(view_reshape); + glutKeyboardFunc(view_keyboard); + glutMouseFunc(view_mouse); + glutMotionFunc(view_motion); + + glutMainLoop(); } void view_redraw() { - V.redraw = true; + V.redraw = true; } CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_view.h b/intern/cycles/util/util_view.h index ae50b098b39..ad5c53ee5d5 100644 --- a/intern/cycles/util/util_view.h +++ b/intern/cycles/util/util_view.h @@ -29,10 +29,15 @@ typedef void (*ViewDisplayFunc)(); typedef void (*ViewKeyboardFunc)(unsigned char key); typedef void (*ViewMotionFunc)(int x, int y, int button); -void view_main_loop(const char *title, int width, int height, - ViewInitFunc initf, ViewExitFunc exitf, - ViewResizeFunc resize, ViewDisplayFunc display, - ViewKeyboardFunc keyboard, ViewMotionFunc motion); +void view_main_loop(const char *title, + int width, + int height, + ViewInitFunc initf, + ViewExitFunc exitf, + ViewResizeFunc resize, + ViewDisplayFunc display, + ViewKeyboardFunc keyboard, + ViewMotionFunc motion); void view_display_info(const char *info); void view_display_help(); @@ -40,4 +45,4 @@ void view_redraw(); CCL_NAMESPACE_END -#endif /*__UTIL_VIEW_H__*/ +#endif /*__UTIL_VIEW_H__*/ diff --git a/intern/cycles/util/util_windows.h b/intern/cycles/util/util_windows.h index bd1bc85adff..0d85c5437f6 100644 --- a/intern/cycles/util/util_windows.h +++ b/intern/cycles/util/util_windows.h @@ -19,18 +19,18 @@ #ifdef _WIN32 -#ifndef NOGDI -# define NOGDI -#endif -#ifndef NOMINMAX -# define NOMINMAX -#endif -#ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -#endif +# ifndef NOGDI +# define NOGDI +# endif +# ifndef NOMINMAX +# define NOMINMAX +# endif +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif -#include <windows.h> +# include <windows.h> -#endif /* _WIN32 */ +#endif /* _WIN32 */ -#endif /* __UTIL_WINDOWS_H__ */ +#endif /* __UTIL_WINDOWS_H__ */ diff --git a/intern/cycles/util/util_xml.h b/intern/cycles/util/util_xml.h index c8a3a495f30..6f06f17937b 100644 --- a/intern/cycles/util/util_xml.h +++ b/intern/cycles/util/util_xml.h @@ -38,4 +38,4 @@ using PUGIXML_NAMESPACE::xml_parse_result; CCL_NAMESPACE_END -#endif /* __UTIL_XML_H__ */ +#endif /* __UTIL_XML_H__ */ |