diff options
Diffstat (limited to 'intern/cycles/util')
55 files changed, 1666 insertions, 383 deletions
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 6120e7e8456..0acb9e9304c 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -1,16 +1,16 @@ set(INC . + ../../glew-mx ) set(INC_SYS - ${GLEW_INCLUDE_PATH} - ${OPENGL_INCLUDE_DIR} + ${GLEW_INCLUDE_DIR} ) set(SRC + util_aligned_malloc.cpp util_cache.cpp - util_dynlib.cpp util_logging.cpp util_md5.cpp util_path.cpp @@ -22,6 +22,10 @@ set(SRC util_transform.cpp ) +if(NOT CYCLES_STANDALONE_REPOSITORY) + add_definitions(-DWITH_GLEW_MX) +endif() + if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI) list(APPEND SRC util_view.cpp @@ -30,11 +34,12 @@ endif() set(SRC_HEADERS util_algorithm.h + util_aligned_malloc.h util_args.h + util_atomic.h util_boundbox.h util_cache.h util_debug.h - util_dynlib.h util_foreach.h util_function.h util_half.h @@ -44,6 +49,7 @@ set(SRC_HEADERS util_logging.h util_map.h util_math.h + util_math_fast.h util_md5.h util_opengl.h util_optimization.h @@ -68,7 +74,18 @@ set(SRC_HEADERS util_xml.h ) +if(WITH_CYCLES_DEBUG) + list(APPEND SRC + util_guarded_allocator.cpp + ) + list(APPEND SRC_HEADERS + util_guarded_allocator.h + ) +endif() + include_directories(${INC}) include_directories(SYSTEM ${INC_SYS}) +add_definitions(${GL_DEFINITIONS}) + add_library(cycles_util ${SRC} ${SRC_HEADERS}) diff --git a/intern/cycles/util/util_algorithm.h b/intern/cycles/util/util_algorithm.h index 5865f3f04bb..5c79c00cc98 100644 --- a/intern/cycles/util/util_algorithm.h +++ b/intern/cycles/util/util_algorithm.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_ALGORITHM_H__ diff --git a/intern/cycles/util/util_aligned_malloc.cpp b/intern/cycles/util/util_aligned_malloc.cpp new file mode 100644 index 00000000000..b161a55c15e --- /dev/null +++ b/intern/cycles/util/util_aligned_malloc.cpp @@ -0,0 +1,83 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util_aligned_malloc.h" +#include "util_guarded_allocator.h" + +#include <cassert> + +/* Adopted from Libmv. */ + +#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__NetBSD__) +/* Needed for memalign on Linux and _aligned_alloc on Windows. */ +# ifdef FREE_WINDOWS +/* Make sure _aligned_malloc is included. */ +# ifdef __MSVCRT_VERSION__ +# undef __MSVCRT_VERSION__ +# endif +# define __MSVCRT_VERSION__ 0x0700 +# endif /* FREE_WINDOWS */ +# include <malloc.h> +#else +/* Apple's malloc is 16-byte aligned, and does not have malloc.h, so include + * stdilb instead. + */ +# include <cstdlib> +#endif + +CCL_NAMESPACE_BEGIN + +void *util_aligned_malloc(size_t size, int alignment) +{ +#ifdef WITH_BLENDER_GUARDEDALLOC + return MEM_mallocN_aligned(size, alignment, "Cycles Aligned Alloc"); +#endif +#ifdef _WIN32 + return _aligned_malloc(size, alignment); +#elif defined(__APPLE__) + /* On Mac OS X, both the heap and the stack are guaranteed 16-byte aligned so + * they work natively with SSE types with no further work. + */ + assert(alignment == 16); + return malloc(size); +#elif defined(__FreeBSD__) || defined(__NetBSD__) + void *result; + if(posix_memalign(&result, alignment, size)) { + /* Non-zero means allocation error + * either no allocation or bad alignment value. + */ + return NULL; + } + return result; +#else /* This is for Linux. */ + return memalign(alignment, size); +#endif +} + +void util_aligned_free(void *ptr) +{ +#if defined(WITH_BLENDER_GUARDEDALLOC) + if(ptr != NULL) { + MEM_freeN(ptr); + } +#elif defined(_WIN32) + _aligned_free(ptr); +#else + free(ptr); +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_dynlib.h b/intern/cycles/util/util_aligned_malloc.h index b30cf98c1b9..ecc0f28c376 100644 --- a/intern/cycles/util/util_dynlib.h +++ b/intern/cycles/util/util_aligned_malloc.h @@ -1,5 +1,5 @@ /* - * Copyright 2011-2013 Blender Foundation + * Copyright 2011-2015 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -11,21 +11,22 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ -#ifndef __UTIL_DYNLIB_H__ -#define __UTIL_DYNLIB_H__ +#ifndef __UTIL_ALIGNED_MALLOC_H__ +#define __UTIL_ALIGNED_MALLOC_H__ + +#include "util_types.h" CCL_NAMESPACE_BEGIN -struct DynamicLibrary; +/* Allocate block of size bytes at least aligned to a given value. */ +void *util_aligned_malloc(size_t size, int alignment); -DynamicLibrary *dynamic_library_open(const char *name); -void *dynamic_library_find(DynamicLibrary *lib, const char *name); -void dynamic_library_close(DynamicLibrary *lib); +/* Free memory allocated by util_aligned_malloc. */ +void util_aligned_free(void *ptr); CCL_NAMESPACE_END -#endif /* __UTIL_DYNLIB_H__ */ - +#endif /* __UTIL_ALIGNED_MALLOC_H__ */ diff --git a/intern/cycles/util/util_args.h b/intern/cycles/util/util_args.h index 55933c895bf..a53fc061758 100644 --- a/intern/cycles/util/util_args.h +++ b/intern/cycles/util/util_args.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_ARGS_H__ diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h new file mode 100644 index 00000000000..1d1e2963348 --- /dev/null +++ b/intern/cycles/util/util_atomic.h @@ -0,0 +1,65 @@ +/* + * Copyright 2014 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_ATOMIC_H__ +#define __UTIL_ATOMIC_H__ + +#ifndef __KERNEL_GPU__ + +/* Using atomic ops header from Blender. */ +#include "atomic_ops.h" + +ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value) +{ + size_t prev_value = *maximum_value; + while(prev_value < value) { + if(atomic_cas_z(maximum_value, prev_value, value) != prev_value) { + break; + } + } +} + +#else /* __KERNEL_GPU__ */ + +#ifdef __KERNEL_OPENCL__ + +/* Float atomics implementation credits: + * http://suhorukov.blogspot.in/2011/12/opencl-11-atomic-operations-on-floating.html + */ +ccl_device_inline void atomic_add_float(volatile ccl_global float *source, + const float operand) +{ + union { + unsigned int int_value; + float float_value; + } new_value; + union { + unsigned int int_value; + float float_value; + } prev_value; + do { + prev_value.float_value = *source; + new_value.float_value = prev_value.float_value + operand; + } while(atomic_cmpxchg((volatile ccl_global unsigned int *)source, + prev_value.int_value, + new_value.int_value) != prev_value.int_value); +} + +#endif /* __KERNEL_OPENCL__ */ + +#endif /* __KERNEL_GPU__ */ + +#endif /* __UTIL_ATOMIC_H__ */ diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h index a71e0399619..cef5adc0a61 100644 --- a/intern/cycles/util/util_boundbox.h +++ b/intern/cycles/util/util_boundbox.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_BOUNDBOX_H__ diff --git a/intern/cycles/util/util_cache.cpp b/intern/cycles/util/util_cache.cpp index e20c3a67b75..5eebfb18155 100644 --- a/intern/cycles/util/util_cache.cpp +++ b/intern/cycles/util/util_cache.cpp @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include <stdio.h> @@ -24,12 +24,6 @@ #include "util_path.h" #include "util_types.h" -#include <boost/version.hpp> - -#if (BOOST_VERSION < 104400) -# define BOOST_FILESYSTEM_VERSION 2 -#endif - #include <boost/filesystem.hpp> #include <boost/algorithm/string.hpp> diff --git a/intern/cycles/util/util_cache.h b/intern/cycles/util/util_cache.h index bfb2877a22b..343fa36817d 100644 --- a/intern/cycles/util/util_cache.h +++ b/intern/cycles/util/util_cache.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_CACHE_H__ @@ -105,7 +105,7 @@ public: return false; } - if(!size) + if((size == 0) || (size % sizeof(T)) != 0) return false; data.resize(size/sizeof(T)); diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h index 53b3d72de67..d3598f84b94 100644 --- a/intern/cycles/util/util_color.h +++ b/intern/cycles/util/util_color.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_COLOR_H__ diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h index 79fac506b98..6b61a49fcc3 100644 --- a/intern/cycles/util/util_debug.h +++ b/intern/cycles/util/util_debug.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_DEBUG_H__ diff --git a/intern/cycles/util/util_dynlib.cpp b/intern/cycles/util/util_dynlib.cpp deleted file mode 100644 index 587cad607c8..00000000000 --- a/intern/cycles/util/util_dynlib.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 2011-2013 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License - */ - -#include <stdlib.h> - -#include "util_dynlib.h" - -#ifdef _WIN32 - -#include <windows.h> - -CCL_NAMESPACE_BEGIN - -struct DynamicLibrary { - HMODULE module; -}; - -DynamicLibrary *dynamic_library_open(const char *name) -{ - HMODULE module = LoadLibrary(name); - - if(!module) - return NULL; - - DynamicLibrary *lib = new DynamicLibrary(); - lib->module = module; - - return lib; -} - -void *dynamic_library_find(DynamicLibrary *lib, const char *name) -{ - return (void*)GetProcAddress(lib->module, name); -} - -void dynamic_library_close(DynamicLibrary *lib) -{ - FreeLibrary(lib->module); - delete lib; -} - -CCL_NAMESPACE_END - -#else - -#include <dlfcn.h> - -CCL_NAMESPACE_BEGIN - -struct DynamicLibrary { - void *module; -}; - -DynamicLibrary *dynamic_library_open(const char *name) -{ - void *module = dlopen(name, RTLD_NOW); - - if(!module) - return NULL; - - DynamicLibrary *lib = new DynamicLibrary(); - lib->module = module; - - return lib; -} - -void *dynamic_library_find(DynamicLibrary *lib, const char *name) -{ - return dlsym(lib->module, name); -} - -void dynamic_library_close(DynamicLibrary *lib) -{ - dlclose(lib->module); - delete lib; -} - -CCL_NAMESPACE_END - -#endif - diff --git a/intern/cycles/util/util_foreach.h b/intern/cycles/util/util_foreach.h index df3277fbf37..4f7337107b3 100644 --- a/intern/cycles/util/util_foreach.h +++ b/intern/cycles/util/util_foreach.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_FOREACH_H__ @@ -19,8 +19,12 @@ /* Use Boost to get nice foreach() loops for STL data structures. */ -#include <boost/foreach.hpp> -#define foreach BOOST_FOREACH +#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) +# define foreach(x, y) for(x : y) +#else +# include <boost/foreach.hpp> +# define foreach BOOST_FOREACH +#endif #endif /* __UTIL_FOREACH_H__ */ diff --git a/intern/cycles/util/util_function.h b/intern/cycles/util/util_function.h index 6aa014a08a6..6d0f0b444a9 100644 --- a/intern/cycles/util/util_function.h +++ b/intern/cycles/util/util_function.h @@ -11,20 +11,39 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_FUNCTION_H__ #define __UTIL_FUNCTION_H__ -#include <boost/bind.hpp> -#include <boost/function.hpp> +#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) +# include <functional> +#else +# include <boost/bind.hpp> +# include <boost/function.hpp> +#endif CCL_NAMESPACE_BEGIN +#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) +# define function_bind std::bind +# define function_null nullptr +using std::function; +using std::placeholders::_1; +using std::placeholders::_2; +using std::placeholders::_3; +using std::placeholders::_4; +using std::placeholders::_5; +using std::placeholders::_6; +using std::placeholders::_7; +using std::placeholders::_8; +using std::placeholders::_9; +#else using boost::function; -#define function_bind boost::bind - +# define function_bind boost::bind +# define function_null NULL +#endif CCL_NAMESPACE_END #endif /* __UTIL_FUNCTION_H__ */ diff --git a/intern/cycles/util/util_guarded_allocator.cpp b/intern/cycles/util/util_guarded_allocator.cpp new file mode 100644 index 00000000000..8de6e254cbf --- /dev/null +++ b/intern/cycles/util/util_guarded_allocator.cpp @@ -0,0 +1,49 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "util_guarded_allocator.h" +#include "util_stats.h" + +CCL_NAMESPACE_BEGIN + +static Stats global_stats; + +/* Internal API. */ + +void util_guarded_mem_alloc(size_t n) +{ + global_stats.mem_alloc(n); +} + +void util_guarded_mem_free(size_t n) +{ + global_stats.mem_free(n); +} + +/* Public API. */ + +size_t util_guarded_get_mem_used(void) +{ + return global_stats.mem_used; +} + +size_t util_guarded_get_mem_peak(void) +{ + return global_stats.mem_peak; +} + + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_guarded_allocator.h b/intern/cycles/util/util_guarded_allocator.h new file mode 100644 index 00000000000..2df717253e3 --- /dev/null +++ b/intern/cycles/util/util_guarded_allocator.h @@ -0,0 +1,86 @@ +/* + * Copyright 2011-2015 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_GUARDED_ALLOCATOR_H__ +#define __UTIL_GUARDED_ALLOCATOR_H__ + +/* Define this in order to use Blender's guarded allocator to keep + * track of allocated buffers, their sizes and peak memory usage. + * + * This is usually a bad level call, but it's really handy to keep + * track of overall peak memory consumption during the scene + * synchronization step. + */ +#undef WITH_BLENDER_GUARDEDALLOC + +#include <memory> + +#include "util_types.h" + +#ifdef WITH_BLENDER_GUARDEDALLOC +# include "../../guardedalloc/MEM_guardedalloc.h" +#endif + +CCL_NAMESPACE_BEGIN + +/* Internal use only. */ +void util_guarded_mem_alloc(size_t n); +void util_guarded_mem_free(size_t n); + +/* Guarded allocator for the use with STL. */ +template <typename T> +class GuardedAllocator : public std::allocator<T> { +public: + template<typename _Tp1> + struct rebind { + typedef GuardedAllocator<_Tp1> other; + }; + + T *allocate(size_t n, const void *hint = 0) + { + util_guarded_mem_alloc(n * sizeof(T)); +#ifdef WITH_BLENDER_GUARDEDALLOC + (void)hint; + return (T*)MEM_mallocN_aligned(n * sizeof(T), 16, "Cycles Alloc"); +#else + return std::allocator<T>::allocate(n, hint); +#endif + } + + void deallocate(T *p, size_t n) + { + util_guarded_mem_free(n * sizeof(T)); +#ifdef WITH_BLENDER_GUARDEDALLOC + MEM_freeN((void*)p); +#else + std::allocator<T>::deallocate(p, n); +#endif + } + + GuardedAllocator() : std::allocator<T>() { } + GuardedAllocator(const GuardedAllocator &a) : std::allocator<T>(a) { } + template <class U> + GuardedAllocator(const GuardedAllocator<U> &a) : std::allocator<T>(a) { } + ~GuardedAllocator() { } +}; + +/* Get memory usage and peak from the guarded STL allocator. */ +size_t util_guarded_get_mem_used(void); +size_t util_guarded_get_mem_peak(void); + +CCL_NAMESPACE_END + +#endif /* __UTIL_GUARDED_ALLOCATOR_H__ */ diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h index 397133618be..f4bac9888a5 100644 --- a/intern/cycles/util/util_half.h +++ b/intern/cycles/util/util_half.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_HALF_H__ @@ -56,7 +56,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale) * assumes no negative, no nan, no inf, and sets denormal to 0 */ union { uint i; float f; } in; float fscale = f[i] * scale; - in.f = (fscale > 0.0f)? ((fscale < 65500.0f)? fscale: 65500.0f): 0.0f; + in.f = (fscale > 0.0f)? ((fscale < 65504.0f)? fscale: 65504.0f): 0.0f; int x = in.i; int absolute = x & 0x7FFFFFFF; @@ -68,20 +68,20 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale) } #else /* same as above with SSE */ - const ssef mm_scale = ssef(scale); - const ssei mm_38800000 = ssei(0x38800000); - const ssei mm_7FFF = ssei(0x7FFF); - const ssei mm_7FFFFFFF = ssei(0x7FFFFFFF); - const ssei mm_C8000000 = ssei(0xC8000000); - - ssef mm_fscale = load4f(f) * mm_scale; - ssei x = cast(min(max(mm_fscale, ssef(0.0f)), ssef(65500.0f))); - ssei absolute = x & mm_7FFFFFFF; - ssei Z = absolute + mm_C8000000; - ssei result = andnot(absolute < mm_38800000, Z); - ssei rh = (result >> 13) & mm_7FFF; - - _mm_storel_pi((__m64*)h, _mm_castsi128_ps(_mm_packs_epi32(rh, rh))); + ssef fscale = load4f(f) * scale; + ssef x = min(max(fscale, 0.0f), 65504.0f); + +#ifdef __KERNEL_AVX2__ + ssei rpack = _mm_cvtps_ph(x, 0); +#else + ssei absolute = cast(x) & 0x7FFFFFFF; + ssei Z = absolute + 0xC8000000; + ssei result = andnot(absolute < 0x38800000, Z); + ssei rshift = (result >> 13) & 0x7FFF; + ssei rpack = _mm_packs_epi32(rshift, rshift); +#endif + + _mm_storel_pi((__m64*)h, _mm_castsi128_ps(rpack)); #endif } diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h index edd2448efa4..3ff2802b46d 100644 --- a/intern/cycles/util/util_hash.h +++ b/intern/cycles/util/util_hash.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_HASH_H__ @@ -53,7 +53,7 @@ static inline uint hash_string(const char *str) { uint i = 0, c; - while ((c = *str++)) + while((c = *str++)) i = i * 37 + c; return i; diff --git a/intern/cycles/util/util_image.h b/intern/cycles/util/util_image.h index 4b8140e7a7a..bb8a31c6fec 100644 --- a/intern/cycles/util/util_image.h +++ b/intern/cycles/util/util_image.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_IMAGE_H__ diff --git a/intern/cycles/util/util_list.h b/intern/cycles/util/util_list.h index 2aa0b7381e3..6cb27e6defe 100644 --- a/intern/cycles/util/util_list.h +++ b/intern/cycles/util/util_list.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_LIST_H__ diff --git a/intern/cycles/util/util_logging.cpp b/intern/cycles/util/util_logging.cpp index 0722f16cf45..03041723e15 100644 --- a/intern/cycles/util/util_logging.cpp +++ b/intern/cycles/util/util_logging.cpp @@ -11,15 +11,63 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include <util_logging.h> #include "util_math.h" +#include <stdio.h> +#ifdef _MSC_VER +# define snprintf _snprintf +#endif + CCL_NAMESPACE_BEGIN +void util_logging_init(const char *argv0) +{ +#ifdef WITH_CYCLES_LOGGING + using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; + + /* Make it so FATAL messages are always print into console. */ + char severity_fatal[32]; + snprintf(severity_fatal, sizeof(severity_fatal), "%d", + google::GLOG_FATAL); + + google::InitGoogleLogging(argv0); + SetCommandLineOption("logtostderr", "1"); + SetCommandLineOption("v", "0"); + SetCommandLineOption("stderrthreshold", severity_fatal); + SetCommandLineOption("minloglevel", severity_fatal); +#else + (void) argv0; +#endif +} + +void util_logging_start(void) +{ +#ifdef WITH_CYCLES_LOGGING + using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; + SetCommandLineOption("logtostderr", "1"); + SetCommandLineOption("v", "2"); + SetCommandLineOption("stderrthreshold", "1"); + SetCommandLineOption("minloglevel", "0"); +#endif +} + +void util_logging_verbosity_set(int verbosity) +{ +#ifdef WITH_CYCLES_LOGGING + using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption; + char val[10]; + snprintf(val, sizeof(val), "%d", verbosity); + SetCommandLineOption("v", val); +#else + (void) verbosity; +#endif +} + std::ostream& operator <<(std::ostream &os, const float3 &value) { diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h index 991789e7460..7fc42ac355a 100644 --- a/intern/cycles/util/util_logging.h +++ b/intern/cycles/util/util_logging.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_LOGGING_H__ @@ -43,7 +43,11 @@ public: #endif -class float3; +struct float3; + +void util_logging_init(const char *argv0); +void util_logging_start(void); +void util_logging_verbosity_set(int verbosity); std::ostream& operator <<(std::ostream &os, const float3 &value); diff --git a/intern/cycles/util/util_map.h b/intern/cycles/util/util_map.h index 77500e4712d..46c2885f8b0 100644 --- a/intern/cycles/util/util_map.h +++ b/intern/cycles/util/util_map.h @@ -11,20 +11,45 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_MAP_H__ #define __UTIL_MAP_H__ #include <map> -#include <boost/tr1/unordered_map.hpp> + +#if defined(CYCLES_TR1_UNORDERED_MAP) +# include <tr1/unordered_map> +#endif + +#if defined(CYCLES_STD_UNORDERED_MAP) || defined(CYCLES_STD_UNORDERED_MAP_IN_TR1_NAMESPACE) +# include <unordered_map> +#endif + +#if !defined(CYCLES_NO_UNORDERED_MAP) && !defined(CYCLES_TR1_UNORDERED_MAP) && \ + !defined(CYCLES_STD_UNORDERED_MAP) && !defined(CYCLES_STD_UNORDERED_MAP_IN_TR1_NAMESPACE) // NOLINT +# error One of: CYCLES_NO_UNORDERED_MAP, CYCLES_TR1_UNORDERED_MAP,\ + CYCLES_STD_UNORDERED_MAP, CYCLES_STD_UNORDERED_MAP_IN_TR1_NAMESPACE must be defined! // NOLINT +#endif + CCL_NAMESPACE_BEGIN using std::map; using std::pair; + +#if defined(CYCLES_NO_UNORDERED_MAP) +typedef std::map unordered_map; +#endif + +#if defined(CYCLES_TR1_UNORDERED_MAP) || defined(CYCLES_STD_UNORDERED_MAP_IN_TR1_NAMESPACE) using std::tr1::unordered_map; +#endif + +#if defined(CYCLES_STD_UNORDERED_MAP) +using std::unordered_map; +#endif CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h index c332e1709db..7d6dfd34e0e 100644 --- a/intern/cycles/util/util_math.h +++ b/intern/cycles/util/util_math.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_MATH_H__ @@ -71,6 +71,13 @@ CCL_NAMESPACE_BEGIN #define M_SQRT2_F ((float)1.41421356237309504880) /* sqrt(2) */ #endif +#ifndef M_LN2_F +#define M_LN2_F ((float)0.6931471805599453) /* ln(2) */ +#endif + +#ifndef M_LN10_F +#define M_LN10_F ((float)2.3025850929940457) /* ln(10) */ +#endif /* Scalar */ @@ -124,6 +131,24 @@ ccl_device_inline double min(double a, double b) return (a < b)? a: b; } +/* These 2 guys are templated for usage with registers data. + * + * NOTE: Since this is CPU-only functions it is ok to use references here. + * But for other devices we'll need to be careful about this. + */ + +template<typename T> +ccl_device_inline T min4(const T& a, const T& b, const T& c, const T& d) +{ + return min(min(a,b),min(c,d)); +} + +template<typename T> +ccl_device_inline T max4(const T& a, const T& b, const T& c, const T& d) +{ + return max(max(a,b),max(c,d)); +} + #endif ccl_device_inline float min4(float a, float b, float c, float d) @@ -150,6 +175,15 @@ ccl_device_inline float clamp(float a, float mn, float mx) #endif +#ifndef __KERNEL_CUDA__ + +ccl_device_inline float saturate(float a) +{ + return clamp(a, 0.0f, 1.0f); +} + +#endif + ccl_device_inline int float_to_int(float f) { return (int)f; @@ -314,6 +348,12 @@ ccl_device_inline float2 normalize_len(const float2 a, float *t) return a/(*t); } +ccl_device_inline float2 safe_normalize(const float2 a) +{ + float t = len(a); + return (t)? a/t: a; +} + ccl_device_inline bool operator==(const float2 a, const float2 b) { return (a.x == b.x && a.y == b.y); @@ -510,6 +550,12 @@ ccl_device_inline float3 normalize_len(const float3 a, float *t) return a/(*t); } +ccl_device_inline float3 safe_normalize(const float3 a) +{ + float t = len(a); + return (t)? a/t: a; +} + #ifndef __KERNEL_OPENCL__ ccl_device_inline bool operator==(const float3 a, const float3 b) @@ -817,6 +863,12 @@ ccl_device_inline float4 normalize(const float4 a) return a/len(a); } +ccl_device_inline float4 safe_normalize(const float4 a) +{ + float t = len(a); + return (t)? a/t: a; +} + ccl_device_inline float4 min(float4 a, float4 b) { #ifdef __KERNEL_SSE__ @@ -1395,10 +1447,9 @@ ccl_device bool ray_triangle_intersect_uv( return true; } -ccl_device bool ray_quad_intersect( - float3 ray_P, float3 ray_D, float ray_t, - float3 quad_P, float3 quad_u, float3 quad_v, - float3 *isect_P, float *isect_t) +ccl_device bool ray_quad_intersect(float3 ray_P, float3 ray_D, float ray_t, + float3 quad_P, float3 quad_u, float3 quad_v, + float3 *isect_P, float *isect_t) { float3 v0 = quad_P - quad_u*0.5f - quad_v*0.5f; float3 v1 = quad_P + quad_u*0.5f - quad_v*0.5f; @@ -1414,23 +1465,52 @@ ccl_device bool ray_quad_intersect( } /* projections */ -ccl_device bool map_to_sphere(float *r_u, float *r_v, - const float x, const float y, const float z) +ccl_device_inline float2 map_to_tube(const float3 co) { - float len = sqrtf(x * x + y * y + z * z); + float len, u, v; + len = sqrtf(co.x * co.x + co.y * co.y); if(len > 0.0f) { - if(UNLIKELY(x == 0.0f && y == 0.0f)) { - *r_u = 0.0f; /* othwise domain error */ + u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f; + v = (co.z + 1.0f) * 0.5f; + } + else { + u = v = 0.0f; + } + return make_float2(u, v); +} + +ccl_device_inline float2 map_to_sphere(const float3 co) +{ + float l = len(co); + float u, v; + if(l > 0.0f) { + if(UNLIKELY(co.x == 0.0f && co.y == 0.0f)) { + u = 0.0f; /* othwise domain error */ } else { - *r_u = (1.0f - atan2f(x, y) / M_PI_F) / 2.0f; + u = (1.0f - atan2f(co.x, co.y) / M_PI_F) / 2.0f; } - *r_v = 1.0f - safe_acosf(z / len) / M_PI_F; - return true; + v = 1.0f - safe_acosf(co.z / l) / M_PI_F; } else { - *r_v = *r_u = 0.0f; /* to avoid un-initialized variables */ - return false; + u = v = 0.0f; + } + return make_float2(u, v); +} + +ccl_device_inline int util_max_axis(float3 vec) +{ + if(vec.x > vec.y) { + if(vec.x > vec.z) + return 0; + else + return 2; + } + else { + if(vec.y > vec.z) + return 1; + else + return 2; } } diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h new file mode 100644 index 00000000000..c1a1be603f4 --- /dev/null +++ b/intern/cycles/util/util_math_fast.h @@ -0,0 +1,611 @@ +/* + * Adapted from OpenImageIO library with this license: + * + * Copyright 2008-2014 Larry Gritz and the other authors and contributors. + * All Rights Reserved. + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the software's owners nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * (This is the Modified BSD License) + * + * A few bits here are based upon code from NVIDIA that was also released + * under the same modified BSD license, and marked as: + * Copyright 2004 NVIDIA Corporation. All Rights Reserved. + * + * Some parts of this file were first open-sourced in Open Shading Language, + * then later moved here. The original copyright notice was: + * Copyright (c) 2009-2014 Sony Pictures Imageworks Inc., et al. + * + * Many of the math functions were copied from or inspired by other + * public domain sources or open source packages with compatible licenses. + * The individual functions give references were applicable. + */ + +#ifndef __UTIL_FAST_MATH__ +#define __UTIL_FAST_MATH__ + +CCL_NAMESPACE_BEGIN + +/* TODO(sergey): Make sure it does not conflict with SSE intrinsics. */ +ccl_device_inline float madd(const float a, const float b, const float c) +{ + /* NOTE: In the future we may want to explicitly ask for a fused + * multiply-add in a specialized version for float. + * + * NOTE: GCC/ICC will turn this (for float) into a FMA unless + * explicitly asked not to, clang seems to leave the code alone. + */ + return a * b + c; +} + +/* + * FAST & APPROXIMATE MATH + * + * The functions named "fast_*" provide a set of replacements to libm that + * are much faster at the expense of some accuracy and robust handling of + * extreme values. One design goal for these approximation was to avoid + * branches as much as possible and operate on single precision values only + * so that SIMD versions should be straightforward ports We also try to + * implement "safe" semantics (ie: clamp to valid range where possible) + * natively since wrapping these inline calls in another layer would be + * wasteful. + * + * Some functions are fast_safe_*, which is both a faster approximation as + * well as clamped input domain to ensure no NaN, Inf, or divide by zero. + */ + +/* Round to nearest integer, returning as an int. */ +ccl_device_inline int fast_rint(float x) +{ + /* used by sin/cos/tan range reduction. */ +#ifdef __KERNEL_SSE4__ + /* Single roundps instruction on SSE4.1+ (for gcc/clang at least). */ + return float_to_int(rintf(x)); +#else + /* emulate rounding by adding/substracting 0.5. */ + return float_to_int(x + copysignf(0.5f, x)); +#endif +} + +ccl_device float fast_sinf(float x) +{ + /* Very accurate argument reduction from SLEEF, + * starts failing around x=262000 + * + * Results on: [-2pi,2pi]. + * + * Examined 2173837240 values of sin: 0.00662760244 avg ulp diff, 2 max ulp, + * 1.19209e-07 max error + */ + int q = fast_rint(x * M_1_PI_F); + float qf = q; + x = madd(qf, -0.78515625f*4, x); + x = madd(qf, -0.00024187564849853515625f*4, x); + x = madd(qf, -3.7747668102383613586e-08f*4, x); + x = madd(qf, -1.2816720341285448015e-12f*4, x); + x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals */ + float s = x * x; + if((q & 1) != 0) x = -x; + /* This polynomial approximation has very low error on [-pi/2,+pi/2] + * 1.19209e-07 max error in total over [-2pi,+2pi]. */ + float u = 2.6083159809786593541503e-06f; + u = madd(u, s, -0.0001981069071916863322258f); + u = madd(u, s, +0.00833307858556509017944336f); + u = madd(u, s, -0.166666597127914428710938f); + u = madd(s, u * x, x); + /* For large x, the argument reduction can fail and the polynomial can be + * evaluated with arguments outside the valid internal. Just clamp the bad + * values away (setting to 0.0f means no branches need to be generated). */ + if(fabsf(u) > 1.0f) { + u = 0.0f; + } + return u; +} + +ccl_device float fast_cosf(float x) +{ + /* Same argument reduction as fast_sinf(). */ + int q = fast_rint(x * M_1_PI_F); + float qf = q; + x = madd(qf, -0.78515625f*4, x); + x = madd(qf, -0.00024187564849853515625f*4, x); + x = madd(qf, -3.7747668102383613586e-08f*4, x); + x = madd(qf, -1.2816720341285448015e-12f*4, x); + x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals. */ + float s = x * x; + /* Polynomial from SLEEF's sincosf, max error is + * 4.33127e-07 over [-2pi,2pi] (98% of values are "exact"). */ + float u = -2.71811842367242206819355e-07f; + u = madd(u, s, +2.47990446951007470488548e-05f); + u = madd(u, s, -0.00138888787478208541870117f); + u = madd(u, s, +0.0416666641831398010253906f); + u = madd(u, s, -0.5f); + u = madd(u, s, +1.0f); + if((q & 1) != 0) { + u = -u; + } + if(fabsf(u) > 1.0f) { + u = 0.0f; + } + return u; +} + +ccl_device void fast_sincosf(float x, float* sine, float* cosine) +{ + /* Same argument reduction as fast_sin. */ + int q = fast_rint(x * M_1_PI_F); + float qf = q; + x = madd(qf, -0.78515625f*4, x); + x = madd(qf, -0.00024187564849853515625f*4, x); + x = madd(qf, -3.7747668102383613586e-08f*4, x); + x = madd(qf, -1.2816720341285448015e-12f*4, x); + x = M_PI_2_F - (M_PI_2_F - x); // crush denormals + float s = x * x; + /* NOTE: same exact polynomials as fast_sinf() and fast_cosf() above. */ + if((q & 1) != 0) { + x = -x; + } + float su = 2.6083159809786593541503e-06f; + su = madd(su, s, -0.0001981069071916863322258f); + su = madd(su, s, +0.00833307858556509017944336f); + su = madd(su, s, -0.166666597127914428710938f); + su = madd(s, su * x, x); + float cu = -2.71811842367242206819355e-07f; + cu = madd(cu, s, +2.47990446951007470488548e-05f); + cu = madd(cu, s, -0.00138888787478208541870117f); + cu = madd(cu, s, +0.0416666641831398010253906f); + cu = madd(cu, s, -0.5f); + cu = madd(cu, s, +1.0f); + if((q & 1) != 0) { + cu = -cu; + } + if(fabsf(su) > 1.0f) { + su = 0.0f; + } + if(fabsf(cu) > 1.0f) { + cu = 0.0f; + } + *sine = su; + *cosine = cu; +} + +/* NOTE: this approximation is only valid on [-8192.0,+8192.0], it starts + * becoming really poor outside of this range because the reciprocal amplifies + * errors. + */ +ccl_device float fast_tanf(float x) +{ + /* Derived from SLEEF implementation. + * + * Note that we cannot apply the "denormal crush" trick everywhere because + * we sometimes need to take the reciprocal of the polynomial + */ + int q = fast_rint(x * 2.0f * M_1_PI_F); + float qf = q; + x = madd(qf, -0.78515625f*2, x); + x = madd(qf, -0.00024187564849853515625f*2, x); + x = madd(qf, -3.7747668102383613586e-08f*2, x); + x = madd(qf, -1.2816720341285448015e-12f*2, x); + if((q & 1) == 0) { + /* Crush denormals (only if we aren't inverting the result later). */ + x = M_PI_4_F - (M_PI_4_F - x); + } + float s = x * x; + float u = 0.00927245803177356719970703f; + u = madd(u, s, 0.00331984995864331722259521f); + u = madd(u, s, 0.0242998078465461730957031f); + u = madd(u, s, 0.0534495301544666290283203f); + u = madd(u, s, 0.133383005857467651367188f); + u = madd(u, s, 0.333331853151321411132812f); + u = madd(s, u * x, x); + if((q & 1) != 0) { + u = -1.0f / u; + } + return u; +} + +/* Fast, approximate sin(x*M_PI) with maximum absolute error of 0.000918954611. + * + * Adapted from http://devmaster.net/posts/9648/fast-and-accurate-sine-cosine#comment-76773 + */ +ccl_device float fast_sinpif(float x) +{ + /* Fast trick to strip the integral part off, so our domain is [-1, 1]. */ + const float z = x - ((x + 25165824.0f) - 25165824.0f); + const float y = z - z * fabsf(z); + const float Q = 3.10396624f; + const float P = 3.584135056f; /* P = 16-4*Q */ + return y * (Q + P * fabsf(y)); + + /* The original article used used inferior constants for Q and P and + * so had max error 1.091e-3. + * + * The optimal value for Q was determined by exhaustive search, minimizing + * the absolute numerical error relative to float(std::sin(double(phi*M_PI))) + * over the interval [0,2] (which is where most of the invocations happen). + * + * The basic idea of this approximation starts with the coarse approximation: + * sin(pi*x) ~= f(x) = 4 * (x - x * abs(x)) + * + * This approximation always _over_ estimates the target. On the otherhand, + * the curve: + * sin(pi*x) ~= f(x) * abs(f(x)) / 4 + * + * always lies _under_ the target. Thus we can simply numerically search for + * the optimal constant to LERP these curves into a more precise + * approximation. + * + * After folding the constants together and simplifying the resulting math, + * we end up with the compact implementation above. + * + * NOTE: this function actually computes sin(x * pi) which avoids one or two + * mults in many cases and guarantees exact values at integer periods. + */ +} + +/* Fast approximate cos(x*M_PI) with ~0.1% absolute error. */ +ccl_device_inline float fast_cospif(float x) +{ + return fast_sinpif(x+0.5f); +} + +ccl_device float fast_acosf(float x) +{ + const float f = fabsf(x); + /* clamp and crush denormals. */ + const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f; + /* Based on http://www.pouet.net/topic.php?which=9132&page=2 + * 85% accurate (ulp 0) + * Examined 2130706434 values of acos: 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush" + * Examined 2130706434 values of acos: 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush" + */ + const float a = sqrtf(1.0f - m) * + (1.5707963267f + m * (-0.213300989f + m * + (0.077980478f + m * -0.02164095f))); + return x < 0 ? M_PI_F - a : a; +} + +ccl_device float fast_asinf(float x) +{ + /* Based on acosf approximation above. + * Max error is 4.51133e-05 (ulps are higher because we are consistently off + * by a little amount). + */ + const float f = fabsf(x); + /* Clamp and crush denormals. */ + const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f; + const float a = M_PI_2_F - sqrtf(1.0f - m) * + (1.5707963267f + m * (-0.213300989f + m * + (0.077980478f + m * -0.02164095f))); + return copysignf(a, x); +} + +ccl_device float fast_atanf(float x) +{ + const float a = fabsf(x); + const float k = a > 1.0f ? 1 / a : a; + const float s = 1.0f - (1.0f - k); /* Crush denormals. */ + const float t = s * s; + /* http://mathforum.org/library/drmath/view/62672.html + * Examined 4278190080 values of atan: 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals) + * Examined 4278190080 values of atan: 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals) + */ + float r = s * madd(0.43157974f, t, 1.0f) / + madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f); + if(a > 1.0f) { + /* TODO(sergey): Is it M_PI_2_F? */ + r = 1.570796326794896557998982f - r; + } + return copysignf(r, x); +} + +ccl_device float fast_atan2f(float y, float x) +{ + /* Based on atan approximation above. + * + * The special cases around 0 and infinity were tested explicitly. + * + * The only case not handled correctly is x=NaN,y=0 which returns 0 instead + * of nan. + */ + const float a = fabsf(x); + const float b = fabsf(y); + + const float k = (b == 0) ? 0.0f : ((a == b) ? 1.0f : (b > a ? a / b : b / a)); + const float s = 1.0f - (1.0f - k); /* Crush denormals */ + const float t = s * s; + + float r = s * madd(0.43157974f, t, 1.0f) / + madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f); + + if(b > a) { + /* Account for arg reduction. */ + /* TODO(sergey): Is it M_PI_2_F? */ + r = 1.570796326794896557998982f - r; + } + /* Test sign bit of x. */ + if(__float_as_uint(x) & 0x80000000u) { + r = M_PI_F - r; + } + return copysignf(r, y); +} + +/* Based on: + * + * https://github.com/LiraNuna/glsl-sse2/blob/master/source/vec4.h + * + */ +ccl_device float fast_log2f(float x) +{ + /* NOTE: clamp to avoid special cases and make result "safe" from large + * negative values/nans. */ + x = clamp(x, FLT_MIN, FLT_MAX); + unsigned bits = __float_as_uint(x); + int exponent = (int)(bits >> 23) - 127; + float f = __uint_as_float((bits & 0x007FFFFF) | 0x3f800000) - 1.0f; + /* Examined 2130706432 values of log2 on [1.17549435e-38,3.40282347e+38]: + * 0.0797524457 avg ulp diff, 3713596 max ulp, 7.62939e-06 max error. + * ulp histogram: + * 0 = 97.46% + * 1 = 2.29% + * 2 = 0.11% + */ + float f2 = f * f; + float f4 = f2 * f2; + float hi = madd(f, -0.00931049621349f, 0.05206469089414f); + float lo = madd(f, 0.47868480909345f, -0.72116591947498f); + hi = madd(f, hi, -0.13753123777116f); + hi = madd(f, hi, 0.24187369696082f); + hi = madd(f, hi, -0.34730547155299f); + lo = madd(f, lo, 1.442689881667200f); + return ((f4 * hi) + (f * lo)) + exponent; +} + +ccl_device_inline float fast_logf(float x) +{ + /* Examined 2130706432 values of logf on [1.17549435e-38,3.40282347e+38]: + * 0.313865375 avg ulp diff, 5148137 max ulp, 7.62939e-06 max error. + */ + return fast_log2f(x) * M_LN2_F; +} + +ccl_device_inline float fast_log10(float x) +{ + /* Examined 2130706432 values of log10f on [1.17549435e-38,3.40282347e+38]: + * 0.631237033 avg ulp diff, 4471615 max ulp, 3.8147e-06 max error. + */ + return fast_log2f(x) * M_LN2_F / M_LN10_F; +} + +ccl_device float fast_logb(float x) +{ + /* Don't bother with denormals. */ + x = fabsf(x); + x = clamp(x, FLT_MIN, FLT_MAX); + unsigned bits = __float_as_uint(x); + return (int)(bits >> 23) - 127; +} + +ccl_device float fast_exp2f(float x) +{ + /* Clamp to safe range for final addition. */ + x = clamp(x, -126.0f, 126.0f); + /* Range reduction. */ + int m = (int)x; x -= m; + x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ulps!). */ + /* 5th degree polynomial generated with sollya + * Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ulp diff, + * 232 max ulp. + * + * ulp histogram: + * 0 = 87.81% + * 1 = 4.18% + */ + float r = 1.33336498402e-3f; + r = madd(x, r, 9.810352697968e-3f); + r = madd(x, r, 5.551834031939e-2f); + r = madd(x, r, 0.2401793301105f); + r = madd(x, r, 0.693144857883f); + r = madd(x, r, 1.0f); + /* Multiply by 2 ^ m by adding in the exponent. */ + /* NOTE: left-shift of negative number is undefined behavior. */ + return __uint_as_float(__float_as_uint(r) + ((unsigned)m << 23)); +} + +ccl_device_inline float fast_expf(float x) +{ + /* Examined 2237485550 values of exp on [-87.3300018,87.3300018]: + * 2.6666452 avg ulp diff, 230 max ulp. + */ + return fast_exp2f(x / M_LN2_F); +} + +ccl_device_inline float fast_exp10(float x) +{ + /* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]: + * 2.71732409 avg ulp diff, 232 max ulp. + */ + return fast_exp2f(x * M_LN10_F / M_LN2_F); +} + +ccl_device_inline float fast_expm1f(float x) +{ + if(fabsf(x) < 1e-5f) { + x = 1.0f - (1.0f - x); /* Crush denormals. */ + return madd(0.5f, x * x, x); + } + else { + return fast_expf(x) - 1.0f; + } +} + +ccl_device float fast_sinhf(float x) +{ + float a = fabsf(x); + if(a > 1.0f) { + /* Examined 53389559 values of sinh on [1,87.3300018]: + * 33.6886442 avg ulp diff, 178 max ulp. */ + float e = fast_expf(a); + return copysignf(0.5f * e - 0.5f / e, x); + } + else { + a = 1.0f - (1.0f - a); /* Crush denorms. */ + float a2 = a * a; + /* Degree 7 polynomial generated with sollya. */ + /* Examined 2130706434 values of sinh on [-1,1]: 1.19209e-07 max error. */ + float r = 2.03945513931e-4f; + r = madd(r, a2, 8.32990277558e-3f); + r = madd(r, a2, 0.1666673421859f); + r = madd(r * a, a2, a); + return copysignf(r, x); + } +} + +ccl_device_inline float fast_coshf(float x) +{ + /* Examined 2237485550 values of cosh on [-87.3300018,87.3300018]: + * 1.78256726 avg ulp diff, 178 max ulp. + */ + float e = fast_expf(fabsf(x)); + return 0.5f * e + 0.5f / e; +} + +ccl_device_inline float fast_tanhf(float x) +{ + /* Examined 4278190080 values of tanh on [-3.40282347e+38,3.40282347e+38]: + * 3.12924e-06 max error. + */ + /* NOTE: ulp error is high because of sub-optimal handling around the origin. */ + float e = fast_expf(2.0f * fabsf(x)); + return copysignf(1.0f - 2.0f / (1.0f + e), x); +} + +ccl_device float fast_safe_powf(float x, float y) +{ + if(y == 0) return 1.0f; /* x^1=1 */ + if(x == 0) return 0.0f; /* 0^y=0 */ + float sign = 1.0f; + if(x < 0.0f) { + /* if x is negative, only deal with integer powers + * powf returns NaN for non-integers, we will return 0 instead. + */ + int ybits = __float_as_int(y) & 0x7fffffff; + if(ybits >= 0x4b800000) { + // always even int, keep positive + } + else if(ybits >= 0x3f800000) { + /* Bigger than 1, check. */ + int k = (ybits >> 23) - 127; /* Get exponent. */ + int j = ybits >> (23 - k); /* Shift out possible fractional bits. */ + if((j << (23 - k)) == ybits) { /* rebuild number and check for a match. */ + /* +1 for even, -1 for odd. */ + sign = __int_as_float(0x3f800000 | (j << 31)); + } + else { + /* Not an integer. */ + return 0.0f; + } + } + else { + /* Not an integer. */ + return 0.0f; + } + } + return sign * fast_exp2f(y * fast_log2f(fabsf(x))); +} + +/* TODO(sergey): Check speed with our erf functions implementation from + * bsdf_microfaset.h. + */ + +ccl_device_inline float fast_erff(float x) +{ + /* Examined 1082130433 values of erff on [0,4]: 1.93715e-06 max error. */ + /* Abramowitz and Stegun, 7.1.28. */ + const float a1 = 0.0705230784f; + const float a2 = 0.0422820123f; + const float a3 = 0.0092705272f; + const float a4 = 0.0001520143f; + const float a5 = 0.0002765672f; + const float a6 = 0.0000430638f; + const float a = fabsf(x); + const float b = 1.0f - (1.0f - a); /* Crush denormals. */ + const float r = madd(madd(madd(madd(madd(madd(a6, b, a5), b, a4), b, a3), b, a2), b, a1), b, 1.0f); + const float s = r * r; /* ^2 */ + const float t = s * s; /* ^4 */ + const float u = t * t; /* ^8 */ + const float v = u * u; /* ^16 */ + return copysignf(1.0f - 1.0f / v, x); +} + +ccl_device_inline float fast_erfcf(float x) +{ + /* Examined 2164260866 values of erfcf on [-4,4]: 1.90735e-06 max error. + * + * ulp histogram: + * + * 0 = 80.30% + */ + return 1.0f - fast_erff(x); +} + +ccl_device_inline float fast_ierff(float x) +{ + /* From: Approximating the erfinv function by Mike Giles. */ + /* To avoid trouble at the limit, clamp input to 1-eps. */ + float a = fabsf(x); + if(a > 0.99999994f) { + a = 0.99999994f; + } + float w = -fast_logf((1.0f - a) * (1.0f + a)), p; + if(w < 5.0f) { + w = w - 2.5f; + p = 2.81022636e-08f; + p = madd(p, w, 3.43273939e-07f); + p = madd(p, w, -3.5233877e-06f); + p = madd(p, w, -4.39150654e-06f); + p = madd(p, w, 0.00021858087f); + p = madd(p, w, -0.00125372503f); + p = madd(p, w, -0.00417768164f); + p = madd(p, w, 0.246640727f); + p = madd(p, w, 1.50140941f); + } + else { + w = sqrtf(w) - 3.0f; + p = -0.000200214257f; + p = madd(p, w, 0.000100950558f); + p = madd(p, w, 0.00134934322f); + p = madd(p, w, -0.00367342844f); + p = madd(p, w, 0.00573950773f); + p = madd(p, w, -0.0076224613f); + p = madd(p, w, 0.00943887047f); + p = madd(p, w, 1.00167406f); + p = madd(p, w, 2.83297682f); + } + return p * x; +} + +CCL_NAMESPACE_END + +#endif /* __UTIL_FAST_MATH__ */ diff --git a/intern/cycles/util/util_md5.cpp b/intern/cycles/util/util_md5.cpp index add0d18c742..b2a32c45287 100644 --- a/intern/cycles/util/util_md5.cpp +++ b/intern/cycles/util/util_md5.cpp @@ -152,8 +152,8 @@ void MD5Hash::process(const uint8_t *data /*[64]*/) * a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */ #define F(x, y, z) (((x) & (y)) | (~(x) & (z))) #define SET(a, b, c, d, k, s, Ti)\ - t = a + F(b,c,d) + X[k] + Ti;\ - a = ROTATE_LEFT(t, s) + b + t = a + F(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 0, 7, T1); SET(d, a, b, c, 1, 12, T2); @@ -178,8 +178,8 @@ void MD5Hash::process(const uint8_t *data /*[64]*/) * a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */ #define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) #define SET(a, b, c, d, k, s, Ti)\ - t = a + G(b,c,d) + X[k] + Ti;\ - a = ROTATE_LEFT(t, s) + b + t = a + G(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 1, 5, T17); SET(d, a, b, c, 6, 9, T18); @@ -230,8 +230,8 @@ void MD5Hash::process(const uint8_t *data /*[64]*/) * a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */ #define I(x, y, z) ((y) ^ ((x) | ~(z))) #define SET(a, b, c, d, k, s, Ti)\ - t = a + I(b,c,d) + X[k] + Ti;\ - a = ROTATE_LEFT(t, s) + b + t = a + I(b,c,d) + X[k] + Ti;\ + a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 0, 6, T49); SET(d, a, b, c, 7, 10, T50); diff --git a/intern/cycles/util/util_opengl.h b/intern/cycles/util/util_opengl.h index 04a3e039c9d..0b5462e0a09 100644 --- a/intern/cycles/util/util_opengl.h +++ b/intern/cycles/util/util_opengl.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_OPENGL_H__ @@ -20,7 +20,12 @@ /* OpenGL header includes, used everywhere we use OpenGL, to deal with * platform differences in one central place. */ -#include <GL/glew.h> +#ifdef WITH_GLEW_MX +# include "glew-mx.h" +#else +# include <GL/glew.h> +# define mxCreateContext() glewInit() +# define mxMakeCurrentContext(x) (x) +#endif #endif /* __UTIL_OPENGL_H__ */ - diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h index 2feb3d6ab7e..c951c35fc76 100644 --- a/intern/cycles/util/util_optimization.h +++ b/intern/cycles/util/util_optimization.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_OPTIMIZATION_H__ @@ -102,34 +102,16 @@ #ifdef _MSC_VER #include <intrin.h> #else - -#ifdef __KERNEL_SSE2__ -#include <xmmintrin.h> /* SSE 1 */ -#include <emmintrin.h> /* SSE 2 */ -#endif - -#ifdef __KERNEL_SSE3__ -#include <pmmintrin.h> /* SSE 3 */ -#endif - -#ifdef __KERNEL_SSSE3__ -#include <tmmintrin.h> /* SSSE 3 */ -#endif - -#ifdef __KERNEL_SSE41__ -#include <smmintrin.h> /* SSE 4.1 */ -#endif - -#ifdef __KERNEL_AVX__ -#include <immintrin.h> /* AVX */ -#endif - +#include <x86intrin.h> #endif #else /* MinGW64 has conflicting declarations for these SSE headers in <windows.h>. * Since we can't avoid including <windows.h>, better only include that */ +#define NOGDI +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN #include <windows.h> #endif diff --git a/intern/cycles/util/util_param.h b/intern/cycles/util/util_param.h index ef20ff0fcd2..69bcbf80a78 100644 --- a/intern/cycles/util/util_param.h +++ b/intern/cycles/util/util_param.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_PARAM_H__ diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp index 85d19b6a325..e8f1ec81763 100644 --- a/intern/cycles/util/util_path.cpp +++ b/intern/cycles/util/util_path.cpp @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "util_debug.h" @@ -25,12 +25,6 @@ OIIO_NAMESPACE_USING #include <stdio.h> -#include <boost/version.hpp> - -#if (BOOST_VERSION < 104400) -# define BOOST_FILESYSTEM_VERSION 2 -#endif - #include <boost/filesystem.hpp> #include <boost/algorithm/string.hpp> @@ -41,21 +35,31 @@ static string cached_user_path = ""; static boost::filesystem::path to_boost(const string& path) { -#ifdef _MSC_VER - std::wstring path_utf16 = Strutil::utf8_to_utf16(path.c_str()); - return boost::filesystem::path(path_utf16.c_str()); -#else return boost::filesystem::path(path.c_str()); -#endif } static string from_boost(const boost::filesystem::path& path) { -#ifdef _MSC_VER - return Strutil::utf16_to_utf8(path.wstring().c_str()); -#else return path.string().c_str(); -#endif +} + +static char *path_specials(const string& sub) +{ + static bool env_init = false; + static char *env_shader_path; + static char *env_kernel_path; + if(!env_init) { + env_shader_path = getenv("CYCLES_SHADER_PATH"); + env_kernel_path = getenv("CYCLES_KERNEL_PATH"); + env_init = true; + } + if(env_shader_path != NULL && sub == "shader") { + return env_shader_path; + } + else if(env_shader_path != NULL && sub == "kernel") { + return env_kernel_path; + } + return NULL; } void path_init(const string& path, const string& user_path) @@ -71,6 +75,10 @@ void path_init(const string& path, const string& user_path) string path_get(const string& sub) { + char *special = path_specials(sub); + if(special != NULL) + return special; + if(cached_path == "") cached_path = path_dirname(Sysutil::this_program_path()); @@ -87,11 +95,7 @@ string path_user_get(const string& sub) string path_filename(const string& path) { -#if (BOOST_FILESYSTEM_VERSION == 2) - return to_boost(path).filename(); -#else return from_boost(to_boost(path).filename()); -#endif } string path_dirname(const string& path) @@ -259,14 +263,7 @@ string path_source_replace_includes(const string& source_, const string& path) FILE *path_fopen(const string& path, const string& mode) { -#ifdef _WIN32 - std::wstring path_utf16 = Strutil::utf8_to_utf16(path); - std::wstring mode_utf16 = Strutil::utf8_to_utf16(mode); - - return _wfopen(path_utf16.c_str(), mode_utf16.c_str()); -#else return fopen(path.c_str(), mode.c_str()); -#endif } void path_cache_clear_except(const string& name, const set<string>& except) @@ -277,11 +274,7 @@ void path_cache_clear_except(const string& name, const set<string>& except) boost::filesystem::directory_iterator it(dir), it_end; for(; it != it_end; it++) { -#if (BOOST_FILESYSTEM_VERSION == 2) - string filename = from_boost(it->path().filename()); -#else string filename = from_boost(it->path().filename().string()); -#endif if(boost::starts_with(filename, name)) if(except.find(filename) == except.end()) diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h index fd9ea11740d..b81d71d1c0f 100644 --- a/intern/cycles/util/util_path.h +++ b/intern/cycles/util/util_path.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_PATH_H__ diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h index e721a3f5047..0b35142ddb3 100644 --- a/intern/cycles/util/util_progress.h +++ b/intern/cycles/util/util_progress.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_PROGRESS_H__ @@ -38,15 +38,18 @@ public: sample = 0; start_time = time_dt(); total_time = 0.0f; + render_time = 0.0f; tile_time = 0.0f; status = "Initializing"; substatus = ""; sync_status = ""; sync_substatus = ""; - update_cb = NULL; + update_cb = function_null; cancel = false; cancel_message = ""; - cancel_cb = NULL; + error = false; + error_message = ""; + cancel_cb = function_null; } Progress(Progress& progress) @@ -59,7 +62,7 @@ public: thread_scoped_lock lock(progress.progress_mutex); progress.get_status(status, substatus); - progress.get_tile(tile, total_time, tile_time); + progress.get_tile(tile, total_time, render_time, tile_time); sample = progress.get_sample(); @@ -71,7 +74,9 @@ public: tile = 0; sample = 0; start_time = time_dt(); + render_start_time = time_dt(); total_time = 0.0f; + render_time = 0.0f; tile_time = 0.0f; status = "Initializing"; substatus = ""; @@ -79,6 +84,8 @@ public: sync_substatus = ""; cancel = false; cancel_message = ""; + error = false; + error_message = ""; } /* cancel */ @@ -103,11 +110,33 @@ public: return cancel_message; } - void set_cancel_callback(boost::function<void(void)> function) + void set_cancel_callback(function<void(void)> function) { cancel_cb = function; } + /* error */ + void set_error(const string& error_message_) + { + thread_scoped_lock lock(progress_mutex); + error_message = error_message_; + error = true; + /* If error happens we also stop rendering. */ + cancel_message = error_message_; + cancel = true; + } + + bool get_error() + { + return error; + } + + string get_error_message() + { + thread_scoped_lock lock(progress_mutex); + return error_message; + } + /* tile and timing information */ void set_start_time(double start_time_) @@ -117,24 +146,39 @@ public: start_time = start_time_; } + void set_render_start_time(double render_start_time_) + { + thread_scoped_lock lock(progress_mutex); + + render_start_time = render_start_time_; + } + void set_tile(int tile_, double tile_time_) { thread_scoped_lock lock(progress_mutex); tile = tile_; total_time = time_dt() - start_time; + render_time = time_dt() - render_start_time; tile_time = tile_time_; } - void get_tile(int& tile_, double& total_time_, double& tile_time_) + void get_tile(int& tile_, double& total_time_, double& render_time_, double& tile_time_) { thread_scoped_lock lock(progress_mutex); tile_ = tile; total_time_ = (total_time > 0.0)? total_time: 0.0; + render_time_ = (render_time > 0.0)? render_time: 0.0; tile_time_ = tile_time; } + void get_time(double& total_time_, double& render_time_) + { + total_time_ = (total_time > 0.0)? total_time: 0.0; + render_time_ = (render_time > 0.0)? render_time: 0.0; + } + void reset_sample() { thread_scoped_lock lock(progress_mutex); @@ -169,6 +213,7 @@ public: status = status_; substatus = substatus_; total_time = time_dt() - start_time; + render_time = time_dt() - render_start_time; } set_update(); @@ -180,6 +225,7 @@ public: thread_scoped_lock lock(progress_mutex); substatus = substatus_; total_time = time_dt() - start_time; + render_time = time_dt() - render_start_time; } set_update(); @@ -192,6 +238,7 @@ public: sync_status = status_; sync_substatus = substatus_; total_time = time_dt() - start_time; + render_time = time_dt() - render_start_time; } set_update(); @@ -204,6 +251,7 @@ public: thread_scoped_lock lock(progress_mutex); sync_substatus = substatus_; total_time = time_dt() - start_time; + render_time = time_dt() - render_start_time; } set_update(); @@ -233,7 +281,7 @@ public: } } - void set_update_callback(boost::function<void(void)> function) + void set_update_callback(function<void(void)> function) { update_cb = function; } @@ -241,14 +289,14 @@ public: protected: thread_mutex progress_mutex; thread_mutex update_mutex; - boost::function<void(void)> update_cb; - boost::function<void(void)> cancel_cb; + function<void(void)> update_cb; + function<void(void)> cancel_cb; int tile; /* counter for rendered tiles */ int sample; /* counter of rendered samples, global for all tiles */ - double start_time; - double total_time; + double start_time, render_start_time; + double total_time, render_time; double tile_time; string status; @@ -259,6 +307,9 @@ protected: volatile bool cancel; string cancel_message; + + volatile bool error; + string error_message; }; CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_set.h b/intern/cycles/util/util_set.h index 6078114e714..b3cb8dd8af5 100644 --- a/intern/cycles/util/util_set.h +++ b/intern/cycles/util/util_set.h @@ -11,20 +11,26 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_SET_H__ #define __UTIL_SET_H__ #include <set> -#include <boost/tr1/unordered_set.hpp> - +#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) +# include <unordered_set> +#else +# include <boost/tr1/unordered_set.hpp> +#endif CCL_NAMESPACE_BEGIN using std::set; +#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) +using std::unordered_set; +#else using std::tr1::unordered_set; - +#endif CCL_NAMESPACE_END #endif /* __UTIL_SET_H__ */ diff --git a/intern/cycles/util/util_simd.cpp b/intern/cycles/util/util_simd.cpp index 0436823e62a..eb9e32800e1 100644 --- a/intern/cycles/util/util_simd.cpp +++ b/intern/cycles/util/util_simd.cpp @@ -12,7 +12,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifdef WITH_KERNEL_SSE2 diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h index 39506a6359b..a1c35b7174d 100644 --- a/intern/cycles/util/util_simd.h +++ b/intern/cycles/util/util_simd.h @@ -12,7 +12,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_SIMD_TYPES_H__ @@ -58,8 +58,12 @@ __forceinline operator int ( ) const { return std::numeric_limits< /* Intrinsics Functions */ #if defined(__BMI__) && defined(__GNUC__) -#define _tzcnt_u32 __tzcnt_u32 -#define _tzcnt_u64 __tzcnt_u64 +# ifndef _tzcnt_u32 +# define _tzcnt_u32 __tzcnt_u32 +# endif +# ifndef _tzcnt_u64 +# define _tzcnt_u64 __tzcnt_u64 +# endif #endif #if defined(__LZCNT__) @@ -133,7 +137,7 @@ __forceinline int clz(const int x) #if defined(__KERNEL_AVX2__) return _lzcnt_u32(x); #else - if (UNLIKELY(x == 0)) return 32; + if(UNLIKELY(x == 0)) return 32; return 31 - __bsr(x); #endif } @@ -286,7 +290,7 @@ __forceinline int clz(const int x) #if defined(__KERNEL_AVX2__) return _lzcnt_u32(x); #else - if (UNLIKELY(x == 0)) return 32; + if(UNLIKELY(x == 0)) return 32; return 31 - __bsr(x); #endif } @@ -358,7 +362,7 @@ __forceinline __m128i _mm_mullo_epi32( __m128i value, __m128i input ) { char* _r = (char*)(&rvalue + 1); char* _v = (char*)(& value + 1); char* _i = (char*)(& input + 1); - for ( ssize_t i = -16 ; i != 0 ; i += 4 ) *((int32*)(_r + i)) = *((int32*)(_v + i))* *((int32*)(_i + i)); + for( ssize_t i = -16 ; i != 0 ; i += 4 ) *((int32*)(_r + i)) = *((int32*)(_v + i))* *((int32*)(_i + i)); return rvalue; } diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h index be510256dd3..6e669701f3b 100644 --- a/intern/cycles/util/util_sseb.h +++ b/intern/cycles/util/util_sseb.h @@ -12,7 +12,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_SSEB_H__ @@ -119,14 +119,29 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const sseb sh return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)); } +template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a ) { + return _mm_movelh_ps(a, a); +} + +template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a ) { + return _mm_movehl_ps(a, a); +} + template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const sseb shuffle( const sseb& a, const sseb& b ) { return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); } +template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a, const sseb& b ) { + return _mm_movelh_ps(a, b); +} + +template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a, const sseb& b ) { + return _mm_movehl_ps(b, a); +} + #if defined(__KERNEL_SSE3__) template<> __forceinline const sseb shuffle<0, 0, 2, 2>( const sseb& a ) { return _mm_moveldup_ps(a); } template<> __forceinline const sseb shuffle<1, 1, 3, 3>( const sseb& a ) { return _mm_movehdup_ps(a); } -template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a ) { return _mm_castpd_ps(_mm_movedup_pd (a)); } #endif #if defined(__KERNEL_SSE41__) @@ -153,6 +168,16 @@ __forceinline bool none ( const sseb& b ) { return _mm_movemask_ps(b) == 0x __forceinline size_t movemask( const sseb& a ) { return _mm_movemask_ps(a); } +//////////////////////////////////////////////////////////////////////////////// +/// Debug Functions +//////////////////////////////////////////////////////////////////////////////// + +ccl_device_inline void print_sseb(const char *label, const sseb &a) +{ + printf("%s: %df %df %df %d\n", + label, a[0], a[1], a[2], a[3]); +} + #endif CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h index f4236cc616e..e625fa63568 100644 --- a/intern/cycles/util/util_ssef.h +++ b/intern/cycles/util/util_ssef.h @@ -12,7 +12,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_SSEF_H__ @@ -119,6 +119,9 @@ __forceinline const ssef operator^(const ssef& a, const ssei& b) { return _mm_xo __forceinline const ssef operator&(const ssef& a, const ssef& b) { return _mm_and_ps(a.m128,b.m128); } __forceinline const ssef operator&(const ssef& a, const ssei& b) { return _mm_and_ps(a.m128,_mm_castsi128_ps(b.m128)); } +__forceinline const ssef operator|(const ssef& a, const ssef& b) { return _mm_or_ps(a.m128,b.m128); } +__forceinline const ssef operator|(const ssef& a, const ssei& b) { return _mm_or_ps(a.m128,_mm_castsi128_ps(b.m128)); } + __forceinline const ssef andnot(const ssef& a, const ssef& b) { return _mm_andnot_ps(a.m128,b.m128); } __forceinline const ssef min(const ssef& a, const ssef& b) { return _mm_min_ps(a.m128,b.m128); } @@ -159,8 +162,8 @@ __forceinline const ssef nmsub(const ssef& a, const ssef& b, const ssef& c) { re #else __forceinline const ssef madd (const ssef& a, const ssef& b, const ssef& c) { return a*b+c; } __forceinline const ssef msub (const ssef& a, const ssef& b, const ssef& c) { return a*b-c; } -__forceinline const ssef nmadd(const ssef& a, const ssef& b, const ssef& c) { return -a*b-c;} -__forceinline const ssef nmsub(const ssef& a, const ssef& b, const ssef& c) { return c-a*b; } +__forceinline const ssef nmadd(const ssef& a, const ssef& b, const ssef& c) { return c-a*b;} +__forceinline const ssef nmsub(const ssef& a, const ssef& b, const ssef& c) { return -a*b-c; } #endif //////////////////////////////////////////////////////////////////////////////// @@ -267,10 +270,30 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssef sh return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0))); } +template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef& a) { + return _mm_movelh_ps(a, a); +} + +template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef& a) { + return _mm_movehl_ps(a, a); +} + template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssef shuffle(const ssef& a, const ssef& b) { return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0)); } +template<size_t i0> __forceinline const ssef shuffle(const ssef& a, const ssef& b) { + return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0)); +} + +template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef& a, const ssef& b) { + return _mm_movelh_ps(a, b); +} + +template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef& a, const ssef& b) { + return _mm_movehl_ps(b, a); +} + #if defined(__KERNEL_SSSE3__) __forceinline const ssef shuffle8(const ssef& a, const ssei& shuf) { return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf)); @@ -280,18 +303,19 @@ __forceinline const ssef shuffle8(const ssef& a, const ssei& shuf) { #if defined(__KERNEL_SSE3__) template<> __forceinline const ssef shuffle<0, 0, 2, 2>(const ssef& b) { return _mm_moveldup_ps(b); } template<> __forceinline const ssef shuffle<1, 1, 3, 3>(const ssef& b) { return _mm_movehdup_ps(b); } -template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef& b) { return _mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b))); } #endif template<size_t i0> __forceinline const ssef shuffle(const ssef& b) { return shuffle<i0,i0,i0,i0>(b); } -#if defined(__KERNEL_SSE41__) && !defined(__GNUC__) -template<size_t i> __forceinline float extract (const ssef& a) { return _mm_cvtss_f32(_mm_extract_ps(a,i)); } -#else -template<size_t i> __forceinline float extract (const ssef& a) { return _mm_cvtss_f32(shuffle<i,i,i,i>(a)); } +#if defined(__KERNEL_AVX__) +__forceinline const ssef shuffle(const ssef& a, const ssei& shuf) { + return _mm_permutevar_ps(a, shuf); +} #endif + +template<size_t i> __forceinline float extract (const ssef& a) { return _mm_cvtss_f32(shuffle<i,i,i,i>(a)); } template<> __forceinline float extract<0>(const ssef& a) { return _mm_cvtss_f32(a); } #if defined(__KERNEL_SSE41__) @@ -347,6 +371,8 @@ __forceinline size_t select_max(const ssef& v) { return __bsf(movemask(v == vred __forceinline size_t select_min(const sseb& valid, const ssef& v) { const ssef a = select(valid,v,ssef(pos_inf)); return __bsf(movemask(valid &(a == vreduce_min(a)))); } __forceinline size_t select_max(const sseb& valid, const ssef& v) { const ssef a = select(valid,v,ssef(neg_inf)); return __bsf(movemask(valid &(a == vreduce_max(a)))); } +__forceinline size_t movemask( const ssef& a ) { return _mm_movemask_ps(a); } + //////////////////////////////////////////////////////////////////////////////// /// Memory load and store operations //////////////////////////////////////////////////////////////////////////////// @@ -580,6 +606,20 @@ ccl_device_inline const ssef set_sign_bit(const ssef &a) return a ^ cast(ssei(S1 << 31, S2 << 31, S3 << 31, S4 << 31)); } +//////////////////////////////////////////////////////////////////////////////// +/// Debug Functions +//////////////////////////////////////////////////////////////////////////////// + +ccl_device_inline void print_ssef(const char *label, const ssef &a) +{ + printf("%s: %.8f %.8f %.8f %.8f\n", + label, + (double)a[0], + (double)a[1], + (double)a[2], + (double)a[3]); +} + #endif CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h index 5f5a8686e35..5f62569268c 100644 --- a/intern/cycles/util/util_ssei.h +++ b/intern/cycles/util/util_ssei.h @@ -12,7 +12,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_SSEI_H__ @@ -190,8 +190,8 @@ __forceinline const ssei select( const int mask, const ssei& t, const ssei& f ) // Movement/Shifting/Shuffling Functions //////////////////////////////////////////////////////////////////////////////// -__forceinline ssei unpacklo( const ssei& a, const ssei& b ) { return _mm_castps_si128(_mm_unpacklo_ps(_mm_castsi128_ps(a.m128), _mm_castsi128_ps(b.m128))); } -__forceinline ssei unpackhi( const ssei& a, const ssei& b ) { return _mm_castps_si128(_mm_unpackhi_ps(_mm_castsi128_ps(a.m128), _mm_castsi128_ps(b.m128))); } +__forceinline ssei unpacklo( const ssei& a, const ssei& b ) { return _mm_unpacklo_epi32(a, b); } +__forceinline ssei unpackhi( const ssei& a, const ssei& b ) { return _mm_unpackhi_epi32(a, b); } template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssei shuffle( const ssei& a ) { return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0)); @@ -201,12 +201,6 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssei sh return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0))); } -#if defined(__KERNEL_SSE3__) -template<> __forceinline const ssei shuffle<0, 0, 2, 2>( const ssei& a ) { return _mm_castps_si128(_mm_moveldup_ps(_mm_castsi128_ps(a))); } -template<> __forceinline const ssei shuffle<1, 1, 3, 3>( const ssei& a ) { return _mm_castps_si128(_mm_movehdup_ps(_mm_castsi128_ps(a))); } -template<> __forceinline const ssei shuffle<0, 1, 0, 1>( const ssei& a ) { return _mm_castpd_si128(_mm_movedup_pd (_mm_castsi128_pd(a))); } -#endif - template<size_t i0> __forceinline const ssei shuffle( const ssei& b ) { return shuffle<i0,i0,i0,i0>(b); } @@ -286,6 +280,16 @@ __forceinline void store4i_nt(void* ptr, const ssei& v) { #endif } +//////////////////////////////////////////////////////////////////////////////// +/// Debug Functions +//////////////////////////////////////////////////////////////////////////////// + +ccl_device_inline void print_ssei(const char *label, const ssei &a) +{ + printf("%s: %df %df %df %d\n", + label, a[0], a[1], a[2], a[3]); +} + #endif CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_stats.h b/intern/cycles/util/util_stats.h index 8758b823084..ce27067dc5e 100644 --- a/intern/cycles/util/util_stats.h +++ b/intern/cycles/util/util_stats.h @@ -11,12 +11,14 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_STATS_H__ #define __UTIL_STATS_H__ +#include "util_atomic.h" + CCL_NAMESPACE_BEGIN class Stats { @@ -24,14 +26,13 @@ public: Stats() : mem_used(0), mem_peak(0) {} void mem_alloc(size_t size) { - mem_used += size; - if(mem_used > mem_peak) - mem_peak = mem_used; + atomic_add_z(&mem_used, size); + atomic_update_max_z(&mem_peak, mem_used); } void mem_free(size_t size) { assert(mem_used >= size); - mem_used -= size; + atomic_sub_z(&mem_used, size); } size_t mem_used; diff --git a/intern/cycles/util/util_string.cpp b/intern/cycles/util/util_string.cpp index f38d8d3282f..66856dd8331 100644 --- a/intern/cycles/util/util_string.cpp +++ b/intern/cycles/util/util_string.cpp @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include <stdarg.h> @@ -105,5 +105,22 @@ string string_strip(const string& s) } +void string_replace(string& haystack, const string& needle, const string& other) +{ + size_t i; + + while((i = haystack.find(needle)) != string::npos) + haystack.replace(i, needle.length(), other); +} + +string string_remove_trademark(const string &s) +{ + string result = s; + string_replace(result, "(TM)", ""); + string_replace(result, "(R)", ""); + + return string_strip(result); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h index 6808f085834..6cb8d8df1e1 100644 --- a/intern/cycles/util/util_string.h +++ b/intern/cycles/util/util_string.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_STRING_H__ @@ -40,8 +40,10 @@ string string_printf(const char *format, ...) PRINTF_ATTRIBUTE; bool string_iequals(const string& a, const string& b); void string_split(vector<string>& tokens, const string& str, const string& separators = "\t "); +void string_replace(string& haystack, const string& needle, const string& other); bool string_endswith(const string& s, const char *end); string string_strip(const string& s); +string string_remove_trademark(const string& s); CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index 7c0445577e2..cc88320b68e 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -11,11 +11,12 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "util_system.h" #include "util_types.h" +#include "util_string.h" #ifdef _WIN32 #if(!defined(FREE_WINDOWS)) @@ -75,14 +76,6 @@ static void __cpuid(int data[4], int selector) } #endif -static void replace_string(string& haystack, const string& needle, const string& other) -{ - size_t i; - - while((i = haystack.find(needle)) != string::npos) - haystack.replace(i, needle.length(), other); -} - string system_cpu_brand_string() { char buf[48]; @@ -98,10 +91,7 @@ string system_cpu_brand_string() string brand = buf; /* make it a bit more presentable */ - replace_string(brand, "(TM)", ""); - replace_string(brand, "(R)", ""); - - brand = string_strip(brand); + brand = string_remove_trademark(brand); return brand; } @@ -127,6 +117,7 @@ struct CPUCapabilities { bool sse42; bool sse4a; bool avx; + bool f16c; bool avx2; bool xop; bool fma3; @@ -135,24 +126,42 @@ struct CPUCapabilities { bool bmi2; }; +static void system_cpu_capabilities_override(CPUCapabilities *caps) +{ + /* Only capabilities which affects on cycles kernel. */ + if(getenv("CYCLES_CPU_NO_AVX2")) { + caps->avx2 = false; + } + if(getenv("CYCLES_CPU_NO_AVX")) { + caps->avx = false; + } + if(getenv("CYCLES_CPU_NO_SSE41")) { + caps->sse41 = false; + } + if(getenv("CYCLES_CPU_NO_SSE3")) { + caps->sse3 = false; + } + if(getenv("CYCLES_CPU_NO_SSE2")) { + caps->sse2 = false; + } + if(getenv("CYCLES_CPU_NO_SSE")) { + caps->sse = false; + } +} + static CPUCapabilities& system_cpu_capabilities() { static CPUCapabilities caps; static bool caps_init = false; if(!caps_init) { - int result[4], num; //, num_ex; + int result[4], num; memset(&caps, 0, sizeof(caps)); __cpuid(result, 0); num = result[0]; -#if 0 - __cpuid(result, 0x80000000); - num_ex = result[0]; -#endif - if(num >= 1) { __cpuid(result, 0x00000001); caps.mmx = (result[3] & ((int)1 << 23)) != 0; @@ -184,21 +193,15 @@ static CPUCapabilities& system_cpu_capabilities() caps.avx = (xcr_feature_mask & 0x6) == 0x6; } + caps.f16c = (result[2] & ((int)1 << 29)) != 0; + __cpuid(result, 0x00000007); caps.bmi1 = (result[1] & ((int)1 << 3)) != 0; caps.bmi2 = (result[1] & ((int)1 << 8)) != 0; caps.avx2 = (result[1] & ((int)1 << 5)) != 0; } -#if 0 - if(num_ex >= 0x80000001) { - __cpuid(result, 0x80000001); - caps.x64 = (result[3] & ((int)1 << 29)) != 0; - caps.sse4a = (result[2] & ((int)1 << 6)) != 0; - caps.fma4 = (result[2] & ((int)1 << 16)) != 0; - caps.xop = (result[2] & ((int)1 << 11)) != 0; - } -#endif + system_cpu_capabilities_override(&caps); caps_init = true; } @@ -232,7 +235,7 @@ bool system_cpu_support_avx() bool system_cpu_support_avx2() { CPUCapabilities& caps = system_cpu_capabilities(); - return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2; + return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.f16c && caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2; } #else diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h index 0e8868c7dfc..4e7e00f85fd 100644 --- a/intern/cycles/util/util_system.h +++ b/intern/cycles/util/util_system.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_SYSTEM_H__ diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp index 14a81ecbb05..d56553d1d4a 100644 --- a/intern/cycles/util/util_task.cpp +++ b/intern/cycles/util/util_task.cpp @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include "util_debug.h" @@ -237,7 +237,7 @@ bool TaskScheduler::thread_wait_pop(Entry& entry) return true; } -void TaskScheduler::thread_run(int thread_id) +void TaskScheduler::thread_run(int /*thread_id*/) { Entry entry; diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h index 42a1e2f5a58..debcff3b776 100644 --- a/intern/cycles/util/util_task.h +++ b/intern/cycles/util/util_task.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_TASK_H__ @@ -27,7 +27,7 @@ class Task; class TaskPool; class TaskScheduler; -typedef boost::function<void(void)> TaskRunFunction; +typedef function<void(void)> TaskRunFunction; /* Task * diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h index f2698d043fb..9c19235d41d 100644 --- a/intern/cycles/util/util_thread.h +++ b/intern/cycles/util/util_thread.h @@ -11,13 +11,20 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_THREAD_H__ #define __UTIL_THREAD_H__ -#include <boost/thread.hpp> +#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) +# include <thread> +# include <mutex> +# include <condition_variable> +# include <functional> +#else +# include <boost/thread.hpp> +#endif #include <pthread.h> #include <queue> @@ -25,18 +32,24 @@ CCL_NAMESPACE_BEGIN +#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) +typedef std::mutex thread_mutex; +typedef std::unique_lock<std::mutex> thread_scoped_lock; +typedef std::condition_variable thread_condition_variable; +#else /* use boost for mutexes */ - typedef boost::mutex thread_mutex; typedef boost::mutex::scoped_lock thread_scoped_lock; typedef boost::condition_variable thread_condition_variable; +#endif /* own pthread based implementation, to avoid boost version conflicts with * dynamically loaded blender plugins */ class thread { public: - thread(boost::function<void(void)> run_cb_) + thread(function<void(void)> run_cb_) + { joined = false; run_cb = run_cb_; @@ -63,7 +76,7 @@ public: } protected: - boost::function<void(void)> run_cb; + function<void(void)> run_cb; pthread_t pthread_id; bool joined; }; diff --git a/intern/cycles/util/util_time.cpp b/intern/cycles/util/util_time.cpp index dd91b024940..964f9f1a7af 100644 --- a/intern/cycles/util/util_time.cpp +++ b/intern/cycles/util/util_time.cpp @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include <stdlib.h> @@ -71,7 +71,7 @@ void time_sleep(double t) /* get microseconds */ int us = (int)(t * 1e6); - if (us > 0) + if(us > 0) usleep(us); } diff --git a/intern/cycles/util/util_time.h b/intern/cycles/util/util_time.h index 3df17272e2f..14ffea7f3da 100644 --- a/intern/cycles/util/util_time.h +++ b/intern/cycles/util/util_time.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_TIME_H__ diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp index 14613558501..acaca69464c 100644 --- a/intern/cycles/util/util_transform.cpp +++ b/intern/cycles/util/util_transform.cpp @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ /* @@ -46,9 +46,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "util_math.h" #include "util_transform.h" +#include "util_boundbox.h" +#include "util_math.h" + CCL_NAMESPACE_BEGIN /* Transform Inverse */ @@ -271,5 +273,15 @@ void transform_motion_decompose(DecompMotionTransform *decomp, const MotionTrans decomp->post_y = post.y; } -CCL_NAMESPACE_END +Transform transform_from_viewplane(BoundBox2D& viewplane) +{ + return + transform_scale(1.0f / (viewplane.right - viewplane.left), + 1.0f / (viewplane.top - viewplane.bottom), + 1.0f) * + transform_translate(-viewplane.left, + -viewplane.bottom, + 0.0f); +} +CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h index 5b3dbe42f69..ba8d04b5c16 100644 --- a/intern/cycles/util/util_transform.h +++ b/intern/cycles/util/util_transform.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_TRANSFORM_H__ @@ -55,6 +55,11 @@ typedef struct DecompMotionTransform { float4 post_x, post_y; } DecompMotionTransform; +typedef struct PerspectiveMotionTransform { + Transform pre; + Transform post; +} PerspectiveMotionTransform; + /* Functions */ ccl_device_inline float3 transform_perspective(const Transform *t, const float3 a) @@ -216,12 +221,13 @@ ccl_device_inline Transform transform_rotate(float angle, float3 axis) 0.0f, 0.0f, 0.0f, 1.0f); } +/* Euler is assumed to be in XYZ order. */ ccl_device_inline Transform transform_euler(float3 euler) { return - transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f)) * + transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)) * transform_rotate(euler.y, make_float3(0.0f, 1.0f, 0.0f)) * - transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)); + transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f)); } ccl_device_inline Transform transform_orthographic(float znear, float zfar) @@ -448,6 +454,8 @@ ccl_device void transform_motion_interpolate(Transform *tfm, const DecompMotionT #ifndef __KERNEL_GPU__ +class BoundBox2D; + ccl_device_inline bool operator==(const MotionTransform& A, const MotionTransform& B) { return (A.pre == B.pre && A.post == B.post); @@ -455,9 +463,41 @@ ccl_device_inline bool operator==(const MotionTransform& A, const MotionTransfor float4 transform_to_quat(const Transform& tfm); void transform_motion_decompose(DecompMotionTransform *decomp, const MotionTransform *motion, const Transform *mid); +Transform transform_from_viewplane(BoundBox2D& viewplane); #endif +/* TODO(sergey): This is only for until we've got OpenCL 2.0 + * on all devices we consider supported. It'll be replaced with + * generic address space. + */ + +#ifdef __KERNEL_OPENCL__ + +#define OPENCL_TRANSFORM_ADDRSPACE_GLUE(a, b) a ## b +#define OPENCL_TRANSFORM_ADDRSPACE_DECLARE(function) \ +ccl_device_inline float3 OPENCL_TRANSFORM_ADDRSPACE_GLUE(function, _addrspace)( \ + ccl_addr_space const Transform *t, const float3 a) \ +{ \ + Transform private_tfm = *t; \ + return function(&private_tfm, a); \ +} + +OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_point) +OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction) +OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction_transposed) + +# undef OPENCL_TRANSFORM_ADDRSPACE_DECLARE +# undef OPENCL_TRANSFORM_ADDRSPACE_GLUE +# define transform_point_auto transform_point_addrspace +# define transform_direction_auto transform_direction_addrspace +# define transform_direction_transposed_auto transform_direction_transposed_addrspace +#else +# define transform_point_auto transform_point +# define transform_direction_auto transform_direction +# define transform_direction_transposed_auto transform_direction_transposed +#endif + CCL_NAMESPACE_END #endif /* __UTIL_TRANSFORM_H__ */ diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h index 2a199e591bf..6f474f873a6 100644 --- a/intern/cycles/util/util_types.h +++ b/intern/cycles/util/util_types.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_TYPES_H__ @@ -33,11 +33,7 @@ #ifndef __KERNEL_GPU__ -# ifdef NDEBUG -# define ccl_device static inline -# else -# define ccl_device static -# endif +#define ccl_device static inline #define ccl_device_noinline static #define ccl_global #define ccl_constant @@ -53,11 +49,7 @@ #define ccl_try_align(...) /* not support for function arguments (error C2719) */ #endif #define ccl_may_alias -# ifdef NDEBUG -# define ccl_always_inline __forceinline -# else -# define ccl_always_inline -# endif +#define ccl_always_inline __forceinline #define ccl_maybe_unused #else @@ -272,6 +264,19 @@ struct ccl_try_align(16) float4 { __forceinline float& operator[](int i) { return *(&x + i); } }; +template<typename T> +class vector3 +{ +public: + T x, y, z; + + ccl_always_inline vector3() {} + ccl_always_inline vector3(const T& a) + : x(a), y(a), z(a) {} + ccl_always_inline vector3(const T& x, const T& y, const T& z) + : x(x), y(y), z(z) {} +}; + #endif #ifndef __KERNEL_GPU__ @@ -465,6 +470,19 @@ enum InterpolationType { INTERPOLATION_SMART = 3, }; +/* Extension types for textures. + * + * Defines how the image is extrapolated past its original bounds. + */ +enum ExtensionType { + /* Cause the image to repeat horizontally and vertically. */ + EXTENSION_REPEAT = 0, + /* Extend by repeating edge pixels of the image. */ + EXTENSION_EXTEND = 1, + /* Clip to image size and set exterior pixels as transparent. */ + EXTENSION_CLIP = 2, +}; + /* macros */ /* hints for branch prediction, only use in code that runs a _lot_ */ @@ -476,18 +494,32 @@ enum InterpolationType { # define UNLIKELY(x) (x) #endif +#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800)) +# define HAS_CPP11_FEATURES +#endif + +#if defined(__GNUC__) || defined(__clang__) +# if defined(HAS_CPP11_FEATURES) +/* Some magic to be sure we don't have reference in the type. */ +template<typename T> static inline T decltype_helper(T x) { return x; } +# define TYPEOF(x) decltype(decltype_helper(x)) +# else +# define TYPEOF(x) typeof(x) +# endif +#endif + /* Causes warning: * incompatible types when assigning to type 'Foo' from type 'Bar' * ... the compiler optimizes away the temp var */ #ifdef __GNUC__ #define CHECK_TYPE(var, type) { \ - typeof(var) *__tmp; \ + TYPEOF(var) *__tmp; \ __tmp = (type *)NULL; \ (void)__tmp; \ } (void)0 #define CHECK_TYPE_PAIR(var_a, var_b) { \ - typeof(var_a) *__tmp; \ + TYPEOF(var_a) *__tmp; \ __tmp = (typeof(var_b) *)NULL; \ (void)__tmp; \ } (void)0 diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h index cc6e8a371ed..ee1f997721d 100644 --- a/intern/cycles/util/util_vector.h +++ b/intern/cycles/util/util_vector.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_VECTOR_H__ @@ -19,34 +19,74 @@ /* Vector */ -#include <string.h> +#include <cassert> +#include <cstring> #include <vector> +#include "util_aligned_malloc.h" #include "util_types.h" -CCL_NAMESPACE_BEGIN +#ifdef WITH_CYCLES_DEBUG +# include "util_guarded_allocator.h" +#endif -using std::vector; +CCL_NAMESPACE_BEGIN -static inline void *malloc_aligned(size_t size, size_t alignment) +/* Vector + * + * Own subclass-ed vestion of std::vector. Subclass is needed because: + * + * - When building with WITH_CYCLES_DEBUG we need to use own allocator which + * keeps track of used/peak memory. + * + * - Have method to ensure capacity is re-set to 0. + */ +template<typename value_type, +#ifdef WITH_CYCLES_DEBUG + typename allocator_type = GuardedAllocator<value_type> +#else + typename allocator_type = std::allocator<value_type> +#endif + > +class vector : public std::vector<value_type, allocator_type> { - void *data = (void*)malloc(size + sizeof(void*) + alignment - 1); +public: + /* Default constructor. */ + explicit vector() : std::vector<value_type, allocator_type>() { } - union { void *ptr; size_t offset; } u; - u.ptr = (char*)data + sizeof(void*); - u.offset = (u.offset + alignment - 1) & ~(alignment - 1); - *(((void**)u.ptr) - 1) = data; + /* Fill constructor. */ + explicit vector(size_t n, const value_type& val = value_type()) + : std::vector<value_type, allocator_type>(n, val) { } - return u.ptr; -} + /* Range constructor. */ + template <class InputIterator> + vector(InputIterator first, InputIterator last) + : std::vector<value_type, allocator_type>(first, last) { } -static inline void free_aligned(void *ptr) -{ - if(ptr) { - void *data = *(((void**)ptr) - 1); - free(data); + /* Copy constructor. */ + vector(const vector &x) : std::vector<value_type, allocator_type>(x) { } + + void shrink_to_fit(void) + { +#if __cplusplus < 201103L + vector<value_type>().swap(*this); +#else + std::vector<value_type, allocator_type>::shrink_to_fit(); +#endif } -} + + void free_memory(void) + { + std::vector<value_type, allocator_type>::resize(0); + shrink_to_fit(); + } + + /* Some external API might demand working with std::vector. */ + operator std::vector<value_type>() + { + return std::vector<value_type>(*this); + } +}; /* Array * @@ -65,6 +105,7 @@ public: { data = NULL; datasize = 0; + capacity = 0; } array(size_t newsize) @@ -72,10 +113,12 @@ public: if(newsize == 0) { data = NULL; datasize = 0; + capacity = 0; } else { - data = (T*)malloc_aligned(sizeof(T)*newsize, alignment); + data = (T*)util_aligned_malloc(sizeof(T)*newsize, alignment); datasize = newsize; + capacity = datasize; } } @@ -89,11 +132,13 @@ public: if(from.datasize == 0) { data = NULL; datasize = 0; + capacity = 0; } else { - data = (T*)malloc_aligned(sizeof(T)*from.datasize, alignment); + data = (T*)util_aligned_malloc(sizeof(T)*from.datasize, alignment); memcpy(data, from.data, from.datasize*sizeof(T)); datasize = from.datasize; + capacity = datasize; } return *this; @@ -102,13 +147,11 @@ public: array& operator=(const vector<T>& from) { datasize = from.size(); + capacity = datasize; data = NULL; if(datasize > 0) { - data = (T*)malloc_aligned(sizeof(T)*datasize, alignment); - memcpy(data, &from[0], datasize*sizeof(T)); - free_aligned(data); - data = (T*)malloc_aligned(sizeof(T)*datasize, alignment); + data = (T*)util_aligned_malloc(sizeof(T)*datasize, alignment); memcpy(data, &from[0], datasize*sizeof(T)); } @@ -117,7 +160,7 @@ public: ~array() { - free_aligned(data); + util_aligned_free(data); } void resize(size_t newsize) @@ -126,22 +169,25 @@ public: clear(); } else if(newsize != datasize) { - T *newdata = (T*)malloc_aligned(sizeof(T)*newsize, alignment); - if(data) { - memcpy(newdata, data, ((datasize < newsize)? datasize: newsize)*sizeof(T)); - free_aligned(data); + if(newsize > capacity) { + T *newdata = (T*)util_aligned_malloc(sizeof(T)*newsize, alignment); + if(data) { + memcpy(newdata, data, ((datasize < newsize)? datasize: newsize)*sizeof(T)); + util_aligned_free(data); + } + data = newdata; + capacity = newsize; } - - data = newdata; datasize = newsize; } } void clear() { - free_aligned(data); + util_aligned_free(data); data = NULL; datasize = 0; + capacity = 0; } size_t size() const @@ -151,12 +197,26 @@ public: T& operator[](size_t i) const { + assert(i < datasize); return data[i]; } + void reserve(size_t newcapacity) { + if(newcapacity > capacity) { + T *newdata = (T*)util_aligned_malloc(sizeof(T)*newcapacity, alignment); + if(data) { + memcpy(newdata, data, ((datasize < newcapacity)? datasize: newcapacity)*sizeof(T)); + util_aligned_free(data); + } + data = newdata; + capacity = newcapacity; + } + } + protected: T *data; size_t datasize; + size_t capacity; }; CCL_NAMESPACE_END diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp index 6bf9c9ed8c0..9b5cd22fb4a 100644 --- a/intern/cycles/util/util_view.cpp +++ b/intern/cycles/util/util_view.cpp @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #include <stdio.h> @@ -98,7 +98,7 @@ void view_display_help() glColor3f(0.8f, 0.8f, 0.8f); view_display_text(x1+20, y2-20, "Cycles Renderer"); - view_display_text(x1+20, y2-40, "(C) 2011-2014 Blender Foundation"); + view_display_text(x1+20, y2-40, "(C) 2011-2015 Blender Foundation"); view_display_text(x1+20, y2-80, "Controls:"); view_display_text(x1+20, y2-100, "h: Info/Help"); view_display_text(x1+20, y2-120, "r: Reset"); @@ -110,6 +110,7 @@ void view_display_help() view_display_text(x1+20, y2-230, "Left mouse: Move camera"); view_display_text(x1+20, y2-250, "Right mouse: Rotate camera"); view_display_text(x1+20, y2-270, "W/A/S/D: Move camera"); + view_display_text(x1+20, y2-290, "0/1/2/3: Set max bounces"); glColor3f(1.0f, 1.0f, 1.0f); } @@ -248,7 +249,7 @@ void view_main_loop(const char *title, int width, int height, glutInitDisplayMode(GLUT_RGB|GLUT_DOUBLE|GLUT_DEPTH); glutCreateWindow(title); - glewInit(); + mxMakeCurrentContext(mxCreateContext()); view_reshape(width, height); diff --git a/intern/cycles/util/util_view.h b/intern/cycles/util/util_view.h index 65d890eb6af..5def0564175 100644 --- a/intern/cycles/util/util_view.h +++ b/intern/cycles/util/util_view.h @@ -11,7 +11,7 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_VIEW_H__ diff --git a/intern/cycles/util/util_xml.h b/intern/cycles/util/util_xml.h index 9d1ebc2114a..cfd0afc95f7 100644 --- a/intern/cycles/util/util_xml.h +++ b/intern/cycles/util/util_xml.h @@ -11,15 +11,15 @@ * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and - * limitations under the License + * limitations under the License. */ #ifndef __UTIL_XML_H__ #define __UTIL_XML_H__ -/* PugiXML from OpenImageIO is used for XML parsing. */ +/* PugiXML is used for XML parsing. */ -#include <OpenImageIO/pugixml.hpp> +#include <pugixml.hpp> CCL_NAMESPACE_BEGIN |