Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/CMakeLists.txt25
-rw-r--r--intern/cycles/util/util_algorithm.h2
-rw-r--r--intern/cycles/util/util_aligned_malloc.cpp83
-rw-r--r--intern/cycles/util/util_aligned_malloc.h (renamed from intern/cycles/util/util_dynlib.h)21
-rw-r--r--intern/cycles/util/util_args.h2
-rw-r--r--intern/cycles/util/util_atomic.h65
-rw-r--r--intern/cycles/util/util_boundbox.h2
-rw-r--r--intern/cycles/util/util_cache.cpp8
-rw-r--r--intern/cycles/util/util_cache.h4
-rw-r--r--intern/cycles/util/util_color.h2
-rw-r--r--intern/cycles/util/util_debug.h2
-rw-r--r--intern/cycles/util/util_dynlib.cpp94
-rw-r--r--intern/cycles/util/util_foreach.h10
-rw-r--r--intern/cycles/util/util_function.h29
-rw-r--r--intern/cycles/util/util_guarded_allocator.cpp49
-rw-r--r--intern/cycles/util/util_guarded_allocator.h86
-rw-r--r--intern/cycles/util/util_half.h32
-rw-r--r--intern/cycles/util/util_hash.h4
-rw-r--r--intern/cycles/util/util_image.h2
-rw-r--r--intern/cycles/util/util_list.h2
-rw-r--r--intern/cycles/util/util_logging.cpp50
-rw-r--r--intern/cycles/util/util_logging.h8
-rw-r--r--intern/cycles/util/util_map.h29
-rw-r--r--intern/cycles/util/util_math.h110
-rw-r--r--intern/cycles/util/util_math_fast.h611
-rw-r--r--intern/cycles/util/util_md5.cpp12
-rw-r--r--intern/cycles/util/util_opengl.h11
-rw-r--r--intern/cycles/util/util_optimization.h28
-rw-r--r--intern/cycles/util/util_param.h2
-rw-r--r--intern/cycles/util/util_path.cpp55
-rw-r--r--intern/cycles/util/util_path.h2
-rw-r--r--intern/cycles/util/util_progress.h73
-rw-r--r--intern/cycles/util/util_set.h14
-rw-r--r--intern/cycles/util/util_simd.cpp2
-rw-r--r--intern/cycles/util/util_simd.h16
-rw-r--r--intern/cycles/util/util_sseb.h29
-rw-r--r--intern/cycles/util/util_ssef.h56
-rw-r--r--intern/cycles/util/util_ssei.h22
-rw-r--r--intern/cycles/util/util_stats.h11
-rw-r--r--intern/cycles/util/util_string.cpp19
-rw-r--r--intern/cycles/util/util_string.h4
-rw-r--r--intern/cycles/util/util_system.cpp61
-rw-r--r--intern/cycles/util/util_system.h2
-rw-r--r--intern/cycles/util/util_task.cpp4
-rw-r--r--intern/cycles/util/util_task.h4
-rw-r--r--intern/cycles/util/util_thread.h23
-rw-r--r--intern/cycles/util/util_time.cpp4
-rw-r--r--intern/cycles/util/util_time.h2
-rw-r--r--intern/cycles/util/util_transform.cpp18
-rw-r--r--intern/cycles/util/util_transform.h46
-rw-r--r--intern/cycles/util/util_types.h58
-rw-r--r--intern/cycles/util/util_vector.h124
-rw-r--r--intern/cycles/util/util_view.cpp7
-rw-r--r--intern/cycles/util/util_view.h2
-rw-r--r--intern/cycles/util/util_xml.h6
55 files changed, 1666 insertions, 383 deletions
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 6120e7e8456..0acb9e9304c 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -1,16 +1,16 @@
set(INC
.
+ ../../glew-mx
)
set(INC_SYS
- ${GLEW_INCLUDE_PATH}
- ${OPENGL_INCLUDE_DIR}
+ ${GLEW_INCLUDE_DIR}
)
set(SRC
+ util_aligned_malloc.cpp
util_cache.cpp
- util_dynlib.cpp
util_logging.cpp
util_md5.cpp
util_path.cpp
@@ -22,6 +22,10 @@ set(SRC
util_transform.cpp
)
+if(NOT CYCLES_STANDALONE_REPOSITORY)
+ add_definitions(-DWITH_GLEW_MX)
+endif()
+
if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI)
list(APPEND SRC
util_view.cpp
@@ -30,11 +34,12 @@ endif()
set(SRC_HEADERS
util_algorithm.h
+ util_aligned_malloc.h
util_args.h
+ util_atomic.h
util_boundbox.h
util_cache.h
util_debug.h
- util_dynlib.h
util_foreach.h
util_function.h
util_half.h
@@ -44,6 +49,7 @@ set(SRC_HEADERS
util_logging.h
util_map.h
util_math.h
+ util_math_fast.h
util_md5.h
util_opengl.h
util_optimization.h
@@ -68,7 +74,18 @@ set(SRC_HEADERS
util_xml.h
)
+if(WITH_CYCLES_DEBUG)
+ list(APPEND SRC
+ util_guarded_allocator.cpp
+ )
+ list(APPEND SRC_HEADERS
+ util_guarded_allocator.h
+ )
+endif()
+
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
+add_definitions(${GL_DEFINITIONS})
+
add_library(cycles_util ${SRC} ${SRC_HEADERS})
diff --git a/intern/cycles/util/util_algorithm.h b/intern/cycles/util/util_algorithm.h
index 5865f3f04bb..5c79c00cc98 100644
--- a/intern/cycles/util/util_algorithm.h
+++ b/intern/cycles/util/util_algorithm.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_ALGORITHM_H__
diff --git a/intern/cycles/util/util_aligned_malloc.cpp b/intern/cycles/util/util_aligned_malloc.cpp
new file mode 100644
index 00000000000..b161a55c15e
--- /dev/null
+++ b/intern/cycles/util/util_aligned_malloc.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util_aligned_malloc.h"
+#include "util_guarded_allocator.h"
+
+#include <cassert>
+
+/* Adopted from Libmv. */
+
+#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__NetBSD__)
+/* Needed for memalign on Linux and _aligned_alloc on Windows. */
+# ifdef FREE_WINDOWS
+/* Make sure _aligned_malloc is included. */
+# ifdef __MSVCRT_VERSION__
+# undef __MSVCRT_VERSION__
+# endif
+# define __MSVCRT_VERSION__ 0x0700
+# endif /* FREE_WINDOWS */
+# include <malloc.h>
+#else
+/* Apple's malloc is 16-byte aligned, and does not have malloc.h, so include
+ * stdilb instead.
+ */
+# include <cstdlib>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+void *util_aligned_malloc(size_t size, int alignment)
+{
+#ifdef WITH_BLENDER_GUARDEDALLOC
+ return MEM_mallocN_aligned(size, alignment, "Cycles Aligned Alloc");
+#endif
+#ifdef _WIN32
+ return _aligned_malloc(size, alignment);
+#elif defined(__APPLE__)
+ /* On Mac OS X, both the heap and the stack are guaranteed 16-byte aligned so
+ * they work natively with SSE types with no further work.
+ */
+ assert(alignment == 16);
+ return malloc(size);
+#elif defined(__FreeBSD__) || defined(__NetBSD__)
+ void *result;
+ if(posix_memalign(&result, alignment, size)) {
+ /* Non-zero means allocation error
+ * either no allocation or bad alignment value.
+ */
+ return NULL;
+ }
+ return result;
+#else /* This is for Linux. */
+ return memalign(alignment, size);
+#endif
+}
+
+void util_aligned_free(void *ptr)
+{
+#if defined(WITH_BLENDER_GUARDEDALLOC)
+ if(ptr != NULL) {
+ MEM_freeN(ptr);
+ }
+#elif defined(_WIN32)
+ _aligned_free(ptr);
+#else
+ free(ptr);
+#endif
+}
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_dynlib.h b/intern/cycles/util/util_aligned_malloc.h
index b30cf98c1b9..ecc0f28c376 100644
--- a/intern/cycles/util/util_dynlib.h
+++ b/intern/cycles/util/util_aligned_malloc.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2011-2013 Blender Foundation
+ * Copyright 2011-2015 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -11,21 +11,22 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
-#ifndef __UTIL_DYNLIB_H__
-#define __UTIL_DYNLIB_H__
+#ifndef __UTIL_ALIGNED_MALLOC_H__
+#define __UTIL_ALIGNED_MALLOC_H__
+
+#include "util_types.h"
CCL_NAMESPACE_BEGIN
-struct DynamicLibrary;
+/* Allocate block of size bytes at least aligned to a given value. */
+void *util_aligned_malloc(size_t size, int alignment);
-DynamicLibrary *dynamic_library_open(const char *name);
-void *dynamic_library_find(DynamicLibrary *lib, const char *name);
-void dynamic_library_close(DynamicLibrary *lib);
+/* Free memory allocated by util_aligned_malloc. */
+void util_aligned_free(void *ptr);
CCL_NAMESPACE_END
-#endif /* __UTIL_DYNLIB_H__ */
-
+#endif /* __UTIL_ALIGNED_MALLOC_H__ */
diff --git a/intern/cycles/util/util_args.h b/intern/cycles/util/util_args.h
index 55933c895bf..a53fc061758 100644
--- a/intern/cycles/util/util_args.h
+++ b/intern/cycles/util/util_args.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_ARGS_H__
diff --git a/intern/cycles/util/util_atomic.h b/intern/cycles/util/util_atomic.h
new file mode 100644
index 00000000000..1d1e2963348
--- /dev/null
+++ b/intern/cycles/util/util_atomic.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2014 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_ATOMIC_H__
+#define __UTIL_ATOMIC_H__
+
+#ifndef __KERNEL_GPU__
+
+/* Using atomic ops header from Blender. */
+#include "atomic_ops.h"
+
+ATOMIC_INLINE void atomic_update_max_z(size_t *maximum_value, size_t value)
+{
+ size_t prev_value = *maximum_value;
+ while(prev_value < value) {
+ if(atomic_cas_z(maximum_value, prev_value, value) != prev_value) {
+ break;
+ }
+ }
+}
+
+#else /* __KERNEL_GPU__ */
+
+#ifdef __KERNEL_OPENCL__
+
+/* Float atomics implementation credits:
+ * http://suhorukov.blogspot.in/2011/12/opencl-11-atomic-operations-on-floating.html
+ */
+ccl_device_inline void atomic_add_float(volatile ccl_global float *source,
+ const float operand)
+{
+ union {
+ unsigned int int_value;
+ float float_value;
+ } new_value;
+ union {
+ unsigned int int_value;
+ float float_value;
+ } prev_value;
+ do {
+ prev_value.float_value = *source;
+ new_value.float_value = prev_value.float_value + operand;
+ } while(atomic_cmpxchg((volatile ccl_global unsigned int *)source,
+ prev_value.int_value,
+ new_value.int_value) != prev_value.int_value);
+}
+
+#endif /* __KERNEL_OPENCL__ */
+
+#endif /* __KERNEL_GPU__ */
+
+#endif /* __UTIL_ATOMIC_H__ */
diff --git a/intern/cycles/util/util_boundbox.h b/intern/cycles/util/util_boundbox.h
index a71e0399619..cef5adc0a61 100644
--- a/intern/cycles/util/util_boundbox.h
+++ b/intern/cycles/util/util_boundbox.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_BOUNDBOX_H__
diff --git a/intern/cycles/util/util_cache.cpp b/intern/cycles/util/util_cache.cpp
index e20c3a67b75..5eebfb18155 100644
--- a/intern/cycles/util/util_cache.cpp
+++ b/intern/cycles/util/util_cache.cpp
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#include <stdio.h>
@@ -24,12 +24,6 @@
#include "util_path.h"
#include "util_types.h"
-#include <boost/version.hpp>
-
-#if (BOOST_VERSION < 104400)
-# define BOOST_FILESYSTEM_VERSION 2
-#endif
-
#include <boost/filesystem.hpp>
#include <boost/algorithm/string.hpp>
diff --git a/intern/cycles/util/util_cache.h b/intern/cycles/util/util_cache.h
index bfb2877a22b..343fa36817d 100644
--- a/intern/cycles/util/util_cache.h
+++ b/intern/cycles/util/util_cache.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_CACHE_H__
@@ -105,7 +105,7 @@ public:
return false;
}
- if(!size)
+ if((size == 0) || (size % sizeof(T)) != 0)
return false;
data.resize(size/sizeof(T));
diff --git a/intern/cycles/util/util_color.h b/intern/cycles/util/util_color.h
index 53b3d72de67..d3598f84b94 100644
--- a/intern/cycles/util/util_color.h
+++ b/intern/cycles/util/util_color.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_COLOR_H__
diff --git a/intern/cycles/util/util_debug.h b/intern/cycles/util/util_debug.h
index 79fac506b98..6b61a49fcc3 100644
--- a/intern/cycles/util/util_debug.h
+++ b/intern/cycles/util/util_debug.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_DEBUG_H__
diff --git a/intern/cycles/util/util_dynlib.cpp b/intern/cycles/util/util_dynlib.cpp
deleted file mode 100644
index 587cad607c8..00000000000
--- a/intern/cycles/util/util_dynlib.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright 2011-2013 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License
- */
-
-#include <stdlib.h>
-
-#include "util_dynlib.h"
-
-#ifdef _WIN32
-
-#include <windows.h>
-
-CCL_NAMESPACE_BEGIN
-
-struct DynamicLibrary {
- HMODULE module;
-};
-
-DynamicLibrary *dynamic_library_open(const char *name)
-{
- HMODULE module = LoadLibrary(name);
-
- if(!module)
- return NULL;
-
- DynamicLibrary *lib = new DynamicLibrary();
- lib->module = module;
-
- return lib;
-}
-
-void *dynamic_library_find(DynamicLibrary *lib, const char *name)
-{
- return (void*)GetProcAddress(lib->module, name);
-}
-
-void dynamic_library_close(DynamicLibrary *lib)
-{
- FreeLibrary(lib->module);
- delete lib;
-}
-
-CCL_NAMESPACE_END
-
-#else
-
-#include <dlfcn.h>
-
-CCL_NAMESPACE_BEGIN
-
-struct DynamicLibrary {
- void *module;
-};
-
-DynamicLibrary *dynamic_library_open(const char *name)
-{
- void *module = dlopen(name, RTLD_NOW);
-
- if(!module)
- return NULL;
-
- DynamicLibrary *lib = new DynamicLibrary();
- lib->module = module;
-
- return lib;
-}
-
-void *dynamic_library_find(DynamicLibrary *lib, const char *name)
-{
- return dlsym(lib->module, name);
-}
-
-void dynamic_library_close(DynamicLibrary *lib)
-{
- dlclose(lib->module);
- delete lib;
-}
-
-CCL_NAMESPACE_END
-
-#endif
-
diff --git a/intern/cycles/util/util_foreach.h b/intern/cycles/util/util_foreach.h
index df3277fbf37..4f7337107b3 100644
--- a/intern/cycles/util/util_foreach.h
+++ b/intern/cycles/util/util_foreach.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_FOREACH_H__
@@ -19,8 +19,12 @@
/* Use Boost to get nice foreach() loops for STL data structures. */
-#include <boost/foreach.hpp>
-#define foreach BOOST_FOREACH
+#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
+# define foreach(x, y) for(x : y)
+#else
+# include <boost/foreach.hpp>
+# define foreach BOOST_FOREACH
+#endif
#endif /* __UTIL_FOREACH_H__ */
diff --git a/intern/cycles/util/util_function.h b/intern/cycles/util/util_function.h
index 6aa014a08a6..6d0f0b444a9 100644
--- a/intern/cycles/util/util_function.h
+++ b/intern/cycles/util/util_function.h
@@ -11,20 +11,39 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_FUNCTION_H__
#define __UTIL_FUNCTION_H__
-#include <boost/bind.hpp>
-#include <boost/function.hpp>
+#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
+# include <functional>
+#else
+# include <boost/bind.hpp>
+# include <boost/function.hpp>
+#endif
CCL_NAMESPACE_BEGIN
+#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
+# define function_bind std::bind
+# define function_null nullptr
+using std::function;
+using std::placeholders::_1;
+using std::placeholders::_2;
+using std::placeholders::_3;
+using std::placeholders::_4;
+using std::placeholders::_5;
+using std::placeholders::_6;
+using std::placeholders::_7;
+using std::placeholders::_8;
+using std::placeholders::_9;
+#else
using boost::function;
-#define function_bind boost::bind
-
+# define function_bind boost::bind
+# define function_null NULL
+#endif
CCL_NAMESPACE_END
#endif /* __UTIL_FUNCTION_H__ */
diff --git a/intern/cycles/util/util_guarded_allocator.cpp b/intern/cycles/util/util_guarded_allocator.cpp
new file mode 100644
index 00000000000..8de6e254cbf
--- /dev/null
+++ b/intern/cycles/util/util_guarded_allocator.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util_guarded_allocator.h"
+#include "util_stats.h"
+
+CCL_NAMESPACE_BEGIN
+
+static Stats global_stats;
+
+/* Internal API. */
+
+void util_guarded_mem_alloc(size_t n)
+{
+ global_stats.mem_alloc(n);
+}
+
+void util_guarded_mem_free(size_t n)
+{
+ global_stats.mem_free(n);
+}
+
+/* Public API. */
+
+size_t util_guarded_get_mem_used(void)
+{
+ return global_stats.mem_used;
+}
+
+size_t util_guarded_get_mem_peak(void)
+{
+ return global_stats.mem_peak;
+}
+
+
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_guarded_allocator.h b/intern/cycles/util/util_guarded_allocator.h
new file mode 100644
index 00000000000..2df717253e3
--- /dev/null
+++ b/intern/cycles/util/util_guarded_allocator.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2011-2015 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_GUARDED_ALLOCATOR_H__
+#define __UTIL_GUARDED_ALLOCATOR_H__
+
+/* Define this in order to use Blender's guarded allocator to keep
+ * track of allocated buffers, their sizes and peak memory usage.
+ *
+ * This is usually a bad level call, but it's really handy to keep
+ * track of overall peak memory consumption during the scene
+ * synchronization step.
+ */
+#undef WITH_BLENDER_GUARDEDALLOC
+
+#include <memory>
+
+#include "util_types.h"
+
+#ifdef WITH_BLENDER_GUARDEDALLOC
+# include "../../guardedalloc/MEM_guardedalloc.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Internal use only. */
+void util_guarded_mem_alloc(size_t n);
+void util_guarded_mem_free(size_t n);
+
+/* Guarded allocator for the use with STL. */
+template <typename T>
+class GuardedAllocator : public std::allocator<T> {
+public:
+ template<typename _Tp1>
+ struct rebind {
+ typedef GuardedAllocator<_Tp1> other;
+ };
+
+ T *allocate(size_t n, const void *hint = 0)
+ {
+ util_guarded_mem_alloc(n * sizeof(T));
+#ifdef WITH_BLENDER_GUARDEDALLOC
+ (void)hint;
+ return (T*)MEM_mallocN_aligned(n * sizeof(T), 16, "Cycles Alloc");
+#else
+ return std::allocator<T>::allocate(n, hint);
+#endif
+ }
+
+ void deallocate(T *p, size_t n)
+ {
+ util_guarded_mem_free(n * sizeof(T));
+#ifdef WITH_BLENDER_GUARDEDALLOC
+ MEM_freeN((void*)p);
+#else
+ std::allocator<T>::deallocate(p, n);
+#endif
+ }
+
+ GuardedAllocator() : std::allocator<T>() { }
+ GuardedAllocator(const GuardedAllocator &a) : std::allocator<T>(a) { }
+ template <class U>
+ GuardedAllocator(const GuardedAllocator<U> &a) : std::allocator<T>(a) { }
+ ~GuardedAllocator() { }
+};
+
+/* Get memory usage and peak from the guarded STL allocator. */
+size_t util_guarded_get_mem_used(void);
+size_t util_guarded_get_mem_peak(void);
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_GUARDED_ALLOCATOR_H__ */
diff --git a/intern/cycles/util/util_half.h b/intern/cycles/util/util_half.h
index 397133618be..f4bac9888a5 100644
--- a/intern/cycles/util/util_half.h
+++ b/intern/cycles/util/util_half.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_HALF_H__
@@ -56,7 +56,7 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
* assumes no negative, no nan, no inf, and sets denormal to 0 */
union { uint i; float f; } in;
float fscale = f[i] * scale;
- in.f = (fscale > 0.0f)? ((fscale < 65500.0f)? fscale: 65500.0f): 0.0f;
+ in.f = (fscale > 0.0f)? ((fscale < 65504.0f)? fscale: 65504.0f): 0.0f;
int x = in.i;
int absolute = x & 0x7FFFFFFF;
@@ -68,20 +68,20 @@ ccl_device_inline void float4_store_half(half *h, float4 f, float scale)
}
#else
/* same as above with SSE */
- const ssef mm_scale = ssef(scale);
- const ssei mm_38800000 = ssei(0x38800000);
- const ssei mm_7FFF = ssei(0x7FFF);
- const ssei mm_7FFFFFFF = ssei(0x7FFFFFFF);
- const ssei mm_C8000000 = ssei(0xC8000000);
-
- ssef mm_fscale = load4f(f) * mm_scale;
- ssei x = cast(min(max(mm_fscale, ssef(0.0f)), ssef(65500.0f)));
- ssei absolute = x & mm_7FFFFFFF;
- ssei Z = absolute + mm_C8000000;
- ssei result = andnot(absolute < mm_38800000, Z);
- ssei rh = (result >> 13) & mm_7FFF;
-
- _mm_storel_pi((__m64*)h, _mm_castsi128_ps(_mm_packs_epi32(rh, rh)));
+ ssef fscale = load4f(f) * scale;
+ ssef x = min(max(fscale, 0.0f), 65504.0f);
+
+#ifdef __KERNEL_AVX2__
+ ssei rpack = _mm_cvtps_ph(x, 0);
+#else
+ ssei absolute = cast(x) & 0x7FFFFFFF;
+ ssei Z = absolute + 0xC8000000;
+ ssei result = andnot(absolute < 0x38800000, Z);
+ ssei rshift = (result >> 13) & 0x7FFF;
+ ssei rpack = _mm_packs_epi32(rshift, rshift);
+#endif
+
+ _mm_storel_pi((__m64*)h, _mm_castsi128_ps(rpack));
#endif
}
diff --git a/intern/cycles/util/util_hash.h b/intern/cycles/util/util_hash.h
index edd2448efa4..3ff2802b46d 100644
--- a/intern/cycles/util/util_hash.h
+++ b/intern/cycles/util/util_hash.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_HASH_H__
@@ -53,7 +53,7 @@ static inline uint hash_string(const char *str)
{
uint i = 0, c;
- while ((c = *str++))
+ while((c = *str++))
i = i * 37 + c;
return i;
diff --git a/intern/cycles/util/util_image.h b/intern/cycles/util/util_image.h
index 4b8140e7a7a..bb8a31c6fec 100644
--- a/intern/cycles/util/util_image.h
+++ b/intern/cycles/util/util_image.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_IMAGE_H__
diff --git a/intern/cycles/util/util_list.h b/intern/cycles/util/util_list.h
index 2aa0b7381e3..6cb27e6defe 100644
--- a/intern/cycles/util/util_list.h
+++ b/intern/cycles/util/util_list.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_LIST_H__
diff --git a/intern/cycles/util/util_logging.cpp b/intern/cycles/util/util_logging.cpp
index 0722f16cf45..03041723e15 100644
--- a/intern/cycles/util/util_logging.cpp
+++ b/intern/cycles/util/util_logging.cpp
@@ -11,15 +11,63 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#include <util_logging.h>
#include "util_math.h"
+#include <stdio.h>
+#ifdef _MSC_VER
+# define snprintf _snprintf
+#endif
+
CCL_NAMESPACE_BEGIN
+void util_logging_init(const char *argv0)
+{
+#ifdef WITH_CYCLES_LOGGING
+ using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption;
+
+ /* Make it so FATAL messages are always print into console. */
+ char severity_fatal[32];
+ snprintf(severity_fatal, sizeof(severity_fatal), "%d",
+ google::GLOG_FATAL);
+
+ google::InitGoogleLogging(argv0);
+ SetCommandLineOption("logtostderr", "1");
+ SetCommandLineOption("v", "0");
+ SetCommandLineOption("stderrthreshold", severity_fatal);
+ SetCommandLineOption("minloglevel", severity_fatal);
+#else
+ (void) argv0;
+#endif
+}
+
+void util_logging_start(void)
+{
+#ifdef WITH_CYCLES_LOGGING
+ using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption;
+ SetCommandLineOption("logtostderr", "1");
+ SetCommandLineOption("v", "2");
+ SetCommandLineOption("stderrthreshold", "1");
+ SetCommandLineOption("minloglevel", "0");
+#endif
+}
+
+void util_logging_verbosity_set(int verbosity)
+{
+#ifdef WITH_CYCLES_LOGGING
+ using CYCLES_GFLAGS_NAMESPACE::SetCommandLineOption;
+ char val[10];
+ snprintf(val, sizeof(val), "%d", verbosity);
+ SetCommandLineOption("v", val);
+#else
+ (void) verbosity;
+#endif
+}
+
std::ostream& operator <<(std::ostream &os,
const float3 &value)
{
diff --git a/intern/cycles/util/util_logging.h b/intern/cycles/util/util_logging.h
index 991789e7460..7fc42ac355a 100644
--- a/intern/cycles/util/util_logging.h
+++ b/intern/cycles/util/util_logging.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_LOGGING_H__
@@ -43,7 +43,11 @@ public:
#endif
-class float3;
+struct float3;
+
+void util_logging_init(const char *argv0);
+void util_logging_start(void);
+void util_logging_verbosity_set(int verbosity);
std::ostream& operator <<(std::ostream &os,
const float3 &value);
diff --git a/intern/cycles/util/util_map.h b/intern/cycles/util/util_map.h
index 77500e4712d..46c2885f8b0 100644
--- a/intern/cycles/util/util_map.h
+++ b/intern/cycles/util/util_map.h
@@ -11,20 +11,45 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_MAP_H__
#define __UTIL_MAP_H__
#include <map>
-#include <boost/tr1/unordered_map.hpp>
+
+#if defined(CYCLES_TR1_UNORDERED_MAP)
+# include <tr1/unordered_map>
+#endif
+
+#if defined(CYCLES_STD_UNORDERED_MAP) || defined(CYCLES_STD_UNORDERED_MAP_IN_TR1_NAMESPACE)
+# include <unordered_map>
+#endif
+
+#if !defined(CYCLES_NO_UNORDERED_MAP) && !defined(CYCLES_TR1_UNORDERED_MAP) && \
+ !defined(CYCLES_STD_UNORDERED_MAP) && !defined(CYCLES_STD_UNORDERED_MAP_IN_TR1_NAMESPACE) // NOLINT
+# error One of: CYCLES_NO_UNORDERED_MAP, CYCLES_TR1_UNORDERED_MAP,\
+ CYCLES_STD_UNORDERED_MAP, CYCLES_STD_UNORDERED_MAP_IN_TR1_NAMESPACE must be defined! // NOLINT
+#endif
+
CCL_NAMESPACE_BEGIN
using std::map;
using std::pair;
+
+#if defined(CYCLES_NO_UNORDERED_MAP)
+typedef std::map unordered_map;
+#endif
+
+#if defined(CYCLES_TR1_UNORDERED_MAP) || defined(CYCLES_STD_UNORDERED_MAP_IN_TR1_NAMESPACE)
using std::tr1::unordered_map;
+#endif
+
+#if defined(CYCLES_STD_UNORDERED_MAP)
+using std::unordered_map;
+#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_math.h b/intern/cycles/util/util_math.h
index c332e1709db..7d6dfd34e0e 100644
--- a/intern/cycles/util/util_math.h
+++ b/intern/cycles/util/util_math.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_MATH_H__
@@ -71,6 +71,13 @@ CCL_NAMESPACE_BEGIN
#define M_SQRT2_F ((float)1.41421356237309504880) /* sqrt(2) */
#endif
+#ifndef M_LN2_F
+#define M_LN2_F ((float)0.6931471805599453) /* ln(2) */
+#endif
+
+#ifndef M_LN10_F
+#define M_LN10_F ((float)2.3025850929940457) /* ln(10) */
+#endif
/* Scalar */
@@ -124,6 +131,24 @@ ccl_device_inline double min(double a, double b)
return (a < b)? a: b;
}
+/* These 2 guys are templated for usage with registers data.
+ *
+ * NOTE: Since this is CPU-only functions it is ok to use references here.
+ * But for other devices we'll need to be careful about this.
+ */
+
+template<typename T>
+ccl_device_inline T min4(const T& a, const T& b, const T& c, const T& d)
+{
+ return min(min(a,b),min(c,d));
+}
+
+template<typename T>
+ccl_device_inline T max4(const T& a, const T& b, const T& c, const T& d)
+{
+ return max(max(a,b),max(c,d));
+}
+
#endif
ccl_device_inline float min4(float a, float b, float c, float d)
@@ -150,6 +175,15 @@ ccl_device_inline float clamp(float a, float mn, float mx)
#endif
+#ifndef __KERNEL_CUDA__
+
+ccl_device_inline float saturate(float a)
+{
+ return clamp(a, 0.0f, 1.0f);
+}
+
+#endif
+
ccl_device_inline int float_to_int(float f)
{
return (int)f;
@@ -314,6 +348,12 @@ ccl_device_inline float2 normalize_len(const float2 a, float *t)
return a/(*t);
}
+ccl_device_inline float2 safe_normalize(const float2 a)
+{
+ float t = len(a);
+ return (t)? a/t: a;
+}
+
ccl_device_inline bool operator==(const float2 a, const float2 b)
{
return (a.x == b.x && a.y == b.y);
@@ -510,6 +550,12 @@ ccl_device_inline float3 normalize_len(const float3 a, float *t)
return a/(*t);
}
+ccl_device_inline float3 safe_normalize(const float3 a)
+{
+ float t = len(a);
+ return (t)? a/t: a;
+}
+
#ifndef __KERNEL_OPENCL__
ccl_device_inline bool operator==(const float3 a, const float3 b)
@@ -817,6 +863,12 @@ ccl_device_inline float4 normalize(const float4 a)
return a/len(a);
}
+ccl_device_inline float4 safe_normalize(const float4 a)
+{
+ float t = len(a);
+ return (t)? a/t: a;
+}
+
ccl_device_inline float4 min(float4 a, float4 b)
{
#ifdef __KERNEL_SSE__
@@ -1395,10 +1447,9 @@ ccl_device bool ray_triangle_intersect_uv(
return true;
}
-ccl_device bool ray_quad_intersect(
- float3 ray_P, float3 ray_D, float ray_t,
- float3 quad_P, float3 quad_u, float3 quad_v,
- float3 *isect_P, float *isect_t)
+ccl_device bool ray_quad_intersect(float3 ray_P, float3 ray_D, float ray_t,
+ float3 quad_P, float3 quad_u, float3 quad_v,
+ float3 *isect_P, float *isect_t)
{
float3 v0 = quad_P - quad_u*0.5f - quad_v*0.5f;
float3 v1 = quad_P + quad_u*0.5f - quad_v*0.5f;
@@ -1414,23 +1465,52 @@ ccl_device bool ray_quad_intersect(
}
/* projections */
-ccl_device bool map_to_sphere(float *r_u, float *r_v,
- const float x, const float y, const float z)
+ccl_device_inline float2 map_to_tube(const float3 co)
{
- float len = sqrtf(x * x + y * y + z * z);
+ float len, u, v;
+ len = sqrtf(co.x * co.x + co.y * co.y);
if(len > 0.0f) {
- if(UNLIKELY(x == 0.0f && y == 0.0f)) {
- *r_u = 0.0f; /* othwise domain error */
+ u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f;
+ v = (co.z + 1.0f) * 0.5f;
+ }
+ else {
+ u = v = 0.0f;
+ }
+ return make_float2(u, v);
+}
+
+ccl_device_inline float2 map_to_sphere(const float3 co)
+{
+ float l = len(co);
+ float u, v;
+ if(l > 0.0f) {
+ if(UNLIKELY(co.x == 0.0f && co.y == 0.0f)) {
+ u = 0.0f; /* othwise domain error */
}
else {
- *r_u = (1.0f - atan2f(x, y) / M_PI_F) / 2.0f;
+ u = (1.0f - atan2f(co.x, co.y) / M_PI_F) / 2.0f;
}
- *r_v = 1.0f - safe_acosf(z / len) / M_PI_F;
- return true;
+ v = 1.0f - safe_acosf(co.z / l) / M_PI_F;
}
else {
- *r_v = *r_u = 0.0f; /* to avoid un-initialized variables */
- return false;
+ u = v = 0.0f;
+ }
+ return make_float2(u, v);
+}
+
+ccl_device_inline int util_max_axis(float3 vec)
+{
+ if(vec.x > vec.y) {
+ if(vec.x > vec.z)
+ return 0;
+ else
+ return 2;
+ }
+ else {
+ if(vec.y > vec.z)
+ return 1;
+ else
+ return 2;
}
}
diff --git a/intern/cycles/util/util_math_fast.h b/intern/cycles/util/util_math_fast.h
new file mode 100644
index 00000000000..c1a1be603f4
--- /dev/null
+++ b/intern/cycles/util/util_math_fast.h
@@ -0,0 +1,611 @@
+/*
+ * Adapted from OpenImageIO library with this license:
+ *
+ * Copyright 2008-2014 Larry Gritz and the other authors and contributors.
+ * All Rights Reserved.
+
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the software's owners nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * (This is the Modified BSD License)
+ *
+ * A few bits here are based upon code from NVIDIA that was also released
+ * under the same modified BSD license, and marked as:
+ * Copyright 2004 NVIDIA Corporation. All Rights Reserved.
+ *
+ * Some parts of this file were first open-sourced in Open Shading Language,
+ * then later moved here. The original copyright notice was:
+ * Copyright (c) 2009-2014 Sony Pictures Imageworks Inc., et al.
+ *
+ * Many of the math functions were copied from or inspired by other
+ * public domain sources or open source packages with compatible licenses.
+ * The individual functions give references were applicable.
+ */
+
+#ifndef __UTIL_FAST_MATH__
+#define __UTIL_FAST_MATH__
+
+CCL_NAMESPACE_BEGIN
+
+/* TODO(sergey): Make sure it does not conflict with SSE intrinsics. */
+ccl_device_inline float madd(const float a, const float b, const float c)
+{
+ /* NOTE: In the future we may want to explicitly ask for a fused
+ * multiply-add in a specialized version for float.
+ *
+ * NOTE: GCC/ICC will turn this (for float) into a FMA unless
+ * explicitly asked not to, clang seems to leave the code alone.
+ */
+ return a * b + c;
+}
+
+/*
+ * FAST & APPROXIMATE MATH
+ *
+ * The functions named "fast_*" provide a set of replacements to libm that
+ * are much faster at the expense of some accuracy and robust handling of
+ * extreme values. One design goal for these approximation was to avoid
+ * branches as much as possible and operate on single precision values only
+ * so that SIMD versions should be straightforward ports We also try to
+ * implement "safe" semantics (ie: clamp to valid range where possible)
+ * natively since wrapping these inline calls in another layer would be
+ * wasteful.
+ *
+ * Some functions are fast_safe_*, which is both a faster approximation as
+ * well as clamped input domain to ensure no NaN, Inf, or divide by zero.
+ */
+
+/* Round to nearest integer, returning as an int. */
+ccl_device_inline int fast_rint(float x)
+{
+ /* used by sin/cos/tan range reduction. */
+#ifdef __KERNEL_SSE4__
+ /* Single roundps instruction on SSE4.1+ (for gcc/clang at least). */
+ return float_to_int(rintf(x));
+#else
+ /* emulate rounding by adding/substracting 0.5. */
+ return float_to_int(x + copysignf(0.5f, x));
+#endif
+}
+
+ccl_device float fast_sinf(float x)
+{
+ /* Very accurate argument reduction from SLEEF,
+ * starts failing around x=262000
+ *
+ * Results on: [-2pi,2pi].
+ *
+ * Examined 2173837240 values of sin: 0.00662760244 avg ulp diff, 2 max ulp,
+ * 1.19209e-07 max error
+ */
+ int q = fast_rint(x * M_1_PI_F);
+ float qf = q;
+ x = madd(qf, -0.78515625f*4, x);
+ x = madd(qf, -0.00024187564849853515625f*4, x);
+ x = madd(qf, -3.7747668102383613586e-08f*4, x);
+ x = madd(qf, -1.2816720341285448015e-12f*4, x);
+ x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals */
+ float s = x * x;
+ if((q & 1) != 0) x = -x;
+ /* This polynomial approximation has very low error on [-pi/2,+pi/2]
+ * 1.19209e-07 max error in total over [-2pi,+2pi]. */
+ float u = 2.6083159809786593541503e-06f;
+ u = madd(u, s, -0.0001981069071916863322258f);
+ u = madd(u, s, +0.00833307858556509017944336f);
+ u = madd(u, s, -0.166666597127914428710938f);
+ u = madd(s, u * x, x);
+ /* For large x, the argument reduction can fail and the polynomial can be
+ * evaluated with arguments outside the valid internal. Just clamp the bad
+ * values away (setting to 0.0f means no branches need to be generated). */
+ if(fabsf(u) > 1.0f) {
+ u = 0.0f;
+ }
+ return u;
+}
+
+ccl_device float fast_cosf(float x)
+{
+ /* Same argument reduction as fast_sinf(). */
+ int q = fast_rint(x * M_1_PI_F);
+ float qf = q;
+ x = madd(qf, -0.78515625f*4, x);
+ x = madd(qf, -0.00024187564849853515625f*4, x);
+ x = madd(qf, -3.7747668102383613586e-08f*4, x);
+ x = madd(qf, -1.2816720341285448015e-12f*4, x);
+ x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals. */
+ float s = x * x;
+ /* Polynomial from SLEEF's sincosf, max error is
+ * 4.33127e-07 over [-2pi,2pi] (98% of values are "exact"). */
+ float u = -2.71811842367242206819355e-07f;
+ u = madd(u, s, +2.47990446951007470488548e-05f);
+ u = madd(u, s, -0.00138888787478208541870117f);
+ u = madd(u, s, +0.0416666641831398010253906f);
+ u = madd(u, s, -0.5f);
+ u = madd(u, s, +1.0f);
+ if((q & 1) != 0) {
+ u = -u;
+ }
+ if(fabsf(u) > 1.0f) {
+ u = 0.0f;
+ }
+ return u;
+}
+
+ccl_device void fast_sincosf(float x, float* sine, float* cosine)
+{
+ /* Same argument reduction as fast_sin. */
+ int q = fast_rint(x * M_1_PI_F);
+ float qf = q;
+ x = madd(qf, -0.78515625f*4, x);
+ x = madd(qf, -0.00024187564849853515625f*4, x);
+ x = madd(qf, -3.7747668102383613586e-08f*4, x);
+ x = madd(qf, -1.2816720341285448015e-12f*4, x);
+ x = M_PI_2_F - (M_PI_2_F - x); // crush denormals
+ float s = x * x;
+ /* NOTE: same exact polynomials as fast_sinf() and fast_cosf() above. */
+ if((q & 1) != 0) {
+ x = -x;
+ }
+ float su = 2.6083159809786593541503e-06f;
+ su = madd(su, s, -0.0001981069071916863322258f);
+ su = madd(su, s, +0.00833307858556509017944336f);
+ su = madd(su, s, -0.166666597127914428710938f);
+ su = madd(s, su * x, x);
+ float cu = -2.71811842367242206819355e-07f;
+ cu = madd(cu, s, +2.47990446951007470488548e-05f);
+ cu = madd(cu, s, -0.00138888787478208541870117f);
+ cu = madd(cu, s, +0.0416666641831398010253906f);
+ cu = madd(cu, s, -0.5f);
+ cu = madd(cu, s, +1.0f);
+ if((q & 1) != 0) {
+ cu = -cu;
+ }
+ if(fabsf(su) > 1.0f) {
+ su = 0.0f;
+ }
+ if(fabsf(cu) > 1.0f) {
+ cu = 0.0f;
+ }
+ *sine = su;
+ *cosine = cu;
+}
+
+/* NOTE: this approximation is only valid on [-8192.0,+8192.0], it starts
+ * becoming really poor outside of this range because the reciprocal amplifies
+ * errors.
+ */
+ccl_device float fast_tanf(float x)
+{
+ /* Derived from SLEEF implementation.
+ *
+ * Note that we cannot apply the "denormal crush" trick everywhere because
+ * we sometimes need to take the reciprocal of the polynomial
+ */
+ int q = fast_rint(x * 2.0f * M_1_PI_F);
+ float qf = q;
+ x = madd(qf, -0.78515625f*2, x);
+ x = madd(qf, -0.00024187564849853515625f*2, x);
+ x = madd(qf, -3.7747668102383613586e-08f*2, x);
+ x = madd(qf, -1.2816720341285448015e-12f*2, x);
+ if((q & 1) == 0) {
+ /* Crush denormals (only if we aren't inverting the result later). */
+ x = M_PI_4_F - (M_PI_4_F - x);
+ }
+ float s = x * x;
+ float u = 0.00927245803177356719970703f;
+ u = madd(u, s, 0.00331984995864331722259521f);
+ u = madd(u, s, 0.0242998078465461730957031f);
+ u = madd(u, s, 0.0534495301544666290283203f);
+ u = madd(u, s, 0.133383005857467651367188f);
+ u = madd(u, s, 0.333331853151321411132812f);
+ u = madd(s, u * x, x);
+ if((q & 1) != 0) {
+ u = -1.0f / u;
+ }
+ return u;
+}
+
+/* Fast, approximate sin(x*M_PI) with maximum absolute error of 0.000918954611.
+ *
+ * Adapted from http://devmaster.net/posts/9648/fast-and-accurate-sine-cosine#comment-76773
+ */
+ccl_device float fast_sinpif(float x)
+{
+ /* Fast trick to strip the integral part off, so our domain is [-1, 1]. */
+ const float z = x - ((x + 25165824.0f) - 25165824.0f);
+ const float y = z - z * fabsf(z);
+ const float Q = 3.10396624f;
+ const float P = 3.584135056f; /* P = 16-4*Q */
+ return y * (Q + P * fabsf(y));
+
+ /* The original article used used inferior constants for Q and P and
+ * so had max error 1.091e-3.
+ *
+ * The optimal value for Q was determined by exhaustive search, minimizing
+ * the absolute numerical error relative to float(std::sin(double(phi*M_PI)))
+ * over the interval [0,2] (which is where most of the invocations happen).
+ *
+ * The basic idea of this approximation starts with the coarse approximation:
+ * sin(pi*x) ~= f(x) = 4 * (x - x * abs(x))
+ *
+ * This approximation always _over_ estimates the target. On the otherhand,
+ * the curve:
+ * sin(pi*x) ~= f(x) * abs(f(x)) / 4
+ *
+ * always lies _under_ the target. Thus we can simply numerically search for
+ * the optimal constant to LERP these curves into a more precise
+ * approximation.
+ *
+ * After folding the constants together and simplifying the resulting math,
+ * we end up with the compact implementation above.
+ *
+ * NOTE: this function actually computes sin(x * pi) which avoids one or two
+ * mults in many cases and guarantees exact values at integer periods.
+ */
+}
+
+/* Fast approximate cos(x*M_PI) with ~0.1% absolute error. */
+ccl_device_inline float fast_cospif(float x)
+{
+ return fast_sinpif(x+0.5f);
+}
+
+ccl_device float fast_acosf(float x)
+{
+ const float f = fabsf(x);
+ /* clamp and crush denormals. */
+ const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
+ /* Based on http://www.pouet.net/topic.php?which=9132&page=2
+ * 85% accurate (ulp 0)
+ * Examined 2130706434 values of acos: 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush"
+ * Examined 2130706434 values of acos: 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush"
+ */
+ const float a = sqrtf(1.0f - m) *
+ (1.5707963267f + m * (-0.213300989f + m *
+ (0.077980478f + m * -0.02164095f)));
+ return x < 0 ? M_PI_F - a : a;
+}
+
+ccl_device float fast_asinf(float x)
+{
+ /* Based on acosf approximation above.
+ * Max error is 4.51133e-05 (ulps are higher because we are consistently off
+ * by a little amount).
+ */
+ const float f = fabsf(x);
+ /* Clamp and crush denormals. */
+ const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
+ const float a = M_PI_2_F - sqrtf(1.0f - m) *
+ (1.5707963267f + m * (-0.213300989f + m *
+ (0.077980478f + m * -0.02164095f)));
+ return copysignf(a, x);
+}
+
+ccl_device float fast_atanf(float x)
+{
+ const float a = fabsf(x);
+ const float k = a > 1.0f ? 1 / a : a;
+ const float s = 1.0f - (1.0f - k); /* Crush denormals. */
+ const float t = s * s;
+ /* http://mathforum.org/library/drmath/view/62672.html
+ * Examined 4278190080 values of atan: 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals)
+ * Examined 4278190080 values of atan: 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals)
+ */
+ float r = s * madd(0.43157974f, t, 1.0f) /
+ madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f);
+ if(a > 1.0f) {
+ /* TODO(sergey): Is it M_PI_2_F? */
+ r = 1.570796326794896557998982f - r;
+ }
+ return copysignf(r, x);
+}
+
+ccl_device float fast_atan2f(float y, float x)
+{
+ /* Based on atan approximation above.
+ *
+ * The special cases around 0 and infinity were tested explicitly.
+ *
+ * The only case not handled correctly is x=NaN,y=0 which returns 0 instead
+ * of nan.
+ */
+ const float a = fabsf(x);
+ const float b = fabsf(y);
+
+ const float k = (b == 0) ? 0.0f : ((a == b) ? 1.0f : (b > a ? a / b : b / a));
+ const float s = 1.0f - (1.0f - k); /* Crush denormals */
+ const float t = s * s;
+
+ float r = s * madd(0.43157974f, t, 1.0f) /
+ madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f);
+
+ if(b > a) {
+ /* Account for arg reduction. */
+ /* TODO(sergey): Is it M_PI_2_F? */
+ r = 1.570796326794896557998982f - r;
+ }
+ /* Test sign bit of x. */
+ if(__float_as_uint(x) & 0x80000000u) {
+ r = M_PI_F - r;
+ }
+ return copysignf(r, y);
+}
+
+/* Based on:
+ *
+ * https://github.com/LiraNuna/glsl-sse2/blob/master/source/vec4.h
+ *
+ */
+ccl_device float fast_log2f(float x)
+{
+ /* NOTE: clamp to avoid special cases and make result "safe" from large
+ * negative values/nans. */
+ x = clamp(x, FLT_MIN, FLT_MAX);
+ unsigned bits = __float_as_uint(x);
+ int exponent = (int)(bits >> 23) - 127;
+ float f = __uint_as_float((bits & 0x007FFFFF) | 0x3f800000) - 1.0f;
+ /* Examined 2130706432 values of log2 on [1.17549435e-38,3.40282347e+38]:
+ * 0.0797524457 avg ulp diff, 3713596 max ulp, 7.62939e-06 max error.
+ * ulp histogram:
+ * 0 = 97.46%
+ * 1 = 2.29%
+ * 2 = 0.11%
+ */
+ float f2 = f * f;
+ float f4 = f2 * f2;
+ float hi = madd(f, -0.00931049621349f, 0.05206469089414f);
+ float lo = madd(f, 0.47868480909345f, -0.72116591947498f);
+ hi = madd(f, hi, -0.13753123777116f);
+ hi = madd(f, hi, 0.24187369696082f);
+ hi = madd(f, hi, -0.34730547155299f);
+ lo = madd(f, lo, 1.442689881667200f);
+ return ((f4 * hi) + (f * lo)) + exponent;
+}
+
+ccl_device_inline float fast_logf(float x)
+{
+ /* Examined 2130706432 values of logf on [1.17549435e-38,3.40282347e+38]:
+ * 0.313865375 avg ulp diff, 5148137 max ulp, 7.62939e-06 max error.
+ */
+ return fast_log2f(x) * M_LN2_F;
+}
+
+ccl_device_inline float fast_log10(float x)
+{
+ /* Examined 2130706432 values of log10f on [1.17549435e-38,3.40282347e+38]:
+ * 0.631237033 avg ulp diff, 4471615 max ulp, 3.8147e-06 max error.
+ */
+ return fast_log2f(x) * M_LN2_F / M_LN10_F;
+}
+
+ccl_device float fast_logb(float x)
+{
+ /* Don't bother with denormals. */
+ x = fabsf(x);
+ x = clamp(x, FLT_MIN, FLT_MAX);
+ unsigned bits = __float_as_uint(x);
+ return (int)(bits >> 23) - 127;
+}
+
+ccl_device float fast_exp2f(float x)
+{
+ /* Clamp to safe range for final addition. */
+ x = clamp(x, -126.0f, 126.0f);
+ /* Range reduction. */
+ int m = (int)x; x -= m;
+ x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ulps!). */
+ /* 5th degree polynomial generated with sollya
+ * Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ulp diff,
+ * 232 max ulp.
+ *
+ * ulp histogram:
+ * 0 = 87.81%
+ * 1 = 4.18%
+ */
+ float r = 1.33336498402e-3f;
+ r = madd(x, r, 9.810352697968e-3f);
+ r = madd(x, r, 5.551834031939e-2f);
+ r = madd(x, r, 0.2401793301105f);
+ r = madd(x, r, 0.693144857883f);
+ r = madd(x, r, 1.0f);
+ /* Multiply by 2 ^ m by adding in the exponent. */
+ /* NOTE: left-shift of negative number is undefined behavior. */
+ return __uint_as_float(__float_as_uint(r) + ((unsigned)m << 23));
+}
+
+ccl_device_inline float fast_expf(float x)
+{
+ /* Examined 2237485550 values of exp on [-87.3300018,87.3300018]:
+ * 2.6666452 avg ulp diff, 230 max ulp.
+ */
+ return fast_exp2f(x / M_LN2_F);
+}
+
+ccl_device_inline float fast_exp10(float x)
+{
+ /* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]:
+ * 2.71732409 avg ulp diff, 232 max ulp.
+ */
+ return fast_exp2f(x * M_LN10_F / M_LN2_F);
+}
+
+ccl_device_inline float fast_expm1f(float x)
+{
+ if(fabsf(x) < 1e-5f) {
+ x = 1.0f - (1.0f - x); /* Crush denormals. */
+ return madd(0.5f, x * x, x);
+ }
+ else {
+ return fast_expf(x) - 1.0f;
+ }
+}
+
+ccl_device float fast_sinhf(float x)
+{
+ float a = fabsf(x);
+ if(a > 1.0f) {
+ /* Examined 53389559 values of sinh on [1,87.3300018]:
+ * 33.6886442 avg ulp diff, 178 max ulp. */
+ float e = fast_expf(a);
+ return copysignf(0.5f * e - 0.5f / e, x);
+ }
+ else {
+ a = 1.0f - (1.0f - a); /* Crush denorms. */
+ float a2 = a * a;
+ /* Degree 7 polynomial generated with sollya. */
+ /* Examined 2130706434 values of sinh on [-1,1]: 1.19209e-07 max error. */
+ float r = 2.03945513931e-4f;
+ r = madd(r, a2, 8.32990277558e-3f);
+ r = madd(r, a2, 0.1666673421859f);
+ r = madd(r * a, a2, a);
+ return copysignf(r, x);
+ }
+}
+
+ccl_device_inline float fast_coshf(float x)
+{
+ /* Examined 2237485550 values of cosh on [-87.3300018,87.3300018]:
+ * 1.78256726 avg ulp diff, 178 max ulp.
+ */
+ float e = fast_expf(fabsf(x));
+ return 0.5f * e + 0.5f / e;
+}
+
+ccl_device_inline float fast_tanhf(float x)
+{
+ /* Examined 4278190080 values of tanh on [-3.40282347e+38,3.40282347e+38]:
+ * 3.12924e-06 max error.
+ */
+ /* NOTE: ulp error is high because of sub-optimal handling around the origin. */
+ float e = fast_expf(2.0f * fabsf(x));
+ return copysignf(1.0f - 2.0f / (1.0f + e), x);
+}
+
+ccl_device float fast_safe_powf(float x, float y)
+{
+ if(y == 0) return 1.0f; /* x^1=1 */
+ if(x == 0) return 0.0f; /* 0^y=0 */
+ float sign = 1.0f;
+ if(x < 0.0f) {
+ /* if x is negative, only deal with integer powers
+ * powf returns NaN for non-integers, we will return 0 instead.
+ */
+ int ybits = __float_as_int(y) & 0x7fffffff;
+ if(ybits >= 0x4b800000) {
+ // always even int, keep positive
+ }
+ else if(ybits >= 0x3f800000) {
+ /* Bigger than 1, check. */
+ int k = (ybits >> 23) - 127; /* Get exponent. */
+ int j = ybits >> (23 - k); /* Shift out possible fractional bits. */
+ if((j << (23 - k)) == ybits) { /* rebuild number and check for a match. */
+ /* +1 for even, -1 for odd. */
+ sign = __int_as_float(0x3f800000 | (j << 31));
+ }
+ else {
+ /* Not an integer. */
+ return 0.0f;
+ }
+ }
+ else {
+ /* Not an integer. */
+ return 0.0f;
+ }
+ }
+ return sign * fast_exp2f(y * fast_log2f(fabsf(x)));
+}
+
+/* TODO(sergey): Check speed with our erf functions implementation from
+ * bsdf_microfaset.h.
+ */
+
+ccl_device_inline float fast_erff(float x)
+{
+ /* Examined 1082130433 values of erff on [0,4]: 1.93715e-06 max error. */
+ /* Abramowitz and Stegun, 7.1.28. */
+ const float a1 = 0.0705230784f;
+ const float a2 = 0.0422820123f;
+ const float a3 = 0.0092705272f;
+ const float a4 = 0.0001520143f;
+ const float a5 = 0.0002765672f;
+ const float a6 = 0.0000430638f;
+ const float a = fabsf(x);
+ const float b = 1.0f - (1.0f - a); /* Crush denormals. */
+ const float r = madd(madd(madd(madd(madd(madd(a6, b, a5), b, a4), b, a3), b, a2), b, a1), b, 1.0f);
+ const float s = r * r; /* ^2 */
+ const float t = s * s; /* ^4 */
+ const float u = t * t; /* ^8 */
+ const float v = u * u; /* ^16 */
+ return copysignf(1.0f - 1.0f / v, x);
+}
+
+ccl_device_inline float fast_erfcf(float x)
+{
+ /* Examined 2164260866 values of erfcf on [-4,4]: 1.90735e-06 max error.
+ *
+ * ulp histogram:
+ *
+ * 0 = 80.30%
+ */
+ return 1.0f - fast_erff(x);
+}
+
+ccl_device_inline float fast_ierff(float x)
+{
+ /* From: Approximating the erfinv function by Mike Giles. */
+ /* To avoid trouble at the limit, clamp input to 1-eps. */
+ float a = fabsf(x);
+ if(a > 0.99999994f) {
+ a = 0.99999994f;
+ }
+ float w = -fast_logf((1.0f - a) * (1.0f + a)), p;
+ if(w < 5.0f) {
+ w = w - 2.5f;
+ p = 2.81022636e-08f;
+ p = madd(p, w, 3.43273939e-07f);
+ p = madd(p, w, -3.5233877e-06f);
+ p = madd(p, w, -4.39150654e-06f);
+ p = madd(p, w, 0.00021858087f);
+ p = madd(p, w, -0.00125372503f);
+ p = madd(p, w, -0.00417768164f);
+ p = madd(p, w, 0.246640727f);
+ p = madd(p, w, 1.50140941f);
+ }
+ else {
+ w = sqrtf(w) - 3.0f;
+ p = -0.000200214257f;
+ p = madd(p, w, 0.000100950558f);
+ p = madd(p, w, 0.00134934322f);
+ p = madd(p, w, -0.00367342844f);
+ p = madd(p, w, 0.00573950773f);
+ p = madd(p, w, -0.0076224613f);
+ p = madd(p, w, 0.00943887047f);
+ p = madd(p, w, 1.00167406f);
+ p = madd(p, w, 2.83297682f);
+ }
+ return p * x;
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_FAST_MATH__ */
diff --git a/intern/cycles/util/util_md5.cpp b/intern/cycles/util/util_md5.cpp
index add0d18c742..b2a32c45287 100644
--- a/intern/cycles/util/util_md5.cpp
+++ b/intern/cycles/util/util_md5.cpp
@@ -152,8 +152,8 @@ void MD5Hash::process(const uint8_t *data /*[64]*/)
* a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */
#define F(x, y, z) (((x) & (y)) | (~(x) & (z)))
#define SET(a, b, c, d, k, s, Ti)\
- t = a + F(b,c,d) + X[k] + Ti;\
- a = ROTATE_LEFT(t, s) + b
+ t = a + F(b,c,d) + X[k] + Ti;\
+ a = ROTATE_LEFT(t, s) + b
/* Do the following 16 operations. */
SET(a, b, c, d, 0, 7, T1);
SET(d, a, b, c, 1, 12, T2);
@@ -178,8 +178,8 @@ void MD5Hash::process(const uint8_t *data /*[64]*/)
* a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */
#define G(x, y, z) (((x) & (z)) | ((y) & ~(z)))
#define SET(a, b, c, d, k, s, Ti)\
- t = a + G(b,c,d) + X[k] + Ti;\
- a = ROTATE_LEFT(t, s) + b
+ t = a + G(b,c,d) + X[k] + Ti;\
+ a = ROTATE_LEFT(t, s) + b
/* Do the following 16 operations. */
SET(a, b, c, d, 1, 5, T17);
SET(d, a, b, c, 6, 9, T18);
@@ -230,8 +230,8 @@ void MD5Hash::process(const uint8_t *data /*[64]*/)
* a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */
#define I(x, y, z) ((y) ^ ((x) | ~(z)))
#define SET(a, b, c, d, k, s, Ti)\
- t = a + I(b,c,d) + X[k] + Ti;\
- a = ROTATE_LEFT(t, s) + b
+ t = a + I(b,c,d) + X[k] + Ti;\
+ a = ROTATE_LEFT(t, s) + b
/* Do the following 16 operations. */
SET(a, b, c, d, 0, 6, T49);
SET(d, a, b, c, 7, 10, T50);
diff --git a/intern/cycles/util/util_opengl.h b/intern/cycles/util/util_opengl.h
index 04a3e039c9d..0b5462e0a09 100644
--- a/intern/cycles/util/util_opengl.h
+++ b/intern/cycles/util/util_opengl.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_OPENGL_H__
@@ -20,7 +20,12 @@
/* OpenGL header includes, used everywhere we use OpenGL, to deal with
* platform differences in one central place. */
-#include <GL/glew.h>
+#ifdef WITH_GLEW_MX
+# include "glew-mx.h"
+#else
+# include <GL/glew.h>
+# define mxCreateContext() glewInit()
+# define mxMakeCurrentContext(x) (x)
+#endif
#endif /* __UTIL_OPENGL_H__ */
-
diff --git a/intern/cycles/util/util_optimization.h b/intern/cycles/util/util_optimization.h
index 2feb3d6ab7e..c951c35fc76 100644
--- a/intern/cycles/util/util_optimization.h
+++ b/intern/cycles/util/util_optimization.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_OPTIMIZATION_H__
@@ -102,34 +102,16 @@
#ifdef _MSC_VER
#include <intrin.h>
#else
-
-#ifdef __KERNEL_SSE2__
-#include <xmmintrin.h> /* SSE 1 */
-#include <emmintrin.h> /* SSE 2 */
-#endif
-
-#ifdef __KERNEL_SSE3__
-#include <pmmintrin.h> /* SSE 3 */
-#endif
-
-#ifdef __KERNEL_SSSE3__
-#include <tmmintrin.h> /* SSSE 3 */
-#endif
-
-#ifdef __KERNEL_SSE41__
-#include <smmintrin.h> /* SSE 4.1 */
-#endif
-
-#ifdef __KERNEL_AVX__
-#include <immintrin.h> /* AVX */
-#endif
-
+#include <x86intrin.h>
#endif
#else
/* MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
* Since we can't avoid including <windows.h>, better only include that */
+#define NOGDI
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
diff --git a/intern/cycles/util/util_param.h b/intern/cycles/util/util_param.h
index ef20ff0fcd2..69bcbf80a78 100644
--- a/intern/cycles/util/util_param.h
+++ b/intern/cycles/util/util_param.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_PARAM_H__
diff --git a/intern/cycles/util/util_path.cpp b/intern/cycles/util/util_path.cpp
index 85d19b6a325..e8f1ec81763 100644
--- a/intern/cycles/util/util_path.cpp
+++ b/intern/cycles/util/util_path.cpp
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#include "util_debug.h"
@@ -25,12 +25,6 @@ OIIO_NAMESPACE_USING
#include <stdio.h>
-#include <boost/version.hpp>
-
-#if (BOOST_VERSION < 104400)
-# define BOOST_FILESYSTEM_VERSION 2
-#endif
-
#include <boost/filesystem.hpp>
#include <boost/algorithm/string.hpp>
@@ -41,21 +35,31 @@ static string cached_user_path = "";
static boost::filesystem::path to_boost(const string& path)
{
-#ifdef _MSC_VER
- std::wstring path_utf16 = Strutil::utf8_to_utf16(path.c_str());
- return boost::filesystem::path(path_utf16.c_str());
-#else
return boost::filesystem::path(path.c_str());
-#endif
}
static string from_boost(const boost::filesystem::path& path)
{
-#ifdef _MSC_VER
- return Strutil::utf16_to_utf8(path.wstring().c_str());
-#else
return path.string().c_str();
-#endif
+}
+
+static char *path_specials(const string& sub)
+{
+ static bool env_init = false;
+ static char *env_shader_path;
+ static char *env_kernel_path;
+ if(!env_init) {
+ env_shader_path = getenv("CYCLES_SHADER_PATH");
+ env_kernel_path = getenv("CYCLES_KERNEL_PATH");
+ env_init = true;
+ }
+ if(env_shader_path != NULL && sub == "shader") {
+ return env_shader_path;
+ }
+ else if(env_shader_path != NULL && sub == "kernel") {
+ return env_kernel_path;
+ }
+ return NULL;
}
void path_init(const string& path, const string& user_path)
@@ -71,6 +75,10 @@ void path_init(const string& path, const string& user_path)
string path_get(const string& sub)
{
+ char *special = path_specials(sub);
+ if(special != NULL)
+ return special;
+
if(cached_path == "")
cached_path = path_dirname(Sysutil::this_program_path());
@@ -87,11 +95,7 @@ string path_user_get(const string& sub)
string path_filename(const string& path)
{
-#if (BOOST_FILESYSTEM_VERSION == 2)
- return to_boost(path).filename();
-#else
return from_boost(to_boost(path).filename());
-#endif
}
string path_dirname(const string& path)
@@ -259,14 +263,7 @@ string path_source_replace_includes(const string& source_, const string& path)
FILE *path_fopen(const string& path, const string& mode)
{
-#ifdef _WIN32
- std::wstring path_utf16 = Strutil::utf8_to_utf16(path);
- std::wstring mode_utf16 = Strutil::utf8_to_utf16(mode);
-
- return _wfopen(path_utf16.c_str(), mode_utf16.c_str());
-#else
return fopen(path.c_str(), mode.c_str());
-#endif
}
void path_cache_clear_except(const string& name, const set<string>& except)
@@ -277,11 +274,7 @@ void path_cache_clear_except(const string& name, const set<string>& except)
boost::filesystem::directory_iterator it(dir), it_end;
for(; it != it_end; it++) {
-#if (BOOST_FILESYSTEM_VERSION == 2)
- string filename = from_boost(it->path().filename());
-#else
string filename = from_boost(it->path().filename().string());
-#endif
if(boost::starts_with(filename, name))
if(except.find(filename) == except.end())
diff --git a/intern/cycles/util/util_path.h b/intern/cycles/util/util_path.h
index fd9ea11740d..b81d71d1c0f 100644
--- a/intern/cycles/util/util_path.h
+++ b/intern/cycles/util/util_path.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_PATH_H__
diff --git a/intern/cycles/util/util_progress.h b/intern/cycles/util/util_progress.h
index e721a3f5047..0b35142ddb3 100644
--- a/intern/cycles/util/util_progress.h
+++ b/intern/cycles/util/util_progress.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_PROGRESS_H__
@@ -38,15 +38,18 @@ public:
sample = 0;
start_time = time_dt();
total_time = 0.0f;
+ render_time = 0.0f;
tile_time = 0.0f;
status = "Initializing";
substatus = "";
sync_status = "";
sync_substatus = "";
- update_cb = NULL;
+ update_cb = function_null;
cancel = false;
cancel_message = "";
- cancel_cb = NULL;
+ error = false;
+ error_message = "";
+ cancel_cb = function_null;
}
Progress(Progress& progress)
@@ -59,7 +62,7 @@ public:
thread_scoped_lock lock(progress.progress_mutex);
progress.get_status(status, substatus);
- progress.get_tile(tile, total_time, tile_time);
+ progress.get_tile(tile, total_time, render_time, tile_time);
sample = progress.get_sample();
@@ -71,7 +74,9 @@ public:
tile = 0;
sample = 0;
start_time = time_dt();
+ render_start_time = time_dt();
total_time = 0.0f;
+ render_time = 0.0f;
tile_time = 0.0f;
status = "Initializing";
substatus = "";
@@ -79,6 +84,8 @@ public:
sync_substatus = "";
cancel = false;
cancel_message = "";
+ error = false;
+ error_message = "";
}
/* cancel */
@@ -103,11 +110,33 @@ public:
return cancel_message;
}
- void set_cancel_callback(boost::function<void(void)> function)
+ void set_cancel_callback(function<void(void)> function)
{
cancel_cb = function;
}
+ /* error */
+ void set_error(const string& error_message_)
+ {
+ thread_scoped_lock lock(progress_mutex);
+ error_message = error_message_;
+ error = true;
+ /* If error happens we also stop rendering. */
+ cancel_message = error_message_;
+ cancel = true;
+ }
+
+ bool get_error()
+ {
+ return error;
+ }
+
+ string get_error_message()
+ {
+ thread_scoped_lock lock(progress_mutex);
+ return error_message;
+ }
+
/* tile and timing information */
void set_start_time(double start_time_)
@@ -117,24 +146,39 @@ public:
start_time = start_time_;
}
+ void set_render_start_time(double render_start_time_)
+ {
+ thread_scoped_lock lock(progress_mutex);
+
+ render_start_time = render_start_time_;
+ }
+
void set_tile(int tile_, double tile_time_)
{
thread_scoped_lock lock(progress_mutex);
tile = tile_;
total_time = time_dt() - start_time;
+ render_time = time_dt() - render_start_time;
tile_time = tile_time_;
}
- void get_tile(int& tile_, double& total_time_, double& tile_time_)
+ void get_tile(int& tile_, double& total_time_, double& render_time_, double& tile_time_)
{
thread_scoped_lock lock(progress_mutex);
tile_ = tile;
total_time_ = (total_time > 0.0)? total_time: 0.0;
+ render_time_ = (render_time > 0.0)? render_time: 0.0;
tile_time_ = tile_time;
}
+ void get_time(double& total_time_, double& render_time_)
+ {
+ total_time_ = (total_time > 0.0)? total_time: 0.0;
+ render_time_ = (render_time > 0.0)? render_time: 0.0;
+ }
+
void reset_sample()
{
thread_scoped_lock lock(progress_mutex);
@@ -169,6 +213,7 @@ public:
status = status_;
substatus = substatus_;
total_time = time_dt() - start_time;
+ render_time = time_dt() - render_start_time;
}
set_update();
@@ -180,6 +225,7 @@ public:
thread_scoped_lock lock(progress_mutex);
substatus = substatus_;
total_time = time_dt() - start_time;
+ render_time = time_dt() - render_start_time;
}
set_update();
@@ -192,6 +238,7 @@ public:
sync_status = status_;
sync_substatus = substatus_;
total_time = time_dt() - start_time;
+ render_time = time_dt() - render_start_time;
}
set_update();
@@ -204,6 +251,7 @@ public:
thread_scoped_lock lock(progress_mutex);
sync_substatus = substatus_;
total_time = time_dt() - start_time;
+ render_time = time_dt() - render_start_time;
}
set_update();
@@ -233,7 +281,7 @@ public:
}
}
- void set_update_callback(boost::function<void(void)> function)
+ void set_update_callback(function<void(void)> function)
{
update_cb = function;
}
@@ -241,14 +289,14 @@ public:
protected:
thread_mutex progress_mutex;
thread_mutex update_mutex;
- boost::function<void(void)> update_cb;
- boost::function<void(void)> cancel_cb;
+ function<void(void)> update_cb;
+ function<void(void)> cancel_cb;
int tile; /* counter for rendered tiles */
int sample; /* counter of rendered samples, global for all tiles */
- double start_time;
- double total_time;
+ double start_time, render_start_time;
+ double total_time, render_time;
double tile_time;
string status;
@@ -259,6 +307,9 @@ protected:
volatile bool cancel;
string cancel_message;
+
+ volatile bool error;
+ string error_message;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_set.h b/intern/cycles/util/util_set.h
index 6078114e714..b3cb8dd8af5 100644
--- a/intern/cycles/util/util_set.h
+++ b/intern/cycles/util/util_set.h
@@ -11,20 +11,26 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_SET_H__
#define __UTIL_SET_H__
#include <set>
-#include <boost/tr1/unordered_set.hpp>
-
+#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
+# include <unordered_set>
+#else
+# include <boost/tr1/unordered_set.hpp>
+#endif
CCL_NAMESPACE_BEGIN
using std::set;
+#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
+using std::unordered_set;
+#else
using std::tr1::unordered_set;
-
+#endif
CCL_NAMESPACE_END
#endif /* __UTIL_SET_H__ */
diff --git a/intern/cycles/util/util_simd.cpp b/intern/cycles/util/util_simd.cpp
index 0436823e62a..eb9e32800e1 100644
--- a/intern/cycles/util/util_simd.cpp
+++ b/intern/cycles/util/util_simd.cpp
@@ -12,7 +12,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifdef WITH_KERNEL_SSE2
diff --git a/intern/cycles/util/util_simd.h b/intern/cycles/util/util_simd.h
index 39506a6359b..a1c35b7174d 100644
--- a/intern/cycles/util/util_simd.h
+++ b/intern/cycles/util/util_simd.h
@@ -12,7 +12,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_SIMD_TYPES_H__
@@ -58,8 +58,12 @@ __forceinline operator int ( ) const { return std::numeric_limits<
/* Intrinsics Functions */
#if defined(__BMI__) && defined(__GNUC__)
-#define _tzcnt_u32 __tzcnt_u32
-#define _tzcnt_u64 __tzcnt_u64
+# ifndef _tzcnt_u32
+# define _tzcnt_u32 __tzcnt_u32
+# endif
+# ifndef _tzcnt_u64
+# define _tzcnt_u64 __tzcnt_u64
+# endif
#endif
#if defined(__LZCNT__)
@@ -133,7 +137,7 @@ __forceinline int clz(const int x)
#if defined(__KERNEL_AVX2__)
return _lzcnt_u32(x);
#else
- if (UNLIKELY(x == 0)) return 32;
+ if(UNLIKELY(x == 0)) return 32;
return 31 - __bsr(x);
#endif
}
@@ -286,7 +290,7 @@ __forceinline int clz(const int x)
#if defined(__KERNEL_AVX2__)
return _lzcnt_u32(x);
#else
- if (UNLIKELY(x == 0)) return 32;
+ if(UNLIKELY(x == 0)) return 32;
return 31 - __bsr(x);
#endif
}
@@ -358,7 +362,7 @@ __forceinline __m128i _mm_mullo_epi32( __m128i value, __m128i input ) {
char* _r = (char*)(&rvalue + 1);
char* _v = (char*)(& value + 1);
char* _i = (char*)(& input + 1);
- for ( ssize_t i = -16 ; i != 0 ; i += 4 ) *((int32*)(_r + i)) = *((int32*)(_v + i))* *((int32*)(_i + i));
+ for( ssize_t i = -16 ; i != 0 ; i += 4 ) *((int32*)(_r + i)) = *((int32*)(_v + i))* *((int32*)(_i + i));
return rvalue;
}
diff --git a/intern/cycles/util/util_sseb.h b/intern/cycles/util/util_sseb.h
index be510256dd3..6e669701f3b 100644
--- a/intern/cycles/util/util_sseb.h
+++ b/intern/cycles/util/util_sseb.h
@@ -12,7 +12,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_SSEB_H__
@@ -119,14 +119,29 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const sseb sh
return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0));
}
+template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a ) {
+ return _mm_movelh_ps(a, a);
+}
+
+template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a ) {
+ return _mm_movehl_ps(a, a);
+}
+
template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const sseb shuffle( const sseb& a, const sseb& b ) {
return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
}
+template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a, const sseb& b ) {
+ return _mm_movelh_ps(a, b);
+}
+
+template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a, const sseb& b ) {
+ return _mm_movehl_ps(b, a);
+}
+
#if defined(__KERNEL_SSE3__)
template<> __forceinline const sseb shuffle<0, 0, 2, 2>( const sseb& a ) { return _mm_moveldup_ps(a); }
template<> __forceinline const sseb shuffle<1, 1, 3, 3>( const sseb& a ) { return _mm_movehdup_ps(a); }
-template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a ) { return _mm_castpd_ps(_mm_movedup_pd (a)); }
#endif
#if defined(__KERNEL_SSE41__)
@@ -153,6 +168,16 @@ __forceinline bool none ( const sseb& b ) { return _mm_movemask_ps(b) == 0x
__forceinline size_t movemask( const sseb& a ) { return _mm_movemask_ps(a); }
+////////////////////////////////////////////////////////////////////////////////
+/// Debug Functions
+////////////////////////////////////////////////////////////////////////////////
+
+ccl_device_inline void print_sseb(const char *label, const sseb &a)
+{
+ printf("%s: %df %df %df %d\n",
+ label, a[0], a[1], a[2], a[3]);
+}
+
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_ssef.h b/intern/cycles/util/util_ssef.h
index f4236cc616e..e625fa63568 100644
--- a/intern/cycles/util/util_ssef.h
+++ b/intern/cycles/util/util_ssef.h
@@ -12,7 +12,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_SSEF_H__
@@ -119,6 +119,9 @@ __forceinline const ssef operator^(const ssef& a, const ssei& b) { return _mm_xo
__forceinline const ssef operator&(const ssef& a, const ssef& b) { return _mm_and_ps(a.m128,b.m128); }
__forceinline const ssef operator&(const ssef& a, const ssei& b) { return _mm_and_ps(a.m128,_mm_castsi128_ps(b.m128)); }
+__forceinline const ssef operator|(const ssef& a, const ssef& b) { return _mm_or_ps(a.m128,b.m128); }
+__forceinline const ssef operator|(const ssef& a, const ssei& b) { return _mm_or_ps(a.m128,_mm_castsi128_ps(b.m128)); }
+
__forceinline const ssef andnot(const ssef& a, const ssef& b) { return _mm_andnot_ps(a.m128,b.m128); }
__forceinline const ssef min(const ssef& a, const ssef& b) { return _mm_min_ps(a.m128,b.m128); }
@@ -159,8 +162,8 @@ __forceinline const ssef nmsub(const ssef& a, const ssef& b, const ssef& c) { re
#else
__forceinline const ssef madd (const ssef& a, const ssef& b, const ssef& c) { return a*b+c; }
__forceinline const ssef msub (const ssef& a, const ssef& b, const ssef& c) { return a*b-c; }
-__forceinline const ssef nmadd(const ssef& a, const ssef& b, const ssef& c) { return -a*b-c;}
-__forceinline const ssef nmsub(const ssef& a, const ssef& b, const ssef& c) { return c-a*b; }
+__forceinline const ssef nmadd(const ssef& a, const ssef& b, const ssef& c) { return c-a*b;}
+__forceinline const ssef nmsub(const ssef& a, const ssef& b, const ssef& c) { return -a*b-c; }
#endif
////////////////////////////////////////////////////////////////////////////////
@@ -267,10 +270,30 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssef sh
return _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0)));
}
+template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef& a) {
+ return _mm_movelh_ps(a, a);
+}
+
+template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef& a) {
+ return _mm_movehl_ps(a, a);
+}
+
template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssef shuffle(const ssef& a, const ssef& b) {
return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
}
+template<size_t i0> __forceinline const ssef shuffle(const ssef& a, const ssef& b) {
+ return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i0, i0, i0, i0));
+}
+
+template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef& a, const ssef& b) {
+ return _mm_movelh_ps(a, b);
+}
+
+template<> __forceinline const ssef shuffle<2, 3, 2, 3>(const ssef& a, const ssef& b) {
+ return _mm_movehl_ps(b, a);
+}
+
#if defined(__KERNEL_SSSE3__)
__forceinline const ssef shuffle8(const ssef& a, const ssei& shuf) {
return _mm_castsi128_ps(_mm_shuffle_epi8(_mm_castps_si128(a), shuf));
@@ -280,18 +303,19 @@ __forceinline const ssef shuffle8(const ssef& a, const ssei& shuf) {
#if defined(__KERNEL_SSE3__)
template<> __forceinline const ssef shuffle<0, 0, 2, 2>(const ssef& b) { return _mm_moveldup_ps(b); }
template<> __forceinline const ssef shuffle<1, 1, 3, 3>(const ssef& b) { return _mm_movehdup_ps(b); }
-template<> __forceinline const ssef shuffle<0, 1, 0, 1>(const ssef& b) { return _mm_castpd_ps(_mm_movedup_pd(_mm_castps_pd(b))); }
#endif
template<size_t i0> __forceinline const ssef shuffle(const ssef& b) {
return shuffle<i0,i0,i0,i0>(b);
}
-#if defined(__KERNEL_SSE41__) && !defined(__GNUC__)
-template<size_t i> __forceinline float extract (const ssef& a) { return _mm_cvtss_f32(_mm_extract_ps(a,i)); }
-#else
-template<size_t i> __forceinline float extract (const ssef& a) { return _mm_cvtss_f32(shuffle<i,i,i,i>(a)); }
+#if defined(__KERNEL_AVX__)
+__forceinline const ssef shuffle(const ssef& a, const ssei& shuf) {
+ return _mm_permutevar_ps(a, shuf);
+}
#endif
+
+template<size_t i> __forceinline float extract (const ssef& a) { return _mm_cvtss_f32(shuffle<i,i,i,i>(a)); }
template<> __forceinline float extract<0>(const ssef& a) { return _mm_cvtss_f32(a); }
#if defined(__KERNEL_SSE41__)
@@ -347,6 +371,8 @@ __forceinline size_t select_max(const ssef& v) { return __bsf(movemask(v == vred
__forceinline size_t select_min(const sseb& valid, const ssef& v) { const ssef a = select(valid,v,ssef(pos_inf)); return __bsf(movemask(valid &(a == vreduce_min(a)))); }
__forceinline size_t select_max(const sseb& valid, const ssef& v) { const ssef a = select(valid,v,ssef(neg_inf)); return __bsf(movemask(valid &(a == vreduce_max(a)))); }
+__forceinline size_t movemask( const ssef& a ) { return _mm_movemask_ps(a); }
+
////////////////////////////////////////////////////////////////////////////////
/// Memory load and store operations
////////////////////////////////////////////////////////////////////////////////
@@ -580,6 +606,20 @@ ccl_device_inline const ssef set_sign_bit(const ssef &a)
return a ^ cast(ssei(S1 << 31, S2 << 31, S3 << 31, S4 << 31));
}
+////////////////////////////////////////////////////////////////////////////////
+/// Debug Functions
+////////////////////////////////////////////////////////////////////////////////
+
+ccl_device_inline void print_ssef(const char *label, const ssef &a)
+{
+ printf("%s: %.8f %.8f %.8f %.8f\n",
+ label,
+ (double)a[0],
+ (double)a[1],
+ (double)a[2],
+ (double)a[3]);
+}
+
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_ssei.h b/intern/cycles/util/util_ssei.h
index 5f5a8686e35..5f62569268c 100644
--- a/intern/cycles/util/util_ssei.h
+++ b/intern/cycles/util/util_ssei.h
@@ -12,7 +12,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_SSEI_H__
@@ -190,8 +190,8 @@ __forceinline const ssei select( const int mask, const ssei& t, const ssei& f )
// Movement/Shifting/Shuffling Functions
////////////////////////////////////////////////////////////////////////////////
-__forceinline ssei unpacklo( const ssei& a, const ssei& b ) { return _mm_castps_si128(_mm_unpacklo_ps(_mm_castsi128_ps(a.m128), _mm_castsi128_ps(b.m128))); }
-__forceinline ssei unpackhi( const ssei& a, const ssei& b ) { return _mm_castps_si128(_mm_unpackhi_ps(_mm_castsi128_ps(a.m128), _mm_castsi128_ps(b.m128))); }
+__forceinline ssei unpacklo( const ssei& a, const ssei& b ) { return _mm_unpacklo_epi32(a, b); }
+__forceinline ssei unpackhi( const ssei& a, const ssei& b ) { return _mm_unpackhi_epi32(a, b); }
template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssei shuffle( const ssei& a ) {
return _mm_shuffle_epi32(a, _MM_SHUFFLE(i3, i2, i1, i0));
@@ -201,12 +201,6 @@ template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const ssei sh
return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _MM_SHUFFLE(i3, i2, i1, i0)));
}
-#if defined(__KERNEL_SSE3__)
-template<> __forceinline const ssei shuffle<0, 0, 2, 2>( const ssei& a ) { return _mm_castps_si128(_mm_moveldup_ps(_mm_castsi128_ps(a))); }
-template<> __forceinline const ssei shuffle<1, 1, 3, 3>( const ssei& a ) { return _mm_castps_si128(_mm_movehdup_ps(_mm_castsi128_ps(a))); }
-template<> __forceinline const ssei shuffle<0, 1, 0, 1>( const ssei& a ) { return _mm_castpd_si128(_mm_movedup_pd (_mm_castsi128_pd(a))); }
-#endif
-
template<size_t i0> __forceinline const ssei shuffle( const ssei& b ) {
return shuffle<i0,i0,i0,i0>(b);
}
@@ -286,6 +280,16 @@ __forceinline void store4i_nt(void* ptr, const ssei& v) {
#endif
}
+////////////////////////////////////////////////////////////////////////////////
+/// Debug Functions
+////////////////////////////////////////////////////////////////////////////////
+
+ccl_device_inline void print_ssei(const char *label, const ssei &a)
+{
+ printf("%s: %df %df %df %d\n",
+ label, a[0], a[1], a[2], a[3]);
+}
+
#endif
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_stats.h b/intern/cycles/util/util_stats.h
index 8758b823084..ce27067dc5e 100644
--- a/intern/cycles/util/util_stats.h
+++ b/intern/cycles/util/util_stats.h
@@ -11,12 +11,14 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_STATS_H__
#define __UTIL_STATS_H__
+#include "util_atomic.h"
+
CCL_NAMESPACE_BEGIN
class Stats {
@@ -24,14 +26,13 @@ public:
Stats() : mem_used(0), mem_peak(0) {}
void mem_alloc(size_t size) {
- mem_used += size;
- if(mem_used > mem_peak)
- mem_peak = mem_used;
+ atomic_add_z(&mem_used, size);
+ atomic_update_max_z(&mem_peak, mem_used);
}
void mem_free(size_t size) {
assert(mem_used >= size);
- mem_used -= size;
+ atomic_sub_z(&mem_used, size);
}
size_t mem_used;
diff --git a/intern/cycles/util/util_string.cpp b/intern/cycles/util/util_string.cpp
index f38d8d3282f..66856dd8331 100644
--- a/intern/cycles/util/util_string.cpp
+++ b/intern/cycles/util/util_string.cpp
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#include <stdarg.h>
@@ -105,5 +105,22 @@ string string_strip(const string& s)
}
+void string_replace(string& haystack, const string& needle, const string& other)
+{
+ size_t i;
+
+ while((i = haystack.find(needle)) != string::npos)
+ haystack.replace(i, needle.length(), other);
+}
+
+string string_remove_trademark(const string &s)
+{
+ string result = s;
+ string_replace(result, "(TM)", "");
+ string_replace(result, "(R)", "");
+
+ return string_strip(result);
+}
+
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_string.h b/intern/cycles/util/util_string.h
index 6808f085834..6cb8d8df1e1 100644
--- a/intern/cycles/util/util_string.h
+++ b/intern/cycles/util/util_string.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_STRING_H__
@@ -40,8 +40,10 @@ string string_printf(const char *format, ...) PRINTF_ATTRIBUTE;
bool string_iequals(const string& a, const string& b);
void string_split(vector<string>& tokens, const string& str, const string& separators = "\t ");
+void string_replace(string& haystack, const string& needle, const string& other);
bool string_endswith(const string& s, const char *end);
string string_strip(const string& s);
+string string_remove_trademark(const string& s);
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index 7c0445577e2..cc88320b68e 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -11,11 +11,12 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#include "util_system.h"
#include "util_types.h"
+#include "util_string.h"
#ifdef _WIN32
#if(!defined(FREE_WINDOWS))
@@ -75,14 +76,6 @@ static void __cpuid(int data[4], int selector)
}
#endif
-static void replace_string(string& haystack, const string& needle, const string& other)
-{
- size_t i;
-
- while((i = haystack.find(needle)) != string::npos)
- haystack.replace(i, needle.length(), other);
-}
-
string system_cpu_brand_string()
{
char buf[48];
@@ -98,10 +91,7 @@ string system_cpu_brand_string()
string brand = buf;
/* make it a bit more presentable */
- replace_string(brand, "(TM)", "");
- replace_string(brand, "(R)", "");
-
- brand = string_strip(brand);
+ brand = string_remove_trademark(brand);
return brand;
}
@@ -127,6 +117,7 @@ struct CPUCapabilities {
bool sse42;
bool sse4a;
bool avx;
+ bool f16c;
bool avx2;
bool xop;
bool fma3;
@@ -135,24 +126,42 @@ struct CPUCapabilities {
bool bmi2;
};
+static void system_cpu_capabilities_override(CPUCapabilities *caps)
+{
+ /* Only capabilities which affects on cycles kernel. */
+ if(getenv("CYCLES_CPU_NO_AVX2")) {
+ caps->avx2 = false;
+ }
+ if(getenv("CYCLES_CPU_NO_AVX")) {
+ caps->avx = false;
+ }
+ if(getenv("CYCLES_CPU_NO_SSE41")) {
+ caps->sse41 = false;
+ }
+ if(getenv("CYCLES_CPU_NO_SSE3")) {
+ caps->sse3 = false;
+ }
+ if(getenv("CYCLES_CPU_NO_SSE2")) {
+ caps->sse2 = false;
+ }
+ if(getenv("CYCLES_CPU_NO_SSE")) {
+ caps->sse = false;
+ }
+}
+
static CPUCapabilities& system_cpu_capabilities()
{
static CPUCapabilities caps;
static bool caps_init = false;
if(!caps_init) {
- int result[4], num; //, num_ex;
+ int result[4], num;
memset(&caps, 0, sizeof(caps));
__cpuid(result, 0);
num = result[0];
-#if 0
- __cpuid(result, 0x80000000);
- num_ex = result[0];
-#endif
-
if(num >= 1) {
__cpuid(result, 0x00000001);
caps.mmx = (result[3] & ((int)1 << 23)) != 0;
@@ -184,21 +193,15 @@ static CPUCapabilities& system_cpu_capabilities()
caps.avx = (xcr_feature_mask & 0x6) == 0x6;
}
+ caps.f16c = (result[2] & ((int)1 << 29)) != 0;
+
__cpuid(result, 0x00000007);
caps.bmi1 = (result[1] & ((int)1 << 3)) != 0;
caps.bmi2 = (result[1] & ((int)1 << 8)) != 0;
caps.avx2 = (result[1] & ((int)1 << 5)) != 0;
}
-#if 0
- if(num_ex >= 0x80000001) {
- __cpuid(result, 0x80000001);
- caps.x64 = (result[3] & ((int)1 << 29)) != 0;
- caps.sse4a = (result[2] & ((int)1 << 6)) != 0;
- caps.fma4 = (result[2] & ((int)1 << 16)) != 0;
- caps.xop = (result[2] & ((int)1 << 11)) != 0;
- }
-#endif
+ system_cpu_capabilities_override(&caps);
caps_init = true;
}
@@ -232,7 +235,7 @@ bool system_cpu_support_avx()
bool system_cpu_support_avx2()
{
CPUCapabilities& caps = system_cpu_capabilities();
- return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2;
+ return caps.sse && caps.sse2 && caps.sse3 && caps.ssse3 && caps.sse41 && caps.avx && caps.f16c && caps.avx2 && caps.fma3 && caps.bmi1 && caps.bmi2;
}
#else
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
index 0e8868c7dfc..4e7e00f85fd 100644
--- a/intern/cycles/util/util_system.h
+++ b/intern/cycles/util/util_system.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_SYSTEM_H__
diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp
index 14a81ecbb05..d56553d1d4a 100644
--- a/intern/cycles/util/util_task.cpp
+++ b/intern/cycles/util/util_task.cpp
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#include "util_debug.h"
@@ -237,7 +237,7 @@ bool TaskScheduler::thread_wait_pop(Entry& entry)
return true;
}
-void TaskScheduler::thread_run(int thread_id)
+void TaskScheduler::thread_run(int /*thread_id*/)
{
Entry entry;
diff --git a/intern/cycles/util/util_task.h b/intern/cycles/util/util_task.h
index 42a1e2f5a58..debcff3b776 100644
--- a/intern/cycles/util/util_task.h
+++ b/intern/cycles/util/util_task.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_TASK_H__
@@ -27,7 +27,7 @@ class Task;
class TaskPool;
class TaskScheduler;
-typedef boost::function<void(void)> TaskRunFunction;
+typedef function<void(void)> TaskRunFunction;
/* Task
*
diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h
index f2698d043fb..9c19235d41d 100644
--- a/intern/cycles/util/util_thread.h
+++ b/intern/cycles/util/util_thread.h
@@ -11,13 +11,20 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_THREAD_H__
#define __UTIL_THREAD_H__
-#include <boost/thread.hpp>
+#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
+# include <thread>
+# include <mutex>
+# include <condition_variable>
+# include <functional>
+#else
+# include <boost/thread.hpp>
+#endif
#include <pthread.h>
#include <queue>
@@ -25,18 +32,24 @@
CCL_NAMESPACE_BEGIN
+#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800)
+typedef std::mutex thread_mutex;
+typedef std::unique_lock<std::mutex> thread_scoped_lock;
+typedef std::condition_variable thread_condition_variable;
+#else
/* use boost for mutexes */
-
typedef boost::mutex thread_mutex;
typedef boost::mutex::scoped_lock thread_scoped_lock;
typedef boost::condition_variable thread_condition_variable;
+#endif
/* own pthread based implementation, to avoid boost version conflicts with
* dynamically loaded blender plugins */
class thread {
public:
- thread(boost::function<void(void)> run_cb_)
+ thread(function<void(void)> run_cb_)
+
{
joined = false;
run_cb = run_cb_;
@@ -63,7 +76,7 @@ public:
}
protected:
- boost::function<void(void)> run_cb;
+ function<void(void)> run_cb;
pthread_t pthread_id;
bool joined;
};
diff --git a/intern/cycles/util/util_time.cpp b/intern/cycles/util/util_time.cpp
index dd91b024940..964f9f1a7af 100644
--- a/intern/cycles/util/util_time.cpp
+++ b/intern/cycles/util/util_time.cpp
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#include <stdlib.h>
@@ -71,7 +71,7 @@ void time_sleep(double t)
/* get microseconds */
int us = (int)(t * 1e6);
- if (us > 0)
+ if(us > 0)
usleep(us);
}
diff --git a/intern/cycles/util/util_time.h b/intern/cycles/util/util_time.h
index 3df17272e2f..14ffea7f3da 100644
--- a/intern/cycles/util/util_time.h
+++ b/intern/cycles/util/util_time.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_TIME_H__
diff --git a/intern/cycles/util/util_transform.cpp b/intern/cycles/util/util_transform.cpp
index 14613558501..acaca69464c 100644
--- a/intern/cycles/util/util_transform.cpp
+++ b/intern/cycles/util/util_transform.cpp
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
/*
@@ -46,9 +46,11 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include "util_math.h"
#include "util_transform.h"
+#include "util_boundbox.h"
+#include "util_math.h"
+
CCL_NAMESPACE_BEGIN
/* Transform Inverse */
@@ -271,5 +273,15 @@ void transform_motion_decompose(DecompMotionTransform *decomp, const MotionTrans
decomp->post_y = post.y;
}
-CCL_NAMESPACE_END
+Transform transform_from_viewplane(BoundBox2D& viewplane)
+{
+ return
+ transform_scale(1.0f / (viewplane.right - viewplane.left),
+ 1.0f / (viewplane.top - viewplane.bottom),
+ 1.0f) *
+ transform_translate(-viewplane.left,
+ -viewplane.bottom,
+ 0.0f);
+}
+CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_transform.h b/intern/cycles/util/util_transform.h
index 5b3dbe42f69..ba8d04b5c16 100644
--- a/intern/cycles/util/util_transform.h
+++ b/intern/cycles/util/util_transform.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_TRANSFORM_H__
@@ -55,6 +55,11 @@ typedef struct DecompMotionTransform {
float4 post_x, post_y;
} DecompMotionTransform;
+typedef struct PerspectiveMotionTransform {
+ Transform pre;
+ Transform post;
+} PerspectiveMotionTransform;
+
/* Functions */
ccl_device_inline float3 transform_perspective(const Transform *t, const float3 a)
@@ -216,12 +221,13 @@ ccl_device_inline Transform transform_rotate(float angle, float3 axis)
0.0f, 0.0f, 0.0f, 1.0f);
}
+/* Euler is assumed to be in XYZ order. */
ccl_device_inline Transform transform_euler(float3 euler)
{
return
- transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f)) *
+ transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)) *
transform_rotate(euler.y, make_float3(0.0f, 1.0f, 0.0f)) *
- transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f));
+ transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f));
}
ccl_device_inline Transform transform_orthographic(float znear, float zfar)
@@ -448,6 +454,8 @@ ccl_device void transform_motion_interpolate(Transform *tfm, const DecompMotionT
#ifndef __KERNEL_GPU__
+class BoundBox2D;
+
ccl_device_inline bool operator==(const MotionTransform& A, const MotionTransform& B)
{
return (A.pre == B.pre && A.post == B.post);
@@ -455,9 +463,41 @@ ccl_device_inline bool operator==(const MotionTransform& A, const MotionTransfor
float4 transform_to_quat(const Transform& tfm);
void transform_motion_decompose(DecompMotionTransform *decomp, const MotionTransform *motion, const Transform *mid);
+Transform transform_from_viewplane(BoundBox2D& viewplane);
#endif
+/* TODO(sergey): This is only for until we've got OpenCL 2.0
+ * on all devices we consider supported. It'll be replaced with
+ * generic address space.
+ */
+
+#ifdef __KERNEL_OPENCL__
+
+#define OPENCL_TRANSFORM_ADDRSPACE_GLUE(a, b) a ## b
+#define OPENCL_TRANSFORM_ADDRSPACE_DECLARE(function) \
+ccl_device_inline float3 OPENCL_TRANSFORM_ADDRSPACE_GLUE(function, _addrspace)( \
+ ccl_addr_space const Transform *t, const float3 a) \
+{ \
+ Transform private_tfm = *t; \
+ return function(&private_tfm, a); \
+}
+
+OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_point)
+OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction)
+OPENCL_TRANSFORM_ADDRSPACE_DECLARE(transform_direction_transposed)
+
+# undef OPENCL_TRANSFORM_ADDRSPACE_DECLARE
+# undef OPENCL_TRANSFORM_ADDRSPACE_GLUE
+# define transform_point_auto transform_point_addrspace
+# define transform_direction_auto transform_direction_addrspace
+# define transform_direction_transposed_auto transform_direction_transposed_addrspace
+#else
+# define transform_point_auto transform_point
+# define transform_direction_auto transform_direction
+# define transform_direction_transposed_auto transform_direction_transposed
+#endif
+
CCL_NAMESPACE_END
#endif /* __UTIL_TRANSFORM_H__ */
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 2a199e591bf..6f474f873a6 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_TYPES_H__
@@ -33,11 +33,7 @@
#ifndef __KERNEL_GPU__
-# ifdef NDEBUG
-# define ccl_device static inline
-# else
-# define ccl_device static
-# endif
+#define ccl_device static inline
#define ccl_device_noinline static
#define ccl_global
#define ccl_constant
@@ -53,11 +49,7 @@
#define ccl_try_align(...) /* not support for function arguments (error C2719) */
#endif
#define ccl_may_alias
-# ifdef NDEBUG
-# define ccl_always_inline __forceinline
-# else
-# define ccl_always_inline
-# endif
+#define ccl_always_inline __forceinline
#define ccl_maybe_unused
#else
@@ -272,6 +264,19 @@ struct ccl_try_align(16) float4 {
__forceinline float& operator[](int i) { return *(&x + i); }
};
+template<typename T>
+class vector3
+{
+public:
+ T x, y, z;
+
+ ccl_always_inline vector3() {}
+ ccl_always_inline vector3(const T& a)
+ : x(a), y(a), z(a) {}
+ ccl_always_inline vector3(const T& x, const T& y, const T& z)
+ : x(x), y(y), z(z) {}
+};
+
#endif
#ifndef __KERNEL_GPU__
@@ -465,6 +470,19 @@ enum InterpolationType {
INTERPOLATION_SMART = 3,
};
+/* Extension types for textures.
+ *
+ * Defines how the image is extrapolated past its original bounds.
+ */
+enum ExtensionType {
+ /* Cause the image to repeat horizontally and vertically. */
+ EXTENSION_REPEAT = 0,
+ /* Extend by repeating edge pixels of the image. */
+ EXTENSION_EXTEND = 1,
+ /* Clip to image size and set exterior pixels as transparent. */
+ EXTENSION_CLIP = 2,
+};
+
/* macros */
/* hints for branch prediction, only use in code that runs a _lot_ */
@@ -476,18 +494,32 @@ enum InterpolationType {
# define UNLIKELY(x) (x)
#endif
+#if defined(__cplusplus) && ((__cplusplus >= 201103L) || (defined(_MSC_VER) && _MSC_VER >= 1800))
+# define HAS_CPP11_FEATURES
+#endif
+
+#if defined(__GNUC__) || defined(__clang__)
+# if defined(HAS_CPP11_FEATURES)
+/* Some magic to be sure we don't have reference in the type. */
+template<typename T> static inline T decltype_helper(T x) { return x; }
+# define TYPEOF(x) decltype(decltype_helper(x))
+# else
+# define TYPEOF(x) typeof(x)
+# endif
+#endif
+
/* Causes warning:
* incompatible types when assigning to type 'Foo' from type 'Bar'
* ... the compiler optimizes away the temp var */
#ifdef __GNUC__
#define CHECK_TYPE(var, type) { \
- typeof(var) *__tmp; \
+ TYPEOF(var) *__tmp; \
__tmp = (type *)NULL; \
(void)__tmp; \
} (void)0
#define CHECK_TYPE_PAIR(var_a, var_b) { \
- typeof(var_a) *__tmp; \
+ TYPEOF(var_a) *__tmp; \
__tmp = (typeof(var_b) *)NULL; \
(void)__tmp; \
} (void)0
diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h
index cc6e8a371ed..ee1f997721d 100644
--- a/intern/cycles/util/util_vector.h
+++ b/intern/cycles/util/util_vector.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_VECTOR_H__
@@ -19,34 +19,74 @@
/* Vector */
-#include <string.h>
+#include <cassert>
+#include <cstring>
#include <vector>
+#include "util_aligned_malloc.h"
#include "util_types.h"
-CCL_NAMESPACE_BEGIN
+#ifdef WITH_CYCLES_DEBUG
+# include "util_guarded_allocator.h"
+#endif
-using std::vector;
+CCL_NAMESPACE_BEGIN
-static inline void *malloc_aligned(size_t size, size_t alignment)
+/* Vector
+ *
+ * Own subclass-ed vestion of std::vector. Subclass is needed because:
+ *
+ * - When building with WITH_CYCLES_DEBUG we need to use own allocator which
+ * keeps track of used/peak memory.
+ *
+ * - Have method to ensure capacity is re-set to 0.
+ */
+template<typename value_type,
+#ifdef WITH_CYCLES_DEBUG
+ typename allocator_type = GuardedAllocator<value_type>
+#else
+ typename allocator_type = std::allocator<value_type>
+#endif
+ >
+class vector : public std::vector<value_type, allocator_type>
{
- void *data = (void*)malloc(size + sizeof(void*) + alignment - 1);
+public:
+ /* Default constructor. */
+ explicit vector() : std::vector<value_type, allocator_type>() { }
- union { void *ptr; size_t offset; } u;
- u.ptr = (char*)data + sizeof(void*);
- u.offset = (u.offset + alignment - 1) & ~(alignment - 1);
- *(((void**)u.ptr) - 1) = data;
+ /* Fill constructor. */
+ explicit vector(size_t n, const value_type& val = value_type())
+ : std::vector<value_type, allocator_type>(n, val) { }
- return u.ptr;
-}
+ /* Range constructor. */
+ template <class InputIterator>
+ vector(InputIterator first, InputIterator last)
+ : std::vector<value_type, allocator_type>(first, last) { }
-static inline void free_aligned(void *ptr)
-{
- if(ptr) {
- void *data = *(((void**)ptr) - 1);
- free(data);
+ /* Copy constructor. */
+ vector(const vector &x) : std::vector<value_type, allocator_type>(x) { }
+
+ void shrink_to_fit(void)
+ {
+#if __cplusplus < 201103L
+ vector<value_type>().swap(*this);
+#else
+ std::vector<value_type, allocator_type>::shrink_to_fit();
+#endif
}
-}
+
+ void free_memory(void)
+ {
+ std::vector<value_type, allocator_type>::resize(0);
+ shrink_to_fit();
+ }
+
+ /* Some external API might demand working with std::vector. */
+ operator std::vector<value_type>()
+ {
+ return std::vector<value_type>(*this);
+ }
+};
/* Array
*
@@ -65,6 +105,7 @@ public:
{
data = NULL;
datasize = 0;
+ capacity = 0;
}
array(size_t newsize)
@@ -72,10 +113,12 @@ public:
if(newsize == 0) {
data = NULL;
datasize = 0;
+ capacity = 0;
}
else {
- data = (T*)malloc_aligned(sizeof(T)*newsize, alignment);
+ data = (T*)util_aligned_malloc(sizeof(T)*newsize, alignment);
datasize = newsize;
+ capacity = datasize;
}
}
@@ -89,11 +132,13 @@ public:
if(from.datasize == 0) {
data = NULL;
datasize = 0;
+ capacity = 0;
}
else {
- data = (T*)malloc_aligned(sizeof(T)*from.datasize, alignment);
+ data = (T*)util_aligned_malloc(sizeof(T)*from.datasize, alignment);
memcpy(data, from.data, from.datasize*sizeof(T));
datasize = from.datasize;
+ capacity = datasize;
}
return *this;
@@ -102,13 +147,11 @@ public:
array& operator=(const vector<T>& from)
{
datasize = from.size();
+ capacity = datasize;
data = NULL;
if(datasize > 0) {
- data = (T*)malloc_aligned(sizeof(T)*datasize, alignment);
- memcpy(data, &from[0], datasize*sizeof(T));
- free_aligned(data);
- data = (T*)malloc_aligned(sizeof(T)*datasize, alignment);
+ data = (T*)util_aligned_malloc(sizeof(T)*datasize, alignment);
memcpy(data, &from[0], datasize*sizeof(T));
}
@@ -117,7 +160,7 @@ public:
~array()
{
- free_aligned(data);
+ util_aligned_free(data);
}
void resize(size_t newsize)
@@ -126,22 +169,25 @@ public:
clear();
}
else if(newsize != datasize) {
- T *newdata = (T*)malloc_aligned(sizeof(T)*newsize, alignment);
- if(data) {
- memcpy(newdata, data, ((datasize < newsize)? datasize: newsize)*sizeof(T));
- free_aligned(data);
+ if(newsize > capacity) {
+ T *newdata = (T*)util_aligned_malloc(sizeof(T)*newsize, alignment);
+ if(data) {
+ memcpy(newdata, data, ((datasize < newsize)? datasize: newsize)*sizeof(T));
+ util_aligned_free(data);
+ }
+ data = newdata;
+ capacity = newsize;
}
-
- data = newdata;
datasize = newsize;
}
}
void clear()
{
- free_aligned(data);
+ util_aligned_free(data);
data = NULL;
datasize = 0;
+ capacity = 0;
}
size_t size() const
@@ -151,12 +197,26 @@ public:
T& operator[](size_t i) const
{
+ assert(i < datasize);
return data[i];
}
+ void reserve(size_t newcapacity) {
+ if(newcapacity > capacity) {
+ T *newdata = (T*)util_aligned_malloc(sizeof(T)*newcapacity, alignment);
+ if(data) {
+ memcpy(newdata, data, ((datasize < newcapacity)? datasize: newcapacity)*sizeof(T));
+ util_aligned_free(data);
+ }
+ data = newdata;
+ capacity = newcapacity;
+ }
+ }
+
protected:
T *data;
size_t datasize;
+ size_t capacity;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/util_view.cpp b/intern/cycles/util/util_view.cpp
index 6bf9c9ed8c0..9b5cd22fb4a 100644
--- a/intern/cycles/util/util_view.cpp
+++ b/intern/cycles/util/util_view.cpp
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#include <stdio.h>
@@ -98,7 +98,7 @@ void view_display_help()
glColor3f(0.8f, 0.8f, 0.8f);
view_display_text(x1+20, y2-20, "Cycles Renderer");
- view_display_text(x1+20, y2-40, "(C) 2011-2014 Blender Foundation");
+ view_display_text(x1+20, y2-40, "(C) 2011-2015 Blender Foundation");
view_display_text(x1+20, y2-80, "Controls:");
view_display_text(x1+20, y2-100, "h: Info/Help");
view_display_text(x1+20, y2-120, "r: Reset");
@@ -110,6 +110,7 @@ void view_display_help()
view_display_text(x1+20, y2-230, "Left mouse: Move camera");
view_display_text(x1+20, y2-250, "Right mouse: Rotate camera");
view_display_text(x1+20, y2-270, "W/A/S/D: Move camera");
+ view_display_text(x1+20, y2-290, "0/1/2/3: Set max bounces");
glColor3f(1.0f, 1.0f, 1.0f);
}
@@ -248,7 +249,7 @@ void view_main_loop(const char *title, int width, int height,
glutInitDisplayMode(GLUT_RGB|GLUT_DOUBLE|GLUT_DEPTH);
glutCreateWindow(title);
- glewInit();
+ mxMakeCurrentContext(mxCreateContext());
view_reshape(width, height);
diff --git a/intern/cycles/util/util_view.h b/intern/cycles/util/util_view.h
index 65d890eb6af..5def0564175 100644
--- a/intern/cycles/util/util_view.h
+++ b/intern/cycles/util/util_view.h
@@ -11,7 +11,7 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_VIEW_H__
diff --git a/intern/cycles/util/util_xml.h b/intern/cycles/util/util_xml.h
index 9d1ebc2114a..cfd0afc95f7 100644
--- a/intern/cycles/util/util_xml.h
+++ b/intern/cycles/util/util_xml.h
@@ -11,15 +11,15 @@
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
- * limitations under the License
+ * limitations under the License.
*/
#ifndef __UTIL_XML_H__
#define __UTIL_XML_H__
-/* PugiXML from OpenImageIO is used for XML parsing. */
+/* PugiXML is used for XML parsing. */
-#include <OpenImageIO/pugixml.hpp>
+#include <pugixml.hpp>
CCL_NAMESPACE_BEGIN