Cycles: Add BVH8 and packeted triangle intersection

This is an initial implementation of BVH8 optimization structure and packated triangle intersection. The aim is to get faster ray to scene intersection checks. Scene BVH4 BVH8 barbershop_interior 10:24.94 10:10.74 bmw27 02:41.25 02:38.83 classroom 08:16.49 07:56.15 fishy_cat 04:24.56 04:17.29 koro 06:03.06 06:01.45 pavillon_barcelona 09:21.26 09:02.98 victor 23:39.65 22:53.71 As memory goes, peak usage raises by about 4.7% in a complex scenes. Note that BVH8 is disabled when using OSL, this is because OSL kernel does not get per-microarchitecture optimizations and hence always considers BVH3 is used. Original BVH8 patch from Anton Gavrikov. Batched triangles intersection from Victoria Zhislina. Extra work and tests and fixes from Maxym Dmytrychenko.
author: Sergey Sharybin <sergey.vfx@gmail.com> 2018-02-14 13:23:30 +0300
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2018-08-29 16:03:09 +0300
commit: 73f20560529457ea177cb93e8e8eaaf44a589643 (patch)
tree: 45ea2ebad9adabcedd7833629421909ede9f6fb5 /intern/cycles/util/util_types_float8_impl.h
parent: 66f8a4c07e8a5fc166579101933264b8425a7cd1 (diff)
1 files changed, 113 insertions, 0 deletions
diff --git a/intern/cycles/util/util_types_float8_impl.h b/intern/cycles/util/util_types_float8_impl.h
new file mode 100644
index 00000000000..4fac03569e9
--- /dev/null
+++ b/intern/cycles/util/util_types_float8_impl.h
@@ -0,0 +1,113 @@
+/*
+Copyright (c) 2017, Intel Corporation
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+* Neither the name of Intel Corporation nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __UTIL_TYPES_FLOAT8_IMPL_H__
+#define __UTIL_TYPES_FLOAT8_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+#  error "Do not include this file directly, include util_types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+#  include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+#ifdef __KERNEL_AVX2__
+__forceinline float8::float8()
+{
+}
+
+__forceinline float8::float8(const float8& f)
+        : m256(f.m256)
+{
+}
+
+__forceinline float8::float8(const __m256& f)
+        : m256(f)
+{
+}
+
+__forceinline float8::operator const __m256&(void) const
+{
+	return m256;
+}
+
+__forceinline float8::operator __m256&(void)
+{
+	return m256;
+}
+
+__forceinline float8& float8::operator =(const float8& f)
+{
+	m256 = f.m256;
+	return *this;
+}
+#endif  /* __KERNEL_AVX2__ */
+
+__forceinline float float8::operator[](int i) const
+{
+	util_assert(i >= 0);
+	util_assert(i < 8);
+	return *(&a + i);
+}
+
+__forceinline float& float8::operator[](int i)
+{
+	util_assert(i >= 0);
+	util_assert(i < 8);
+	return *(&a + i);
+}
+
+ccl_device_inline float8 make_float8(float f)
+{
+#ifdef __KERNEL_AVX2__
+	float8 r(_mm256_set1_ps(f));
+#else
+	float8 r = {f, f, f, f, f, f, f, f};
+#endif
+	return r;
+}
+
+ccl_device_inline float8 make_float8(float a, float b, float c, float d,
+                                     float e, float f, float g, float h)
+{
+#ifdef __KERNEL_AVX2__
+	float8 r(_mm256_set_ps(a, b, c, d, e, f, g, h));
+#else
+	float8 r = {a, b, c, d, e, f, g, h};
+#endif
+	return r;
+}
+
+#endif  /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif  /* __UTIL_TYPES_FLOAT8_IMPL_H__ */
author	Sergey Sharybin <sergey.vfx@gmail.com>	2018-02-14 13:23:30 +0300
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2018-08-29 16:03:09 +0300
commit	73f20560529457ea177cb93e8e8eaaf44a589643 (patch)
tree	45ea2ebad9adabcedd7833629421909ede9f6fb5 /intern/cycles/util/util_types_float8_impl.h
parent	66f8a4c07e8a5fc166579101933264b8425a7cd1 (diff)