Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/CMakeLists.txt5
-rw-r--r--intern/cycles/util/util_avxb.h191
-rw-r--r--intern/cycles/util/util_avxf.h51
-rw-r--r--intern/cycles/util/util_debug.cpp14
-rw-r--r--intern/cycles/util/util_types.h5
-rw-r--r--intern/cycles/util/util_types_float8.h70
-rw-r--r--intern/cycles/util/util_types_float8_impl.h113
7 files changed, 445 insertions, 4 deletions
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 24043e2231b..8ffe9fd371a 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -76,6 +76,7 @@ set(SRC_HEADERS
util_sky_model.h
util_sky_model_data.h
util_avxf.h
+ util_avxb.h
util_sseb.h
util_ssef.h
util_ssei.h
@@ -96,7 +97,9 @@ set(SRC_HEADERS
util_types_float3_impl.h
util_types_float4.h
util_types_float4_impl.h
- util_types_int2.h
+ util_types_float8.h
+ util_types_float8_impl.h
+ util_types_int2.h
util_types_int2_impl.h
util_types_int3.h
util_types_int3_impl.h
diff --git a/intern/cycles/util/util_avxb.h b/intern/cycles/util/util_avxb.h
new file mode 100644
index 00000000000..670a33f9ae6
--- /dev/null
+++ b/intern/cycles/util/util_avxb.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright 2011-2013 Intel Corporation
+ * Modifications Copyright 2014, Blender Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0(the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_AVXB_H__
+#define __UTIL_AVXB_H__
+
+CCL_NAMESPACE_BEGIN
+
+struct avxf;
+
+/*! 4-wide SSE bool type. */
+struct avxb
+{
+ typedef avxb Mask; // mask type
+ typedef avxf Float; // float type
+
+ enum { size = 8 }; // number of SIMD elements
+ union { __m256 m256; int32_t v[8]; }; // data
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constructors, Assignment & Cast Operators
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline avxb ( ) {}
+ __forceinline avxb ( const avxb& other ) { m256 = other.m256; }
+ __forceinline avxb& operator=( const avxb& other ) { m256 = other.m256; return *this; }
+
+ __forceinline avxb( const __m256 input ) : m256(input) {}
+ __forceinline operator const __m256&( void ) const { return m256; }
+ __forceinline operator const __m256i( void ) const { return _mm256_castps_si256(m256); }
+ __forceinline operator const __m256d( void ) const { return _mm256_castps_pd(m256); }
+
+ //__forceinline avxb ( bool a )
+ // : m256(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {}
+ //__forceinline avxb ( bool a, bool b)
+ // : m256(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)]) {}
+ //__forceinline avxb ( bool a, bool b, bool c, bool d)
+ // : m256(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {}
+ //__forceinline avxb(int mask) {
+ // assert(mask >= 0 && mask < 16);
+ // m128 = _mm_lookupmask_ps[mask];
+ //}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Constants
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline avxb( FalseTy ) : m256(_mm256_setzero_ps()) {}
+ __forceinline avxb( TrueTy ) : m256(_mm256_castsi256_ps(_mm256_cmpeq_epi32(_mm256_setzero_si256(), _mm256_setzero_si256()))) {}
+
+ ////////////////////////////////////////////////////////////////////////////////
+ /// Array Access
+ ////////////////////////////////////////////////////////////////////////////////
+
+ __forceinline bool operator []( const size_t i ) const { assert(i < 8); return (_mm256_movemask_ps(m256) >> i) & 1; }
+ __forceinline int32_t& operator []( const size_t i ) { assert(i < 8); return v[i]; }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+/// Unary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxb operator !( const avxb& a ) { return _mm256_xor_ps(a, avxb(True)); }
+
+////////////////////////////////////////////////////////////////////////////////
+/// Binary Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxb operator &( const avxb& a, const avxb& b ) { return _mm256_and_ps(a, b); }
+__forceinline const avxb operator |( const avxb& a, const avxb& b ) { return _mm256_or_ps (a, b); }
+__forceinline const avxb operator ^( const avxb& a, const avxb& b ) { return _mm256_xor_ps(a, b); }
+
+////////////////////////////////////////////////////////////////////////////////
+/// Assignment Operators
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxb operator &=( avxb& a, const avxb& b ) { return a = a & b; }
+__forceinline const avxb operator |=( avxb& a, const avxb& b ) { return a = a | b; }
+__forceinline const avxb operator ^=( avxb& a, const avxb& b ) { return a = a ^ b; }
+
+////////////////////////////////////////////////////////////////////////////////
+/// Comparison Operators + Select
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxb operator !=( const avxb& a, const avxb& b ) { return _mm256_xor_ps(a, b); }
+__forceinline const avxb operator ==( const avxb& a, const avxb& b ) { return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b)); }
+
+__forceinline const avxb select( const avxb& m, const avxb& t, const avxb& f ) {
+#if defined(__KERNEL_SSE41__)
+ return _mm256_blendv_ps(f, t, m);
+#else
+ return _mm256_or_ps(_mm256_and_ps(m, t), _mm256_andnot_ps(m, f));
+#endif
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Movement/Shifting/Shuffling Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline const avxb unpacklo( const avxb& a, const avxb& b ) { return _mm256_unpacklo_ps(a, b); }
+__forceinline const avxb unpackhi( const avxb& a, const avxb& b ) { return _mm256_unpackhi_ps(a, b); }
+
+#define _MM256_SHUFFLE(fp7,fp6,fp5,fp4,fp3,fp2,fp1,fp0) (((fp7) << 14) | ((fp6) << 12) | ((fp5) << 10) | ((fp4) << 8) | \
+ ((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
+
+template<size_t i0, size_t i1, size_t i2, size_t i3, size_t i4, size_t i5, size_t i6, size_t i7> __forceinline const avxb shuffle( const avxb& a ) {
+ return _mm256_shuffle_epi32(a, _MM256_SHUFFLE(i7, i6, i5, i4, i3, i2, i1, i0));
+}
+
+/*
+template<> __forceinline const avxb shuffle<0, 1, 0, 1, 0, 1, 0, 1>( const avxb& a ) {
+ return _mm_movelh_ps(a, a);
+}
+
+template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a ) {
+ return _mm_movehl_ps(a, a);
+}
+
+template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const sseb shuffle( const sseb& a, const sseb& b ) {
+ return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
+}
+
+template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a, const sseb& b ) {
+ return _mm_movelh_ps(a, b);
+}
+
+template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a, const sseb& b ) {
+ return _mm_movehl_ps(b, a);
+}
+
+#if defined(__KERNEL_SSE3__)
+template<> __forceinline const sseb shuffle<0, 0, 2, 2>( const sseb& a ) { return _mm_moveldup_ps(a); }
+template<> __forceinline const sseb shuffle<1, 1, 3, 3>( const sseb& a ) { return _mm_movehdup_ps(a); }
+#endif
+
+#if defined(__KERNEL_SSE41__)
+template<size_t dst, size_t src, size_t clr> __forceinline const sseb insert( const sseb& a, const sseb& b ) { return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); }
+template<size_t dst, size_t src> __forceinline const sseb insert( const sseb& a, const sseb& b ) { return insert<dst, src, 0>(a, b); }
+template<size_t dst> __forceinline const sseb insert( const sseb& a, const bool b ) { return insert<dst,0>(a, sseb(b)); }
+#endif
+*/
+
+////////////////////////////////////////////////////////////////////////////////
+/// Reduction Operations
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__KERNEL_SSE41__)
+__forceinline size_t popcnt( const avxb& a ) { return __popcnt(_mm256_movemask_ps(a)); }
+#else
+__forceinline size_t popcnt( const avxb& a ) { return bool(a[0])+bool(a[1])+bool(a[2])+bool(a[3])+bool(a[4])+
+ bool(a[5])+bool(a[6])+bool(a[7]); }
+#endif
+
+__forceinline bool reduce_and( const avxb& a ) { return _mm256_movemask_ps(a) == 0xf; }
+__forceinline bool reduce_or ( const avxb& a ) { return _mm256_movemask_ps(a) != 0x0; }
+__forceinline bool all ( const avxb& b ) { return _mm256_movemask_ps(b) == 0xf; }
+__forceinline bool any ( const avxb& b ) { return _mm256_movemask_ps(b) != 0x0; }
+__forceinline bool none ( const avxb& b ) { return _mm256_movemask_ps(b) == 0x0; }
+
+__forceinline size_t movemask( const avxb& a ) { return _mm256_movemask_ps(a); }
+
+////////////////////////////////////////////////////////////////////////////////
+/// Debug Functions
+////////////////////////////////////////////////////////////////////////////////
+
+ccl_device_inline void print_avxb(const char *label, const avxb &a)
+{
+ printf("%s: %df %df %df %df %df %df %df %d\n",
+ label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
+}
+
+#endif
+
+CCL_NAMESPACE_END
+
+//#endif
+
diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h
index 2451213963a..81365f96c94 100644
--- a/intern/cycles/util/util_avxf.h
+++ b/intern/cycles/util/util_avxf.h
@@ -19,7 +19,8 @@
CCL_NAMESPACE_BEGIN
-#ifdef __KERNEL_AVX__
+struct avxb;
+
struct avxf
{
typedef avxf Float;
@@ -53,6 +54,9 @@ struct avxf
__forceinline avxf(float a7, float a6, float a5, float a4, float a3, float a2, float a1, float a0) :
m256(_mm256_set_ps(a7, a6, a5, a4, a3, a2, a1, a0)) {}
+ __forceinline avxf(float3 a) :
+ m256(_mm256_set_ps(a.w, a.z, a.y, a.x, a.w, a.z, a.y, a.x)) {}
+
__forceinline avxf(int a3, int a2, int a1, int a0)
{
@@ -73,8 +77,24 @@ struct avxf
m256 = _mm256_insertf128_ps(foo, b, 1);
}
+ __forceinline const float& operator [](const size_t i) const { assert(i < 8); return f[i]; }
+ __forceinline float& operator [](const size_t i) { assert(i < 8); return f[i]; }
};
+__forceinline avxf cross(const avxf& a, const avxf& b)
+{
+ avxf r(0.0, a[4]*b[5] - a[5]*b[4], a[6]*b[4] - a[4]*b[6], a[5]*b[6] - a[6]*b[5],
+ 0.0, a[0]*b[1] - a[1]*b[0], a[2]*b[0] - a[0]*b[2], a[1]*b[2] - a[2]*b[1]);
+ return r;
+}
+
+__forceinline void dot3(const avxf& a, const avxf& b, float &den, float &den2)
+{
+ const avxf t = _mm256_mul_ps(a.m256, b.m256);
+ den = ((float*)&t)[0] + ((float*)&t)[1] + ((float*)&t)[2];
+ den2 = ((float*)&t)[4] + ((float*)&t)[5] + ((float*)&t)[6];
+}
+
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
@@ -107,6 +127,9 @@ __forceinline const avxf operator^(const avxf& a, const avxf& b) { return _mm256
__forceinline const avxf operator&(const avxf& a, const avxf& b) { return _mm256_and_ps(a.m256,b.m256); }
+__forceinline const avxf max(const avxf& a, const avxf& b) { return _mm256_max_ps(a.m256, b.m256); }
+__forceinline const avxf min(const avxf& a, const avxf& b) { return _mm256_min_ps(a.m256, b.m256); }
+
////////////////////////////////////////////////////////////////////////////////
/// Movement/Shifting/Shuffling Functions
////////////////////////////////////////////////////////////////////////////////
@@ -160,6 +183,18 @@ ccl_device_inline const avxf blend(const avxf &a, const avxf &b)
return blend<S0,S1,S2,S3,S0,S1,S2,S3>(a,b);
}
+//#if defined(__KERNEL_SSE41__)
+__forceinline avxf maxi(const avxf& a, const avxf& b) {
+ const avxf ci = _mm256_max_ps(a, b);
+ return ci;
+}
+
+__forceinline avxf mini(const avxf& a, const avxf& b) {
+ const avxf ci = _mm256_min_ps(a, b);
+ return ci;
+}
+//#endif
+
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
@@ -178,6 +213,19 @@ __forceinline const avxf nmadd(const avxf& a, const avxf& b, const avxf& c) {
return c-(a*b);
#endif
}
+__forceinline const avxf msub(const avxf& a, const avxf& b, const avxf& c) {
+ return _mm256_fmsub_ps(a, b, c);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Comparison Operators
+////////////////////////////////////////////////////////////////////////////////
+#ifdef __KERNEL_AVX2__
+__forceinline const avxb operator <=(const avxf& a, const avxf& b) {
+ return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS);
+}
+#endif
+
#endif
#ifndef _mm256_set_m128
@@ -190,4 +238,3 @@ __forceinline const avxf nmadd(const avxf& a, const avxf& b, const avxf& c) {
CCL_NAMESPACE_END
-#endif
diff --git a/intern/cycles/util/util_debug.cpp b/intern/cycles/util/util_debug.cpp
index 9a66a372822..a761c9c46de 100644
--- a/intern/cycles/util/util_debug.cpp
+++ b/intern/cycles/util/util_debug.cpp
@@ -57,7 +57,19 @@ void DebugFlags::CPU::reset()
#undef STRINGIFY
#undef CHECK_CPU_FLAGS
- bvh_layout = BVH_LAYOUT_DEFAULT;
+ if (getenv("CYCLES_BVH2") != NULL) {
+ bvh_layout = BVH_LAYOUT_BVH2;
+ }
+ else if (getenv("CYCLES_BVH4") != NULL) {
+ bvh_layout = BVH_LAYOUT_BVH4;
+ }
+ else if (getenv("CYCLES_BVH8") != NULL) {
+ bvh_layout = BVH_LAYOUT_BVH8;
+ }
+ else {
+ bvh_layout = BVH_LAYOUT_DEFAULT;
+ }
+
split_kernel = false;
}
diff --git a/intern/cycles/util/util_types.h b/intern/cycles/util/util_types.h
index 84206a7ba5a..8ac075fe344 100644
--- a/intern/cycles/util/util_types.h
+++ b/intern/cycles/util/util_types.h
@@ -119,6 +119,7 @@ CCL_NAMESPACE_END
#include "util/util_types_float2.h"
#include "util/util_types_float3.h"
#include "util/util_types_float4.h"
+#include "util/util_types_float8.h"
#include "util/util_types_vector3.h"
@@ -138,6 +139,7 @@ CCL_NAMESPACE_END
#include "util/util_types_float2_impl.h"
#include "util/util_types_float3_impl.h"
#include "util/util_types_float4_impl.h"
+#include "util/util_types_float8_impl.h"
#include "util/util_types_vector3_impl.h"
@@ -146,8 +148,11 @@ CCL_NAMESPACE_END
# include "util/util_sseb.h"
# include "util/util_ssei.h"
# include "util/util_ssef.h"
+#if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
+# include "util/util_avxb.h"
# include "util/util_avxf.h"
#endif
+#endif
#endif /* __UTIL_TYPES_H__ */
diff --git a/intern/cycles/util/util_types_float8.h b/intern/cycles/util/util_types_float8.h
new file mode 100644
index 00000000000..2e693d49634
--- /dev/null
+++ b/intern/cycles/util/util_types_float8.h
@@ -0,0 +1,70 @@
+/*
+Copyright (c) 2017, Intel Corporation
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+* Neither the name of Intel Corporation nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __UTIL_TYPES_FLOAT8_H__
+#define __UTIL_TYPES_FLOAT8_H__
+
+#ifndef __UTIL_TYPES_H__
+# error "Do not include this file directly, include util_types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+
+struct ccl_try_align(16) float8 {
+#ifdef __KERNEL_AVX2__
+ union {
+ __m256 m256;
+ struct { float a, b, c, d, e, f, g, h; };
+ };
+
+ __forceinline float8();
+ __forceinline float8(const float8& a);
+ __forceinline explicit float8(const __m256& a);
+
+ __forceinline operator const __m256&(void) const;
+ __forceinline operator __m256&(void);
+
+ __forceinline float8& operator =(const float8& a);
+
+#else /* __KERNEL_AVX2__ */
+ float a, b, c, d, e, f, g, h;
+#endif /* __KERNEL_AVX2__ */
+
+ __forceinline float operator[](int i) const;
+ __forceinline float& operator[](int i);
+};
+
+ccl_device_inline float8 make_float8(float f);
+ccl_device_inline float8 make_float8(float a, float b, float c, float d,
+ float e, float f, float g, float h);
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_FLOAT8_H__ */
diff --git a/intern/cycles/util/util_types_float8_impl.h b/intern/cycles/util/util_types_float8_impl.h
new file mode 100644
index 00000000000..4fac03569e9
--- /dev/null
+++ b/intern/cycles/util/util_types_float8_impl.h
@@ -0,0 +1,113 @@
+/*
+Copyright (c) 2017, Intel Corporation
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+* Neither the name of Intel Corporation nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __UTIL_TYPES_FLOAT8_IMPL_H__
+#define __UTIL_TYPES_FLOAT8_IMPL_H__
+
+#ifndef __UTIL_TYPES_H__
+# error "Do not include this file directly, include util_types.h instead."
+#endif
+
+#ifndef __KERNEL_GPU__
+# include <cstdio>
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+#ifndef __KERNEL_GPU__
+#ifdef __KERNEL_AVX2__
+__forceinline float8::float8()
+{
+}
+
+__forceinline float8::float8(const float8& f)
+ : m256(f.m256)
+{
+}
+
+__forceinline float8::float8(const __m256& f)
+ : m256(f)
+{
+}
+
+__forceinline float8::operator const __m256&(void) const
+{
+ return m256;
+}
+
+__forceinline float8::operator __m256&(void)
+{
+ return m256;
+}
+
+__forceinline float8& float8::operator =(const float8& f)
+{
+ m256 = f.m256;
+ return *this;
+}
+#endif /* __KERNEL_AVX2__ */
+
+__forceinline float float8::operator[](int i) const
+{
+ util_assert(i >= 0);
+ util_assert(i < 8);
+ return *(&a + i);
+}
+
+__forceinline float& float8::operator[](int i)
+{
+ util_assert(i >= 0);
+ util_assert(i < 8);
+ return *(&a + i);
+}
+
+ccl_device_inline float8 make_float8(float f)
+{
+#ifdef __KERNEL_AVX2__
+ float8 r(_mm256_set1_ps(f));
+#else
+ float8 r = {f, f, f, f, f, f, f, f};
+#endif
+ return r;
+}
+
+ccl_device_inline float8 make_float8(float a, float b, float c, float d,
+ float e, float f, float g, float h)
+{
+#ifdef __KERNEL_AVX2__
+ float8 r(_mm256_set_ps(a, b, c, d, e, f, g, h));
+#else
+ float8 r = {a, b, c, d, e, f, g, h};
+#endif
+ return r;
+}
+
+#endif /* __KERNEL_GPU__ */
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_TYPES_FLOAT8_IMPL_H__ */