diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2017-04-13 20:34:45 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2017-04-25 11:33:26 +0300 |
commit | 51ec9441b74ccd02e5555b9385c3b2531f7411a6 (patch) | |
tree | aa580862491e63e42e91b9d1d63317d3f3baf01b /intern/cycles/util/util_types_int4_impl.h | |
parent | b06cd746ce9fb8dfe5361fa9d148bde73c26f5d3 (diff) |
Cycles: Split vectorized types into separate files
The final goal to reach is to make vectorized types much easier to maintain
and the previous design had following issues:
- Having all types and methods implementation made the source file rather
bloated and unfun to navigate in.
- It was not possible to quickly glance available API for the type you are
interested in.
- Adding more vectorization types will bloat the file even more, making
things even more tricky to follow.
Diffstat (limited to 'intern/cycles/util/util_types_int4_impl.h')
-rw-r--r-- | intern/cycles/util/util_types_int4_impl.h | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/intern/cycles/util/util_types_int4_impl.h b/intern/cycles/util/util_types_int4_impl.h new file mode 100644 index 00000000000..52cda1c74b5 --- /dev/null +++ b/intern/cycles/util/util_types_int4_impl.h @@ -0,0 +1,106 @@ +/* + * Copyright 2011-2017 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_TYPES_INT4_IMPL_H__ +#define __UTIL_TYPES_INT4_IMPL_H__ + +#ifndef __UTIL_TYPES_H__ +# error "Do not include this file directly, include util_types.h instead." +#endif + +CCL_NAMESPACE_BEGIN + +#ifndef __KERNEL_GPU__ +#ifdef __KERNEL_SSE__ +__forceinline int4::int4() +{ +} + +__forceinline int4::int4(const int4& a) + : m128(a.m128) +{ +} + +__forceinline int4::int4(const __m128i& a) + : m128(a) +{ +} + +__forceinline int4::operator const __m128i&(void) const +{ + return m128; +} + +__forceinline int4::operator __m128i&(void) +{ + return m128; +} + +__forceinline int4& int4::operator=(const int4& a) +{ + m128 = a.m128; + return *this; +} +#endif /* __KERNEL_SSE__ */ + +__forceinline int int4::operator[](int i) const +{ + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); +} + +__forceinline int& int4::operator[](int i) +{ + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); +} + +ccl_device_inline int4 make_int4(int i) +{ +#ifdef __KERNEL_SSE__ + int4 a(_mm_set1_epi32(i)); +#else + int4 a = {i, i, i, i}; +#endif + return a; +} + +ccl_device_inline int4 make_int4(int x, int y, int z, int w) +{ +#ifdef __KERNEL_SSE__ + int4 a(_mm_set_epi32(w, z, y, x)); +#else + int4 a = {x, y, z, w}; +#endif + return a; +} + +ccl_device_inline int4 make_int4(const float3& f) +{ +#ifdef __KERNEL_SSE__ + int4 a(_mm_cvtps_epi32(f.m128)); +#else + int4 a = {(int)f.x, (int)f.y, (int)f.z, (int)f.w}; +#endif + return a; +} +#endif /* __KERNEL_GPU__ */ + +CCL_NAMESPACE_END + +#endif /* __UTIL_TYPES_INT4_IMPL_H__ */ |