Welcome to mirror list, hosted at ThFree Co, Russian Federation.

math_int4.h « util « cycles « intern - git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: fbdada223cb74945a36db1a26f34a6a75748f031 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
/* SPDX-License-Identifier: Apache-2.0
 * Copyright 2011-2022 Blender Foundation */

#ifndef __UTIL_MATH_INT4_H__
#define __UTIL_MATH_INT4_H__

#ifndef __UTIL_MATH_H__
#  error "Do not include this file directly, include util/types.h instead."
#endif

CCL_NAMESPACE_BEGIN

/*******************************************************************************
 * Declaration.
 */

#ifndef __KERNEL_GPU__
ccl_device_inline int4 operator+(const int4 &a, const int4 &b);
ccl_device_inline int4 operator+=(int4 &a, const int4 &b);
ccl_device_inline int4 operator>>(const int4 &a, int i);
ccl_device_inline int4 operator<<(const int4 &a, int i);
ccl_device_inline int4 operator<(const int4 &a, const int4 &b);
ccl_device_inline int4 operator>=(const int4 &a, const int4 &b);
ccl_device_inline int4 operator&(const int4 &a, const int4 &b);
ccl_device_inline int4 min(int4 a, int4 b);
ccl_device_inline int4 max(int4 a, int4 b);
ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx);
ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b);
#endif /* __KERNEL_GPU__ */

/*******************************************************************************
 * Definition.
 */

#ifndef __KERNEL_GPU__
ccl_device_inline int4 operator+(const int4 &a, const int4 &b)
{
#  ifdef __KERNEL_SSE__
  return int4(_mm_add_epi32(a.m128, b.m128));
#  else
  return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
#  endif
}

ccl_device_inline int4 operator+=(int4 &a, const int4 &b)
{
  return a = a + b;
}

ccl_device_inline int4 operator>>(const int4 &a, int i)
{
#  ifdef __KERNEL_SSE__
  return int4(_mm_srai_epi32(a.m128, i));
#  else
  return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i);
#  endif
}

ccl_device_inline int4 operator<<(const int4 &a, int i)
{
#  ifdef __KERNEL_SSE__
  return int4(_mm_slli_epi32(a.m128, i));
#  else
  return make_int4(a.x << i, a.y << i, a.z << i, a.w << i);
#  endif
}

ccl_device_inline int4 operator<(const int4 &a, const int4 &b)
{
#  ifdef __KERNEL_SSE__
  return int4(_mm_cmplt_epi32(a.m128, b.m128));
#  else
  return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
#  endif
}

ccl_device_inline int4 operator>=(const int4 &a, const int4 &b)
{
#  ifdef __KERNEL_SSE__
  return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128)));
#  else
  return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
#  endif
}

ccl_device_inline int4 operator&(const int4 &a, const int4 &b)
{
#  ifdef __KERNEL_SSE__
  return int4(_mm_and_si128(a.m128, b.m128));
#  else
  return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w);
#  endif
}

ccl_device_inline int4 min(int4 a, int4 b)
{
#  if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
  return int4(_mm_min_epi32(a.m128, b.m128));
#  else
  return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
#  endif
}

ccl_device_inline int4 max(int4 a, int4 b)
{
#  if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
  return int4(_mm_max_epi32(a.m128, b.m128));
#  else
  return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
#  endif
}

ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx)
{
  return min(max(a, mn), mx);
}

ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b)
{
#  ifdef __KERNEL_SSE__
  return int4(_mm_or_si128(_mm_and_si128(mask, a), _mm_andnot_si128(mask, b)));
#  else
  return make_int4(
      (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w);
#  endif
}

ccl_device_inline int4 load_int4(const int *v)
{
#  ifdef __KERNEL_SSE__
  return int4(_mm_loadu_si128((__m128i *)v));
#  else
  return make_int4(v[0], v[1], v[2], v[3]);
#  endif
}
#endif /* __KERNEL_GPU__ */

CCL_NAMESPACE_END

#endif /* __UTIL_MATH_INT4_H__ */