Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2021-10-24 15:19:19 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-10-26 16:37:04 +0300
commitfd25e883e2807a151f673b87c152a59701a0df80 (patch)
tree9441933f32ba2672ca71c58842342a9c525e123e /intern/cycles/util/half.h
parentd7d40745fa09061a3117bd3669c5a46bbf611eae (diff)
Cycles: remove prefix from source code file names
Remove prefix of filenames that is the same as the folder name. This used to help when #includes were using individual files, but now they are always relative to the cycles root directory and so the prefixes are redundant. For patches and branches, git merge and rebase should be able to detect the renames and move over code to the right file.
Diffstat (limited to 'intern/cycles/util/half.h')
-rw-r--r--intern/cycles/util/half.h169
1 files changed, 169 insertions, 0 deletions
diff --git a/intern/cycles/util/half.h b/intern/cycles/util/half.h
new file mode 100644
index 00000000000..016975e3c25
--- /dev/null
+++ b/intern/cycles/util/half.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __UTIL_HALF_H__
+#define __UTIL_HALF_H__
+
+#include "util/math.h"
+#include "util/types.h"
+
+#if !defined(__KERNEL_GPU__) && defined(__KERNEL_SSE2__)
+# include "util/simd.h"
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/* Half Floats */
+
+/* CUDA has its own half data type, no need to define then */
+#if !defined(__KERNEL_CUDA__) && !defined(__KERNEL_HIP__)
+/* Implementing this as a class rather than a typedef so that the compiler can tell it apart from
+ * unsigned shorts. */
+class half {
+ public:
+ half() : v(0)
+ {
+ }
+ half(const unsigned short &i) : v(i)
+ {
+ }
+ operator unsigned short()
+ {
+ return v;
+ }
+ half &operator=(const unsigned short &i)
+ {
+ v = i;
+ return *this;
+ }
+
+ private:
+ unsigned short v;
+};
+#endif
+
+struct half4 {
+ half x, y, z, w;
+};
+
+/* Conversion to/from half float for image textures
+ *
+ * Simplified float to half for fast sampling on processor without a native
+ * instruction, and eliminating any NaN and inf values. */
+
+ccl_device_inline half float_to_half_image(float f)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
+ return __float2half(f);
+#else
+ const uint u = __float_as_uint(f);
+ /* Sign bit, shifted to its position. */
+ uint sign_bit = u & 0x80000000;
+ sign_bit >>= 16;
+ /* Exponent. */
+ uint exponent_bits = u & 0x7f800000;
+ /* Non-sign bits. */
+ uint value_bits = u & 0x7fffffff;
+ value_bits >>= 13; /* Align mantissa on MSB. */
+ value_bits -= 0x1c000; /* Adjust bias. */
+ /* Flush-to-zero. */
+ value_bits = (exponent_bits < 0x38800000) ? 0 : value_bits;
+ /* Clamp-to-max. */
+ value_bits = (exponent_bits > 0x47000000) ? 0x7bff : value_bits;
+ /* Denormals-as-zero. */
+ value_bits = (exponent_bits == 0 ? 0 : value_bits);
+ /* Re-insert sign bit and return. */
+ return (value_bits | sign_bit);
+#endif
+}
+
+ccl_device_inline float half_to_float_image(half h)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
+ return __half2float(h);
+#else
+ const int x = ((h & 0x8000) << 16) | (((h & 0x7c00) + 0x1C000) << 13) | ((h & 0x03FF) << 13);
+ return __int_as_float(x);
+#endif
+}
+
+ccl_device_inline float4 half4_to_float4_image(const half4 h)
+{
+ /* Unable to use because it gives different results half_to_float_image, can we
+ * modify float_to_half_image so the conversion results are identical? */
+#if 0 /* defined(__KERNEL_AVX2__) */
+ /* CPU: AVX. */
+ __m128i x = _mm_castpd_si128(_mm_load_sd((const double *)&h));
+ return float4(_mm_cvtph_ps(x));
+#endif
+
+ const float4 f = make_float4(half_to_float_image(h.x),
+ half_to_float_image(h.y),
+ half_to_float_image(h.z),
+ half_to_float_image(h.w));
+ return f;
+}
+
+/* Conversion to half float texture for display.
+ *
+ * Simplified float to half for fast display texture conversion on processors
+ * without a native instruction. Assumes no negative, no NaN, no inf, and sets
+ * denormal to 0. */
+
+ccl_device_inline half float_to_half_display(const float f)
+{
+#if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
+ return __float2half(f);
+#else
+ const int x = __float_as_int((f > 0.0f) ? ((f < 65504.0f) ? f : 65504.0f) : 0.0f);
+ const int absolute = x & 0x7FFFFFFF;
+ const int Z = absolute + 0xC8000000;
+ const int result = (absolute < 0x38800000) ? 0 : Z;
+ const int rshift = (result >> 13);
+ return (rshift & 0x7FFF);
+#endif
+}
+
+ccl_device_inline half4 float4_to_half4_display(const float4 f)
+{
+#ifdef __KERNEL_SSE2__
+ /* CPU: SSE and AVX. */
+ ssef x = min(max(load4f(f), 0.0f), 65504.0f);
+# ifdef __KERNEL_AVX2__
+ ssei rpack = _mm_cvtps_ph(x, 0);
+# else
+ ssei absolute = cast(x) & 0x7FFFFFFF;
+ ssei Z = absolute + 0xC8000000;
+ ssei result = andnot(absolute < 0x38800000, Z);
+ ssei rshift = (result >> 13) & 0x7FFF;
+ ssei rpack = _mm_packs_epi32(rshift, rshift);
+# endif
+ half4 h;
+ _mm_storel_pi((__m64 *)&h, _mm_castsi128_ps(rpack));
+ return h;
+#else
+ /* GPU and scalar fallback. */
+ const half4 h = {float_to_half_display(f.x),
+ float_to_half_display(f.y),
+ float_to_half_display(f.z),
+ float_to_half_display(f.w)};
+ return h;
+#endif
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_HALF_H__ */