Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/wolfpld/tracy.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/imgui
diff options
context:
space:
mode:
authorBartosz Taudul <wolf@nereid.pl>2021-05-01 18:11:59 +0300
committerBartosz Taudul <wolf@nereid.pl>2021-05-01 19:10:20 +0300
commit630615c6c2005f1df7d72b0d838800c0b5dbbc7e (patch)
treefb6107b61a59a32cf2141935b43bcb9784518c80 /imgui
parent0e6a25fdd8d788263b2cf8fc721c07adb9616582 (diff)
Apply IM_NORMALIZE2F_OVER_ZERO optimization.
Diffstat (limited to 'imgui')
-rw-r--r--imgui/imgui_draw.cpp2
-rw-r--r--imgui/imgui_internal.h12
2 files changed, 12 insertions, 2 deletions
diff --git a/imgui/imgui_draw.cpp b/imgui/imgui_draw.cpp
index 138abc1e..a9d3149c 100644
--- a/imgui/imgui_draw.cpp
+++ b/imgui/imgui_draw.cpp
@@ -694,7 +694,7 @@ void ImDrawList::PrimQuadUV(const ImVec2& a, const ImVec2& b, const ImVec2& c, c
// On AddPolyline() and AddConvexPolyFilled() we intentionally avoid using ImVec2 and superfluous function calls to optimize debug/non-inlined builds.
// Those macros expects l-values.
-#define IM_NORMALIZE2F_OVER_ZERO(VX,VY) do { float d2 = VX*VX + VY*VY; if (d2 > 0.0f) { float inv_len = 1.0f / ImSqrt(d2); VX *= inv_len; VY *= inv_len; } } while (0)
+#define IM_NORMALIZE2F_OVER_ZERO(VX,VY) do { float d2 = VX*VX + VY*VY; if (d2 > 0.0f) { float inv_len = ImRsqrt(d2); VX *= inv_len; VY *= inv_len; } } while (0)
#define IM_FIXNORMAL2F(VX,VY) do { float d2 = VX*VX + VY*VY; if (d2 < 0.5f) d2 = 0.5f; float inv_lensq = 1.0f / d2; VX *= inv_lensq; VY *= inv_lensq; } while (0)
// TODO: Thickness anti-aliased lines cap are missing their AA fringe.
diff --git a/imgui/imgui_internal.h b/imgui/imgui_internal.h
index 599823f3..a5503374 100644
--- a/imgui/imgui_internal.h
+++ b/imgui/imgui_internal.h
@@ -51,6 +51,10 @@ Index of this file:
#include <math.h> // sqrtf, fabsf, fmodf, powf, floorf, ceilf, cosf, sinf
#include <limits.h> // INT_MIN, INT_MAX
+#if defined __SSE__ || defined __x86_64__ || defined _M_X64
+#include <immintrin.h>
+#endif
+
// Visual Studio warnings
#ifdef _MSC_VER
#pragma warning (push)
@@ -386,6 +390,12 @@ static inline float ImAbs(float x) { return fabsf(x); }
static inline double ImAbs(double x) { return fabs(x); }
static inline float ImSign(float x) { return (x < 0.0f) ? -1.0f : ((x > 0.0f) ? 1.0f : 0.0f); } // Sign operator - returns -1, 0 or 1 based on sign of argument
static inline double ImSign(double x) { return (x < 0.0) ? -1.0 : ((x > 0.0) ? 1.0 : 0.0); }
+#if defined __SSE__ || defined __x86_64__ || defined _M_X64
+static inline float ImRsqrt(float x) { return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x))); }
+#else
+static inline float ImRsqrt(float x) { return 1.0f / sqrtf(x); }
+#endif
+static inline double ImRsqrt(double x) { return 1.0 / sqrt(x); }
#endif
// - ImMin/ImMax/ImClamp/ImLerp/ImSwap are used by widgets which support variety of types: signed/unsigned int/long long float/double
// (Exceptionally using templates here but we could also redefine them for those types)
@@ -406,7 +416,7 @@ static inline ImVec4 ImLerp(const ImVec4& a, const ImVec4& b, float t)
static inline float ImSaturate(float f) { return (f < 0.0f) ? 0.0f : (f > 1.0f) ? 1.0f : f; }
static inline float ImLengthSqr(const ImVec2& lhs) { return (lhs.x * lhs.x) + (lhs.y * lhs.y); }
static inline float ImLengthSqr(const ImVec4& lhs) { return (lhs.x * lhs.x) + (lhs.y * lhs.y) + (lhs.z * lhs.z) + (lhs.w * lhs.w); }
-static inline float ImInvLength(const ImVec2& lhs, float fail_value) { float d = (lhs.x * lhs.x) + (lhs.y * lhs.y); if (d > 0.0f) return 1.0f / ImSqrt(d); return fail_value; }
+static inline float ImInvLength(const ImVec2& lhs, float fail_value) { float d = (lhs.x * lhs.x) + (lhs.y * lhs.y); if (d > 0.0f) return ImRsqrt(d); return fail_value; }
static inline float ImFloor(float f) { return (float)(int)(f); }
static inline ImVec2 ImFloor(const ImVec2& v) { return ImVec2((float)(int)(v.x), (float)(int)(v.y)); }
static inline int ImModPositive(int a, int b) { return (a + b) % b; }