diff options
author | Henrik Gramner <gramner@twoorioles.com> | 2022-07-06 15:43:44 +0300 |
---|---|---|
committer | Henrik Gramner <henrik@gramner.com> | 2022-07-06 16:05:47 +0300 |
commit | bd0466350d20e2c6aab4c47668cd5486dc7a3d94 (patch) | |
tree | a49202fdff9fe3f560aa4eadde6f54f4d033ed21 /src/cpu.h | |
parent | 820bf5156322ea6f9d1fc180ac579743347b9c5b (diff) |
Eliminate unused C DSP functions at compile time
When compiling with asm enabled there's no point in compiling
C versions of DSP functions that have asm implementations using
instruction sets that the compiler can unconditionally use.
E.g. when compiling with -mssse3 we can remove the C version
of all functions with SSSE3 implementations.
This is accomplished using the compiler's dead code elimination
functionality.
Can be configured using the new 'trim_dsp' meson option, which
by default is enabled when compiling in release mode.
Diffstat (limited to 'src/cpu.h')
-rw-r--r-- | src/cpu.h | 51 |
1 files changed, 48 insertions, 3 deletions
@@ -1,6 +1,6 @@ /* - * Copyright © 2018, VideoLAN and dav1d authors - * Copyright © 2018, Two Orioles, LLC + * Copyright © 2018-2022, VideoLAN and dav1d authors + * Copyright © 2018-2022, Two Orioles, LLC * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -51,7 +51,52 @@ DAV1D_API void dav1d_set_cpu_flags_mask(unsigned mask); int dav1d_num_logical_processors(Dav1dContext *c); static ALWAYS_INLINE unsigned dav1d_get_cpu_flags(void) { - return dav1d_cpu_flags & dav1d_cpu_flags_mask; + unsigned flags = dav1d_cpu_flags & dav1d_cpu_flags_mask; + +#if TRIM_DSP_FUNCTIONS +/* Since this function is inlined, unconditionally setting a flag here will + * enable dead code elimination in the calling function. */ +#if ARCH_AARCH64 || ARCH_ARM +#if defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32) || ARCH_AARCH64 + flags |= DAV1D_ARM_CPU_FLAG_NEON; +#endif +#elif ARCH_PPC64LE +#if defined(__VSX__) + flags |= DAV1D_PPC_CPU_FLAG_VSX; +#endif +#elif ARCH_X86 +#if defined(__AVX512F__) && defined(__AVX512CD__) && \ + defined(__AVX512BW__) && defined(__AVX512DQ__) && \ + defined(__AVX512VL__) && defined(__AVX512VNNI__) && \ + defined(__AVX512IFMA__) && defined(__AVX512VBMI__) && \ + defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__) && \ + defined(__AVX512BITALG__) && defined(__GFNI__) && \ + defined(__VAES__) && defined(__VPCLMULQDQ__) + flags |= DAV1D_X86_CPU_FLAG_AVX512ICL | + DAV1D_X86_CPU_FLAG_AVX2 | + DAV1D_X86_CPU_FLAG_SSE41 | + DAV1D_X86_CPU_FLAG_SSSE3 | + DAV1D_X86_CPU_FLAG_SSE2; +#elif defined(__AVX2__) + flags |= DAV1D_X86_CPU_FLAG_AVX2 | + DAV1D_X86_CPU_FLAG_SSE41 | + DAV1D_X86_CPU_FLAG_SSSE3 | + DAV1D_X86_CPU_FLAG_SSE2; +#elif defined(__SSE4_1__) || defined(__AVX__) + flags |= DAV1D_X86_CPU_FLAG_SSE41 | + DAV1D_X86_CPU_FLAG_SSSE3 | + DAV1D_X86_CPU_FLAG_SSE2; +#elif defined(__SSSE3__) + flags |= DAV1D_X86_CPU_FLAG_SSSE3 | + DAV1D_X86_CPU_FLAG_SSE2; +#elif ARCH_X86_64 || defined(__SSE2__) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 2) + flags |= DAV1D_X86_CPU_FLAG_SSE2; +#endif +#endif +#endif + + return flags; } #endif /* DAV1D_SRC_CPU_H */ |