diff options
Diffstat (limited to 'ruy/path.h')
-rw-r--r-- | ruy/path.h | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/ruy/path.h b/ruy/path.h new file mode 100644 index 0000000..7141b16 --- /dev/null +++ b/ruy/path.h @@ -0,0 +1,162 @@ +/* Copyright 2019 Google LLC. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_RUY_PATH_H_ +#define TENSORFLOW_LITE_EXPERIMENTAL_RUY_RUY_PATH_H_ + +#include <cstdint> + +#include "ruy/platform.h" +#include "ruy/size_util.h" + +namespace ruy { + +// A Path is a choice of implementation path, e.g. between reference code +// and optimized code, or between different optimized code paths using different +// instruction sets. +// +// It's important that any symbol that depends on such implementation +// details, is somehow templatized in such a Path, so that different Path values +// yield different symbols, so we never have the situation where a symbols has +// multiple inequivalent definitions based on which code paths are compiled. +// That would be a violation of the ODR (One Definition Rule) which is Undefined +// Behavior, and one of the most serious issues plaguing both Eigen and +// gemmlowp. +// +// This enum is actually a bit-field: aside from kNone, all other values are +// powers of two, thus are one bit each. We define bit-wise operators below +// for this enum. Some places in Ruy accept a Path bit-field where multiple +// Paths may be selected, while some other places require a single Path (i.e. +// just one of the enum values here). Typically, user-facing parts of Ruy +// accept arbitrary bit-fields, allowing the user to compile support for +// multiple paths and to inform Ruy of all the paths that are to be enabled +// at runtime; then, typically in dispatch.h, we internally pick one +// specific path and from there on, internal Ruy code deals with only one +// path. +// +// When a user selects a set of compiled paths, Ruy internally dispatches to the +// "best" one, which typically means the newest optimized instructions for a +// given base architecture (such as ARM). Higher values of this enum correspond +// to "better" code paths within a given base architecture for which Ruy has +// optimized code paths. +// +// Values are reused across architectures. +// Rationale: Scale better to N architectures, it is good to have small values +// both for the compile-time logic to select paths, and when manually spelling +// out Path values, such as when invoking a test or benchmark. +enum class Path : std::uint8_t { + // This is a special null value, representing the absence of any path. + kNone = 0, + // Reference multiplication code. + // The main purpose of this path is to have a very simple standalone Mul + // implementation to check against. + // This path bypasses almost all of Ruy's internal implementation details. + // + // This is intended for testing/development. + kReference = 0x1, + // Standard C++ implementation of Ruy's architecture-specific parts. + // Unlike Path::kReference, this path exercises most of Ruy's internal logic. + // + // This is intended for testing/development. + kStandardCpp = 0x2, + +#if RUY_PLATFORM(ARM) + // ARM architectures. + // + // Optimized path using a widely available subset of ARM NEON instructions. + kNeon = 0x4, + // Optimized path making use of ARM NEON dot product instructions that are + // available on newer ARM cores. + kNeonDotprod = 0x8, +#endif // RUY_PLATFORM(ARM) + +#if RUY_PLATFORM(X86) + // x86 architectures. + // + // TODO(b/147376783): SSE 4.2 and AVX-VNNI support is incomplete / + // placeholder. + // Optimization is not finished. In particular the dimensions of the kernel + // blocks can be changed as desired. + // + // Optimized for SSE 4.2. + kSse42 = 0x4, + // Optimized for AVX2. + kAvx2 = 0x8, + // Optimized for AVX-512. + kAvx512 = 0x10, + // TODO(b/147376783): SSE 4.2 and AVX-VNNI support is incomplete / + // placeholder. + // Optimization is not finished. In particular the dimensions of the kernel + // blocks can be changed as desired. + // + // Optimized for AVX-VNNI. + kAvxVnni = 0x20, +#endif // RUY_PLATFORM(X86) +}; + +inline constexpr Path operator|(Path p, Path q) { + return static_cast<Path>(static_cast<std::uint32_t>(p) | + static_cast<std::uint32_t>(q)); +} + +inline constexpr Path operator&(Path p, Path q) { + return static_cast<Path>(static_cast<std::uint32_t>(p) & + static_cast<std::uint32_t>(q)); +} + +inline constexpr Path operator^(Path p, Path q) { + return static_cast<Path>(static_cast<std::uint32_t>(p) ^ + static_cast<std::uint32_t>(q)); +} + +inline constexpr Path operator~(Path p) { + return static_cast<Path>(~static_cast<std::uint32_t>(p)); +} + +inline Path GetMostSignificantPath(Path path_mask) { + return static_cast<Path>(round_down_pot(static_cast<int>(path_mask))); +} + +// ruy::kAllPaths represents all Path's that make sense to on a given +// base architecture. +#ifdef __linux__ +#if RUY_PLATFORM(NEON_64) +constexpr Path kAllPaths = + Path::kReference | Path::kStandardCpp | Path::kNeon | Path::kNeonDotprod; +#elif RUY_PLATFORM(NEON_32) +constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon; +#elif RUY_PLATFORM(X86) +constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | + Path::kSse42 | Path::kAvx2 | Path::kAvx512 | + Path::kAvxVnni; +#else +constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp; +#endif +#else // __linux__ +// We don't know how to do runtime dotprod detection outside of linux for now. +#if RUY_PLATFORM(NEON) +constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | Path::kNeon; +#elif RUY_PLATFORM(X86) +constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp | + Path::kSse42 | Path::kAvx2 | Path::kAvx512 | + Path::kAvxVnni; +#else +constexpr Path kAllPaths = Path::kReference | Path::kStandardCpp; +#endif +#endif // __linux__ + +} // namespace ruy + +#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_RUY_PATH_H_ |