Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/google/ruy.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenoit Jacob <benoitjacob@google.com>2020-03-28 04:58:51 +0300
committerBenoit Jacob <benoitjacob@google.com>2020-03-30 23:51:39 +0300
commitf7ea583082c670103fb2cebd6035b944c71d64c4 (patch)
tree4a58b4b3a210fc78776e16cf19b46e671714ac4a /ruy/prepacked_cache.h
parent299a33a5c2affb88c75726c77be6dd4491418b17 (diff)
Move ruy's code to a ruy/ subdirectory.
The motivation is that having source files in the repository root runs into a number of corner cases with copybara setups and with external CMake build systems, so enclosing all code in ruy/ avoids that while generally making our setup much more similar to that of other related projects (TensorFlow, IREE). PiperOrigin-RevId: 303448881
Diffstat (limited to 'ruy/prepacked_cache.h')
-rw-r--r--ruy/prepacked_cache.h130
1 files changed, 130 insertions, 0 deletions
diff --git a/ruy/prepacked_cache.h b/ruy/prepacked_cache.h
new file mode 100644
index 0000000..eedd7e4
--- /dev/null
+++ b/ruy/prepacked_cache.h
@@ -0,0 +1,130 @@
+/* Copyright 2019 Google LLC. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_RUY_PREPACKED_CACHE_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_RUY_RUY_PREPACKED_CACHE_H_
+
+#include <cstddef>
+#include <iostream>
+#include <map>
+#include <queue>
+#include <vector>
+
+#include "ruy/allocator.h"
+#include "ruy/matrix.h"
+#include "ruy/time.h"
+
+namespace ruy {
+
+namespace detail {
+
+// Tracks a set of blocks allocated from the underlying system allocator.
+class SystemBlockAllocator {
+ public:
+ void *Alloc(std::ptrdiff_t num_bytes) {
+ void *p = detail::SystemAlignedAlloc(num_bytes);
+ blocks_.push_back(p);
+ return p;
+ }
+
+ void Free(void *block) {
+ for (auto it = blocks_.begin(); it != blocks_.end(); ++it) {
+ if (*it == block) {
+ detail::SystemAlignedFree(block);
+ blocks_.erase(it);
+ return;
+ }
+ }
+ RUY_DCHECK(false); // Trying to free pointer we did not allocate.
+ }
+
+ ~SystemBlockAllocator() {
+ for (void *block : blocks_) {
+ detail::SystemAlignedFree(block);
+ }
+ }
+
+ private:
+ std::vector<void *> blocks_;
+};
+
+} // namespace detail
+
+enum CachePolicy { kNoCache, kCacheLHSOnNarrowMul };
+
+// "Low effort" Least Recently Used Cache for Prepacked Matrices
+// A cache mechanism for prepacked matrices that ejects oldest entries.
+// The implementation is "low effort" in the following ways:
+// - we just linearly search for the oldest entry when doing an ejection
+// - the ejection policy is very simple: if the new size would be above the
+// . threshold, we will eject entries until the size is below the threshold.
+// Current use cases (RNNs with GEMV operations) indicate that ejection is rare
+// and memory constraints are tight, so we devote no additional storage to the
+// LRU mechanism and accept O(n) search to eject oldest entry. In practice,
+// the number of total entries has not been shown to be large.
+// This class is not thread safe. In Ruy, memory allocation for packed matrices
+// is done in a single threaded context and the actual packing activity may
+// be done in a multi-threaded context.
+class PrepackedCache {
+ public:
+ static constexpr int kDefaultEjectionThresholdBytes = 1 << 28;
+
+ using CacheKey = std::pair<void *, void *>;
+
+ using MatrixWithTimeStamp = std::pair<PrepackedMatrix, TimePoint>;
+
+ using CacheIterator = std::map<CacheKey, MatrixWithTimeStamp>::const_iterator;
+
+ using AlignedAllocator = detail::AlignedAllocator;
+
+ explicit PrepackedCache(
+ int32_t ejection_threshold = kDefaultEjectionThresholdBytes)
+ : ejection_threshold_(ejection_threshold), cache_size_(0) {}
+
+ // Looks for an entry with `key`. If found, update its time stamp.
+ CacheIterator FindAndUpdate(const CacheKey &key);
+
+ // Returns end iterator for internal cache. The iterator type is appropriate
+ // to use with `FindAndUpdate`.
+ CacheIterator cend() const { return cache_.end(); }
+
+ // Returns the total size (in bytes) of data held in this cache.
+ int TotalSize() const { return cache_size_; }
+
+ // All calls to get current TimePoints go through here.
+ // TODO(b/145625614) Profile timestamps on relevant models to see if
+ // this level of granularity is sufficient. CoarseNow is cheap so
+ // it would be nice to keep it.
+ TimePoint CacheNow() const { return CoarseNow(); }
+
+ // Performs the memory allocation for the `data` and `sums` members of a
+ // PrepackedMatrix.
+ void AllocatePrepackedMatrix(PrepackedMatrix *pmatrix);
+
+ // Adds the PrepackedMatrix to the cache, possibly ejecting other values.
+ void Insert(const CacheKey &key, const PrepackedMatrix &matrix);
+
+ private:
+ void EjectOne();
+ void DoInsert(const CacheKey &key, const PrepackedMatrix &matrix);
+ detail::SystemBlockAllocator allocator_;
+ std::map<CacheKey, MatrixWithTimeStamp> cache_;
+ const int32_t ejection_threshold_;
+ size_t cache_size_;
+};
+
+} // namespace ruy
+
+#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_RUY_PREPACKED_CACHE_H_