Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Tobler <jtobler@gitlab.com>2023-01-09 18:33:22 +0300
committerJustin Tobler <jtobler@gitlab.com>2023-01-09 18:33:22 +0300
commit835cf56e28370e1898fa3b2ba21d6fc6e3a82e47 (patch)
treec079a44d12f9b7f558ee53cc7b07f7a79c4ff96e
parentf177dc41e90d2d05fec6582e26361dbd3ba6c4ab (diff)
parentdd85450dd0f17e0f231835a30d3afb9d6949dcd1 (diff)
Merge branch 'toon-unarycache' into 'master'
repository: Add unarycache around FindLicense See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5080 Merged-by: Justin Tobler <jtobler@gitlab.com> Approved-by: Justin Tobler <jtobler@gitlab.com> Approved-by: John Cai <jcai@gitlab.com> Reviewed-by: Patrick Steinhardt <psteinhardt@gitlab.com> Reviewed-by: John Cai <jcai@gitlab.com> Reviewed-by: Justin Tobler <jtobler@gitlab.com> Co-authored-by: Toon Claes <toon@gitlab.com> Co-authored-by: Patrick Steinhardt <psteinhardt@gitlab.com>
-rw-r--r--internal/gitaly/service/repository/license.go152
-rw-r--r--internal/gitaly/service/repository/server.go5
-rw-r--r--internal/unarycache/cache.go83
-rw-r--r--internal/unarycache/cache_test.go152
-rw-r--r--internal/unarycache/testhelper_test.go11
5 files changed, 336 insertions, 67 deletions
diff --git a/internal/gitaly/service/repository/license.go b/internal/gitaly/service/repository/license.go
index 2888d690b..9140af665 100644
--- a/internal/gitaly/service/repository/license.go
+++ b/internal/gitaly/service/repository/license.go
@@ -20,6 +20,7 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/service"
"gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
"gitlab.com/gitlab-org/gitaly/v15/internal/structerr"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/unarycache"
"gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
)
@@ -40,6 +41,14 @@ var nicknameByLicenseIdentifier = map[string]string{
"gpl-2.0": "GNU GPLv2",
}
+func newLicenseCache() *unarycache.Cache[git.ObjectID, *gitalypb.FindLicenseResponse] {
+ cache, err := unarycache.New(100, findLicense)
+ if err != nil {
+ panic(err)
+ }
+ return cache
+}
+
func (s *server) FindLicense(ctx context.Context, req *gitalypb.FindLicenseRequest) (*gitalypb.FindLicenseResponse, error) {
repository := req.GetRepository()
if err := service.ValidateRepository(repository); err != nil {
@@ -56,88 +65,97 @@ func (s *server) FindLicense(ctx context.Context, req *gitalypb.FindLicenseReque
return nil, structerr.NewInternal("cannot find HEAD revision: %v", err)
}
- repoFiler := &gitFiler{ctx: ctx, repo: repo, treeishID: headOID}
- detectedLicenses, err := licensedb.Detect(repoFiler)
+ response, err := s.licenseCache.GetOrCompute(ctx, repo, headOID)
if err != nil {
- if errors.Is(err, licensedb.ErrNoLicenseFound) {
- if repoFiler.foundLicense {
- // In case the license is not identified, but a file containing some
- // sort of license is found, we return a predefined response.
- return &gitalypb.FindLicenseResponse{
- LicenseName: "Other",
- LicenseShortName: "other",
- LicenseNickname: "LICENSE", // Show as LICENSE in the UI
- LicensePath: repoFiler.path,
- }, nil
- }
- return &gitalypb.FindLicenseResponse{}, nil
- }
- return nil, structerr.NewInternal("detect licenses: %w", err)
+ return nil, err
}
- // This should not happen as the error must be returned, but let's keep it safe to avoid panics.
- if len(detectedLicenses) == 0 {
- return &gitalypb.FindLicenseResponse{}, nil
- }
+ return response, nil
+ }
- type bestMatch struct {
- shortName string
- api.Match
- }
- bestMatches := make([]bestMatch, 0, len(detectedLicenses))
- for candidate, match := range detectedLicenses {
- bestMatches = append(bestMatches, bestMatch{Match: match, shortName: candidate})
- }
- sort.Slice(bestMatches, func(i, j int) bool {
- // Because there could be multiple matches with the same confidence, we need
- // to make sure the function is consistent and returns the same license on
- // each invocation. That is why we sort by the short name as well.
- if bestMatches[i].Confidence == bestMatches[j].Confidence {
- return trimDeprecatedPrefix(bestMatches[i].shortName) < trimDeprecatedPrefix(bestMatches[j].shortName)
+ client, err := s.ruby.RepositoryServiceClient(ctx)
+ if err != nil {
+ return nil, err
+ }
+ clientCtx, err := rubyserver.SetHeaders(ctx, s.locator, repository)
+ if err != nil {
+ return nil, err
+ }
+ return client.FindLicense(clientCtx, req)
+}
+
+func findLicense(ctx context.Context, repo *localrepo.Repo, commitID git.ObjectID) (*gitalypb.FindLicenseResponse, error) {
+ repoFiler := &gitFiler{ctx: ctx, repo: repo, treeishID: commitID}
+ detectedLicenses, err := licensedb.Detect(repoFiler)
+ if err != nil {
+ if errors.Is(err, licensedb.ErrNoLicenseFound) {
+ if repoFiler.foundLicense {
+ // In case the license is not identified, but a file containing some
+ // sort of license is found, we return a predefined response.
+ return &gitalypb.FindLicenseResponse{
+ LicenseName: "Other",
+ LicenseShortName: "other",
+ LicenseNickname: "LICENSE", // Show as LICENSE in the UI
+ LicensePath: repoFiler.path,
+ }, nil
}
- return bestMatches[i].Confidence > bestMatches[j].Confidence
- })
+ return &gitalypb.FindLicenseResponse{}, nil
+ }
+ return nil, structerr.NewInternal("detect licenses: %w", err)
+ }
- // We also don't want to return the prefix back to the caller if it exists.
- shortName := trimDeprecatedPrefix(bestMatches[0].shortName)
+ // This should not happen as the error must be returned, but let's keep it safe to avoid panics.
+ if len(detectedLicenses) == 0 {
+ return &gitalypb.FindLicenseResponse{}, nil
+ }
- name, err := licensedb.LicenseName(shortName)
- if err != nil {
- return nil, structerr.NewInternal("license name by id %q: %w", shortName, err)
+ type bestMatch struct {
+ shortName string
+ api.Match
+ }
+ bestMatches := make([]bestMatch, 0, len(detectedLicenses))
+ for candidate, match := range detectedLicenses {
+ bestMatches = append(bestMatches, bestMatch{Match: match, shortName: candidate})
+ }
+ sort.Slice(bestMatches, func(i, j int) bool {
+ // Because there could be multiple matches with the same confidence, we need
+ // to make sure the function is consistent and returns the same license on
+ // each invocation. That is why we sort by the short name as well.
+ if bestMatches[i].Confidence == bestMatches[j].Confidence {
+ return trimDeprecatedPrefix(bestMatches[i].shortName) < trimDeprecatedPrefix(bestMatches[j].shortName)
}
+ return bestMatches[i].Confidence > bestMatches[j].Confidence
+ })
- urls, err := licensedb.LicenseURLs(shortName)
- if err != nil {
- return nil, structerr.NewInternal("license URLs by id %q: %w", shortName, err)
- }
- var url string
- if len(urls) > 0 {
- // The URL list is returned in an ordered slice, so we just pick up the first one from the list.
- url = urls[0]
- }
+ // We also don't want to return the prefix back to the caller if it exists.
+ shortName := trimDeprecatedPrefix(bestMatches[0].shortName)
- // The license identifier used by `github.com/go-enry/go-license-detector` is
- // case-sensitive, but the API requires all license identifiers to be lower-cased.
- shortName = strings.ToLower(shortName)
- nickname := nicknameByLicenseIdentifier[shortName]
- return &gitalypb.FindLicenseResponse{
- LicenseShortName: shortName,
- LicensePath: bestMatches[0].File,
- LicenseName: name,
- LicenseUrl: url,
- LicenseNickname: nickname,
- }, nil
+ name, err := licensedb.LicenseName(shortName)
+ if err != nil {
+ return nil, structerr.NewInternal("license name by id %q: %w", shortName, err)
}
- client, err := s.ruby.RepositoryServiceClient(ctx)
+ urls, err := licensedb.LicenseURLs(shortName)
if err != nil {
- return nil, err
+ return nil, structerr.NewInternal("license URLs by id %q: %w", shortName, err)
}
- clientCtx, err := rubyserver.SetHeaders(ctx, s.locator, repository)
- if err != nil {
- return nil, err
+ var url string
+ if len(urls) > 0 {
+ // The URL list is returned in an ordered slice, so we just pick up the first one from the list.
+ url = urls[0]
}
- return client.FindLicense(clientCtx, req)
+
+ // The license identifier used by `github.com/go-enry/go-license-detector` is
+ // case-sensitive, but the API requires all license identifiers to be lower-cased.
+ shortName = strings.ToLower(shortName)
+ nickname := nicknameByLicenseIdentifier[shortName]
+ return &gitalypb.FindLicenseResponse{
+ LicenseShortName: shortName,
+ LicensePath: bestMatches[0].File,
+ LicenseName: name,
+ LicenseUrl: url,
+ LicenseNickname: nickname,
+ }, nil
}
// For the deprecated licenses, the `github.com/go-enry/go-license-detector` package
diff --git a/internal/gitaly/service/repository/server.go b/internal/gitaly/service/repository/server.go
index a6d8faa07..d63bfade7 100644
--- a/internal/gitaly/service/repository/server.go
+++ b/internal/gitaly/service/repository/server.go
@@ -12,6 +12,7 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/rubyserver"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/storage"
"gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/transaction"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/unarycache"
"gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
)
@@ -27,6 +28,8 @@ type server struct {
catfileCache catfile.Cache
git2goExecutor *git2go.Executor
housekeepingManager housekeeping.Manager
+
+ licenseCache *unarycache.Cache[git.ObjectID, *gitalypb.FindLicenseResponse]
}
// NewServer creates a new instance of a gRPC repo server
@@ -52,6 +55,8 @@ func NewServer(
catfileCache: catfileCache,
git2goExecutor: git2goExecutor,
housekeepingManager: housekeepingManager,
+
+ licenseCache: newLicenseCache(),
}
}
diff --git a/internal/unarycache/cache.go b/internal/unarycache/cache.go
new file mode 100644
index 000000000..b330df56c
--- /dev/null
+++ b/internal/unarycache/cache.go
@@ -0,0 +1,83 @@
+// Package unarycache allows you to cache responses for unary gRPC messages.
+//
+// Some gRPC unary message take a considerable amount of compute to build the
+// response. This package will allow users to cache this response.
+//
+// For the time being it is implemented using a simple in-memory
+// least-recently-used cache.
+package unarycache
+
+import (
+ "context"
+ "fmt"
+
+ lru "github.com/hashicorp/golang-lru/v2"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
+)
+
+// Generator is a function that computes a cacheable value for a repository based on the given key.
+// The key must uniquely identify the result. A valid choice would for example be an object ID.
+type Generator[Key comparable, Value any] func(context.Context, *localrepo.Repo, Key) (Value, error)
+
+// cacheKey is a repository-scoped key for the cache.
+type cacheKey[Key comparable] struct {
+ // repoPath is the path of the repository for which a cache entry is active.
+ repoPath string
+ // key is the key that uniquely identifies a result.
+ key Key
+}
+
+// Cache is a cache that stores values that can be uniquely computed for a specific key.
+type Cache[Key comparable, Value any] struct {
+ // generator is the generator function that computes the value for a given key if the key is
+ // not yet cached.
+ generator Generator[Key, Value]
+ // lru is the least-recently-used cache that stores cached values. The cache keys are scoped
+ // to the repository and the respective key that uniquely identifies the result.
+ lru *lru.Cache[cacheKey[Key], Value]
+}
+
+// New creates a new Cache. The cache will hold at maximum `maxEntries`, if more than this many
+// entries are added then the least-recently-used one will be evicted from the cache. If a
+// repository-scoped key does not exist in the cache, then the generator function will be called to
+// compute the value for the given repository and key.
+func New[Key comparable, Value any](maxEntries int, generator Generator[Key, Value]) (*Cache[Key, Value], error) {
+ lru, err := lru.New[cacheKey[Key], Value](maxEntries)
+ if err != nil {
+ return nil, fmt.Errorf("creating LRU cache: %w", err)
+ }
+
+ return &Cache[Key, Value]{
+ generator: generator,
+ lru: lru,
+ }, nil
+}
+
+// GetOrCompute either returns a cached value or computes the value for the given repository and
+// key.
+func (c *Cache[Key, Value]) GetOrCompute(ctx context.Context, repo *localrepo.Repo, key Key) (Value, error) {
+ var defaultValue Value
+
+ repoPath, err := repo.Path()
+ if err != nil {
+ return defaultValue, fmt.Errorf("getting repo path: %w", err)
+ }
+
+ repoScopedKey := cacheKey[Key]{
+ repoPath: repoPath,
+ key: key,
+ }
+
+ if value, ok := c.lru.Get(repoScopedKey); ok {
+ return value, nil
+ }
+
+ value, err := c.generator(ctx, repo, key)
+ if err != nil {
+ return defaultValue, err
+ }
+
+ c.lru.Add(repoScopedKey, value)
+
+ return value, nil
+}
diff --git a/internal/unarycache/cache_test.go b/internal/unarycache/cache_test.go
new file mode 100644
index 000000000..846e346e6
--- /dev/null
+++ b/internal/unarycache/cache_test.go
@@ -0,0 +1,152 @@
+package unarycache
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg"
+)
+
+func TestCache_GetOrCompute(t *testing.T) {
+ t.Parallel()
+
+ ctx := testhelper.Context(t)
+ cfg := testcfg.Build(t)
+
+ repoProto, _ := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ })
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ t.Run("returns identical value", func(t *testing.T) {
+ generator := func(ctx context.Context, repo *localrepo.Repo, key string) (string, error) {
+ return "hello", nil
+ }
+ cacher, err := New(5, generator)
+ require.NoError(t, err)
+
+ actual, err := cacher.GetOrCompute(ctx, repo, "foo")
+ require.NoError(t, err)
+ require.Equal(t, "hello", actual)
+ })
+
+ t.Run("returns identical error when generator fails", func(t *testing.T) {
+ generator := func(ctx context.Context, repo *localrepo.Repo, key string) (string, error) {
+ return "", errors.New("broken")
+ }
+ cacher, err := New(5, generator)
+ require.NoError(t, err)
+
+ _, err = cacher.GetOrCompute(ctx, repo, "foo")
+ require.EqualError(t, err, "broken")
+ })
+
+ t.Run("returns cached response", func(t *testing.T) {
+ first := true
+
+ generator := func(ctx context.Context, repo *localrepo.Repo, key string) (string, error) {
+ if first {
+ first = false
+
+ return "hello", nil
+ }
+
+ return "", errors.New("broken")
+ }
+ cacher, err := New(5, generator)
+ require.NoError(t, err)
+
+ // Populate cache
+ actual, err := cacher.GetOrCompute(ctx, repo, "foo")
+ require.NoError(t, err)
+ require.Equal(t, "hello", actual)
+
+ // Sanity check on generator
+ _, err = generator(ctx, repo, "foo")
+ require.EqualError(t, err, "broken")
+
+ // Fetch from cache
+ actual, err = cacher.GetOrCompute(ctx, repo, "foo")
+ require.NoError(t, err)
+ require.Equal(t, "hello", actual)
+ })
+
+ t.Run("different values for different keys", func(t *testing.T) {
+ count := 0
+ generator := func(ctx context.Context, repo *localrepo.Repo, key string) (string, error) {
+ count++
+ return fmt.Sprintf("You're number %d", count), nil
+ }
+ cacher, err := New(5, generator)
+ require.NoError(t, err)
+
+ actual, err := cacher.GetOrCompute(ctx, repo, "foo")
+ require.NoError(t, err)
+ require.Equal(t, "You're number 1", actual)
+
+ actual, err = cacher.GetOrCompute(ctx, repo, "bar")
+ require.NoError(t, err)
+ require.Equal(t, "You're number 2", actual)
+
+ actual, err = cacher.GetOrCompute(ctx, repo, "foo")
+ require.NoError(t, err)
+ require.Equal(t, "You're number 1", actual)
+ })
+
+ t.Run("only keeps given max entries", func(t *testing.T) {
+ count := 0
+ generator := func(ctx context.Context, repo *localrepo.Repo, key string) (string, error) {
+ count++
+ return fmt.Sprintf("You're number %d", count), nil
+ }
+ cacher, err := New(5, generator)
+ require.NoError(t, err)
+
+ actual, err := cacher.GetOrCompute(ctx, repo, "foo")
+ require.NoError(t, err)
+ require.Equal(t, "You're number 1", actual)
+
+ for i, key := range []string{"a", "b", "c", "d", "e"} {
+ actual, err = cacher.GetOrCompute(ctx, repo, key)
+ require.NoError(t, err)
+ require.Equal(t, fmt.Sprintf("You're number %d", i+2), actual)
+ }
+
+ actual, err = cacher.GetOrCompute(ctx, repo, "foo")
+ require.NoError(t, err)
+ require.Equal(t, "You're number 7", actual)
+ })
+
+ t.Run("same cache key on different repos", func(t *testing.T) {
+ otherProto, _ := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{
+ SkipCreationViaService: true,
+ })
+ otherRepo := localrepo.NewTestRepo(t, cfg, otherProto)
+
+ count := 0
+ generator := func(ctx context.Context, repo *localrepo.Repo, key string) (string, error) {
+ count++
+ return fmt.Sprintf("You're number %d", count), nil
+ }
+ cacher, err := New(5, generator)
+ require.NoError(t, err)
+
+ actual, err := cacher.GetOrCompute(ctx, repo, "foo")
+ require.NoError(t, err)
+ require.Equal(t, "You're number 1", actual)
+
+ actual, err = cacher.GetOrCompute(ctx, otherRepo, "foo")
+ require.NoError(t, err)
+ require.Equal(t, "You're number 2", actual)
+
+ actual, err = cacher.GetOrCompute(ctx, repo, "foo")
+ require.NoError(t, err)
+ require.Equal(t, "You're number 1", actual)
+ })
+}
diff --git a/internal/unarycache/testhelper_test.go b/internal/unarycache/testhelper_test.go
new file mode 100644
index 000000000..9828d2a13
--- /dev/null
+++ b/internal/unarycache/testhelper_test.go
@@ -0,0 +1,11 @@
+package unarycache
+
+import (
+ "testing"
+
+ "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
+)
+
+func TestMain(m *testing.M) {
+ testhelper.Run(m)
+}