diff options
author | Justin Tobler <jtobler@gitlab.com> | 2023-01-09 18:33:22 +0300 |
---|---|---|
committer | Justin Tobler <jtobler@gitlab.com> | 2023-01-09 18:33:22 +0300 |
commit | 835cf56e28370e1898fa3b2ba21d6fc6e3a82e47 (patch) | |
tree | c079a44d12f9b7f558ee53cc7b07f7a79c4ff96e | |
parent | f177dc41e90d2d05fec6582e26361dbd3ba6c4ab (diff) | |
parent | dd85450dd0f17e0f231835a30d3afb9d6949dcd1 (diff) |
Merge branch 'toon-unarycache' into 'master'
repository: Add unarycache around FindLicense
See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5080
Merged-by: Justin Tobler <jtobler@gitlab.com>
Approved-by: Justin Tobler <jtobler@gitlab.com>
Approved-by: John Cai <jcai@gitlab.com>
Reviewed-by: Patrick Steinhardt <psteinhardt@gitlab.com>
Reviewed-by: John Cai <jcai@gitlab.com>
Reviewed-by: Justin Tobler <jtobler@gitlab.com>
Co-authored-by: Toon Claes <toon@gitlab.com>
Co-authored-by: Patrick Steinhardt <psteinhardt@gitlab.com>
-rw-r--r-- | internal/gitaly/service/repository/license.go | 152 | ||||
-rw-r--r-- | internal/gitaly/service/repository/server.go | 5 | ||||
-rw-r--r-- | internal/unarycache/cache.go | 83 | ||||
-rw-r--r-- | internal/unarycache/cache_test.go | 152 | ||||
-rw-r--r-- | internal/unarycache/testhelper_test.go | 11 |
5 files changed, 336 insertions, 67 deletions
diff --git a/internal/gitaly/service/repository/license.go b/internal/gitaly/service/repository/license.go index 2888d690b..9140af665 100644 --- a/internal/gitaly/service/repository/license.go +++ b/internal/gitaly/service/repository/license.go @@ -20,6 +20,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/service" "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag" "gitlab.com/gitlab-org/gitaly/v15/internal/structerr" + "gitlab.com/gitlab-org/gitaly/v15/internal/unarycache" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" ) @@ -40,6 +41,14 @@ var nicknameByLicenseIdentifier = map[string]string{ "gpl-2.0": "GNU GPLv2", } +func newLicenseCache() *unarycache.Cache[git.ObjectID, *gitalypb.FindLicenseResponse] { + cache, err := unarycache.New(100, findLicense) + if err != nil { + panic(err) + } + return cache +} + func (s *server) FindLicense(ctx context.Context, req *gitalypb.FindLicenseRequest) (*gitalypb.FindLicenseResponse, error) { repository := req.GetRepository() if err := service.ValidateRepository(repository); err != nil { @@ -56,88 +65,97 @@ func (s *server) FindLicense(ctx context.Context, req *gitalypb.FindLicenseReque return nil, structerr.NewInternal("cannot find HEAD revision: %v", err) } - repoFiler := &gitFiler{ctx: ctx, repo: repo, treeishID: headOID} - detectedLicenses, err := licensedb.Detect(repoFiler) + response, err := s.licenseCache.GetOrCompute(ctx, repo, headOID) if err != nil { - if errors.Is(err, licensedb.ErrNoLicenseFound) { - if repoFiler.foundLicense { - // In case the license is not identified, but a file containing some - // sort of license is found, we return a predefined response. - return &gitalypb.FindLicenseResponse{ - LicenseName: "Other", - LicenseShortName: "other", - LicenseNickname: "LICENSE", // Show as LICENSE in the UI - LicensePath: repoFiler.path, - }, nil - } - return &gitalypb.FindLicenseResponse{}, nil - } - return nil, structerr.NewInternal("detect licenses: %w", err) + return nil, err } - // This should not happen as the error must be returned, but let's keep it safe to avoid panics. - if len(detectedLicenses) == 0 { - return &gitalypb.FindLicenseResponse{}, nil - } + return response, nil + } - type bestMatch struct { - shortName string - api.Match - } - bestMatches := make([]bestMatch, 0, len(detectedLicenses)) - for candidate, match := range detectedLicenses { - bestMatches = append(bestMatches, bestMatch{Match: match, shortName: candidate}) - } - sort.Slice(bestMatches, func(i, j int) bool { - // Because there could be multiple matches with the same confidence, we need - // to make sure the function is consistent and returns the same license on - // each invocation. That is why we sort by the short name as well. - if bestMatches[i].Confidence == bestMatches[j].Confidence { - return trimDeprecatedPrefix(bestMatches[i].shortName) < trimDeprecatedPrefix(bestMatches[j].shortName) + client, err := s.ruby.RepositoryServiceClient(ctx) + if err != nil { + return nil, err + } + clientCtx, err := rubyserver.SetHeaders(ctx, s.locator, repository) + if err != nil { + return nil, err + } + return client.FindLicense(clientCtx, req) +} + +func findLicense(ctx context.Context, repo *localrepo.Repo, commitID git.ObjectID) (*gitalypb.FindLicenseResponse, error) { + repoFiler := &gitFiler{ctx: ctx, repo: repo, treeishID: commitID} + detectedLicenses, err := licensedb.Detect(repoFiler) + if err != nil { + if errors.Is(err, licensedb.ErrNoLicenseFound) { + if repoFiler.foundLicense { + // In case the license is not identified, but a file containing some + // sort of license is found, we return a predefined response. + return &gitalypb.FindLicenseResponse{ + LicenseName: "Other", + LicenseShortName: "other", + LicenseNickname: "LICENSE", // Show as LICENSE in the UI + LicensePath: repoFiler.path, + }, nil } - return bestMatches[i].Confidence > bestMatches[j].Confidence - }) + return &gitalypb.FindLicenseResponse{}, nil + } + return nil, structerr.NewInternal("detect licenses: %w", err) + } - // We also don't want to return the prefix back to the caller if it exists. - shortName := trimDeprecatedPrefix(bestMatches[0].shortName) + // This should not happen as the error must be returned, but let's keep it safe to avoid panics. + if len(detectedLicenses) == 0 { + return &gitalypb.FindLicenseResponse{}, nil + } - name, err := licensedb.LicenseName(shortName) - if err != nil { - return nil, structerr.NewInternal("license name by id %q: %w", shortName, err) + type bestMatch struct { + shortName string + api.Match + } + bestMatches := make([]bestMatch, 0, len(detectedLicenses)) + for candidate, match := range detectedLicenses { + bestMatches = append(bestMatches, bestMatch{Match: match, shortName: candidate}) + } + sort.Slice(bestMatches, func(i, j int) bool { + // Because there could be multiple matches with the same confidence, we need + // to make sure the function is consistent and returns the same license on + // each invocation. That is why we sort by the short name as well. + if bestMatches[i].Confidence == bestMatches[j].Confidence { + return trimDeprecatedPrefix(bestMatches[i].shortName) < trimDeprecatedPrefix(bestMatches[j].shortName) } + return bestMatches[i].Confidence > bestMatches[j].Confidence + }) - urls, err := licensedb.LicenseURLs(shortName) - if err != nil { - return nil, structerr.NewInternal("license URLs by id %q: %w", shortName, err) - } - var url string - if len(urls) > 0 { - // The URL list is returned in an ordered slice, so we just pick up the first one from the list. - url = urls[0] - } + // We also don't want to return the prefix back to the caller if it exists. + shortName := trimDeprecatedPrefix(bestMatches[0].shortName) - // The license identifier used by `github.com/go-enry/go-license-detector` is - // case-sensitive, but the API requires all license identifiers to be lower-cased. - shortName = strings.ToLower(shortName) - nickname := nicknameByLicenseIdentifier[shortName] - return &gitalypb.FindLicenseResponse{ - LicenseShortName: shortName, - LicensePath: bestMatches[0].File, - LicenseName: name, - LicenseUrl: url, - LicenseNickname: nickname, - }, nil + name, err := licensedb.LicenseName(shortName) + if err != nil { + return nil, structerr.NewInternal("license name by id %q: %w", shortName, err) } - client, err := s.ruby.RepositoryServiceClient(ctx) + urls, err := licensedb.LicenseURLs(shortName) if err != nil { - return nil, err + return nil, structerr.NewInternal("license URLs by id %q: %w", shortName, err) } - clientCtx, err := rubyserver.SetHeaders(ctx, s.locator, repository) - if err != nil { - return nil, err + var url string + if len(urls) > 0 { + // The URL list is returned in an ordered slice, so we just pick up the first one from the list. + url = urls[0] } - return client.FindLicense(clientCtx, req) + + // The license identifier used by `github.com/go-enry/go-license-detector` is + // case-sensitive, but the API requires all license identifiers to be lower-cased. + shortName = strings.ToLower(shortName) + nickname := nicknameByLicenseIdentifier[shortName] + return &gitalypb.FindLicenseResponse{ + LicenseShortName: shortName, + LicensePath: bestMatches[0].File, + LicenseName: name, + LicenseUrl: url, + LicenseNickname: nickname, + }, nil } // For the deprecated licenses, the `github.com/go-enry/go-license-detector` package diff --git a/internal/gitaly/service/repository/server.go b/internal/gitaly/service/repository/server.go index a6d8faa07..d63bfade7 100644 --- a/internal/gitaly/service/repository/server.go +++ b/internal/gitaly/service/repository/server.go @@ -12,6 +12,7 @@ import ( "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/rubyserver" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/storage" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/transaction" + "gitlab.com/gitlab-org/gitaly/v15/internal/unarycache" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" ) @@ -27,6 +28,8 @@ type server struct { catfileCache catfile.Cache git2goExecutor *git2go.Executor housekeepingManager housekeeping.Manager + + licenseCache *unarycache.Cache[git.ObjectID, *gitalypb.FindLicenseResponse] } // NewServer creates a new instance of a gRPC repo server @@ -52,6 +55,8 @@ func NewServer( catfileCache: catfileCache, git2goExecutor: git2goExecutor, housekeepingManager: housekeepingManager, + + licenseCache: newLicenseCache(), } } diff --git a/internal/unarycache/cache.go b/internal/unarycache/cache.go new file mode 100644 index 000000000..b330df56c --- /dev/null +++ b/internal/unarycache/cache.go @@ -0,0 +1,83 @@ +// Package unarycache allows you to cache responses for unary gRPC messages. +// +// Some gRPC unary message take a considerable amount of compute to build the +// response. This package will allow users to cache this response. +// +// For the time being it is implemented using a simple in-memory +// least-recently-used cache. +package unarycache + +import ( + "context" + "fmt" + + lru "github.com/hashicorp/golang-lru/v2" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" +) + +// Generator is a function that computes a cacheable value for a repository based on the given key. +// The key must uniquely identify the result. A valid choice would for example be an object ID. +type Generator[Key comparable, Value any] func(context.Context, *localrepo.Repo, Key) (Value, error) + +// cacheKey is a repository-scoped key for the cache. +type cacheKey[Key comparable] struct { + // repoPath is the path of the repository for which a cache entry is active. + repoPath string + // key is the key that uniquely identifies a result. + key Key +} + +// Cache is a cache that stores values that can be uniquely computed for a specific key. +type Cache[Key comparable, Value any] struct { + // generator is the generator function that computes the value for a given key if the key is + // not yet cached. + generator Generator[Key, Value] + // lru is the least-recently-used cache that stores cached values. The cache keys are scoped + // to the repository and the respective key that uniquely identifies the result. + lru *lru.Cache[cacheKey[Key], Value] +} + +// New creates a new Cache. The cache will hold at maximum `maxEntries`, if more than this many +// entries are added then the least-recently-used one will be evicted from the cache. If a +// repository-scoped key does not exist in the cache, then the generator function will be called to +// compute the value for the given repository and key. +func New[Key comparable, Value any](maxEntries int, generator Generator[Key, Value]) (*Cache[Key, Value], error) { + lru, err := lru.New[cacheKey[Key], Value](maxEntries) + if err != nil { + return nil, fmt.Errorf("creating LRU cache: %w", err) + } + + return &Cache[Key, Value]{ + generator: generator, + lru: lru, + }, nil +} + +// GetOrCompute either returns a cached value or computes the value for the given repository and +// key. +func (c *Cache[Key, Value]) GetOrCompute(ctx context.Context, repo *localrepo.Repo, key Key) (Value, error) { + var defaultValue Value + + repoPath, err := repo.Path() + if err != nil { + return defaultValue, fmt.Errorf("getting repo path: %w", err) + } + + repoScopedKey := cacheKey[Key]{ + repoPath: repoPath, + key: key, + } + + if value, ok := c.lru.Get(repoScopedKey); ok { + return value, nil + } + + value, err := c.generator(ctx, repo, key) + if err != nil { + return defaultValue, err + } + + c.lru.Add(repoScopedKey, value) + + return value, nil +} diff --git a/internal/unarycache/cache_test.go b/internal/unarycache/cache_test.go new file mode 100644 index 000000000..846e346e6 --- /dev/null +++ b/internal/unarycache/cache_test.go @@ -0,0 +1,152 @@ +package unarycache + +import ( + "context" + "errors" + "fmt" + "testing" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo" + "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" + "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper/testcfg" +) + +func TestCache_GetOrCompute(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + cfg := testcfg.Build(t) + + repoProto, _ := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + }) + repo := localrepo.NewTestRepo(t, cfg, repoProto) + + t.Run("returns identical value", func(t *testing.T) { + generator := func(ctx context.Context, repo *localrepo.Repo, key string) (string, error) { + return "hello", nil + } + cacher, err := New(5, generator) + require.NoError(t, err) + + actual, err := cacher.GetOrCompute(ctx, repo, "foo") + require.NoError(t, err) + require.Equal(t, "hello", actual) + }) + + t.Run("returns identical error when generator fails", func(t *testing.T) { + generator := func(ctx context.Context, repo *localrepo.Repo, key string) (string, error) { + return "", errors.New("broken") + } + cacher, err := New(5, generator) + require.NoError(t, err) + + _, err = cacher.GetOrCompute(ctx, repo, "foo") + require.EqualError(t, err, "broken") + }) + + t.Run("returns cached response", func(t *testing.T) { + first := true + + generator := func(ctx context.Context, repo *localrepo.Repo, key string) (string, error) { + if first { + first = false + + return "hello", nil + } + + return "", errors.New("broken") + } + cacher, err := New(5, generator) + require.NoError(t, err) + + // Populate cache + actual, err := cacher.GetOrCompute(ctx, repo, "foo") + require.NoError(t, err) + require.Equal(t, "hello", actual) + + // Sanity check on generator + _, err = generator(ctx, repo, "foo") + require.EqualError(t, err, "broken") + + // Fetch from cache + actual, err = cacher.GetOrCompute(ctx, repo, "foo") + require.NoError(t, err) + require.Equal(t, "hello", actual) + }) + + t.Run("different values for different keys", func(t *testing.T) { + count := 0 + generator := func(ctx context.Context, repo *localrepo.Repo, key string) (string, error) { + count++ + return fmt.Sprintf("You're number %d", count), nil + } + cacher, err := New(5, generator) + require.NoError(t, err) + + actual, err := cacher.GetOrCompute(ctx, repo, "foo") + require.NoError(t, err) + require.Equal(t, "You're number 1", actual) + + actual, err = cacher.GetOrCompute(ctx, repo, "bar") + require.NoError(t, err) + require.Equal(t, "You're number 2", actual) + + actual, err = cacher.GetOrCompute(ctx, repo, "foo") + require.NoError(t, err) + require.Equal(t, "You're number 1", actual) + }) + + t.Run("only keeps given max entries", func(t *testing.T) { + count := 0 + generator := func(ctx context.Context, repo *localrepo.Repo, key string) (string, error) { + count++ + return fmt.Sprintf("You're number %d", count), nil + } + cacher, err := New(5, generator) + require.NoError(t, err) + + actual, err := cacher.GetOrCompute(ctx, repo, "foo") + require.NoError(t, err) + require.Equal(t, "You're number 1", actual) + + for i, key := range []string{"a", "b", "c", "d", "e"} { + actual, err = cacher.GetOrCompute(ctx, repo, key) + require.NoError(t, err) + require.Equal(t, fmt.Sprintf("You're number %d", i+2), actual) + } + + actual, err = cacher.GetOrCompute(ctx, repo, "foo") + require.NoError(t, err) + require.Equal(t, "You're number 7", actual) + }) + + t.Run("same cache key on different repos", func(t *testing.T) { + otherProto, _ := gittest.CreateRepository(t, ctx, cfg, gittest.CreateRepositoryConfig{ + SkipCreationViaService: true, + }) + otherRepo := localrepo.NewTestRepo(t, cfg, otherProto) + + count := 0 + generator := func(ctx context.Context, repo *localrepo.Repo, key string) (string, error) { + count++ + return fmt.Sprintf("You're number %d", count), nil + } + cacher, err := New(5, generator) + require.NoError(t, err) + + actual, err := cacher.GetOrCompute(ctx, repo, "foo") + require.NoError(t, err) + require.Equal(t, "You're number 1", actual) + + actual, err = cacher.GetOrCompute(ctx, otherRepo, "foo") + require.NoError(t, err) + require.Equal(t, "You're number 2", actual) + + actual, err = cacher.GetOrCompute(ctx, repo, "foo") + require.NoError(t, err) + require.Equal(t, "You're number 1", actual) + }) +} diff --git a/internal/unarycache/testhelper_test.go b/internal/unarycache/testhelper_test.go new file mode 100644 index 000000000..9828d2a13 --- /dev/null +++ b/internal/unarycache/testhelper_test.go @@ -0,0 +1,11 @@ +package unarycache + +import ( + "testing" + + "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" +) + +func TestMain(m *testing.M) { + testhelper.Run(m) +} |