diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-07-29 13:20:06 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2022-08-02 09:45:54 +0300 |
commit | 4b181168fb555ff87da239a1ba89b5b6e4499c92 (patch) | |
tree | c6b3300011978b0bc069b81e138dba7bd56c0a04 | |
parent | 936d5aab0ca1ed0558a46780f302cb5bb21e1180 (diff) |
git: Support cached detection of object hash in RepositoryExecutors
Over time many of the places in our production-facing code will require
to detect object hashes, often even multiple times per RPC call. Because
we need to spawn a Git command to detect the object hash this cost may
add up over time, so ideally we'd run this detection logic infrequently
only.
We implement a new `ObjectHash()` function for `RepositoryExecutor`s
that allows us to at cache the detected object hash on the `localrepo`
level. Assuming that `localrepo`s get constructed once only per RPC call
this at least allows us to limit the frequency to detect the object hash
to once per RPC call.
While we might eventually want to use an LRU cache to do cache recent
detections, this feels quite a lot like premature optimization. We
should first see how things behave and whether this is going to put some
strain on our command factory before we grow more complexity.
-rw-r--r-- | internal/git/catfile/testhelper_test.go | 4 | ||||
-rw-r--r-- | internal/git/localrepo/objects.go | 14 | ||||
-rw-r--r-- | internal/git/localrepo/refs.go | 7 | ||||
-rw-r--r-- | internal/git/localrepo/repo.go | 13 | ||||
-rw-r--r-- | internal/git/localrepo/repo_test.go | 41 | ||||
-rw-r--r-- | internal/git/repository.go | 1 |
6 files changed, 77 insertions, 3 deletions
diff --git a/internal/git/catfile/testhelper_test.go b/internal/git/catfile/testhelper_test.go index 4aeb140bb..e222a52c0 100644 --- a/internal/git/catfile/testhelper_test.go +++ b/internal/git/catfile/testhelper_test.go @@ -53,6 +53,10 @@ func (e *repoExecutor) GitVersion(ctx context.Context) (git.Version, error) { return git.Version{}, nil } +func (e *repoExecutor) ObjectHash(ctx context.Context) (git.ObjectHash, error) { + return gittest.DefaultObjectHash, nil +} + func setupObjectReader(t *testing.T, ctx context.Context) (config.Cfg, ObjectReader, *gitalypb.Repository) { t.Helper() diff --git a/internal/git/localrepo/objects.go b/internal/git/localrepo/objects.go index 4616df7a5..43198b0e9 100644 --- a/internal/git/localrepo/objects.go +++ b/internal/git/localrepo/objects.go @@ -47,7 +47,12 @@ func (repo *Repo) WriteBlob(ctx context.Context, path string, content io.Reader) return "", errorWithStderr(err, stderr.Bytes()) } - oid, err := git.ObjectHashSHA1.FromHex(text.ChompBytes(stdout.Bytes())) + objectHash, err := repo.ObjectHash(ctx) + if err != nil { + return "", fmt.Errorf("detecting object hash: %w", err) + } + + oid, err := objectHash.FromHex(text.ChompBytes(stdout.Bytes())) if err != nil { return "", err } @@ -159,7 +164,12 @@ func (repo *Repo) WriteTag( return "", MktagError{tagName: tagName, stderr: stderr.String()} } - tagID, err := git.ObjectHashSHA1.FromHex(text.ChompBytes(stdout.Bytes())) + objectHash, err := repo.ObjectHash(ctx) + if err != nil { + return "", fmt.Errorf("detecting object hash: %w", err) + } + + tagID, err := objectHash.FromHex(text.ChompBytes(stdout.Bytes())) if err != nil { return "", fmt.Errorf("could not parse tag ID: %w", err) } diff --git a/internal/git/localrepo/refs.go b/internal/git/localrepo/refs.go index 2ba1d988d..f76975e28 100644 --- a/internal/git/localrepo/refs.go +++ b/internal/git/localrepo/refs.go @@ -56,8 +56,13 @@ func (repo *Repo) ResolveRevision(ctx context.Context, revision git.Revision) (g return "", err } + objectHash, err := repo.ObjectHash(ctx) + if err != nil { + return "", fmt.Errorf("detecting object hash: %w", err) + } + hex := strings.TrimSpace(stdout.String()) - oid, err := git.ObjectHashSHA1.FromHex(hex) + oid, err := objectHash.FromHex(hex) if err != nil { return "", fmt.Errorf("unsupported object hash %q: %w", hex, err) } diff --git a/internal/git/localrepo/repo.go b/internal/git/localrepo/repo.go index 730176543..26e097350 100644 --- a/internal/git/localrepo/repo.go +++ b/internal/git/localrepo/repo.go @@ -9,6 +9,7 @@ import ( "os" "strconv" "strings" + "sync" "testing" "github.com/stretchr/testify/require" @@ -30,6 +31,10 @@ type Repo struct { locator storage.Locator gitCmdFactory git.CommandFactory catfileCache catfile.Cache + + detectObjectHashOnce sync.Once + objectHash git.ObjectHash + objectHashErr error } // New creates a new Repo from its protobuf representation. @@ -220,3 +225,11 @@ func (repo *Repo) StorageTempDir() (string, error) { return tempPath, nil } + +// ObjectHash detects the object hash used by this particular repository. +func (repo *Repo) ObjectHash(ctx context.Context) (git.ObjectHash, error) { + repo.detectObjectHashOnce.Do(func() { + repo.objectHash, repo.objectHashErr = git.DetectObjectHash(ctx, repo) + }) + return repo.objectHash, repo.objectHashErr +} diff --git a/internal/git/localrepo/repo_test.go b/internal/git/localrepo/repo_test.go index 26ae6336a..ffa5589d8 100644 --- a/internal/git/localrepo/repo_test.go +++ b/internal/git/localrepo/repo_test.go @@ -345,3 +345,44 @@ func TestRepo_StorageTempDir(t *testing.T) { require.DirExists(t, expected) require.Equal(t, expected, tempPath) } + +func TestRepo_ObjectHash(t *testing.T) { + t.Parallel() + + ctx := testhelper.Context(t) + cfg := testcfg.Build(t) + + catfileCache := catfile.NewCache(cfg) + t.Cleanup(catfileCache.Stop) + locator := config.NewLocator(cfg) + + outputFile := filepath.Join(testhelper.TempDir(t), "output") + + // We create an intercepting command factory that detects when we run our object hash + // detection logic and, if so, writes a sentinel value into our output file. Like this we + // can test how often the logic runs. + gitCmdFactory := gittest.NewInterceptingCommandFactory(ctx, t, cfg, func(execEnv git.ExecutionEnvironment) string { + return fmt.Sprintf(`#!/bin/sh + ( echo "$@" | grep --silent -- '--show-object-format' ) && echo detection-logic >>%q + exec %q "$@"`, outputFile, execEnv.BinaryPath) + }) + + repoProto, _ := gittest.InitRepo(t, cfg, cfg.Storages[0]) + repo := New(locator, gitCmdFactory, catfileCache, repoProto) + + objectHash, err := repo.ObjectHash(ctx) + require.NoError(t, err) + require.Equal(t, gittest.DefaultObjectHash.EmptyTreeOID, objectHash.EmptyTreeOID) + + // We should see that the detection logic has been executed once. + require.Equal(t, "detection-logic\n", string(testhelper.MustReadFile(t, outputFile))) + + // Verify that running this a second time continues to return the object hash alright + // regardless of the cache. + objectHash, err = repo.ObjectHash(ctx) + require.NoError(t, err) + require.Equal(t, gittest.DefaultObjectHash.EmptyTreeOID, objectHash.EmptyTreeOID) + + // But the detection logic should not have been executed a second time. + require.Equal(t, "detection-logic\n", string(testhelper.MustReadFile(t, outputFile))) +} diff --git a/internal/git/repository.go b/internal/git/repository.go index b8b764e6c..8f969bdf5 100644 --- a/internal/git/repository.go +++ b/internal/git/repository.go @@ -55,4 +55,5 @@ type RepositoryExecutor interface { Exec(ctx context.Context, cmd Cmd, opts ...CmdOpt) (*command.Command, error) ExecAndWait(ctx context.Context, cmd Cmd, opts ...CmdOpt) error GitVersion(ctx context.Context) (Version, error) + ObjectHash(ctx context.Context) (ObjectHash, error) } |