Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Cai <jcai@gitlab.com>2022-09-21 21:16:35 +0300
committerJohn Cai <jcai@gitlab.com>2022-10-06 23:06:30 +0300
commit45f643e019b275453a604086782140c52ef4db11 (patch)
treebc234e1d768a88ae68056466eb6ebf2a44561c77
parent16b38f034eb38253006a2e69a4b4220717b45a99 (diff)
objectpool: use rev-list for connectivity checkjc-use-rev-list-for-connectivity
git fsck --connectivity-only uses much more memory than git rev-list --all --objects, which effectively does the same connectivity check but with less than half the memory consumption. Some analysis using Valgrind's massif and a conversation on the git mailing list [1] confirmed that between git fsck and git rev-list, git rev-list is the better tool for checking for connectivity. Switch to using git rev-list --all --objects behind a feature flag. 1. https://lore.kernel.org/git/YyoljwDIn7PxRlC9@coredump.intra.peff.net/T/#t Changelog: changed
-rw-r--r--internal/gitaly/service/objectpool/alternates.go48
-rw-r--r--internal/gitaly/service/objectpool/alternates_test.go119
-rw-r--r--internal/metadata/featureflag/ff_revlist_for_connectivity.go10
3 files changed, 141 insertions, 36 deletions
diff --git a/internal/gitaly/service/objectpool/alternates.go b/internal/gitaly/service/objectpool/alternates.go
index 8f8703114..66ec153e7 100644
--- a/internal/gitaly/service/objectpool/alternates.go
+++ b/internal/gitaly/service/objectpool/alternates.go
@@ -11,10 +11,12 @@ import (
"time"
"github.com/grpc-ecosystem/go-grpc-middleware/logging/logrus/ctxlogrus"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/command"
"gitlab.com/gitlab-org/gitaly/v15/internal/git"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
"gitlab.com/gitlab-org/gitaly/v15/internal/helper"
"gitlab.com/gitlab-org/gitaly/v15/internal/helper/text"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
"gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
)
@@ -148,9 +150,9 @@ func findObjectFiles(altDir string) ([]string, error) {
return objectFiles, nil
}
-type fsckError struct{ error }
+type connectivityError struct{ error }
-func (fe *fsckError) Error() string {
+func (fe *connectivityError) Error() string {
return fmt.Sprintf("git fsck error while disconnected: %v", fe.error)
}
@@ -200,23 +202,41 @@ func (s *server) removeAlternatesIfOk(ctx context.Context, repo *localrepo.Repo,
}
}()
- cmd, err := repo.Exec(ctx, git.SubCmd{
- Name: "fsck",
- Flags: []git.Option{git.Flag{Name: "--connectivity-only"}},
- }, git.WithConfig(git.ConfigPair{
- // Starting with Git's f30e4d854b (fsck: verify commit graph when implicitly
- // enabled, 2021-10-15), git-fsck(1) will check the commit graph for consistency
- // even if `core.commitGraph` is not enabled explicitly. We do not want to verify
- // whether the commit graph is consistent though, but only care about connectivity,
- // so we now explicitly disable usage of the commit graph.
- Key: "core.commitGraph", Value: "false",
- }))
+ var err error
+ var cmd *command.Command
+
+ if featureflag.RevlistForConnectivity.IsEnabled(ctx) {
+ // The choice here of git rev-list is for performance reasons.
+ // git fsck --connectivity-only performed badly for large
+ // repositories. The reasons are detailed in https://lore.kernel.org/git/9304B938-4A59-456B-B091-DBBCAA1823B2@gmail.com/
+ cmd, err = repo.Exec(ctx, git.SubCmd{
+ Name: "rev-list",
+ Flags: []git.Option{
+ git.Flag{Name: "--objects"},
+ git.Flag{Name: "--all"},
+ git.Flag{Name: "--quiet"},
+ },
+ })
+ } else {
+ cmd, err = repo.Exec(ctx, git.SubCmd{
+ Name: "fsck",
+ Flags: []git.Option{git.Flag{Name: "--connectivity-only"}},
+ }, git.WithConfig(git.ConfigPair{
+ // Starting with Git's f30e4d854b (fsck: verify commit graph when implicitly
+ // enabled, 2021-10-15), git-fsck(1) will check the commit graph for consistency
+ // even if `core.commitGraph` is not enabled explicitly. We do not want to verify
+ // whether the commit graph is consistent though, but only care about connectivity,
+ // so we now explicitly disable usage of the commit graph.
+ Key: "core.commitGraph", Value: "false",
+ }))
+ }
+
if err != nil {
return err
}
if err := cmd.Wait(); err != nil {
- return &fsckError{error: err}
+ return &connectivityError{error: err}
}
rollback = false
diff --git a/internal/gitaly/service/objectpool/alternates_test.go b/internal/gitaly/service/objectpool/alternates_test.go
index b1129d81c..f4b6f14fb 100644
--- a/internal/gitaly/service/objectpool/alternates_test.go
+++ b/internal/gitaly/service/objectpool/alternates_test.go
@@ -3,20 +3,31 @@
package objectpool
import (
+ "context"
"fmt"
"os"
+ "os/exec"
+ "path/filepath"
"testing"
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v15/internal/git"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/metadata/featureflag"
"gitlab.com/gitlab-org/gitaly/v15/internal/testhelper"
"gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb"
)
func TestDisconnectGitAlternates(t *testing.T) {
- ctx := testhelper.Context(t)
+ t.Parallel()
+ testhelper.NewFeatureSets(featureflag.RevlistForConnectivity).Run(
+ t,
+ testDisconnectGitAlternates,
+ )
+}
+
+func testDisconnectGitAlternates(t *testing.T, ctx context.Context) {
cfg, repoProto, repoPath, _, client := setup(ctx, t)
repo := localrepo.NewTestRepo(t, cfg, repoProto)
@@ -55,7 +66,14 @@ func TestDisconnectGitAlternates(t *testing.T) {
}
func TestDisconnectGitAlternatesNoAlternates(t *testing.T) {
- ctx := testhelper.Context(t)
+ t.Parallel()
+ testhelper.NewFeatureSets(featureflag.RevlistForConnectivity).Run(
+ t,
+ testDisconnectGitAlternatesNoAlternates,
+ )
+}
+
+func testDisconnectGitAlternatesNoAlternates(t *testing.T, ctx context.Context) {
cfg, repoProto, repoPath, _, client := setup(ctx, t)
repo := localrepo.NewTestRepo(t, cfg, repoProto)
@@ -70,7 +88,14 @@ func TestDisconnectGitAlternatesNoAlternates(t *testing.T) {
}
func TestDisconnectGitAlternatesUnexpectedAlternates(t *testing.T) {
- ctx := testhelper.Context(t)
+ t.Parallel()
+ testhelper.NewFeatureSets(featureflag.RevlistForConnectivity).Run(
+ t,
+ testDisconnectGitAlternatesUnexpectedAlternates,
+ )
+}
+
+func testDisconnectGitAlternatesUnexpectedAlternates(t *testing.T, ctx context.Context) {
cfg, _, _, _, client := setup(ctx, t)
testCases := []struct {
@@ -104,31 +129,81 @@ func TestDisconnectGitAlternatesUnexpectedAlternates(t *testing.T) {
}
func TestRemoveAlternatesIfOk(t *testing.T) {
- ctx := testhelper.Context(t)
- cfg, repoProto, repoPath, _, _ := setup(ctx, t)
- repo := localrepo.NewTestRepo(t, cfg, repoProto)
+ t.Parallel()
+ testhelper.NewFeatureSets(featureflag.RevlistForConnectivity).Run(
+ t,
+ testRemoveAlternatesIfOk,
+ )
+}
- altPath, err := repo.InfoAlternatesPath()
- require.NoError(t, err, "find info/alternates")
- altContent := "/var/empty\n"
- require.NoError(t, os.WriteFile(altPath, []byte(altContent), 0o644), "write alternates file")
+func testRemoveAlternatesIfOk(t *testing.T, ctx context.Context) {
+ t.Run("pack files are missing", func(t *testing.T) {
+ cfg, repoProto, repoPath, _, _ := setup(ctx, t)
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ altPath, err := repo.InfoAlternatesPath()
+ require.NoError(t, err, "find info/alternates")
+ altContent := "/var/empty\n"
+ require.NoError(t, os.WriteFile(altPath, []byte(altContent), 0o644), "write alternates file")
+
+ // Intentionally break the repository, so that 'git fsck' will fail later.
+ testhelper.MustRunCommand(t, nil, "sh", "-c", fmt.Sprintf("rm %s/objects/pack/*.pack", repoPath))
+
+ altBackup := altPath + ".backup"
+
+ srv := server{gitCmdFactory: gittest.NewCommandFactory(t, cfg)}
+ err = srv.removeAlternatesIfOk(ctx, repo, altPath, altBackup)
+ require.Error(t, err, "removeAlternatesIfOk should fail")
+ require.IsType(t, &connectivityError{}, err, "error must be because of fsck")
+
+ // We expect objects/info/alternates to have been restored when
+ // removeAlternatesIfOk returned.
+ assertAlternates(t, altPath, altContent)
+
+ // We expect the backup alternates file to still exist.
+ assertAlternates(t, altBackup, altContent)
+ })
+
+ t.Run("commit graph exists but object is missing from odb", func(t *testing.T) {
+ cfg, repoProto, repoPath, _, _ := setup(ctx, t)
+ repo := localrepo.NewTestRepo(t, cfg, repoProto)
+
+ altPath, err := repo.InfoAlternatesPath()
+ require.NoError(t, err, "find info/alternates")
+
+ tmpDir := testhelper.TempDir(t)
+ altContent := tmpDir + "\n"
+ require.NoError(t, os.WriteFile(altPath, []byte(altContent), 0o644), "write alternates file")
+
+ // In order to test the scenario where a commit is in a commit
+ // graph but not in the object database, we will first write a
+ // new commit, write the commit graph, then remove that commit
+ // object from the object database.
+ headCommitOid := gittest.ResolveRevision(t, cfg, repoPath, "HEAD")
+ commitOid := gittest.WriteCommit(t, cfg, repoPath, gittest.WithParents(headCommitOid), gittest.WithBranch("master"))
+
+ gittest.Exec(t, cfg, "-C", repoPath, "commit-graph", "write")
+
+ require.NoError(t, os.Remove(filepath.Join(repoPath, "objects", string(commitOid)[0:2], string(commitOid)[2:])))
+
+ altBackup := altPath + ".backup"
- // Intentionally break the repository, so that 'git fsck' will fail later.
- testhelper.MustRunCommand(t, nil, "sh", "-c", fmt.Sprintf("rm %s/objects/pack/*.pack", repoPath))
+ srv := server{gitCmdFactory: gittest.NewCommandFactory(t, cfg)}
+ err = srv.removeAlternatesIfOk(ctx, repo, altPath, altBackup)
+ require.Error(t, err, "removeAlternatesIfOk should fail")
- altBackup := altPath + ".backup"
+ require.IsType(t, &connectivityError{}, err, "error must be because of connectivity check")
- srv := server{gitCmdFactory: gittest.NewCommandFactory(t, cfg)}
- err = srv.removeAlternatesIfOk(ctx, repo, altPath, altBackup)
- require.Error(t, err, "removeAlternatesIfOk should fail")
- require.IsType(t, &fsckError{}, err, "error must be because of fsck")
+ connectivityErr := err.(*connectivityError)
+ require.IsType(t, &exec.ExitError{}, connectivityErr.error, "error must be because of fsck")
- // We expect objects/info/alternates to have been restored when
- // removeAlternatesIfOk returned.
- assertAlternates(t, altPath, altContent)
+ // We expect objects/info/alternates to have been restored when
+ // removeAlternatesIfOk returned.
+ assertAlternates(t, altPath, altContent)
- // We expect the backup alternates file to still exist.
- assertAlternates(t, altBackup, altContent)
+ // We expect the backup alternates file to still exist.
+ assertAlternates(t, altBackup, altContent)
+ })
}
func assertAlternates(t *testing.T, altPath string, altContent string) {
diff --git a/internal/metadata/featureflag/ff_revlist_for_connectivity.go b/internal/metadata/featureflag/ff_revlist_for_connectivity.go
new file mode 100644
index 000000000..09c667fb4
--- /dev/null
+++ b/internal/metadata/featureflag/ff_revlist_for_connectivity.go
@@ -0,0 +1,10 @@
+package featureflag
+
+// RevlistForConnectivity causes the connectivity check when removing alternates
+// files to use git-rev-list instead of git-fsck
+var RevlistForConnectivity = NewFeatureFlag(
+ "revlist_for_connectivity",
+ "v15.5.0",
+ "https://gitlab.com/gitlab-org/gitaly/-/issues/4489",
+ false,
+)