diff options
author | Pavlo Strokov <pstrokov@gitlab.com> | 2023-01-03 18:56:16 +0300 |
---|---|---|
committer | Pavlo Strokov <pstrokov@gitlab.com> | 2023-01-03 18:56:16 +0300 |
commit | 2f1557b2ee2da0490c68bb70c8684a34b33a32b2 (patch) | |
tree | 1f1ab24bbb1c09bd908240fd12e51f4ef42f3f47 | |
parent | 315adae3fb023f18be1d81150b77956d41555ebd (diff) | |
parent | fb830f5f01311609c01590ddf04a22a3d33cd36d (diff) |
Merge branch 'pks-repository-fsck-dont-compute-dangling-objects' into 'master'
repository: Disable misleading dangling checks in git-fsck(1)
See merge request https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5230
Merged-by: Pavlo Strokov <pstrokov@gitlab.com>
Approved-by: karthik nayak <knayak@gitlab.com>
Approved-by: Pavlo Strokov <pstrokov@gitlab.com>
Reviewed-by: karthik nayak <knayak@gitlab.com>
Co-authored-by: Patrick Steinhardt <psteinhardt@gitlab.com>
-rw-r--r-- | internal/gitaly/service/repository/fsck.go | 22 | ||||
-rw-r--r-- | internal/gitaly/service/repository/fsck_test.go | 216 | ||||
-rw-r--r-- | proto/go/gitalypb/repository.pb.go | 8 | ||||
-rw-r--r-- | proto/go/gitalypb/repository_grpc.pb.go | 6 | ||||
-rw-r--r-- | proto/repository.proto | 11 |
5 files changed, 189 insertions, 74 deletions
diff --git a/internal/gitaly/service/repository/fsck.go b/internal/gitaly/service/repository/fsck.go index 9b5f0a9e9..9561339a7 100644 --- a/internal/gitaly/service/repository/fsck.go +++ b/internal/gitaly/service/repository/fsck.go @@ -1,8 +1,8 @@ package repository import ( - "bytes" "context" + "strings" "gitlab.com/gitlab-org/gitaly/v15/internal/git" "gitlab.com/gitlab-org/gitaly/v15/internal/gitaly/service" @@ -16,18 +16,28 @@ func (s *server) Fsck(ctx context.Context, req *gitalypb.FsckRequest) (*gitalypb return nil, structerr.NewInvalidArgument("%w", err) } - var stdout, stderr bytes.Buffer + var output strings.Builder cmd, err := s.gitCmdFactory.New(ctx, repository, - git.Command{Name: "fsck"}, - git.WithStdout(&stdout), - git.WithStderr(&stderr), + git.Command{ + Name: "fsck", + Flags: []git.Option{ + // We don't care about any progress bars. + git.Flag{Name: "--no-progress"}, + // We don't want to get warning about dangling objects. It is + // expected that repositories have these and makes the signal to + // noise ratio a lot worse. + git.Flag{Name: "--no-dangling"}, + }, + }, + git.WithStdout(&output), + git.WithStderr(&output), ) if err != nil { return nil, err } if err = cmd.Wait(); err != nil { - return &gitalypb.FsckResponse{Error: append(stdout.Bytes(), stderr.Bytes()...)}, nil + return &gitalypb.FsckResponse{Error: []byte(output.String())}, nil } return &gitalypb.FsckResponse{}, nil diff --git a/internal/gitaly/service/repository/fsck_test.go b/internal/gitaly/service/repository/fsck_test.go index 8fddbb1f5..6bf337388 100644 --- a/internal/gitaly/service/repository/fsck_test.go +++ b/internal/gitaly/service/repository/fsck_test.go @@ -3,74 +3,176 @@ package repository import ( + "fmt" "os" "path/filepath" "strings" "testing" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v15/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v15/internal/structerr" "gitlab.com/gitlab-org/gitaly/v15/internal/testhelper" "gitlab.com/gitlab-org/gitaly/v15/proto/go/gitalypb" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" ) -func TestFsckSuccess(t *testing.T) { +func TestFsck(t *testing.T) { t.Parallel() - ctx := testhelper.Context(t) - - _, repo, _, client := setupRepositoryService(t, ctx) - - c, err := client.Fsck(ctx, &gitalypb.FsckRequest{Repository: repo}) - assert.NoError(t, err) - assert.NotNil(t, c) - assert.Empty(t, c.GetError()) -} -func TestFsckFailureSeverelyBrokenRepo(t *testing.T) { - t.Parallel() ctx := testhelper.Context(t) - - _, repo, repoPath, client := setupRepositoryService(t, ctx) - - // This makes the repo severely broken so that `git` does not identify it as a - // proper repo. - require.NoError(t, os.RemoveAll(filepath.Join(repoPath, "objects"))) - fd, err := os.Create(filepath.Join(repoPath, "objects")) - require.NoError(t, err) - require.NoError(t, fd.Close()) - - c, err := client.Fsck(ctx, &gitalypb.FsckRequest{Repository: repo}) - assert.NoError(t, err) - assert.NotNil(t, c) - assert.Contains(t, strings.ToLower(string(c.GetError())), "not a git repository") -} - -func TestFsckFailureSlightlyBrokenRepo(t *testing.T) { - t.Parallel() - ctx := testhelper.Context(t) - - _, repo, repoPath, client := setupRepositoryService(t, ctx) - - // This makes the repo slightly broken so that `git` still identify it as a - // proper repo, but `fsck` complains about broken refs... - require.NoError(t, os.RemoveAll(filepath.Join(repoPath, "objects", "pack"))) - - c, err := client.Fsck(ctx, &gitalypb.FsckRequest{Repository: repo}) - assert.NoError(t, err) - assert.NotNil(t, c) - assert.NotEmpty(t, string(c.GetError())) - assert.Contains(t, string(c.GetError()), "error: HEAD: invalid sha1 pointer") -} - -func TestFsck_validate(t *testing.T) { - t.Parallel() - ctx := testhelper.Context(t) - - _, client := setupRepositoryServiceWithoutRepo(t) - - _, err := client.Fsck(ctx, &gitalypb.FsckRequest{Repository: nil}) - msg := testhelper.GitalyOrPraefect("empty Repository", "repo scoped: empty Repository") - testhelper.RequireGrpcError(t, status.Error(codes.InvalidArgument, msg), err) + cfg, client := setupRepositoryServiceWithoutRepo(t) + + type setupData struct { + repo *gitalypb.Repository + expectedErr error + expectedResponse *gitalypb.FsckResponse + } + + for _, tc := range []struct { + desc string + setup func(t *testing.T) setupData + }{ + { + desc: "request is missing repository", + setup: func(t *testing.T) setupData { + return setupData{ + repo: nil, + expectedErr: structerr.NewInvalidArgument( + testhelper.GitalyOrPraefect( + "empty Repository", + "repo scoped: empty Repository", + ), + ), + } + }, + }, + { + desc: "empty repository", + setup: func(t *testing.T) setupData { + repo, _ := gittest.CreateRepository(t, ctx, cfg) + + return setupData{ + repo: repo, + expectedResponse: &gitalypb.FsckResponse{}, + } + }, + }, + { + desc: "repository with commit", + setup: func(t *testing.T) setupData { + repo, repoPath := gittest.CreateRepository(t, ctx, cfg) + gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) + + return setupData{ + repo: repo, + expectedResponse: &gitalypb.FsckResponse{}, + } + }, + }, + { + desc: "invalid object directory", + setup: func(t *testing.T) setupData { + repo, repoPath := gittest.CreateRepository(t, ctx, cfg) + + // This makes the repo severely broken so that `git` does not + // identify it as a proper repository anymore. + require.NoError(t, os.RemoveAll(filepath.Join(repoPath, "objects"))) + require.NoError(t, os.WriteFile(filepath.Join(repoPath, "objects"), nil, 0o644)) + + return setupData{ + repo: repo, + expectedResponse: &gitalypb.FsckResponse{ + Error: []byte(fmt.Sprintf("fatal: not a git repository: '%s'\n", repoPath)), + }, + } + }, + }, + { + desc: "missing objects", + setup: func(t *testing.T) setupData { + repo, repoPath := gittest.CreateRepository(t, ctx, cfg) + + // We write a commit and repack it into a packfile... + commitID := gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) + gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Ad") + // ... but then subsequently delete all the packfiles. This should + // lead to git-fsck(1) complaining about missing objects. + require.NoError(t, os.RemoveAll(filepath.Join(repoPath, "objects", "pack"))) + + expectedErr := strings.Join([]string{ + "error: refs/heads/main: invalid sha1 pointer " + commitID.String(), + "error: HEAD: invalid sha1 pointer " + commitID.String(), + "notice: No default references", + }, "\n") + "\n" + + return setupData{ + repo: repo, + expectedResponse: &gitalypb.FsckResponse{ + Error: []byte(expectedErr), + }, + } + }, + }, + { + desc: "dangling blob", + setup: func(t *testing.T) setupData { + repo, repoPath := gittest.CreateRepository(t, ctx, cfg) + + // A dangling blob should not cause the consistency check to fail as + // it is totally expected that repositories accumulate unreachable + // objects. + gittest.WriteBlob(t, cfg, repoPath, []byte("content")) + + return setupData{ + repo: repo, + expectedResponse: &gitalypb.FsckResponse{}, + } + }, + }, + { + desc: "invalid tree object", + setup: func(t *testing.T) setupData { + repo, repoPath := gittest.CreateRepository(t, ctx, cfg) + + // Create the default branch so that git-fsck(1) doesn't complain + // about it missing. + gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch("main")) + + // Write a tree object that has the same path twice. We use this as + // an example to verify that git-fsck(1) indeed also verifies + // objects as expected just because writing trees with two entries + // is so easy. + // + // Furthermore, the tree is also dangling on purpose to verify that + // we don't complain about it dangling. + treeID := gittest.WriteTree(t, cfg, repoPath, []gittest.TreeEntry{ + {Path: "duplicate", Mode: "100644", Content: "foo"}, + {Path: "duplicate", Mode: "100644", Content: "bar"}, + }) + + expectedErr := "error in tree " + treeID.String() + ": duplicateEntries: contains duplicate file entries\n" + + return setupData{ + repo: repo, + expectedResponse: &gitalypb.FsckResponse{ + Error: []byte(expectedErr), + }, + } + }, + }, + } { + tc := tc + + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + + setupData := tc.setup(t) + + response, err := client.Fsck(ctx, &gitalypb.FsckRequest{ + Repository: setupData.repo, + }) + testhelper.RequireGrpcError(t, setupData.expectedErr, err) + testhelper.ProtoEqual(t, setupData.expectedResponse, response) + }) + } } diff --git a/proto/go/gitalypb/repository.pb.go b/proto/go/gitalypb/repository.pb.go index 82cc54b6d..db4e1182b 100644 --- a/proto/go/gitalypb/repository.pb.go +++ b/proto/go/gitalypb/repository.pb.go @@ -1897,13 +1897,13 @@ func (x *FetchSourceBranchResponse) GetResult() bool { return false } -// This comment is left unintentionally blank. +// FsckRequest is a request for the Fsck RPC. type FsckRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // This comment is left unintentionally blank. + // Repository is the repository that shall be checked for consistency. Repository *Repository `protobuf:"bytes,1,opt,name=repository,proto3" json:"repository,omitempty"` } @@ -1946,13 +1946,13 @@ func (x *FsckRequest) GetRepository() *Repository { return nil } -// This comment is left unintentionally blank. +// FsckResponse is a response for the Fsck RPC. type FsckResponse struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // This comment is left unintentionally blank. + // Error contains both stdout and stderr of git-fsck(1) in case it returned an error. Error []byte `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` } diff --git a/proto/go/gitalypb/repository_grpc.pb.go b/proto/go/gitalypb/repository_grpc.pb.go index f6bf2b7ae..84cd90f3f 100644 --- a/proto/go/gitalypb/repository_grpc.pb.go +++ b/proto/go/gitalypb/repository_grpc.pb.go @@ -55,7 +55,8 @@ type RepositoryServiceClient interface { // FetchSourceBranch fetches a branch from a second (potentially remote) // repository into the given repository. FetchSourceBranch(ctx context.Context, in *FetchSourceBranchRequest, opts ...grpc.CallOption) (*FetchSourceBranchResponse, error) - // This comment is left unintentionally blank. + // Fsck checks the repository for consistency via git-fsck(1). This can be used to check for + // repository corruption. Fsck(ctx context.Context, in *FsckRequest, opts ...grpc.CallOption) (*FsckResponse, error) // This comment is left unintentionally blank. WriteRef(ctx context.Context, in *WriteRefRequest, opts ...grpc.CallOption) (*WriteRefResponse, error) @@ -875,7 +876,8 @@ type RepositoryServiceServer interface { // FetchSourceBranch fetches a branch from a second (potentially remote) // repository into the given repository. FetchSourceBranch(context.Context, *FetchSourceBranchRequest) (*FetchSourceBranchResponse, error) - // This comment is left unintentionally blank. + // Fsck checks the repository for consistency via git-fsck(1). This can be used to check for + // repository corruption. Fsck(context.Context, *FsckRequest) (*FsckResponse, error) // This comment is left unintentionally blank. WriteRef(context.Context, *WriteRefRequest) (*WriteRefResponse, error) diff --git a/proto/repository.proto b/proto/repository.proto index 33c040f24..0cda33a82 100644 --- a/proto/repository.proto +++ b/proto/repository.proto @@ -108,7 +108,8 @@ service RepositoryService { }; } - // This comment is left unintentionally blank. + // Fsck checks the repository for consistency via git-fsck(1). This can be used to check for + // repository corruption. rpc Fsck(FsckRequest) returns (FsckResponse) { option (op_type) = { op: ACCESSOR @@ -605,15 +606,15 @@ message FetchSourceBranchResponse { bool result = 1; } -// This comment is left unintentionally blank. +// FsckRequest is a request for the Fsck RPC. message FsckRequest { - // This comment is left unintentionally blank. + // Repository is the repository that shall be checked for consistency. Repository repository = 1 [(target_repository)=true]; } -// This comment is left unintentionally blank. +// FsckResponse is a response for the Fsck RPC. message FsckResponse { - // This comment is left unintentionally blank. + // Error contains both stdout and stderr of git-fsck(1) in case it returned an error. bytes error = 1; } |