diff options
author | Sami Hiltunen <shiltunen@gitlab.com> | 2024-01-12 22:28:01 +0300 |
---|---|---|
committer | Sami Hiltunen <shiltunen@gitlab.com> | 2024-01-15 13:53:36 +0300 |
commit | beb6fd2d7313019c5beaabb99d158147d4a72bad (patch) | |
tree | ce65fac623536416b4b09f57ee3a251f51b50863 | |
parent | d84aee2f8e60e28a6558f36383e4f766531e571f (diff) |
Simplify checking whether packing is neededsmh-record-packfile-deps
TransactionManager is currently looking into the quarantine directory
explicitly to see if there are new objects. As we're now walking the
new reference tips in the quarantine directory, we know whether we
need to pack objects or not based on whether we get object IDs back
from the walk. Let's thus simplify and remove the additional check.
As an optimization, we only launch `pack-objects` and `index-pack` if
we receive an object to pack. This avoids launching the commands for
transactions that don't need any new objects from the quarantine to
be committed.
There are minor differences in behavior as visible in tests:
1. All required objects are now verified to exist in the same place in
`verifyObjectDependencies`. Errors change in few test cases to match
as we no longer rely on `update-ref` to error out in order to verify
the objects the references have been pointed to exist.
2. Git has special cased the empty tree OID. Git can read the object from
the object database even if it hasn't been explicitly written there.
This is visible in one of the test cases that points a tag to an empty
tree. `update-ref` previously allowed the update to go through even if
we didn't write out the object as it's considered to always exist. As
we now pack all objects returned from the object walk, we create a pack
that contains the empty tree oid. In practice, this should not have any
impact. If necessary, we can change the behavior to match later. For now,
I've let it be since it's added behavior for little benefit.
3 files changed, 73 insertions, 97 deletions
diff --git a/internal/gitaly/storage/storagemgr/transaction_manager.go b/internal/gitaly/storage/storagemgr/transaction_manager.go index bb2eaf9ff..296cbd802 100644 --- a/internal/gitaly/storage/storagemgr/transaction_manager.go +++ b/internal/gitaly/storage/storagemgr/transaction_manager.go @@ -1008,40 +1008,6 @@ func (mgr *TransactionManager) stageHooks(ctx context.Context, transaction *Tran // prints the packs prefix in the format `pack <digest>`. var packPrefixRegexp = regexp.MustCompile(`^pack\t([0-9a-f]+)\n$`) -// shouldPackObjects checks whether the quarantine directory has any non-default content in it. -// If so, this signifies objects were written into it and we should pack them. -func shouldPackObjects(quarantineDirectory string) (bool, error) { - errHasNewContent := errors.New("new content found") - - // The quarantine directory itself and the pack directory within it are created when the transaction - // begins. These don't signify new content so we ignore them. - preExistingDirs := map[string]struct{}{ - quarantineDirectory: {}, - filepath.Join(quarantineDirectory, "pack"): {}, - } - if err := filepath.Walk(quarantineDirectory, func(path string, info fs.FileInfo, err error) error { - if err != nil { - return err - } - - if _, ok := preExistingDirs[path]; ok { - // The pre-existing directories don't signal any new content to pack. - return nil - } - - // Use an error sentinel to cancel the walk as soon as new content has been found. - return errHasNewContent - }); err != nil { - if errors.Is(err, errHasNewContent) { - return true, nil - } - - return false, fmt.Errorf("check for objects: %w", err) - } - - return false, nil -} - // packObjects walks the objects in the quarantine directory starting from the new // reference tips introduced by the transaction and the explicitly included objects. All // objects in the quarantine directory that are encountered during the walk are included in @@ -1058,12 +1024,6 @@ func shouldPackObjects(quarantineDirectory string) (bool, error) { // The packed objects are not yet checked for validity. See the following issue for more // details on this: https://gitlab.com/gitlab-org/gitaly/-/issues/5779 func (mgr *TransactionManager) packObjects(ctx context.Context, transaction *Transaction) error { - if shouldPack, err := shouldPackObjects(transaction.quarantineDirectory); err != nil { - return fmt.Errorf("should pack objects: %w", err) - } else if !shouldPack { - return nil - } - quarantineOnlySnapshotRepository := transaction.snapshotRepository if transaction.snapshotRepository.GetGitObjectDirectory() != "" { // We're not configuring a quarantine on the snapshotRepository when the repository didn't @@ -1120,12 +1080,8 @@ func (mgr *TransactionManager) packObjects(ctx context.Context, transaction *Tra }) transaction.objectDependencies = map[git.ObjectID]struct{}{} - objectsToPackReader, objectsToPackWriter := io.Pipe() group.Go(func() (returnedErr error) { - defer func() { - objectWalkReader.CloseWithError(returnedErr) - objectsToPackWriter.CloseWithError(returnedErr) - }() + defer objectWalkReader.CloseWithError(returnedErr) // objectLine comes in two formats from the walk: // 1. '<oid> <path>\n' in case the object is found. <path> may or may not be set. @@ -1136,6 +1092,11 @@ func (mgr *TransactionManager) packObjects(ctx context.Context, transaction *Tra // Objects that are not found are recorded as the transaction's // dependencies since they should exist in the repository. scanner := bufio.NewScanner(objectWalkReader) + + objectsToPackReader, objectsToPackWriter := io.Pipe() + defer objectsToPackWriter.CloseWithError(returnedErr) + + packObjectsStarted := false for scanner.Scan() { objectLine := scanner.Text() if objectLine[0] == '?' { @@ -1144,60 +1105,66 @@ func (mgr *TransactionManager) packObjects(ctx context.Context, transaction *Tra continue } - // Write the objects to `git pack-objects`. Restore the new line that was - // trimmed by the scanner. - if _, err := objectsToPackWriter.Write([]byte(objectLine + "\n")); err != nil { - return fmt.Errorf("write object id for packing: %w", err) - } - } + // At this point we have an object that we need to pack. If `pack-objects` and `index-pack` + // haven't yet been launched, launch them. + if !packObjectsStarted { + packObjectsStarted = true - if err := scanner.Err(); err != nil { - return fmt.Errorf("scanner: %w", err) - } + packReader, packWriter := io.Pipe() + group.Go(func() (returnedErr error) { + defer func() { + objectsToPackReader.CloseWithError(returnedErr) + packWriter.CloseWithError(returnedErr) + }() - return nil - }) + if err := quarantineOnlySnapshotRepository.PackObjects(ctx, objectsToPackReader, packWriter); err != nil { + return fmt.Errorf("pack objects: %w", err) + } - packReader, packWriter := io.Pipe() - group.Go(func() (returnedErr error) { - defer func() { - objectsToPackReader.CloseWithError(returnedErr) - packWriter.CloseWithError(returnedErr) - }() + return nil + }) - if err := quarantineOnlySnapshotRepository.PackObjects(ctx, objectsToPackReader, packWriter); err != nil { - return fmt.Errorf("pack objects: %w", err) - } + group.Go(func() (returnedErr error) { + defer packReader.CloseWithError(returnedErr) - return nil - }) + // index-pack places the pack, index, and reverse index into the transaction's staging directory. + var stdout, stderr bytes.Buffer + if err := quarantineOnlySnapshotRepository.ExecAndWait(ctx, git.Command{ + Name: "index-pack", + Flags: []git.Option{git.Flag{Name: "--stdin"}, git.Flag{Name: "--rev-index"}}, + Args: []string{filepath.Join(transaction.walFilesPath(), "objects.pack")}, + }, git.WithStdin(packReader), git.WithStdout(&stdout), git.WithStderr(&stderr)); err != nil { + return structerr.New("index pack: %w", err).WithMetadata("stderr", stderr.String()) + } - group.Go(func() (returnedErr error) { - defer packReader.CloseWithError(returnedErr) + matches := packPrefixRegexp.FindStringSubmatch(stdout.String()) + if len(matches) != 2 { + return structerr.New("unexpected index-pack output").WithMetadata("stdout", stdout.String()) + } - // index-pack places the pack, index, and reverse index into the transaction's staging directory. - var stdout, stderr bytes.Buffer - if err := quarantineOnlySnapshotRepository.ExecAndWait(ctx, git.Command{ - Name: "index-pack", - Flags: []git.Option{git.Flag{Name: "--stdin"}, git.Flag{Name: "--rev-index"}}, - Args: []string{filepath.Join(transaction.walFilesPath(), "objects.pack")}, - }, git.WithStdin(packReader), git.WithStdout(&stdout), git.WithStderr(&stderr)); err != nil { - return structerr.New("index pack: %w", err).WithMetadata("stderr", stderr.String()) - } + // Sync the files and the directory entries so everything is flushed to the disk prior + // to moving on to committing the log entry. This way we only have to flush the directory + // move when we move the staged files into the log. + if err := safe.NewSyncer().SyncRecursive(transaction.walFilesPath()); err != nil { + return fmt.Errorf("sync recursive: %w", err) + } - matches := packPrefixRegexp.FindStringSubmatch(stdout.String()) - if len(matches) != 2 { - return structerr.New("unexpected index-pack output").WithMetadata("stdout", stdout.String()) - } + transaction.packPrefix = fmt.Sprintf("pack-%s", matches[1]) + + return nil + }) + } - // Sync the files and the directory entries so everything is flushed to the disk prior - // to moving on to committing the log entry. This way we only have to flush the directory - // move when we move the staged files into the log. - if err := safe.NewSyncer().SyncRecursive(transaction.walFilesPath()); err != nil { - return fmt.Errorf("sync recursive: %w", err) + // Write the objects to `git pack-objects`. Restore the new line that was + // trimmed by the scanner. + if _, err := objectsToPackWriter.Write([]byte(objectLine + "\n")); err != nil { + return fmt.Errorf("write object id for packing: %w", err) + } } - transaction.packPrefix = fmt.Sprintf("pack-%s", matches[1]) + if err := scanner.Err(); err != nil { + return fmt.Errorf("scanner: %w", err) + } return nil }) diff --git a/internal/gitaly/storage/storagemgr/transaction_manager_refs_test.go b/internal/gitaly/storage/storagemgr/transaction_manager_refs_test.go index 915aadc30..e1427a47d 100644 --- a/internal/gitaly/storage/storagemgr/transaction_manager_refs_test.go +++ b/internal/gitaly/storage/storagemgr/transaction_manager_refs_test.go @@ -2,6 +2,7 @@ package storagemgr import ( "fmt" + "io/fs" "os" "path/filepath" "testing" @@ -9,9 +10,11 @@ import ( "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v16/internal/git" "gitlab.com/gitlab-org/gitaly/v16/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo" "gitlab.com/gitlab-org/gitaly/v16/internal/git/updateref" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config" "gitlab.com/gitlab-org/gitaly/v16/internal/helper/perm" + "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper" ) func generateInvalidReferencesTests(t *testing.T, setup testTransactionSetup) []transactionTestCase { @@ -555,6 +558,19 @@ func generateModifyReferencesTests(t *testing.T, setup testTransactionSetup) []t Database: DatabaseState{ string(keyAppliedLSN(setup.PartitionID)): LSN(1).toProto(), }, + Directory: testhelper.DirectoryState{ + "/": {Mode: fs.ModeDir | perm.PrivateDir}, + "/wal": {Mode: fs.ModeDir | perm.PrivateDir}, + "/wal/1": {Mode: fs.ModeDir | perm.PrivateDir}, + "/wal/1/objects.idx": indexFileDirectoryEntry(setup.Config), + "/wal/1/objects.rev": reverseIndexFileDirectoryEntry(setup.Config), + "/wal/1/objects.pack": packFileDirectoryEntry( + setup.Config, + []git.ObjectID{ + setup.ObjectHash.EmptyTreeOID, + }, + ), + }, Repositories: RepositoryStates{ setup.RelativePath: { References: &ReferencesState{ @@ -577,10 +593,7 @@ func generateModifyReferencesTests(t *testing.T, setup testTransactionSetup) []t ReferenceUpdates: ReferenceUpdates{ "refs/heads/main": {OldOID: setup.ObjectHash.ZeroOID, NewOID: setup.NonExistentOID}, }, - ExpectedError: updateref.NonExistentObjectError{ - ReferenceName: "refs/heads/main", - ObjectID: setup.NonExistentOID.String(), - }, + ExpectedError: localrepo.InvalidObjectError(setup.NonExistentOID), }, }, }, diff --git a/internal/gitaly/storage/storagemgr/transaction_manager_test.go b/internal/gitaly/storage/storagemgr/transaction_manager_test.go index 1160573fa..337077d41 100644 --- a/internal/gitaly/storage/storagemgr/transaction_manager_test.go +++ b/internal/gitaly/storage/storagemgr/transaction_manager_test.go @@ -25,7 +25,6 @@ import ( "gitlab.com/gitlab-org/gitaly/v16/internal/git/gittest" "gitlab.com/gitlab-org/gitaly/v16/internal/git/housekeeping" "gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo" - "gitlab.com/gitlab-org/gitaly/v16/internal/git/updateref" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config" "gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/transaction" "gitlab.com/gitlab-org/gitaly/v16/internal/grpc/backchannel" @@ -605,10 +604,7 @@ func generateCommonTests(t *testing.T, ctx context.Context, setup testTransactio ReferenceUpdates: ReferenceUpdates{ "refs/heads/main": {OldOID: setup.ObjectHash.ZeroOID, NewOID: setup.Commits.First.OID}, }, - ExpectedError: updateref.NonExistentObjectError{ - ReferenceName: "refs/heads/main", - ObjectID: setup.Commits.First.OID.String(), - }, + ExpectedError: localrepo.InvalidObjectError(setup.Commits.First.OID), }, Begin{ TransactionID: 2, |