Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSami Hiltunen <shiltunen@gitlab.com>2024-01-12 22:28:01 +0300
committerSami Hiltunen <shiltunen@gitlab.com>2024-01-15 13:53:36 +0300
commitbeb6fd2d7313019c5beaabb99d158147d4a72bad (patch)
treece65fac623536416b4b09f57ee3a251f51b50863
parentd84aee2f8e60e28a6558f36383e4f766531e571f (diff)
Simplify checking whether packing is neededsmh-record-packfile-deps
TransactionManager is currently looking into the quarantine directory explicitly to see if there are new objects. As we're now walking the new reference tips in the quarantine directory, we know whether we need to pack objects or not based on whether we get object IDs back from the walk. Let's thus simplify and remove the additional check. As an optimization, we only launch `pack-objects` and `index-pack` if we receive an object to pack. This avoids launching the commands for transactions that don't need any new objects from the quarantine to be committed. There are minor differences in behavior as visible in tests: 1. All required objects are now verified to exist in the same place in `verifyObjectDependencies`. Errors change in few test cases to match as we no longer rely on `update-ref` to error out in order to verify the objects the references have been pointed to exist. 2. Git has special cased the empty tree OID. Git can read the object from the object database even if it hasn't been explicitly written there. This is visible in one of the test cases that points a tag to an empty tree. `update-ref` previously allowed the update to go through even if we didn't write out the object as it's considered to always exist. As we now pack all objects returned from the object walk, we create a pack that contains the empty tree oid. In practice, this should not have any impact. If necessary, we can change the behavior to match later. For now, I've let it be since it's added behavior for little benefit.
-rw-r--r--internal/gitaly/storage/storagemgr/transaction_manager.go143
-rw-r--r--internal/gitaly/storage/storagemgr/transaction_manager_refs_test.go21
-rw-r--r--internal/gitaly/storage/storagemgr/transaction_manager_test.go6
3 files changed, 73 insertions, 97 deletions
diff --git a/internal/gitaly/storage/storagemgr/transaction_manager.go b/internal/gitaly/storage/storagemgr/transaction_manager.go
index bb2eaf9ff..296cbd802 100644
--- a/internal/gitaly/storage/storagemgr/transaction_manager.go
+++ b/internal/gitaly/storage/storagemgr/transaction_manager.go
@@ -1008,40 +1008,6 @@ func (mgr *TransactionManager) stageHooks(ctx context.Context, transaction *Tran
// prints the packs prefix in the format `pack <digest>`.
var packPrefixRegexp = regexp.MustCompile(`^pack\t([0-9a-f]+)\n$`)
-// shouldPackObjects checks whether the quarantine directory has any non-default content in it.
-// If so, this signifies objects were written into it and we should pack them.
-func shouldPackObjects(quarantineDirectory string) (bool, error) {
- errHasNewContent := errors.New("new content found")
-
- // The quarantine directory itself and the pack directory within it are created when the transaction
- // begins. These don't signify new content so we ignore them.
- preExistingDirs := map[string]struct{}{
- quarantineDirectory: {},
- filepath.Join(quarantineDirectory, "pack"): {},
- }
- if err := filepath.Walk(quarantineDirectory, func(path string, info fs.FileInfo, err error) error {
- if err != nil {
- return err
- }
-
- if _, ok := preExistingDirs[path]; ok {
- // The pre-existing directories don't signal any new content to pack.
- return nil
- }
-
- // Use an error sentinel to cancel the walk as soon as new content has been found.
- return errHasNewContent
- }); err != nil {
- if errors.Is(err, errHasNewContent) {
- return true, nil
- }
-
- return false, fmt.Errorf("check for objects: %w", err)
- }
-
- return false, nil
-}
-
// packObjects walks the objects in the quarantine directory starting from the new
// reference tips introduced by the transaction and the explicitly included objects. All
// objects in the quarantine directory that are encountered during the walk are included in
@@ -1058,12 +1024,6 @@ func shouldPackObjects(quarantineDirectory string) (bool, error) {
// The packed objects are not yet checked for validity. See the following issue for more
// details on this: https://gitlab.com/gitlab-org/gitaly/-/issues/5779
func (mgr *TransactionManager) packObjects(ctx context.Context, transaction *Transaction) error {
- if shouldPack, err := shouldPackObjects(transaction.quarantineDirectory); err != nil {
- return fmt.Errorf("should pack objects: %w", err)
- } else if !shouldPack {
- return nil
- }
-
quarantineOnlySnapshotRepository := transaction.snapshotRepository
if transaction.snapshotRepository.GetGitObjectDirectory() != "" {
// We're not configuring a quarantine on the snapshotRepository when the repository didn't
@@ -1120,12 +1080,8 @@ func (mgr *TransactionManager) packObjects(ctx context.Context, transaction *Tra
})
transaction.objectDependencies = map[git.ObjectID]struct{}{}
- objectsToPackReader, objectsToPackWriter := io.Pipe()
group.Go(func() (returnedErr error) {
- defer func() {
- objectWalkReader.CloseWithError(returnedErr)
- objectsToPackWriter.CloseWithError(returnedErr)
- }()
+ defer objectWalkReader.CloseWithError(returnedErr)
// objectLine comes in two formats from the walk:
// 1. '<oid> <path>\n' in case the object is found. <path> may or may not be set.
@@ -1136,6 +1092,11 @@ func (mgr *TransactionManager) packObjects(ctx context.Context, transaction *Tra
// Objects that are not found are recorded as the transaction's
// dependencies since they should exist in the repository.
scanner := bufio.NewScanner(objectWalkReader)
+
+ objectsToPackReader, objectsToPackWriter := io.Pipe()
+ defer objectsToPackWriter.CloseWithError(returnedErr)
+
+ packObjectsStarted := false
for scanner.Scan() {
objectLine := scanner.Text()
if objectLine[0] == '?' {
@@ -1144,60 +1105,66 @@ func (mgr *TransactionManager) packObjects(ctx context.Context, transaction *Tra
continue
}
- // Write the objects to `git pack-objects`. Restore the new line that was
- // trimmed by the scanner.
- if _, err := objectsToPackWriter.Write([]byte(objectLine + "\n")); err != nil {
- return fmt.Errorf("write object id for packing: %w", err)
- }
- }
+ // At this point we have an object that we need to pack. If `pack-objects` and `index-pack`
+ // haven't yet been launched, launch them.
+ if !packObjectsStarted {
+ packObjectsStarted = true
- if err := scanner.Err(); err != nil {
- return fmt.Errorf("scanner: %w", err)
- }
+ packReader, packWriter := io.Pipe()
+ group.Go(func() (returnedErr error) {
+ defer func() {
+ objectsToPackReader.CloseWithError(returnedErr)
+ packWriter.CloseWithError(returnedErr)
+ }()
- return nil
- })
+ if err := quarantineOnlySnapshotRepository.PackObjects(ctx, objectsToPackReader, packWriter); err != nil {
+ return fmt.Errorf("pack objects: %w", err)
+ }
- packReader, packWriter := io.Pipe()
- group.Go(func() (returnedErr error) {
- defer func() {
- objectsToPackReader.CloseWithError(returnedErr)
- packWriter.CloseWithError(returnedErr)
- }()
+ return nil
+ })
- if err := quarantineOnlySnapshotRepository.PackObjects(ctx, objectsToPackReader, packWriter); err != nil {
- return fmt.Errorf("pack objects: %w", err)
- }
+ group.Go(func() (returnedErr error) {
+ defer packReader.CloseWithError(returnedErr)
- return nil
- })
+ // index-pack places the pack, index, and reverse index into the transaction's staging directory.
+ var stdout, stderr bytes.Buffer
+ if err := quarantineOnlySnapshotRepository.ExecAndWait(ctx, git.Command{
+ Name: "index-pack",
+ Flags: []git.Option{git.Flag{Name: "--stdin"}, git.Flag{Name: "--rev-index"}},
+ Args: []string{filepath.Join(transaction.walFilesPath(), "objects.pack")},
+ }, git.WithStdin(packReader), git.WithStdout(&stdout), git.WithStderr(&stderr)); err != nil {
+ return structerr.New("index pack: %w", err).WithMetadata("stderr", stderr.String())
+ }
- group.Go(func() (returnedErr error) {
- defer packReader.CloseWithError(returnedErr)
+ matches := packPrefixRegexp.FindStringSubmatch(stdout.String())
+ if len(matches) != 2 {
+ return structerr.New("unexpected index-pack output").WithMetadata("stdout", stdout.String())
+ }
- // index-pack places the pack, index, and reverse index into the transaction's staging directory.
- var stdout, stderr bytes.Buffer
- if err := quarantineOnlySnapshotRepository.ExecAndWait(ctx, git.Command{
- Name: "index-pack",
- Flags: []git.Option{git.Flag{Name: "--stdin"}, git.Flag{Name: "--rev-index"}},
- Args: []string{filepath.Join(transaction.walFilesPath(), "objects.pack")},
- }, git.WithStdin(packReader), git.WithStdout(&stdout), git.WithStderr(&stderr)); err != nil {
- return structerr.New("index pack: %w", err).WithMetadata("stderr", stderr.String())
- }
+ // Sync the files and the directory entries so everything is flushed to the disk prior
+ // to moving on to committing the log entry. This way we only have to flush the directory
+ // move when we move the staged files into the log.
+ if err := safe.NewSyncer().SyncRecursive(transaction.walFilesPath()); err != nil {
+ return fmt.Errorf("sync recursive: %w", err)
+ }
- matches := packPrefixRegexp.FindStringSubmatch(stdout.String())
- if len(matches) != 2 {
- return structerr.New("unexpected index-pack output").WithMetadata("stdout", stdout.String())
- }
+ transaction.packPrefix = fmt.Sprintf("pack-%s", matches[1])
+
+ return nil
+ })
+ }
- // Sync the files and the directory entries so everything is flushed to the disk prior
- // to moving on to committing the log entry. This way we only have to flush the directory
- // move when we move the staged files into the log.
- if err := safe.NewSyncer().SyncRecursive(transaction.walFilesPath()); err != nil {
- return fmt.Errorf("sync recursive: %w", err)
+ // Write the objects to `git pack-objects`. Restore the new line that was
+ // trimmed by the scanner.
+ if _, err := objectsToPackWriter.Write([]byte(objectLine + "\n")); err != nil {
+ return fmt.Errorf("write object id for packing: %w", err)
+ }
}
- transaction.packPrefix = fmt.Sprintf("pack-%s", matches[1])
+ if err := scanner.Err(); err != nil {
+ return fmt.Errorf("scanner: %w", err)
+ }
return nil
})
diff --git a/internal/gitaly/storage/storagemgr/transaction_manager_refs_test.go b/internal/gitaly/storage/storagemgr/transaction_manager_refs_test.go
index 915aadc30..e1427a47d 100644
--- a/internal/gitaly/storage/storagemgr/transaction_manager_refs_test.go
+++ b/internal/gitaly/storage/storagemgr/transaction_manager_refs_test.go
@@ -2,6 +2,7 @@ package storagemgr
import (
"fmt"
+ "io/fs"
"os"
"path/filepath"
"testing"
@@ -9,9 +10,11 @@ import (
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v16/internal/git"
"gitlab.com/gitlab-org/gitaly/v16/internal/git/gittest"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo"
"gitlab.com/gitlab-org/gitaly/v16/internal/git/updateref"
"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/v16/internal/helper/perm"
+ "gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
)
func generateInvalidReferencesTests(t *testing.T, setup testTransactionSetup) []transactionTestCase {
@@ -555,6 +558,19 @@ func generateModifyReferencesTests(t *testing.T, setup testTransactionSetup) []t
Database: DatabaseState{
string(keyAppliedLSN(setup.PartitionID)): LSN(1).toProto(),
},
+ Directory: testhelper.DirectoryState{
+ "/": {Mode: fs.ModeDir | perm.PrivateDir},
+ "/wal": {Mode: fs.ModeDir | perm.PrivateDir},
+ "/wal/1": {Mode: fs.ModeDir | perm.PrivateDir},
+ "/wal/1/objects.idx": indexFileDirectoryEntry(setup.Config),
+ "/wal/1/objects.rev": reverseIndexFileDirectoryEntry(setup.Config),
+ "/wal/1/objects.pack": packFileDirectoryEntry(
+ setup.Config,
+ []git.ObjectID{
+ setup.ObjectHash.EmptyTreeOID,
+ },
+ ),
+ },
Repositories: RepositoryStates{
setup.RelativePath: {
References: &ReferencesState{
@@ -577,10 +593,7 @@ func generateModifyReferencesTests(t *testing.T, setup testTransactionSetup) []t
ReferenceUpdates: ReferenceUpdates{
"refs/heads/main": {OldOID: setup.ObjectHash.ZeroOID, NewOID: setup.NonExistentOID},
},
- ExpectedError: updateref.NonExistentObjectError{
- ReferenceName: "refs/heads/main",
- ObjectID: setup.NonExistentOID.String(),
- },
+ ExpectedError: localrepo.InvalidObjectError(setup.NonExistentOID),
},
},
},
diff --git a/internal/gitaly/storage/storagemgr/transaction_manager_test.go b/internal/gitaly/storage/storagemgr/transaction_manager_test.go
index 1160573fa..337077d41 100644
--- a/internal/gitaly/storage/storagemgr/transaction_manager_test.go
+++ b/internal/gitaly/storage/storagemgr/transaction_manager_test.go
@@ -25,7 +25,6 @@ import (
"gitlab.com/gitlab-org/gitaly/v16/internal/git/gittest"
"gitlab.com/gitlab-org/gitaly/v16/internal/git/housekeeping"
"gitlab.com/gitlab-org/gitaly/v16/internal/git/localrepo"
- "gitlab.com/gitlab-org/gitaly/v16/internal/git/updateref"
"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config"
"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/transaction"
"gitlab.com/gitlab-org/gitaly/v16/internal/grpc/backchannel"
@@ -605,10 +604,7 @@ func generateCommonTests(t *testing.T, ctx context.Context, setup testTransactio
ReferenceUpdates: ReferenceUpdates{
"refs/heads/main": {OldOID: setup.ObjectHash.ZeroOID, NewOID: setup.Commits.First.OID},
},
- ExpectedError: updateref.NonExistentObjectError{
- ReferenceName: "refs/heads/main",
- ObjectID: setup.Commits.First.OID.String(),
- },
+ ExpectedError: localrepo.InvalidObjectError(setup.Commits.First.OID),
},
Begin{
TransactionID: 2,