Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSami Hiltunen <shiltunen@gitlab.com>2023-05-26 17:09:25 +0300
committerSami Hiltunen <shiltunen@gitlab.com>2023-06-01 11:01:10 +0300
commit3cee3b150adc7935d611dd98dfca4b2c2149c0f7 (patch)
tree168ac137cf286f9d4173e889d68217cedefc6cf6 /internal/gitaly/transaction_manager.go
parentceebb89a7dfd326a621519f07201286374ea8a17 (diff)
Apply logged pack files to repository without copying
TransactionManager is currently unpacking pack files in order to apply them to the repository from the log. This is inefficient: 1. The objects are copied from the pack file when they are unpacked. 2. Accessing unpacked objects is less efficient than packed ones. 3. It's more work to repack the objects later. To avoid these problems, this commit instead hard links the logged pack files from the log to the repository. This is done as follows: 1. When staging the transaction, a pack file is computed that contains objects that are newly reachable from the new reference tips. An index is also computed for the pack file. 2. The pack file and its index are logged. 3. The pack file and the index are hard linked from the log directory to the repository's 'objects/pack' directory. The pack file and the index are logged with the name Git gives them, so `pack-<digest>.{pack,idx}`. It would be simpler to log the files with static names so they are always named the same for all transactions, say `transaction.{pack,idx}`. They could then be linked to the repository's object directory under the log entry's index, so for example `objects/pack/<log_index>.{pack,idx}`. This would be simpler as we wouldn't have to pipe the pack prefix through the log. The problem is that Git doesn't seem to automatically remove these packs when it's doing a full repack with `git repack -ad`. This would lead to the packs accumulating in the repository. For that reason, we use the Git generated names. Applying pack files directly doesn't also come without downsides. With each write resulting in an additional pack file in the repository, looking up objects from the packs becomes slower. This is particularly a problem if there are large number of small writes. We might want to later have a look at using a threshold on the object count in the pack to decide whether we apply it directly or unpack it similarly to what Git does when receiving packs. We'll leave this for later though once we know this is a problem.
Diffstat (limited to 'internal/gitaly/transaction_manager.go')
-rw-r--r--internal/gitaly/transaction_manager.go82
1 files changed, 63 insertions, 19 deletions
diff --git a/internal/gitaly/transaction_manager.go b/internal/gitaly/transaction_manager.go
index e98ba7a92..cd1f9fb70 100644
--- a/internal/gitaly/transaction_manager.go
+++ b/internal/gitaly/transaction_manager.go
@@ -11,6 +11,7 @@ import (
"io/fs"
"os"
"path/filepath"
+ "regexp"
"sort"
"strconv"
"strings"
@@ -153,9 +154,9 @@ type Transaction struct {
// quarantineDirectory is the directory within the stagingDirectory where the new objects of the
// transaction are quarantined.
quarantineDirectory string
- // includesPack is set if a pack file has been computed for the transaction and should be
- // logged.
- includesPack bool
+ // packPrefix contains the prefix (`pack-<digest>`) of the transaction's pack if the transaction
+ // had objects to log.
+ packPrefix string
// stagingRepository is a repository that is used to stage the transaction. If there are quarantined
// objects, it has the quarantine applied so the objects are available for verification and packing.
stagingRepository repository
@@ -556,6 +557,10 @@ func (mgr *TransactionManager) setupStagingRepository(ctx context.Context, trans
return nil
}
+// packPrefixRegexp matches the output of `git index-pack` where it
+// prints the packs prefix in the format `pack <digest>`.
+var packPrefixRegexp = regexp.MustCompile(`^pack\t([0-9a-f]+)\n$`)
+
// packObjects packs the objects included in the transaction into a single pack file that is ready
// for logging. The pack file includes all unreachable objects that are about to be made reachable.
func (mgr *TransactionManager) packObjects(ctx context.Context, transaction *Transaction) error {
@@ -583,8 +588,6 @@ func (mgr *TransactionManager) packObjects(ctx context.Context, transaction *Tra
return nil
}
- transaction.includesPack = true
-
objectsReader, objectsWriter := io.Pipe()
group, ctx := errgroup.WithContext(ctx)
@@ -622,15 +625,36 @@ func (mgr *TransactionManager) packObjects(ctx context.Context, transaction *Tra
return fmt.Errorf("create wal files directory: %w", err)
}
- var stderr bytes.Buffer
+ // index-pack places the pack and the index into the repository's object directory. The
+ // staging repository is configured with a quarantine so we execute it there.
+ var stdout, stderr bytes.Buffer
if err := transaction.stagingRepository.ExecAndWait(ctx, git.Command{
Name: "index-pack",
Flags: []git.Option{git.Flag{Name: "--stdin"}},
- Args: []string{packFilePath(transaction.walFilesPath())},
- }, git.WithStdin(packReader), git.WithStderr(&stderr)); err != nil {
+ }, git.WithStdin(packReader), git.WithStdout(&stdout), git.WithStderr(&stderr)); err != nil {
return structerr.New("index pack: %w", err).WithMetadata("stderr", stderr.String())
}
+ matches := packPrefixRegexp.FindStringSubmatch(stdout.String())
+ if len(matches) != 2 {
+ return structerr.New("unexpected index-pack output").WithMetadata("stdout", stdout.String())
+ }
+
+ // Move the files from the quarantine to the wal-files directory so they'll get logged as part
+ // of the directory.
+ packPrefix := fmt.Sprintf("pack-%s", matches[1])
+ for _, fileName := range []string{
+ packPrefix + ".pack",
+ packPrefix + ".idx",
+ } {
+ if err := os.Rename(
+ filepath.Join(transaction.quarantineDirectory, "pack", fileName),
+ filepath.Join(transaction.walFilesPath(), fileName),
+ ); err != nil {
+ return fmt.Errorf("move file: %w", err)
+ }
+ }
+
// Sync the files and the directory entries so everything is flushed to the disk prior
// to moving on to committing the log entry. This way we only have to flush the directory
// move when we move the staged files into the log.
@@ -638,6 +662,8 @@ func (mgr *TransactionManager) packObjects(ctx context.Context, transaction *Tra
return fmt.Errorf("sync recursive: %w", err)
}
+ transaction.packPrefix = packPrefix
+
return nil
})
@@ -755,8 +781,8 @@ func (mgr *TransactionManager) processTransaction() (returnedErr error) {
}
nextLogIndex := mgr.appendedLogIndex + 1
- if transaction.includesPack {
- logEntry.IncludesPack = true
+ if transaction.packPrefix != "" {
+ logEntry.PackPrefix = transaction.packPrefix
removeFiles, err := mgr.storeWALFiles(mgr.ctx, nextLogIndex, transaction)
cleanUps = append(cleanUps, func() error {
@@ -1238,8 +1264,8 @@ func (mgr *TransactionManager) applyLogEntry(ctx context.Context, logIndex LogIn
return fmt.Errorf("apply repository deletion: %w", err)
}
} else {
- if logEntry.IncludesPack {
- if err := mgr.applyPackFile(ctx, logIndex); err != nil {
+ if logEntry.PackPrefix != "" {
+ if err := mgr.applyPackFile(ctx, logEntry.PackPrefix, logIndex); err != nil {
return fmt.Errorf("apply pack file: %w", err)
}
}
@@ -1362,15 +1388,33 @@ func (mgr *TransactionManager) applyRepositoryDeletion(ctx context.Context, inde
}
// applyPackFile unpacks the objects from the pack file into the repository if the log entry
-// has an associated pack file.
-func (mgr *TransactionManager) applyPackFile(ctx context.Context, logIndex LogIndex) error {
- packFile, err := os.Open(packFilePath(walFilesPathForLogIndex(mgr.repositoryPath, logIndex)))
- if err != nil {
- return fmt.Errorf("open pack file: %w", err)
+// has an associated pack file. This is done by hard linking the pack and index from the
+// log into the repository's object directory.
+func (mgr *TransactionManager) applyPackFile(ctx context.Context, packPrefix string, logIndex LogIndex) error {
+ packDirectory := filepath.Join(mgr.repositoryPath, "objects", "pack")
+ for _, fileName := range []string{
+ packPrefix + ".pack",
+ packPrefix + ".idx",
+ } {
+ if err := os.Link(
+ filepath.Join(walFilesPathForLogIndex(mgr.repositoryPath, logIndex), fileName),
+ filepath.Join(packDirectory, fileName),
+ ); err != nil {
+ if !errors.Is(err, fs.ErrExist) {
+ return fmt.Errorf("link file: %w", err)
+ }
+
+ // The file already existing means that we've already linked it in place or a repack
+ // has resulted in the exact same file. No need to do anything about it.
+ }
}
- defer packFile.Close()
- return mgr.repository.UnpackObjects(ctx, packFile)
+ // Sync the new directory entries created.
+ if err := safe.NewSyncer().Sync(packDirectory); err != nil {
+ return fmt.Errorf("sync: %w", err)
+ }
+
+ return nil
}
// applyCustomHooks applies the custom hooks to the repository from the log entry. The custom hooks are stored