Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2023-03-17 16:32:44 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2023-03-17 17:27:03 +0300
commite0d3e73973f96e7e048d60ddf64cf62f14c15646 (patch)
tree623de68bf6d38a4c8aed1beb2c89aca2a8e10a3f
parent076d3b6bbfc3fac82dc9430205667f442b252c57 (diff)
housekeeping: Keep timestamp of last full repack
At the moment, we can keep track of the last time a full repack has happened in a repository by simply taking the timestamp of the oldest packfile in a repository. This only really works because we have a split between full and incremental repacks. This is about to change soonish though once we implement support for geometric repacking of repositories. In geometric repacking, we don't control anymore whether old packfiles will get rewritten or not, but instead we shift that burden to Git. So the oldest packfile in the repository may have been written either by a full repack, or by a geometric repack that decided to rewrite the oldest packfile. Now why do we care for this? The problem is that once we move towards geometric repacking, we still need to make sure that we perform a full repack every once in a while. This full repack will then be responsible for moving unreachable objects into a separate cruft pack so that we can still properly prune objects from repositories. But as we will have no direct control over the number of packfiles in a repository anymore, the current heuristic that uses the number of existing packfiles won't work anymore to decide whether we need to perform a full repack or not. Instead, we'll be moving to a time-based heuristic where we decide to do a full repack every once in a while, e.g. daily. This will ensure that we perform geometric repacks most of the time but still move unreachable objects out into cruft packs on a schedule that is easy to understand and explain. As mentioned though, we are not in a position to derive the last time such a full repack has happened. To fix this, introduce a new timestamp file ".gitaly-full-repack-timestamp" that we write into the repository every time we are about to perform a full repack. Changelog: added
-rw-r--r--internal/git/housekeeping/objects.go22
-rw-r--r--internal/git/housekeeping/objects_test.go6
-rw-r--r--internal/git/stats/repository_info.go14
3 files changed, 39 insertions, 3 deletions
diff --git a/internal/git/housekeeping/objects.go b/internal/git/housekeeping/objects.go
index 414238539..4ac0d1221 100644
--- a/internal/git/housekeeping/objects.go
+++ b/internal/git/housekeeping/objects.go
@@ -3,6 +3,8 @@ package housekeeping
import (
"context"
"fmt"
+ "os"
+ "path/filepath"
"strconv"
"time"
@@ -10,6 +12,7 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/repository"
"gitlab.com/gitlab-org/gitaly/v15/internal/git/stats"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/helper/perm"
"gitlab.com/gitlab-org/gitaly/v15/internal/structerr"
)
@@ -43,6 +46,11 @@ type RepackObjectsConfig struct {
// RepackObjects repacks objects in the given repository and updates the commit-graph. The way
// objects are repacked is determined via the RepackObjectsConfig.
func RepackObjects(ctx context.Context, repo *localrepo.Repo, cfg RepackObjectsConfig) error {
+ repoPath, err := repo.Path()
+ if err != nil {
+ return err
+ }
+
if !cfg.FullRepack && !cfg.WriteMultiPackIndex && cfg.WriteBitmap {
return structerr.NewInvalidArgument("cannot write packfile bitmap for an incremental repack")
}
@@ -74,6 +82,20 @@ func RepackObjects(ctx context.Context, repo *localrepo.Repo, cfg RepackObjectsC
Value: cfg.CruftExpireBefore.Format(rfc2822DateFormat),
})
}
+
+ // When we have performed a full repack we're updating the "full-repack-timestamp"
+ // file. This is done so that we can tell when we have last performed a full repack
+ // in a repository. This information can be used by our heuristics to effectively
+ // rate-limit the frequency of full repacks.
+ //
+ // Note that we write the file _before_ actually writing the new pack, which means
+ // that even if the full repack fails, we would still pretend to have done it. This
+ // is done intentionally, as the likelihood for huge repositories to fail during a
+ // full repack is comparatively high. So if we didn't update the timestamp in case
+ // of a failure we'd potentially busy-spin trying to do a full repack.
+ if err := os.WriteFile(filepath.Join(repoPath, stats.FullRepackTimestampFilename), nil, perm.PrivateFile); err != nil {
+ return fmt.Errorf("updating timestamp: %w", err)
+ }
}
if cfg.WriteMultiPackIndex {
diff --git a/internal/git/housekeeping/objects_test.go b/internal/git/housekeeping/objects_test.go
index 4a91a2d1f..8bb6d158c 100644
--- a/internal/git/housekeeping/objects_test.go
+++ b/internal/git/housekeeping/objects_test.go
@@ -400,6 +400,12 @@ func TestRepackObjects(t *testing.T) {
require.Equal(t, tc.expectedErr, RepackObjects(ctx, repo, tc.repackCfg))
requireObjectsState(t, repo, tc.stateAfterRepack)
+ if tc.repackCfg.FullRepack {
+ require.FileExists(t, filepath.Join(repoPath, stats.FullRepackTimestampFilename))
+ } else {
+ require.NoFileExists(t, filepath.Join(repoPath, stats.FullRepackTimestampFilename))
+ }
+
// There should not be any server info data in the repository.
require.NoFileExists(t, filepath.Join(repoPath, "info", "refs"))
require.NoFileExists(t, filepath.Join(repoPath, "objects", "info", "packs"))
diff --git a/internal/git/stats/repository_info.go b/internal/git/stats/repository_info.go
index 6134e6d02..2f02eb22f 100644
--- a/internal/git/stats/repository_info.go
+++ b/internal/git/stats/repository_info.go
@@ -18,9 +18,17 @@ import (
"gitlab.com/gitlab-org/gitaly/v15/internal/helper/text"
)
-// StaleObjectsGracePeriod is time delta that is used to indicate cutoff wherein an object would be
-// considered old. Currently this is set to being 2 weeks (2 * 7days * 24hours).
-const StaleObjectsGracePeriod = -14 * 24 * time.Hour
+const (
+ // StaleObjectsGracePeriod is time delta that is used to indicate cutoff wherein an object
+ // would be considered old. Currently this is set to being 2 weeks (2 * 7days * 24hours).
+ StaleObjectsGracePeriod = -14 * 24 * time.Hour
+
+ // FullRepackTimestampFilename is the name of the file that is used as a timestamp for the
+ // last repack that happened in the repository. Whenever a full repack happens, Gitaly will
+ // touch this file so that its last-modified date can be used to tell how long ago the last
+ // full repack happened.
+ FullRepackTimestampFilename = ".gitaly-full-repack-timestamp"
+)
// PackfilesCount returns the number of packfiles a repository has.
func PackfilesCount(repo *localrepo.Repo) (uint64, error) {