diff options
author | Sami Hiltunen <shiltunen@gitlab.com> | 2022-02-23 18:42:53 +0300 |
---|---|---|
committer | Sami Hiltunen <shiltunen@gitlab.com> | 2022-05-09 16:30:53 +0300 |
commit | c7798d51d7f721888ded9930acc6428e735a9ef1 (patch) | |
tree | 9881a24fdd618557157a52b8cecd85900c3847fd | |
parent | 38faecba969039ea49b75193082c6f92f37cb516 (diff) |
Derive identifiable replica paths for object pools
Gitaly is relying on the @pools prefix in OptimizeRepository to
avoid pruning object pools. Pruning object pools could lead to data
loss if some pool members still need the pruned objects. To ensure
Gitaly can identify object pools from the other repositories after
their relative paths have been rewritten, this commit adds the
DerivePoolPath function that will be used in the router to derive
the replica paths for object pools. Doing so, the replica paths have
been changed to include a @cluster prefix, which allows for grouping
the cluster's repositories and pools under a common directory. Using
the existing @Pools directory would also be possible as Rails hashes
the repository ids where as Praefect doesn't. However, it's clearer
to separate them in different directories and leave ownership to each
service minting paths so they can ensure independently there are no
conflicts.
-rw-r--r-- | internal/git/housekeeping/object_pool.go | 4 | ||||
-rw-r--r-- | internal/git/housekeeping/object_pool_test.go | 10 | ||||
-rw-r--r-- | internal/praefect/praefectutil/replica_path.go | 27 | ||||
-rw-r--r-- | internal/praefect/praefectutil/replica_path_test.go | 36 |
4 files changed, 72 insertions, 5 deletions
diff --git a/internal/git/housekeeping/object_pool.go b/internal/git/housekeeping/object_pool.go index 1e4a935c8..4d0a27e4b 100644 --- a/internal/git/housekeeping/object_pool.go +++ b/internal/git/housekeeping/object_pool.go @@ -3,6 +3,8 @@ package housekeeping import ( "regexp" "strings" + + "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/praefectutil" ) // railsPoolDirRegexp is used to validate object pool directory structure and name as generated by Rails. @@ -21,5 +23,5 @@ func IsRailsPoolPath(relativePath string) bool { // IsPoolPath returns whether the relative path indicates the repository is an object // pool. func IsPoolPath(relativePath string) bool { - return IsRailsPoolPath(relativePath) + return IsRailsPoolPath(relativePath) || praefectutil.IsPoolPath(relativePath) } diff --git a/internal/git/housekeeping/object_pool_test.go b/internal/git/housekeeping/object_pool_test.go index 6fdded51f..50c3201cd 100644 --- a/internal/git/housekeeping/object_pool_test.go +++ b/internal/git/housekeeping/object_pool_test.go @@ -5,6 +5,7 @@ import ( "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest" + "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/praefectutil" ) func TestIsPoolPath(t *testing.T) { @@ -19,6 +20,15 @@ func TestIsPoolPath(t *testing.T) { isPoolPath: true, }, { + desc: "praefect pool path", + relativePath: praefectutil.DerivePoolPath(1), + isPoolPath: true, + }, + { + desc: "praefect replica path", + relativePath: praefectutil.DeriveReplicaPath(1), + }, + { desc: "empty string", }, { diff --git a/internal/praefect/praefectutil/replica_path.go b/internal/praefect/praefectutil/replica_path.go index 5f35e1e81..576762d22 100644 --- a/internal/praefect/praefectutil/replica_path.go +++ b/internal/praefect/praefectutil/replica_path.go @@ -3,18 +3,41 @@ package praefectutil import ( "crypto/sha256" "fmt" + "path/filepath" "strconv" + "strings" ) +// poolPathPrefix is the prefix directory where Praefect places object pools. +const poolPathPrefix = "@cluster/pools/" + +// IsPoolPath returns whether the relative path indicates this is a Praefect generated object pool path. +func IsPoolPath(relativePath string) bool { + return strings.HasPrefix(relativePath, poolPathPrefix) +} + // DeriveReplicaPath derives a repository's disk storage path from its repository ID. The repository ID // is hashed with SHA256 and the first four hex digits of the hash are used as the two subdirectories to -// ensure even distribution into subdirectories. The format is @repositories/ab/cd/<repository-id>. +// ensure even distribution into subdirectories. The format is @cluster/repositories/ab/cd/<repository-id>. func DeriveReplicaPath(repositoryID int64) string { + return deriveDiskPath("@cluster/repositories", repositoryID) +} + +// DerivePoolPath derives an object pools's disk storage path from its repository ID. The repository ID +// is hashed with SHA256 and the first four hex digits of the hash are used as the two subdirectories to +// ensure even distribution into subdirectories. The format is @cluster/pools/ab/cd/<repository-id>. The pools +// have a different directory prefix from other repositories so Gitaly can identify them in OptimizeRepository +// and avoid pruning them. +func DerivePoolPath(repositoryID int64) string { + return deriveDiskPath(poolPathPrefix, repositoryID) +} + +func deriveDiskPath(prefixDir string, repositoryID int64) string { hasher := sha256.New() // String representation of the ID is used to make it easier to derive the replica paths with // external tools. The error is ignored as the hash.Hash interface is documented to never return // an error. hasher.Write([]byte(strconv.FormatInt(repositoryID, 10))) hash := hasher.Sum(nil) - return fmt.Sprintf("@repositories/%x/%x/%d", hash[0:1], hash[1:2], repositoryID) + return filepath.Join(prefixDir, fmt.Sprintf("%x/%x/%d", hash[0:1], hash[1:2], repositoryID)) } diff --git a/internal/praefect/praefectutil/replica_path_test.go b/internal/praefect/praefectutil/replica_path_test.go index 6084c13ae..572dfc2ed 100644 --- a/internal/praefect/praefectutil/replica_path_test.go +++ b/internal/praefect/praefectutil/replica_path_test.go @@ -4,9 +4,41 @@ import ( "testing" "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest" ) func TestDeriveReplicaPath(t *testing.T) { - require.Equal(t, "@repositories/6b/86/1", DeriveReplicaPath(1)) - require.Equal(t, "@repositories/d4/73/2", DeriveReplicaPath(2)) + require.Equal(t, "@cluster/repositories/6b/86/1", DeriveReplicaPath(1)) + require.Equal(t, "@cluster/repositories/d4/73/2", DeriveReplicaPath(2)) +} + +func TestDerivePoolPath(t *testing.T) { + require.Equal(t, "@cluster/pools/6b/86/1", DerivePoolPath(1)) + require.Equal(t, "@cluster/pools/d4/73/2", DerivePoolPath(2)) +} + +func TestIsPoolPath(t *testing.T) { + for _, tc := range []struct { + desc string + relativePath string + isPoolPath bool + }{ + { + desc: "praefect pool path", + relativePath: DerivePoolPath(1), + isPoolPath: true, + }, + { + desc: "praefect replica path", + relativePath: DeriveReplicaPath(1), + }, + { + desc: "rails pool path", + relativePath: gittest.NewObjectPoolName(t), + }, + } { + t.Run(tc.desc, func(t *testing.T) { + require.Equal(t, tc.isPoolPath, IsPoolPath(tc.relativePath)) + }) + } } |