Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSami Hiltunen <shiltunen@gitlab.com>2022-02-23 18:42:53 +0300
committerSami Hiltunen <shiltunen@gitlab.com>2022-05-09 16:30:53 +0300
commitc7798d51d7f721888ded9930acc6428e735a9ef1 (patch)
tree9881a24fdd618557157a52b8cecd85900c3847fd
parent38faecba969039ea49b75193082c6f92f37cb516 (diff)
Derive identifiable replica paths for object pools
Gitaly is relying on the @pools prefix in OptimizeRepository to avoid pruning object pools. Pruning object pools could lead to data loss if some pool members still need the pruned objects. To ensure Gitaly can identify object pools from the other repositories after their relative paths have been rewritten, this commit adds the DerivePoolPath function that will be used in the router to derive the replica paths for object pools. Doing so, the replica paths have been changed to include a @cluster prefix, which allows for grouping the cluster's repositories and pools under a common directory. Using the existing @Pools directory would also be possible as Rails hashes the repository ids where as Praefect doesn't. However, it's clearer to separate them in different directories and leave ownership to each service minting paths so they can ensure independently there are no conflicts.
-rw-r--r--internal/git/housekeeping/object_pool.go4
-rw-r--r--internal/git/housekeeping/object_pool_test.go10
-rw-r--r--internal/praefect/praefectutil/replica_path.go27
-rw-r--r--internal/praefect/praefectutil/replica_path_test.go36
4 files changed, 72 insertions, 5 deletions
diff --git a/internal/git/housekeeping/object_pool.go b/internal/git/housekeeping/object_pool.go
index 1e4a935c8..4d0a27e4b 100644
--- a/internal/git/housekeeping/object_pool.go
+++ b/internal/git/housekeeping/object_pool.go
@@ -3,6 +3,8 @@ package housekeeping
import (
"regexp"
"strings"
+
+ "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/praefectutil"
)
// railsPoolDirRegexp is used to validate object pool directory structure and name as generated by Rails.
@@ -21,5 +23,5 @@ func IsRailsPoolPath(relativePath string) bool {
// IsPoolPath returns whether the relative path indicates the repository is an object
// pool.
func IsPoolPath(relativePath string) bool {
- return IsRailsPoolPath(relativePath)
+ return IsRailsPoolPath(relativePath) || praefectutil.IsPoolPath(relativePath)
}
diff --git a/internal/git/housekeeping/object_pool_test.go b/internal/git/housekeeping/object_pool_test.go
index 6fdded51f..50c3201cd 100644
--- a/internal/git/housekeeping/object_pool_test.go
+++ b/internal/git/housekeeping/object_pool_test.go
@@ -5,6 +5,7 @@ import (
"github.com/stretchr/testify/require"
"gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/praefect/praefectutil"
)
func TestIsPoolPath(t *testing.T) {
@@ -19,6 +20,15 @@ func TestIsPoolPath(t *testing.T) {
isPoolPath: true,
},
{
+ desc: "praefect pool path",
+ relativePath: praefectutil.DerivePoolPath(1),
+ isPoolPath: true,
+ },
+ {
+ desc: "praefect replica path",
+ relativePath: praefectutil.DeriveReplicaPath(1),
+ },
+ {
desc: "empty string",
},
{
diff --git a/internal/praefect/praefectutil/replica_path.go b/internal/praefect/praefectutil/replica_path.go
index 5f35e1e81..576762d22 100644
--- a/internal/praefect/praefectutil/replica_path.go
+++ b/internal/praefect/praefectutil/replica_path.go
@@ -3,18 +3,41 @@ package praefectutil
import (
"crypto/sha256"
"fmt"
+ "path/filepath"
"strconv"
+ "strings"
)
+// poolPathPrefix is the prefix directory where Praefect places object pools.
+const poolPathPrefix = "@cluster/pools/"
+
+// IsPoolPath returns whether the relative path indicates this is a Praefect generated object pool path.
+func IsPoolPath(relativePath string) bool {
+ return strings.HasPrefix(relativePath, poolPathPrefix)
+}
+
// DeriveReplicaPath derives a repository's disk storage path from its repository ID. The repository ID
// is hashed with SHA256 and the first four hex digits of the hash are used as the two subdirectories to
-// ensure even distribution into subdirectories. The format is @repositories/ab/cd/<repository-id>.
+// ensure even distribution into subdirectories. The format is @cluster/repositories/ab/cd/<repository-id>.
func DeriveReplicaPath(repositoryID int64) string {
+ return deriveDiskPath("@cluster/repositories", repositoryID)
+}
+
+// DerivePoolPath derives an object pools's disk storage path from its repository ID. The repository ID
+// is hashed with SHA256 and the first four hex digits of the hash are used as the two subdirectories to
+// ensure even distribution into subdirectories. The format is @cluster/pools/ab/cd/<repository-id>. The pools
+// have a different directory prefix from other repositories so Gitaly can identify them in OptimizeRepository
+// and avoid pruning them.
+func DerivePoolPath(repositoryID int64) string {
+ return deriveDiskPath(poolPathPrefix, repositoryID)
+}
+
+func deriveDiskPath(prefixDir string, repositoryID int64) string {
hasher := sha256.New()
// String representation of the ID is used to make it easier to derive the replica paths with
// external tools. The error is ignored as the hash.Hash interface is documented to never return
// an error.
hasher.Write([]byte(strconv.FormatInt(repositoryID, 10)))
hash := hasher.Sum(nil)
- return fmt.Sprintf("@repositories/%x/%x/%d", hash[0:1], hash[1:2], repositoryID)
+ return filepath.Join(prefixDir, fmt.Sprintf("%x/%x/%d", hash[0:1], hash[1:2], repositoryID))
}
diff --git a/internal/praefect/praefectutil/replica_path_test.go b/internal/praefect/praefectutil/replica_path_test.go
index 6084c13ae..572dfc2ed 100644
--- a/internal/praefect/praefectutil/replica_path_test.go
+++ b/internal/praefect/praefectutil/replica_path_test.go
@@ -4,9 +4,41 @@ import (
"testing"
"github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v14/internal/git/gittest"
)
func TestDeriveReplicaPath(t *testing.T) {
- require.Equal(t, "@repositories/6b/86/1", DeriveReplicaPath(1))
- require.Equal(t, "@repositories/d4/73/2", DeriveReplicaPath(2))
+ require.Equal(t, "@cluster/repositories/6b/86/1", DeriveReplicaPath(1))
+ require.Equal(t, "@cluster/repositories/d4/73/2", DeriveReplicaPath(2))
+}
+
+func TestDerivePoolPath(t *testing.T) {
+ require.Equal(t, "@cluster/pools/6b/86/1", DerivePoolPath(1))
+ require.Equal(t, "@cluster/pools/d4/73/2", DerivePoolPath(2))
+}
+
+func TestIsPoolPath(t *testing.T) {
+ for _, tc := range []struct {
+ desc string
+ relativePath string
+ isPoolPath bool
+ }{
+ {
+ desc: "praefect pool path",
+ relativePath: DerivePoolPath(1),
+ isPoolPath: true,
+ },
+ {
+ desc: "praefect replica path",
+ relativePath: DeriveReplicaPath(1),
+ },
+ {
+ desc: "rails pool path",
+ relativePath: gittest.NewObjectPoolName(t),
+ },
+ } {
+ t.Run(tc.desc, func(t *testing.T) {
+ require.Equal(t, tc.isPoolPath, IsPoolPath(tc.relativePath))
+ })
+ }
}