Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Okstad <pokstad@gitlab.com>2020-09-08 18:01:53 +0300
committerPaul Okstad <pokstad@gitlab.com>2020-09-08 18:01:53 +0300
commit84ad7e51daefeba16c3e120e4e4cb42d08384b83 (patch)
tree9f334e3df88e2f0c5422594360e424f3d3cd0836
parentf51896682ed1b30293c8fb3e54beda4c4cde5aa5 (diff)
parent19750bfdb5dfef3e8dd8bd1ce76c6cbac7f809cd (diff)
Merge branch 'ps-read-dist-sql-boost' into 'master'
Improved SQL to get up to date storages for repository. See merge request gitlab-org/gitaly!2514
-rw-r--r--changelogs/unreleased/ps-read-dist-sql-boost.yml5
-rw-r--r--internal/praefect/datastore/repository_memory.go15
-rw-r--r--internal/praefect/datastore/repository_postgres.go34
-rw-r--r--internal/praefect/datastore/repository_postgres_bm_test.go75
4 files changed, 109 insertions, 20 deletions
diff --git a/changelogs/unreleased/ps-read-dist-sql-boost.yml b/changelogs/unreleased/ps-read-dist-sql-boost.yml
new file mode 100644
index 000000000..231ef44cd
--- /dev/null
+++ b/changelogs/unreleased/ps-read-dist-sql-boost.yml
@@ -0,0 +1,5 @@
+---
+title: Improved SQL to get up to date storages for repository.
+merge_request: 2514
+author:
+type: performance
diff --git a/internal/praefect/datastore/repository_memory.go b/internal/praefect/datastore/repository_memory.go
index 86925935d..0d175fb8f 100644
--- a/internal/praefect/datastore/repository_memory.go
+++ b/internal/praefect/datastore/repository_memory.go
@@ -19,9 +19,9 @@ type MemoryRepositoryStore struct {
type storages map[string][]string
func (s storages) secondaries(virtualStorage, primary string) ([]string, error) {
- storages, ok := s[virtualStorage]
- if !ok {
- return nil, fmt.Errorf("unknown virtual storage: %q", virtualStorage)
+ storages, err := s.storages(virtualStorage)
+ if err != nil {
+ return nil, err
}
primaryFound := false
@@ -42,6 +42,15 @@ func (s storages) secondaries(virtualStorage, primary string) ([]string, error)
return secondaries, nil
}
+func (s storages) storages(virtualStorage string) ([]string, error) {
+ storages, ok := s[virtualStorage]
+ if !ok {
+ return nil, fmt.Errorf("unknown virtual storage: %q", virtualStorage)
+ }
+
+ return storages, nil
+}
+
// virtualStorageStates represents the virtual storage's view of what state the repositories should be in.
// It structured as virtual-storage->relative_path->generation.
type virtualStorageState map[string]map[string]int
diff --git a/internal/praefect/datastore/repository_postgres.go b/internal/praefect/datastore/repository_postgres.go
index a5df50920..cfceda375 100644
--- a/internal/praefect/datastore/repository_postgres.go
+++ b/internal/praefect/datastore/repository_postgres.go
@@ -148,11 +148,11 @@ WITH next_generation AS (
UPDATE SET generation = repositories.generation + 1
RETURNING virtual_storage, relative_path, generation
), base_generation AS (
- SELECT virtual_storage, relative_path, generation
+ SELECT virtual_storage, relative_path, generation
FROM storage_repositories
WHERE virtual_storage = $1
AND relative_path = $2
- AND storage = $3
+ AND storage = $3
FOR UPDATE
), eligible_secondaries AS (
SELECT storage
@@ -330,31 +330,31 @@ AND storage = $3
func (rs *PostgresRepositoryStore) GetConsistentSecondaries(ctx context.Context, virtualStorage, relativePath, primary string) (map[string]struct{}, error) {
const q = `
-WITH expected AS (
- SELECT virtual_storage, relative_path, generation
- FROM storage_repositories
- WHERE virtual_storage = $1
- AND relative_path = $2
- AND storage = $3
-)
+ WITH storage_gen AS (
+ SELECT storage, generation
+ FROM storage_repositories
+ WHERE virtual_storage = $1
+ AND relative_path = $2
+ AND storage = ANY($4::text[])
+ )
-SELECT storage
-FROM storage_repositories
-NATURAL JOIN expected
-WHERE storage = ANY($4::text[])
-`
- secondaries, err := rs.storages.secondaries(virtualStorage, primary)
+ SELECT DISTINCT sec.storage
+ FROM (SELECT generation FROM storage_gen WHERE storage = $3) AS prim
+ JOIN storage_gen AS sec ON sec.storage != $3 AND prim.generation = sec.generation`
+
+ storages, err := rs.storages.storages(virtualStorage)
if err != nil {
return nil, err
}
+ storages = append(storages, primary)
- rows, err := rs.db.QueryContext(ctx, q, virtualStorage, relativePath, primary, pq.StringArray(secondaries))
+ rows, err := rs.db.QueryContext(ctx, q, virtualStorage, relativePath, primary, pq.StringArray(storages))
if err != nil {
return nil, err
}
defer rows.Close()
- consistentSecondaries := make(map[string]struct{}, len(secondaries))
+ consistentSecondaries := make(map[string]struct{}, len(storages)-1)
for rows.Next() {
var storage string
if err := rows.Scan(&storage); err != nil {
diff --git a/internal/praefect/datastore/repository_postgres_bm_test.go b/internal/praefect/datastore/repository_postgres_bm_test.go
new file mode 100644
index 000000000..7c8647609
--- /dev/null
+++ b/internal/praefect/datastore/repository_postgres_bm_test.go
@@ -0,0 +1,75 @@
+// +build postgres
+
+package datastore
+
+import (
+ "strconv"
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/internal/testhelper"
+)
+
+// The test setup takes a lot of time, so it is better to run each sub-benchmark separately with limit on number of repeats.
+func BenchmarkPostgresRepositoryStore_GetConsistentSecondaries(b *testing.B) {
+ // go test -tags=postgres -test.bench=BenchmarkPostgresRepositoryStore_GetConsistentSecondaries/extra-small -benchtime=5000x gitlab.com/gitlab-org/gitaly/internal/praefect/datastore
+ b.Run("extra-small", func(b *testing.B) {
+ benchmarkGetConsistentSecondaries(b, 3, 1000)
+ })
+
+ // go test -tags=postgres -test.bench=BenchmarkPostgresRepositoryStore_GetConsistentSecondaries/small -benchtime=1000x gitlab.com/gitlab-org/gitaly/internal/praefect/datastore
+ b.Run("small", func(b *testing.B) {
+ benchmarkGetConsistentSecondaries(b, 3, 10_000)
+ })
+
+ // go test -tags=postgres -test.bench=BenchmarkPostgresRepositoryStore_GetConsistentSecondaries/medium -benchtime=50x gitlab.com/gitlab-org/gitaly/internal/praefect/datastore
+ b.Run("medium", func(b *testing.B) {
+ benchmarkGetConsistentSecondaries(b, 3, 100_000)
+ })
+
+ // go test -tags=postgres -test.bench=BenchmarkPostgresRepositoryStore_GetConsistentSecondaries/large -benchtime=10x gitlab.com/gitlab-org/gitaly/internal/praefect/datastore
+ b.Run("large", func(b *testing.B) {
+ benchmarkGetConsistentSecondaries(b, 3, 1_000_000)
+ })
+
+ // go test -tags=postgres -test.bench=BenchmarkPostgresRepositoryStore_GetConsistentSecondaries/huge -benchtime=1x gitlab.com/gitlab-org/gitaly/internal/praefect/datastore
+ b.Run("huge", func(b *testing.B) {
+ benchmarkGetConsistentSecondaries(b, 6, 1_000_000)
+ })
+}
+
+func benchmarkGetConsistentSecondaries(b *testing.B, nstorages, nrepositories int) {
+ db := getDB(b)
+
+ ctx, cancel := testhelper.Context()
+ defer cancel()
+
+ for n := 0; n < b.N; n++ {
+ b.StopTimer()
+
+ db.Truncate(b, "storage_repositories")
+
+ var storages []string
+ for i := 0; i < nstorages; i++ {
+ storages = append(storages, "gitaly-"+strconv.Itoa(i))
+ }
+
+ repoStore := NewPostgresRepositoryStore(db, map[string][]string{"vs": storages})
+
+ _, err := db.DB.ExecContext(
+ ctx,
+ `INSERT INTO storage_repositories(virtual_storage, relative_path, storage, generation)
+ SELECT 'vs', '/path/repo/' || R.I, 'gitaly-' || S.I, 1
+ FROM GENERATE_SERIES(1, $1) R(I)
+ CROSS JOIN GENERATE_SERIES(1, $2) S(I)`,
+ nrepositories, nstorages,
+ )
+ require.NoError(b, err)
+
+ b.StartTimer()
+ _, err = repoStore.GetConsistentSecondaries(ctx, "vs", "/path/repo/"+strconv.Itoa(nrepositories/2), "s1")
+ b.StopTimer()
+
+ require.NoError(b, err)
+ }
+}