diff options
author | Paul Okstad <pokstad@gitlab.com> | 2020-09-08 18:01:53 +0300 |
---|---|---|
committer | Paul Okstad <pokstad@gitlab.com> | 2020-09-08 18:01:53 +0300 |
commit | 84ad7e51daefeba16c3e120e4e4cb42d08384b83 (patch) | |
tree | 9f334e3df88e2f0c5422594360e424f3d3cd0836 | |
parent | f51896682ed1b30293c8fb3e54beda4c4cde5aa5 (diff) | |
parent | 19750bfdb5dfef3e8dd8bd1ce76c6cbac7f809cd (diff) |
Merge branch 'ps-read-dist-sql-boost' into 'master'
Improved SQL to get up to date storages for repository.
See merge request gitlab-org/gitaly!2514
4 files changed, 109 insertions, 20 deletions
diff --git a/changelogs/unreleased/ps-read-dist-sql-boost.yml b/changelogs/unreleased/ps-read-dist-sql-boost.yml new file mode 100644 index 000000000..231ef44cd --- /dev/null +++ b/changelogs/unreleased/ps-read-dist-sql-boost.yml @@ -0,0 +1,5 @@ +--- +title: Improved SQL to get up to date storages for repository. +merge_request: 2514 +author: +type: performance diff --git a/internal/praefect/datastore/repository_memory.go b/internal/praefect/datastore/repository_memory.go index 86925935d..0d175fb8f 100644 --- a/internal/praefect/datastore/repository_memory.go +++ b/internal/praefect/datastore/repository_memory.go @@ -19,9 +19,9 @@ type MemoryRepositoryStore struct { type storages map[string][]string func (s storages) secondaries(virtualStorage, primary string) ([]string, error) { - storages, ok := s[virtualStorage] - if !ok { - return nil, fmt.Errorf("unknown virtual storage: %q", virtualStorage) + storages, err := s.storages(virtualStorage) + if err != nil { + return nil, err } primaryFound := false @@ -42,6 +42,15 @@ func (s storages) secondaries(virtualStorage, primary string) ([]string, error) return secondaries, nil } +func (s storages) storages(virtualStorage string) ([]string, error) { + storages, ok := s[virtualStorage] + if !ok { + return nil, fmt.Errorf("unknown virtual storage: %q", virtualStorage) + } + + return storages, nil +} + // virtualStorageStates represents the virtual storage's view of what state the repositories should be in. // It structured as virtual-storage->relative_path->generation. type virtualStorageState map[string]map[string]int diff --git a/internal/praefect/datastore/repository_postgres.go b/internal/praefect/datastore/repository_postgres.go index a5df50920..cfceda375 100644 --- a/internal/praefect/datastore/repository_postgres.go +++ b/internal/praefect/datastore/repository_postgres.go @@ -148,11 +148,11 @@ WITH next_generation AS ( UPDATE SET generation = repositories.generation + 1 RETURNING virtual_storage, relative_path, generation ), base_generation AS ( - SELECT virtual_storage, relative_path, generation + SELECT virtual_storage, relative_path, generation FROM storage_repositories WHERE virtual_storage = $1 AND relative_path = $2 - AND storage = $3 + AND storage = $3 FOR UPDATE ), eligible_secondaries AS ( SELECT storage @@ -330,31 +330,31 @@ AND storage = $3 func (rs *PostgresRepositoryStore) GetConsistentSecondaries(ctx context.Context, virtualStorage, relativePath, primary string) (map[string]struct{}, error) { const q = ` -WITH expected AS ( - SELECT virtual_storage, relative_path, generation - FROM storage_repositories - WHERE virtual_storage = $1 - AND relative_path = $2 - AND storage = $3 -) + WITH storage_gen AS ( + SELECT storage, generation + FROM storage_repositories + WHERE virtual_storage = $1 + AND relative_path = $2 + AND storage = ANY($4::text[]) + ) -SELECT storage -FROM storage_repositories -NATURAL JOIN expected -WHERE storage = ANY($4::text[]) -` - secondaries, err := rs.storages.secondaries(virtualStorage, primary) + SELECT DISTINCT sec.storage + FROM (SELECT generation FROM storage_gen WHERE storage = $3) AS prim + JOIN storage_gen AS sec ON sec.storage != $3 AND prim.generation = sec.generation` + + storages, err := rs.storages.storages(virtualStorage) if err != nil { return nil, err } + storages = append(storages, primary) - rows, err := rs.db.QueryContext(ctx, q, virtualStorage, relativePath, primary, pq.StringArray(secondaries)) + rows, err := rs.db.QueryContext(ctx, q, virtualStorage, relativePath, primary, pq.StringArray(storages)) if err != nil { return nil, err } defer rows.Close() - consistentSecondaries := make(map[string]struct{}, len(secondaries)) + consistentSecondaries := make(map[string]struct{}, len(storages)-1) for rows.Next() { var storage string if err := rows.Scan(&storage); err != nil { diff --git a/internal/praefect/datastore/repository_postgres_bm_test.go b/internal/praefect/datastore/repository_postgres_bm_test.go new file mode 100644 index 000000000..7c8647609 --- /dev/null +++ b/internal/praefect/datastore/repository_postgres_bm_test.go @@ -0,0 +1,75 @@ +// +build postgres + +package datastore + +import ( + "strconv" + "testing" + + "github.com/stretchr/testify/require" + "gitlab.com/gitlab-org/gitaly/internal/testhelper" +) + +// The test setup takes a lot of time, so it is better to run each sub-benchmark separately with limit on number of repeats. +func BenchmarkPostgresRepositoryStore_GetConsistentSecondaries(b *testing.B) { + // go test -tags=postgres -test.bench=BenchmarkPostgresRepositoryStore_GetConsistentSecondaries/extra-small -benchtime=5000x gitlab.com/gitlab-org/gitaly/internal/praefect/datastore + b.Run("extra-small", func(b *testing.B) { + benchmarkGetConsistentSecondaries(b, 3, 1000) + }) + + // go test -tags=postgres -test.bench=BenchmarkPostgresRepositoryStore_GetConsistentSecondaries/small -benchtime=1000x gitlab.com/gitlab-org/gitaly/internal/praefect/datastore + b.Run("small", func(b *testing.B) { + benchmarkGetConsistentSecondaries(b, 3, 10_000) + }) + + // go test -tags=postgres -test.bench=BenchmarkPostgresRepositoryStore_GetConsistentSecondaries/medium -benchtime=50x gitlab.com/gitlab-org/gitaly/internal/praefect/datastore + b.Run("medium", func(b *testing.B) { + benchmarkGetConsistentSecondaries(b, 3, 100_000) + }) + + // go test -tags=postgres -test.bench=BenchmarkPostgresRepositoryStore_GetConsistentSecondaries/large -benchtime=10x gitlab.com/gitlab-org/gitaly/internal/praefect/datastore + b.Run("large", func(b *testing.B) { + benchmarkGetConsistentSecondaries(b, 3, 1_000_000) + }) + + // go test -tags=postgres -test.bench=BenchmarkPostgresRepositoryStore_GetConsistentSecondaries/huge -benchtime=1x gitlab.com/gitlab-org/gitaly/internal/praefect/datastore + b.Run("huge", func(b *testing.B) { + benchmarkGetConsistentSecondaries(b, 6, 1_000_000) + }) +} + +func benchmarkGetConsistentSecondaries(b *testing.B, nstorages, nrepositories int) { + db := getDB(b) + + ctx, cancel := testhelper.Context() + defer cancel() + + for n := 0; n < b.N; n++ { + b.StopTimer() + + db.Truncate(b, "storage_repositories") + + var storages []string + for i := 0; i < nstorages; i++ { + storages = append(storages, "gitaly-"+strconv.Itoa(i)) + } + + repoStore := NewPostgresRepositoryStore(db, map[string][]string{"vs": storages}) + + _, err := db.DB.ExecContext( + ctx, + `INSERT INTO storage_repositories(virtual_storage, relative_path, storage, generation) + SELECT 'vs', '/path/repo/' || R.I, 'gitaly-' || S.I, 1 + FROM GENERATE_SERIES(1, $1) R(I) + CROSS JOIN GENERATE_SERIES(1, $2) S(I)`, + nrepositories, nstorages, + ) + require.NoError(b, err) + + b.StartTimer() + _, err = repoStore.GetConsistentSecondaries(ctx, "vs", "/path/repo/"+strconv.Itoa(nrepositories/2), "s1") + b.StopTimer() + + require.NoError(b, err) + } +} |