Welcome to mirror list, hosted at ThFree Co, Russian Federation.

size_test.go « repository « service « gitaly « internal - gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: d955d33ad4eb16d61a4620477423451febf53b7d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
package repository

import (
	"context"
	"crypto/rand"
	"os"
	"path/filepath"
	"testing"

	"github.com/stretchr/testify/require"
	"gitlab.com/gitlab-org/gitaly/v16/internal/git"
	"gitlab.com/gitlab-org/gitaly/v16/internal/git/gittest"
	"gitlab.com/gitlab-org/gitaly/v16/internal/git/quarantine"
	"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/config"
	"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage"
	"gitlab.com/gitlab-org/gitaly/v16/internal/gitaly/storage/storagemgr"
	"gitlab.com/gitlab-org/gitaly/v16/internal/helper/perm"
	"gitlab.com/gitlab-org/gitaly/v16/internal/structerr"
	"gitlab.com/gitlab-org/gitaly/v16/internal/testhelper"
	"gitlab.com/gitlab-org/gitaly/v16/proto/go/gitalypb"
	"google.golang.org/grpc/metadata"
	"google.golang.org/protobuf/proto"
)

func TestRepositorySize_poolMember(t *testing.T) {
	testhelper.SkipWithWAL(t, `
Object pools are not yet supported with transaction management.`)

	t.Parallel()

	ctx := testhelper.Context(t)

	cfg, client := setupRepositoryService(t)

	repo, repoPath := gittest.CreateRepository(t, ctx, cfg)

	// Write a large, reachable blob that would get pulled into the object pool. Note that the data must be part of
	// a packfile or otherwise it won't get pulled into the object pool. We thus repack the repository first before
	// linking it to the pool repository.
	gittest.WriteCommit(t, cfg, repoPath, gittest.WithBranch(git.DefaultBranch), gittest.WithTreeEntries(
		gittest.TreeEntry{Mode: "100644", Path: "16kbblob", Content: string(uncompressibleData(16 * 1000))},
	))
	gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Adl")
	requireRepositorySize(t, ctx, client, repo, 17)

	// We create an object pool now and link the repository to it. When repacking, this should cause us to
	// deduplicate all objects and thus reduce the size of the repository.
	gittest.CreateObjectPool(t, ctx, cfg, repo, gittest.CreateObjectPoolConfig{
		LinkRepositoryToObjectPool: true,
	})
	gittest.Exec(t, cfg, "-C", repoPath, "repack", "-Adl")

	// The blob has been deduplicated, so the repository should now be basically empty again.
	requireRepositorySize(t, ctx, client, repo, 0)
}

func TestRepositorySize_normalRepository(t *testing.T) {
	t.Parallel()

	ctx := testhelper.Context(t)
	cfg, client := setupRepositoryService(t)

	// An empty repository should have a size of zero. This is not quite true as there are some data structures like
	// the gitconfig, but they do not exceed 1kB of data.
	repo, repoPath := gittest.CreateRepository(t, ctx, cfg)
	requireRepositorySize(t, ctx, client, repo, 0)

	// When writing a largish blob into the repository it's expected to grow.
	gittest.WriteBlob(t, cfg, repoPath, uncompressibleData(16*1024))
	requireRepositorySize(t, ctx, client, repo, 16)

	// Also, updating any other files should cause a size increase.
	require.NoError(t, os.WriteFile(filepath.Join(repoPath, "packed-refs"), uncompressibleData(7*1024), perm.PrivateFile))
	requireRepositorySize(t, ctx, client, repo, 23)

	// Even garbage should increase the size.
	require.NoError(t, os.WriteFile(filepath.Join(repoPath, "garbage"), uncompressibleData(5*1024), perm.PrivateFile))
	requireRepositorySize(t, ctx, client, repo, 28)
}

func TestRepositorySize_failure(t *testing.T) {
	t.Parallel()

	ctx := testhelper.Context(t)
	_, client := setupRepositoryService(t)

	for _, tc := range []struct {
		description string
		repo        *gitalypb.Repository
		expectedErr error
	}{
		{
			description: "no repository provided",
			repo:        nil,
			expectedErr: structerr.NewInvalidArgument("%w", storage.ErrRepositoryNotSet),
		},
	} {
		t.Run(tc.description, func(t *testing.T) {
			_, err := client.RepositorySize(ctx, &gitalypb.RepositorySizeRequest{
				Repository: tc.repo,
			})
			testhelper.RequireGrpcError(t, tc.expectedErr, err)
		})
	}
}

func BenchmarkRepositorySize(b *testing.B) {
	ctx := testhelper.Context(b)
	cfg, client := setupRepositoryService(b)

	for _, tc := range []struct {
		desc  string
		setup func(b *testing.B) *gitalypb.Repository
	}{
		{
			desc: "empty repository",
			setup: func(b *testing.B) *gitalypb.Repository {
				repo, _ := gittest.CreateRepository(b, ctx, cfg)
				return repo
			},
		},
		{
			desc: "benchmark repository",
			setup: func(b *testing.B) *gitalypb.Repository {
				repo, _ := gittest.CreateRepository(b, ctx, cfg, gittest.CreateRepositoryConfig{
					Seed: "benchmark.git",
				})
				return repo
			},
		},
	} {
		b.Run(tc.desc, func(b *testing.B) {
			repo := tc.setup(b)

			b.StartTimer()

			for i := 0; i < b.N; i++ {
				_, err := client.RepositorySize(ctx, &gitalypb.RepositorySizeRequest{
					Repository: repo,
				})
				require.NoError(b, err)
			}
		})
	}
}

func TestGetObjectDirectorySize_successful(t *testing.T) {
	t.Parallel()

	ctx := testhelper.Context(t)
	cfg, client := setupRepositoryService(t)

	repo, repoPath := gittest.CreateRepository(t, ctx, cfg)
	repo.GitObjectDirectory = "objects/"

	// Rails sends the repository's relative path from the access checks as provided by Gitaly. If transactions are enabled,
	// this is the snapshot's relative path. Include the metadata in the test as well as we're testing requests with quarantine
	// as if they were coming from access checks. The RPC is also a special case as it only works with a quarantine set.
	//
	// Related issue: https://gitlab.com/gitlab-org/gitaly/-/issues/5710
	ctx = metadata.AppendToOutgoingContext(ctx, storagemgr.MetadataKeySnapshotRelativePath,
		// Gitaly sends the snapshot's relative path to Rails from `pre-receive` and Rails
		// sends it back to Gitaly when it performs requests in the access checks. The repository
		// would have already been rewritten by Praefect, so we have to adjust for that as well.
		gittest.RewrittenRepository(t, ctx, cfg, repo).RelativePath,
	)

	// Initially, the object directory should be empty and thus have a size of zero.
	requireObjectDirectorySize(t, ctx, client, repo, 0)

	// Writing an object into the repository should increase the size accordingly.
	gittest.WriteBlob(t, cfg, repoPath, uncompressibleData(16*1024))
	requireObjectDirectorySize(t, ctx, client, repo, 16)
}

func TestGetObjectDirectorySize_quarantine(t *testing.T) {
	t.Parallel()

	ctx := testhelper.Context(t)
	cfg, client := setupRepositoryService(t)
	locator := config.NewLocator(cfg)
	logger := testhelper.NewLogger(t)

	t.Run("quarantined repo", func(t *testing.T) {
		repo, repoPath := gittest.CreateRepository(t, ctx, cfg)
		repo.GitObjectDirectory = "objects/"
		gittest.WriteBlob(t, cfg, repoPath, uncompressibleData(16*1024))

		// Rails sends the repository's relative path from the access checks as provided by Gitaly. If transactions are enabled,
		// this is the snapshot's relative path. Include the metadata in the test as well as we're testing requests with quarantine
		// as if they were coming from access checks. The RPC is also a special case as it only works with a quarantine set.
		ctx := metadata.AppendToOutgoingContext(ctx, storagemgr.MetadataKeySnapshotRelativePath,
			// Gitaly sends the snapshot's relative path to Rails from `pre-receive` and Rails
			// sends it back to Gitaly when it performs requests in the access checks. The repository
			// would have already been rewritten by Praefect, so we have to adjust for that as well.
			gittest.RewrittenRepository(t, ctx, cfg, repo).RelativePath,
		)

		requireObjectDirectorySize(t, ctx, client, repo, 16)

		quarantine, err := quarantine.New(ctx, gittest.RewrittenRepository(t, ctx, cfg, repo), logger, locator)
		require.NoError(t, err)

		// quarantine.New in Gitaly would receive an already rewritten repository. Gitaly would then calculate
		// the quarantine directories based on the rewritten relative path. That quarantine would then be looped
		// through Rails, which would then send a request with the quarantine object directories set based on the
		// rewritten relative path but with the original relative path of the repository. Since we're using the production
		// helpers here, we need to manually substitute the rewritten relative path with the original one when sending
		// it back through the API.
		quarantinedRepo := quarantine.QuarantinedRepo()
		quarantinedRepo.RelativePath = repo.RelativePath

		// The size of the quarantine directory should be zero.
		requireObjectDirectorySize(t, ctx, client, quarantinedRepo, 0)
	})

	t.Run("quarantined repo with different relative path", func(t *testing.T) {
		repo1, _ := gittest.CreateRepository(t, ctx, cfg)
		quarantine1, err := quarantine.New(ctx, gittest.RewrittenRepository(t, ctx, cfg, repo1), logger, locator)
		require.NoError(t, err)

		repo2, _ := gittest.CreateRepository(t, ctx, cfg)
		quarantine2, err := quarantine.New(ctx, gittest.RewrittenRepository(t, ctx, cfg, repo2), logger, locator)
		require.NoError(t, err)

		// We swap out the the object directories of both quarantines. So while both are
		// valid, we still expect that this RPC call fails because we detect that the
		// swapped-in quarantine directory does not belong to our repository.
		repo := proto.Clone(quarantine1.QuarantinedRepo()).(*gitalypb.Repository)
		repo.GitObjectDirectory = quarantine2.QuarantinedRepo().GetGitObjectDirectory()
		// quarantine.New in Gitaly would receive an already rewritten repository. Gitaly would then calculate
		// the quarantine directories based on the rewritten relative path. That quarantine would then be looped
		// through Rails, which would then send a request with the quarantine object directories set based on the
		// rewritten relative path but with the original relative path of the repository. Since we're using the production
		// helpers here, we need to manually substitute the rewritten relative path with the original one when sending
		// it back through the API.
		repo.RelativePath = repo1.RelativePath

		// Rails sends the repository's relative path from the access checks as provided by Gitaly. If transactions are enabled,
		// this is the snapshot's relative path. Include the metadata in the test as well as we're testing requests with quarantine
		// as if they were coming from access checks. The RPC is also a special case as it only works with a quarantine set.
		ctx := metadata.AppendToOutgoingContext(ctx, storagemgr.MetadataKeySnapshotRelativePath,
			// Gitaly sends the snapshot's relative path to Rails from `pre-receive` and Rails
			// sends it back to Gitaly when it performs requests in the access checks. The repository
			// would have already been rewritten by Praefect, so we have to adjust for that as well.
			gittest.RewrittenRepository(t, ctx, cfg, repo).RelativePath,
		)

		response, err := client.GetObjectDirectorySize(ctx, &gitalypb.GetObjectDirectorySizeRequest{
			Repository: repo,
		})
		require.Error(t, err, "rpc error: code = InvalidArgument desc = GetObjectDirectoryPath: relative path escapes root directory")
		require.Nil(t, response)
	})
}

func requireRepositorySize(tb testing.TB, ctx context.Context, client gitalypb.RepositoryServiceClient, repo *gitalypb.Repository, expectedSize int64) {
	tb.Helper()

	response, err := client.RepositorySize(ctx, &gitalypb.RepositorySizeRequest{
		Repository: repo,
	})
	require.NoError(tb, err)
	require.Equal(tb, expectedSize, response.GetSize())
}

func requireObjectDirectorySize(tb testing.TB, ctx context.Context, client gitalypb.RepositoryServiceClient, repo *gitalypb.Repository, expectedSize int64) {
	tb.Helper()

	response, err := client.GetObjectDirectorySize(ctx, &gitalypb.GetObjectDirectorySizeRequest{
		Repository: repo,
	})
	require.NoError(tb, err)
	require.Equal(tb, expectedSize, response.GetSize())
}

// uncompressibleData returns data that will not be easily compressible by Git. This is required because
// well-compressible objects would not lead to a repository size increase due to the zlib compression used for Git
// objects.
func uncompressibleData(bytes int) []byte {
	data := make([]byte, bytes)
	_, _ = rand.Read(data[:])
	return data
}