Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSami Hiltunen <shiltunen@gitlab.com>2022-10-14 15:29:16 +0300
committerSami Hiltunen <shiltunen@gitlab.com>2023-01-16 18:36:55 +0300
commit8a29ef51192cc57770dcd7966d9af8173eb7ff4d (patch)
tree88c53ec66ec7fc7bbe9d1ea9db8d46655976fe66 /internal/gitaly/transaction_manager_hook_test.go
parentc096774eb7c55bbbf0630ba5acf8fa845c48118d (diff)
Implement basic transaction processing with write-ahead logging
Gitaly is currently lacking in transaction control. Each write coming in launches their own Git commands which operate on the repository concurrently. This makes transaction management difficult. It's difficult to optimize the writes as they are being done from multiple locations without synchronization. The concurrent writers may step on each others toes and surface lock conflicts to the users. Recovering from crashes is also difficult as Gitaly is not logging the modifications it is about to perform and thus loses the transaction state on crashes. There's also no clear notion of ordering which further complicates replication related matters. It's not easy to say which writes a repository is missing and which not. We've recently designed a new replication architecture for Gitaly. The new architecture relies on a replicated write-ahead log. The write-ahead log defines a clear order of writes and aids in crash recovery. A single writer will be operating on a repository which makes further optimizations such as write batching easier. This commit implements the first steps towards the new architecture by implementing the TransactionManager. The TransactionManager will be responsible for transaction management of a single repository. It will be the single goroutine that writes into a repository and is invoked by all other locations in the code that wish to write. It will also be responsible for synchronizing reads by ensuring they see the changes they are supposed to see. TransactionManager implementation introduced here does not contain the full implementation but aims to provide a basis for future iteration. For now, it implements basic write processing with a write-ahead log. It processes writes one-by-one by verifying references, logging the changes and finally applying the changes to the repository. It also supports recovering from the write-ahead log should the log processing be interrupted. The reference verification behavior can be tuned on a per transaction level to match behavior Git's `--atomic` or `--force` push flags. The TransactionManager stores the state related to the write-ahead log in BadgerDB, which is a key-value store that will be local to each Gitaly storage. The values are marshaled protocol buffer messages. This iteration is mostly concerned with the reference updating logic. Pack files are not handled yet as aren't the internal references they need. Symbolic references, namely for updating the default branch, are not handled yet either. The writes are processed one by one and are acknowledged after applying them to the repository. Given that, there's not separate logic needed for read synchronization yet either. The goal here is to set the initial interface and log processing, and to lock down the reference updating logic with tests so we can later on safely start iterating on the internals of the TransactionManager and start adding support for the missing functionality.
Diffstat (limited to 'internal/gitaly/transaction_manager_hook_test.go')
-rw-r--r--internal/gitaly/transaction_manager_hook_test.go137
1 files changed, 137 insertions, 0 deletions
diff --git a/internal/gitaly/transaction_manager_hook_test.go b/internal/gitaly/transaction_manager_hook_test.go
new file mode 100644
index 000000000..618dbffb9
--- /dev/null
+++ b/internal/gitaly/transaction_manager_hook_test.go
@@ -0,0 +1,137 @@
+package gitaly
+
+import (
+ "context"
+ "regexp"
+ "runtime"
+ "strings"
+ "testing"
+
+ "github.com/dgraph-io/badger/v3"
+ "github.com/stretchr/testify/require"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git"
+ "gitlab.com/gitlab-org/gitaly/v15/internal/git/localrepo"
+)
+
+// hookFunc is a function that is executed at a specific point. It gets a hookContext that allows it to
+// influence the execution of the test.
+type hookFunc func(hookContext)
+
+// hookContext are the control toggels available in a hook.
+type hookContext struct {
+ // stopManager calls the calls stops the TransactionManager.
+ stopManager func()
+}
+
+// hooks are functions that get invoked at specific points of the TransactionManager Run method. They allow
+// for hooking into the Run method at specific poins which would otherwise to do assertions that would otherwise
+// not be possible.
+type hooks struct {
+ // beforeReadLogEntry is invoked before a log entry is read from the database.
+ beforeReadLogEntry hookFunc
+ // beforeResolveRevision is invoked before ResolveRevision is invoked.
+ beforeResolveRevision hookFunc
+ // beforeDeferredStop is invoked before the deferred Stop is invoked in Run.
+ beforeDeferredStop hookFunc
+}
+
+// installHooks installs the configured hooks into the transactionManager.
+func installHooks(tb testing.TB, transactionManager *TransactionManager, database *badger.DB, repository *localrepo.Repo, hooks hooks) {
+ hookContext := hookContext{stopManager: transactionManager.stop}
+
+ transactionManager.stop = func() {
+ programCounter, _, _, ok := runtime.Caller(2)
+ require.True(tb, ok)
+
+ isDeferredStopInRun := strings.HasSuffix(
+ runtime.FuncForPC(programCounter).Name(),
+ "gitaly.(*TransactionManager).Run",
+ )
+
+ if isDeferredStopInRun && hooks.beforeDeferredStop != nil {
+ hooks.beforeDeferredStop(hookContext)
+ }
+
+ hookContext.stopManager()
+ }
+
+ transactionManager.db = databaseHook{
+ database: newDatabaseAdapter(database),
+ hooks: hooks,
+ hookContext: hookContext,
+ }
+
+ transactionManager.repository = repositoryHook{
+ repository: repository,
+ hookContext: hookContext,
+ hooks: hooks,
+ }
+}
+
+type repositoryHook struct {
+ repository
+ hookContext
+ hooks
+}
+
+func (hook repositoryHook) ResolveRevision(ctx context.Context, revision git.Revision) (git.ObjectID, error) {
+ if hook.beforeResolveRevision != nil {
+ hook.hooks.beforeResolveRevision(hook.hookContext)
+ }
+
+ return hook.repository.ResolveRevision(ctx, revision)
+}
+
+type databaseHook struct {
+ database
+ hookContext
+ hooks
+}
+
+func (hook databaseHook) View(handler func(databaseTransaction) error) error {
+ return hook.database.View(func(transaction databaseTransaction) error {
+ return handler(databaseTransactionHook{
+ databaseTransaction: transaction,
+ hookContext: hook.hookContext,
+ hooks: hook.hooks,
+ })
+ })
+}
+
+func (hook databaseHook) NewWriteBatch() writeBatch {
+ return writeBatchHook{writeBatch: hook.database.NewWriteBatch()}
+}
+
+type databaseTransactionHook struct {
+ databaseTransaction
+ hookContext
+ hooks
+}
+
+var regexLogEntry = regexp.MustCompile("repository/.+/log/entry/")
+
+func (hook databaseTransactionHook) Get(key []byte) (*badger.Item, error) {
+ if regexLogEntry.Match(key) {
+ if hook.hooks.beforeReadLogEntry != nil {
+ hook.hooks.beforeReadLogEntry(hook.hookContext)
+ }
+ }
+
+ return hook.databaseTransaction.Get(key)
+}
+
+func (hook databaseTransactionHook) NewIterator(options badger.IteratorOptions) *badger.Iterator {
+ return hook.databaseTransaction.NewIterator(options)
+}
+
+type writeBatchHook struct {
+ writeBatch
+}
+
+func (hook writeBatchHook) Set(key []byte, value []byte) error {
+ return hook.writeBatch.Set(key, value)
+}
+
+func (hook writeBatchHook) Flush() error { return hook.writeBatch.Flush() }
+
+func (hook writeBatchHook) Cancel() { hook.writeBatch.Cancel() }