Welcome to mirror list, hosted at ThFree Co, Russian Federation.

gitlab.com/gitlab-org/gitaly.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Steinhardt <psteinhardt@gitlab.com>2023-01-30 12:08:40 +0300
committerPatrick Steinhardt <psteinhardt@gitlab.com>2023-01-30 12:16:44 +0300
commitfb3277fcb0a6634639700c20688299a1708c57f1 (patch)
treebfd766ba285256162e27ec48aa1a4308505e59f5
parent9e9717d60e83bc08e6bcb3ca0e636edf1668a9b3 (diff)
cmd/praefect: Fix flake when testing removal of replication events
When deleting a repository we also try to remove all of its in-flight replication events. The test that exercises this logic is flaky though because it does not correctly synchronize execution of the loop that handles removal of the replication events and acknowledging a ready replication job. This may cause the removal logic to still be running while we're acknowledging the job, which then leads to the job being removed without us expecting that to happen. This then causes a deadlock because the loop exits, but we still expect the ticker to be called: goroutine 2169 [chan send, 19 minutes]: gitlab.com/gitlab-org/gitaly/v15/internal/helper.(*ManualTicker).Tick(...) /builds/gitlab-org/gitaly/internal/helper/ticker.go:60 gitlab.com/gitlab-org/gitaly/v15/cmd/praefect.TestRemoveRepository_removeReplicationEvents.func1() /builds/gitlab-org/gitaly/cmd/praefect/subcmd_remove_repository_test.go:367 +0x7e5 created by gitlab.com/gitlab-org/gitaly/v15/cmd/praefect.TestRemoveRepository_removeReplicationEvents /builds/gitlab-org/gitaly/cmd/praefect/subcmd_remove_repository_test.go:346 +0x10f6 goroutine 94 [semacquire, 19 minutes]: sync.runtime_Semacquire(0xc001a85fc8?) /usr/local/go/src/runtime/sema.go:62 +0x25 sync.(*WaitGroup).Wait(0xc001a85fc0) /usr/local/go/src/sync/waitgroup.go:139 +0xa6 gitlab.com/gitlab-org/gitaly/v15/cmd/praefect.TestRemoveRepository_removeReplicationEvents(0xc003c7bba0) /builds/gitlab-org/gitaly/cmd/praefect/subcmd_remove_repository_test.go:373 +0x143a testing.tRunner(0xc003c7bba0, 0x26ae808) /usr/local/go/src/testing/testing.go:1446 +0x217 created by testing.(*T).Run /usr/local/go/src/testing/testing.go:1493 +0x75e Fix this by explicitly synchronizing the loop via the `Reset()` function of the ticker.
-rw-r--r--cmd/praefect/subcmd_remove_repository_test.go21
1 files changed, 15 insertions, 6 deletions
diff --git a/cmd/praefect/subcmd_remove_repository_test.go b/cmd/praefect/subcmd_remove_repository_test.go
index 57c6d1bed..7878a965d 100644
--- a/cmd/praefect/subcmd_remove_repository_test.go
+++ b/cmd/praefect/subcmd_remove_repository_test.go
@@ -338,19 +338,21 @@ func TestRemoveRepository_removeReplicationEvents(t *testing.T) {
require.NoError(t, err)
require.Equal(t, []uint64{failedEvent.ID}, acknowledgedJobIDs)
+ resetCh := make(chan struct{})
ticker := helper.NewManualTicker()
- defer ticker.Stop()
+ ticker.ResetFunc = func() {
+ resetCh <- struct{}{}
+ }
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
- // Tick multiple times so that we know that at least one event must have been processed by
- // the command.
- ticker.Tick()
- ticker.Tick()
- ticker.Tick()
+ // Wait for the system-under-test to execute the `Reset()` function of the ticker
+ // for the first time. This means that the logic-under-test has executed once and
+ // that the processing loop is blocked until we call `Tick()`.
+ <-resetCh
// Verify that the database now only contains a single job, which is the "in_progress" one.
var jobIDs glsql.Uint64Provider
@@ -364,6 +366,13 @@ func TestRemoveRepository_removeReplicationEvents(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, []uint64{inProgressEvent.ID}, acknowledgedJobIDs)
+ // Trigger the ticker so that the processing loop becomes unblocked. This should
+ // cause us to prune the now-acknowledged job.
+ //
+ // Note that we explicitly don't close the reset channel or try to receive another
+ // message on it. This is done to ensure that we have now deterministically removed
+ // the replication event and that the loop indeed has terminated as expected without
+ // calling `Reset()` on the ticker again.
ticker.Tick()
}()