diff options
author | Sami Hiltunen <shiltunen@gitlab.com> | 2021-11-15 14:58:01 +0300 |
---|---|---|
committer | Sami Hiltunen <shiltunen@gitlab.com> | 2021-11-15 16:27:58 +0300 |
commit | 3e45d3384caa633730f3e51698d13a009b90de3b (patch) | |
tree | b597a73e32016b0206356dbb3e56fddabf6f3beb | |
parent | ab48849645ffb6738cb03d37210f73bc95a75588 (diff) |
Add a test case for concurrent deadlocking inserts in HealthManager
HealthManager doesn't perform inserts into the node_status table in an
ordered manner. This can cause a deadlock if the concurrent queries acquire
row locks in different order. This commit adds a test case for this behavior
so we can verify it's fixed in a later commit that orders the writes.
-rw-r--r-- | internal/praefect/nodes/health_manager_test.go | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/internal/praefect/nodes/health_manager_test.go b/internal/praefect/nodes/health_manager_test.go index 07b359090..98faf9410 100644 --- a/internal/praefect/nodes/health_manager_test.go +++ b/internal/praefect/nodes/health_manager_test.go @@ -568,3 +568,49 @@ func predateHealthChecks(t testing.TB, db glsql.DB, amount time.Duration) { ) require.NoError(t, err) } + +// This test case ensures the record updates are done in an ordered manner to avoid concurrent writes +// deadlocking. Issue: https://gitlab.com/gitlab-org/gitaly/-/issues/3907 +func TestHealthManager_orderedWrites(t *testing.T) { + db := glsql.NewDB(t) + + tx1 := db.Begin(t).Tx + defer func() { _ = tx1.Rollback() }() + + tx2 := db.Begin(t).Tx + defer func() { _ = tx2.Rollback() }() + + ctx, cancel := testhelper.Context() + defer cancel() + + const ( + praefectName = "praefect-1" + virtualStorage = "virtual-storage" + ) + + returnErr := func(err error) error { return err } + + hm1 := NewHealthManager(testhelper.DiscardTestLogger(t), tx1, praefectName, nil) + hm1.handleError = returnErr + require.NoError(t, hm1.updateHealthChecks(ctx, []string{virtualStorage}, []string{"gitaly-1"}, []bool{true})) + + tx2Err := make(chan error, 1) + hm2 := NewHealthManager(testhelper.DiscardTestLogger(t), tx2, praefectName, nil) + hm2.handleError = returnErr + go func() { + tx2Err <- hm2.updateHealthChecks(ctx, []string{virtualStorage, virtualStorage}, []string{"gitaly-2", "gitaly-1"}, []bool{true, true}) + }() + + // Wait for tx2 to be blocked on the gitaly-1 lock acquired by tx1 + glsql.WaitForQueries(ctx, t, db, "INSERT INTO node_status", 1) + + // This triggers a deadlock. Postgres allows tx1 to proceed and kills tx2. + require.NoError(t, hm1.updateHealthChecks(ctx, []string{virtualStorage}, []string{"gitaly-2"}, []bool{true})) + require.EqualError(t, <-tx2Err, "update checks: pq: deadlock detected") + + // Tx1 succeeds and Tx2 failed + require.NoError(t, tx1.Commit()) + require.Error(t, tx2.Commit()) + + require.Equal(t, map[string][]string{"virtual-storage": {"gitaly-1", "gitaly-2"}}, hm1.HealthConsensus()) +} |