diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-07-08 17:06:50 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-09-01 14:52:33 +0300 |
commit | abfc3f01704c0fdbda7b92777de926ec624ef64b (patch) | |
tree | ca17af6e4f3ddd747141189c4151acb7c61e6142 | |
parent | 4df1f179d4cf1e60de941fc5d513fa496f666d52 (diff) |
coordinator: Create replication jobs if the primary cast a vote
Starting with commit d87747c8 (Consider primary modified only if a
subtransaction was committed, 2021-05-14), we consider primaries to not
have been modified unless at least one subtransaction was committed. The
intent of this change is to avoid queueing replication jobs in case an
RPC returned an error without having modified any on-disk state. As it
turns out, this optimization had unintended side effects: if an RPC
fails on the first vote because of inconsistent state across all nodes,
then we wouldn't ever schedule a replication job to fix this
inconsistency. In some cases, this will keep up from making any progress
at all because we will never converge towards the same state, for
example in object pools.
The current condition is clearly insufficient: if the initial vote
fails, then we must schedule a replication job because we cannot tell
much about the reason of its failure. This commit thus tightens the
check: instead of requiring at least one committed subtransaction to
consider the primary dirty, we now consider it dirty whenever it did
cast a vote.
With this change, we can still avoid replication jobs if secondaries
created a subtransaction while the primary dropped out before casting a
vote given that secondaries couldn't have reached quorum without the
primary. But in all the other cases where the primary did cast a vote,
we'll now go through our typical updated-outdated logic and will thus
also know to replicate changes in case the first vote fails.
Changelog: fixed
(cherry picked from commit cbf4ed8ffcb9e2600bdb20da420bc694c8523d8f)
-rw-r--r-- | internal/praefect/coordinator.go | 10 | ||||
-rw-r--r-- | internal/praefect/coordinator_pg_test.go | 8 | ||||
-rw-r--r-- | internal/praefect/coordinator_test.go | 165 |
3 files changed, 115 insertions, 68 deletions
diff --git a/internal/praefect/coordinator.go b/internal/praefect/coordinator.go index 5207301bd..63afa5a55 100644 --- a/internal/praefect/coordinator.go +++ b/internal/praefect/coordinator.go @@ -792,11 +792,11 @@ func getUpdatedAndOutdatedSecondaries( return false, nil, nil } - // If there were subtransactions, we assume no changes were made if none of the - // subtransactions was committed. This can really only happen for the first subtransaction - // given that no new subtransactions are created if the first one wasn't committed. Same as - // above, we exit early in this case and notify the caller that the primary is not dirty. - if transaction.CountSubtransactions() != 0 && !transaction.DidCommitAnySubtransaction() { + // If there was a single subtransactions but the primary didn't cast a vote, then it means + // that the primary node has dropped out before secondaries were able to commit any changes + // to disk. Given that they cannot ever succeed without the primary, no change to disk + // should have happened. + if transaction.CountSubtransactions() == 1 && !transaction.DidVote(route.Primary.Storage) { return false, nil, nil } diff --git a/internal/praefect/coordinator_pg_test.go b/internal/praefect/coordinator_pg_test.go index 316b05997..d43f2f02c 100644 --- a/internal/praefect/coordinator_pg_test.go +++ b/internal/praefect/coordinator_pg_test.go @@ -69,11 +69,11 @@ func TestStreamDirectorMutator_Transaction(t *testing.T) { }, }, { - desc: "failing vote should not create replication jobs without committed subtransactions", + desc: "failing vote should create replication jobs without committed subtransactions", nodes: []node{ - {primary: true, subtransactions: subtransactions{{vote: "foo", shouldSucceed: false}}, shouldGetRepl: false, shouldParticipate: true, expectedGeneration: 0}, - {primary: false, subtransactions: subtransactions{{vote: "qux", shouldSucceed: false}}, shouldGetRepl: false, shouldParticipate: true, expectedGeneration: 0}, - {primary: false, subtransactions: subtransactions{{vote: "bar", shouldSucceed: false}}, shouldGetRepl: false, shouldParticipate: true, expectedGeneration: 0}, + {primary: true, subtransactions: subtransactions{{vote: "foo", shouldSucceed: false}}, shouldGetRepl: false, shouldParticipate: true, expectedGeneration: 1}, + {primary: false, subtransactions: subtransactions{{vote: "qux", shouldSucceed: false}}, shouldGetRepl: true, shouldParticipate: true, expectedGeneration: 0}, + {primary: false, subtransactions: subtransactions{{vote: "bar", shouldSucceed: false}}, shouldGetRepl: true, shouldParticipate: true, expectedGeneration: 0}, }, }, { diff --git a/internal/praefect/coordinator_test.go b/internal/praefect/coordinator_test.go index 8ba453211..b2365d4c4 100644 --- a/internal/praefect/coordinator_test.go +++ b/internal/praefect/coordinator_test.go @@ -1545,16 +1545,16 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { anyErr := errors.New("arbitrary error") for _, tc := range []struct { - desc string - primary node - secondaries []node - replicas []string - subtransactions int - didCommitAnySubtransaction bool - expectedPrimaryDirtied bool - expectedOutdated []string - expectedUpdated []string - expectedMetrics map[string]int + desc string + primary node + secondaries []node + replicas []string + subtransactions int + didVote map[string]bool + expectedPrimaryDirtied bool + expectedOutdated []string + expectedUpdated []string + expectedMetrics map[string]int }{ { desc: "single committed node", @@ -1562,9 +1562,11 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { name: "primary", state: transactions.VoteCommitted, }, - didCommitAnySubtransaction: true, - subtransactions: 1, - expectedPrimaryDirtied: true, + didVote: map[string]bool{ + "primary": true, + }, + subtransactions: 1, + expectedPrimaryDirtied: true, }, { desc: "single failed node", @@ -1595,11 +1597,13 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { name: "primary", state: transactions.VoteCommitted, }, - replicas: []string{"replica"}, - didCommitAnySubtransaction: true, - subtransactions: 1, - expectedPrimaryDirtied: true, - expectedOutdated: []string{"replica"}, + replicas: []string{"replica"}, + didVote: map[string]bool{ + "primary": true, + }, + subtransactions: 1, + expectedPrimaryDirtied: true, + expectedOutdated: []string{"replica"}, expectedMetrics: map[string]int{ "outdated": 1, }, @@ -1619,16 +1623,29 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { state: transactions.VoteCommitted, err: anyErr, }, - replicas: []string{"replica"}, - didCommitAnySubtransaction: true, - subtransactions: 1, - expectedPrimaryDirtied: true, - expectedOutdated: []string{"replica"}, + replicas: []string{"replica"}, + didVote: map[string]bool{ + "primary": true, + }, + subtransactions: 1, + expectedPrimaryDirtied: true, + expectedOutdated: []string{"replica"}, expectedMetrics: map[string]int{ "outdated": 1, }, }, { + desc: "single erred node without commit with replica", + primary: node{ + name: "primary", + state: transactions.VoteCommitted, + err: anyErr, + }, + replicas: []string{"replica"}, + subtransactions: 1, + expectedPrimaryDirtied: false, + }, + { desc: "single node without transaction with replica", primary: node{ name: "primary", @@ -1651,10 +1668,12 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { {name: "s1", state: transactions.VoteCommitted}, {name: "s2", state: transactions.VoteCommitted}, }, - didCommitAnySubtransaction: true, - subtransactions: 1, - expectedPrimaryDirtied: true, - expectedUpdated: []string{"s1", "s2"}, + didVote: map[string]bool{ + "primary": true, + }, + subtransactions: 1, + expectedPrimaryDirtied: true, + expectedUpdated: []string{"s1", "s2"}, expectedMetrics: map[string]int{ "updated": 2, }, @@ -1670,10 +1689,12 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { {name: "s1", state: transactions.VoteCommitted}, {name: "s2", state: transactions.VoteCommitted}, }, - didCommitAnySubtransaction: true, - subtransactions: 1, - expectedPrimaryDirtied: true, - expectedOutdated: []string{"s1", "s2"}, + didVote: map[string]bool{ + "primary": true, + }, + subtransactions: 1, + expectedPrimaryDirtied: true, + expectedOutdated: []string{"s1", "s2"}, expectedMetrics: map[string]int{ "primary-failed": 2, }, @@ -1688,11 +1709,13 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { {name: "s1", state: transactions.VoteCommitted, err: anyErr}, {name: "s2", state: transactions.VoteCommitted}, }, - didCommitAnySubtransaction: true, - subtransactions: 1, - expectedPrimaryDirtied: true, - expectedUpdated: []string{"s2"}, - expectedOutdated: []string{"s1"}, + didVote: map[string]bool{ + "primary": true, + }, + subtransactions: 1, + expectedPrimaryDirtied: true, + expectedUpdated: []string{"s2"}, + expectedOutdated: []string{"s1"}, expectedMetrics: map[string]int{ "node-failed": 1, "updated": 1, @@ -1708,11 +1731,13 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { {name: "s1", state: transactions.VoteFailed}, {name: "s2", state: transactions.VoteCommitted}, }, - didCommitAnySubtransaction: true, - subtransactions: 1, - expectedPrimaryDirtied: true, - expectedUpdated: []string{"s2"}, - expectedOutdated: []string{"s1"}, + didVote: map[string]bool{ + "primary": true, + }, + subtransactions: 1, + expectedPrimaryDirtied: true, + expectedUpdated: []string{"s2"}, + expectedOutdated: []string{"s1"}, expectedMetrics: map[string]int{ "node-not-committed": 1, "updated": 1, @@ -1728,15 +1753,33 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { {name: "s1", state: transactions.VoteFailed}, {name: "s2", state: transactions.VoteCommitted}, }, - didCommitAnySubtransaction: true, - subtransactions: 1, - expectedPrimaryDirtied: true, - expectedOutdated: []string{"s1", "s2"}, + didVote: map[string]bool{ + "primary": true, + }, + subtransactions: 1, + expectedPrimaryDirtied: true, + expectedOutdated: []string{"s1", "s2"}, expectedMetrics: map[string]int{ "primary-not-committed": 2, }, }, { + desc: "failure with no primary votes", + primary: node{ + name: "primary", + state: transactions.VoteFailed, + }, + secondaries: []node{ + {name: "s1", state: transactions.VoteFailed}, + {name: "s2", state: transactions.VoteCommitted}, + }, + didVote: map[string]bool{ + "s1": true, + "s2": true, + }, + subtransactions: 1, + }, + { desc: "multiple nodes without subtransactions", primary: node{ name: "primary", @@ -1763,12 +1806,14 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { {name: "s1", state: transactions.VoteFailed}, {name: "s2", state: transactions.VoteCommitted}, }, - replicas: []string{"r1", "r2"}, - didCommitAnySubtransaction: true, - subtransactions: 1, - expectedPrimaryDirtied: true, - expectedOutdated: []string{"s1", "r1", "r2"}, - expectedUpdated: []string{"s2"}, + replicas: []string{"r1", "r2"}, + didVote: map[string]bool{ + "primary": true, + }, + subtransactions: 1, + expectedPrimaryDirtied: true, + expectedOutdated: []string{"s1", "r1", "r2"}, + expectedUpdated: []string{"s2"}, expectedMetrics: map[string]int{ "node-not-committed": 1, "outdated": 2, @@ -1785,11 +1830,13 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { {name: "s1", state: transactions.VoteFailed}, {name: "s2", state: transactions.VoteCommitted, err: anyErr}, }, - replicas: []string{"r1", "r2"}, - didCommitAnySubtransaction: true, - subtransactions: 1, - expectedPrimaryDirtied: true, - expectedOutdated: []string{"s1", "s2", "r1", "r2"}, + replicas: []string{"r1", "r2"}, + didVote: map[string]bool{ + "primary": true, + }, + subtransactions: 1, + expectedPrimaryDirtied: true, + expectedOutdated: []string{"s1", "s2", "r1", "r2"}, expectedMetrics: map[string]int{ "node-failed": 1, "node-not-committed": 1, @@ -1816,9 +1863,9 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { } transaction := mockTransaction{ - nodeStates: states, - subtransactions: tc.subtransactions, - didCommitAnySubtransaction: tc.didCommitAnySubtransaction, + nodeStates: states, + subtransactions: tc.subtransactions, + didVote: tc.didVote, } route := RepositoryMutatorRoute{ |