diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-02-17 18:28:48 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-02-23 12:01:52 +0300 |
commit | cd44d9e62a272c71c96b2057c59d6eab45b4d908 (patch) | |
tree | 98cb039fae338f90875f6488a7681466c0e619c6 /internal/praefect/coordinator_test.go | |
parent | 6d24c8e6a651799f0ea2bdfd3386d58ed401407e (diff) |
praefect: Consider RPC errors when creating replication jobs
Historically, we didn't have any access to error codes of proxying
destinations if proxying to more than one node at once. This is mostly
because the proxy code just conflated all errors into one, which made
proper error handling next to impossible. As a result, the transactional
finalizer cannot and thus doesn't take into account whether any of the
nodes fails, which may lead to spotty coverage when creating replication
jobs.
Fix this shortcoming by setting up error handlers for all proxying
destinations. Like this, we can easily grab any errors they encounter
and put it into a local map, which we can then forward to the
transaction finalizer. Now there's three cases to consider:
- The primary raised an error. In that case, we cannot help it and
need to create replication jobs to all nodes.
- A secondary raised an error. In that case, we need to consider it
as outdated and need to create a replication job for that specific
node.
- Neither of them failed, in which case the node is considered to be
up-to-date.
Note that currently we still pass the error through as-is even for the
secondaries. We should instead ignore errors on secondaries, but this is
left for a subsequent commit.
Diffstat (limited to 'internal/praefect/coordinator_test.go')
-rw-r--r-- | internal/praefect/coordinator_test.go | 70 |
1 files changed, 69 insertions, 1 deletions
diff --git a/internal/praefect/coordinator_test.go b/internal/praefect/coordinator_test.go index e991af307..4e88323af 100644 --- a/internal/praefect/coordinator_test.go +++ b/internal/praefect/coordinator_test.go @@ -1483,11 +1483,14 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { type node struct { name string state transactions.VoteResult + err error } ctx, cancel := testhelper.Context() defer cancel() + anyErr := errors.New("arbitrary error") + for _, tc := range []struct { desc string primary node @@ -1514,6 +1517,13 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { subtransactions: 1, }, { + desc: "single erred node", + primary: node{ + name: "primary", + err: anyErr, + }, + }, + { desc: "single node without subtransactions", primary: node{ name: "primary", @@ -1541,6 +1551,17 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { expectedOutdated: []string{"replica"}, }, { + desc: "single erred node with replica", + primary: node{ + name: "primary", + state: transactions.VoteCommitted, + err: anyErr, + }, + replicas: []string{"replica"}, + subtransactions: 1, + expectedOutdated: []string{"replica"}, + }, + { desc: "single node without transaction with replica", primary: node{ name: "primary", @@ -1563,6 +1584,34 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { expectedUpdated: []string{"s1", "s2"}, }, { + desc: "multiple committed nodes with primary err", + primary: node{ + name: "primary", + state: transactions.VoteCommitted, + err: anyErr, + }, + secondaries: []node{ + {name: "s1", state: transactions.VoteCommitted}, + {name: "s2", state: transactions.VoteCommitted}, + }, + subtransactions: 1, + expectedOutdated: []string{"s1", "s2"}, + }, + { + desc: "multiple committed nodes with secondary err", + primary: node{ + name: "primary", + state: transactions.VoteCommitted, + }, + secondaries: []node{ + {name: "s1", state: transactions.VoteCommitted, err: anyErr}, + {name: "s2", state: transactions.VoteCommitted}, + }, + subtransactions: 1, + expectedUpdated: []string{"s2"}, + expectedOutdated: []string{"s1"}, + }, + { desc: "partial success", primary: node{ name: "primary", @@ -1617,11 +1666,29 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { expectedOutdated: []string{"s1", "r1", "r2"}, expectedUpdated: []string{"s2"}, }, + { + desc: "multiple nodes with replica and partial err", + primary: node{ + name: "primary", + state: transactions.VoteCommitted, + }, + secondaries: []node{ + {name: "s1", state: transactions.VoteFailed}, + {name: "s2", state: transactions.VoteCommitted, err: anyErr}, + }, + replicas: []string{"r1", "r2"}, + subtransactions: 1, + expectedOutdated: []string{"s1", "s2", "r1", "r2"}, + }, } { t.Run(tc.desc, func(t *testing.T) { nodes := append(tc.secondaries, tc.primary) voters := make([]transactions.Voter, len(nodes)) + states := make(map[string]transactions.VoteResult) + nodeErrors := &nodeErrors{ + errByNode: make(map[string]error), + } for i, node := range nodes { voters[i] = transactions.Voter{ @@ -1629,6 +1696,7 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { Votes: 1, } states[node.name] = node.state + nodeErrors.errByNode[node.name] = node.err } transaction := mockTransaction{ @@ -1648,7 +1716,7 @@ func TestGetUpdatedAndOutdatedSecondaries(t *testing.T) { } route.ReplicationTargets = append(route.ReplicationTargets, tc.replicas...) - updated, outdated := getUpdatedAndOutdatedSecondaries(ctx, route, transaction) + updated, outdated := getUpdatedAndOutdatedSecondaries(ctx, route, transaction, nodeErrors) require.ElementsMatch(t, tc.expectedUpdated, updated) require.ElementsMatch(t, tc.expectedOutdated, outdated) }) |