diff options
author | Sami Hiltunen <shiltunen@gitlab.com> | 2020-05-18 15:08:42 +0300 |
---|---|---|
committer | Sami Hiltunen <shiltunen@gitlab.com> | 2020-05-18 18:23:51 +0300 |
commit | bac251d7b47018240fcfcf71fc33384c5c0edce6 (patch) | |
tree | 5439c9ffd7ed750956cddbdc600b610806250b47 | |
parent | 69493a692e4173b8cfc59b56b218efe32a2ebca4 (diff) |
fix re-electing demoted primaries
Fixes a bug where the demoted status of a primary is not considered
when checking whether the new primary candidate is already elected
as the primary. This caused a problem where Praefect was not able to
re-elect a demoted primary as it was considered already elected.
-rw-r--r-- | internal/praefect/nodes/sql_elector.go | 2 | ||||
-rw-r--r-- | internal/praefect/nodes/sql_elector_test.go | 58 |
2 files changed, 52 insertions, 8 deletions
diff --git a/internal/praefect/nodes/sql_elector.go b/internal/praefect/nodes/sql_elector.go index e84decbc8..07ae68e52 100644 --- a/internal/praefect/nodes/sql_elector.go +++ b/internal/praefect/nodes/sql_elector.go @@ -418,7 +418,7 @@ func (s *sqlElector) electNewPrimary(candidates []*sqlCandidate) error { // be switched to read-only mode. q = `INSERT INTO shard_primaries (elected_by_praefect, shard_name, node_name, elected_at) SELECT $1::VARCHAR, $2::VARCHAR, $3::VARCHAR, NOW() - WHERE $3 != COALESCE((SELECT node_name FROM shard_primaries WHERE shard_name = $2::VARCHAR), '') + WHERE $3 != COALESCE((SELECT node_name FROM shard_primaries WHERE shard_name = $2::VARCHAR AND demoted = false), '') ON CONFLICT (shard_name) DO UPDATE SET elected_by_praefect = EXCLUDED.elected_by_praefect , node_name = EXCLUDED.node_name diff --git a/internal/praefect/nodes/sql_elector_test.go b/internal/praefect/nodes/sql_elector_test.go index 2b73c46dd..b39c91eae 100644 --- a/internal/praefect/nodes/sql_elector_test.go +++ b/internal/praefect/nodes/sql_elector_test.go @@ -10,6 +10,7 @@ import ( "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitaly/internal/praefect/config" + "gitlab.com/gitlab-org/gitaly/internal/praefect/datastore/glsql" "gitlab.com/gitlab-org/gitaly/internal/praefect/models" "gitlab.com/gitlab-org/gitaly/internal/testhelper" "gitlab.com/gitlab-org/gitaly/internal/testhelper/promtest" @@ -170,6 +171,55 @@ func TestBasicFailover(t *testing.T) { "shouldn't be able to enable writes with unhealthy master") } +func TestElectDemotedPrimary(t *testing.T) { + db := getDB(t) + + node := models.Node{Storage: "gitaly-0"} + elector := newSQLElector( + shardName, + config.Config{}, + defaultFailoverTimeoutSeconds, + defaultActivePraefectSeconds, + db.DB, + testhelper.DiscardTestLogger(t), + []*nodeStatus{{Node: node}}, + ) + + candidates := []*sqlCandidate{{Node: &nodeStatus{Node: node}}} + require.NoError(t, elector.electNewPrimary(candidates)) + + primary, _, err := elector.lookupPrimary() + require.NoError(t, err) + require.Equal(t, node.Storage, primary.GetStorage()) + + require.NoError(t, elector.demotePrimary()) + + primary, _, err = elector.lookupPrimary() + require.NoError(t, err) + require.Nil(t, primary) + + shiftElection(t, db, shardName, -1*defaultFailoverTimeoutSeconds) + require.NoError(t, err) + require.NoError(t, elector.electNewPrimary(candidates)) + + primary, _, err = elector.lookupPrimary() + require.NoError(t, err) + require.Equal(t, node.Storage, primary.GetStorage()) +} + +// pretend like the last election happened in the past because the query in electNewPrimary +// to update the primary will not overwrite a previous that's within 10 seconds +func shiftElection(t testing.TB, db glsql.DB, shardName string, seconds int) { + t.Helper() + + _, err := db.Exec( + "UPDATE shard_primaries SET elected_at = elected_at + $1::INTERVAL SECOND WHERE shard_name = $2", + seconds, + shardName, + ) + require.NoError(t, err) +} + func TestElectNewPrimary(t *testing.T) { db := getDB(t) @@ -296,13 +346,7 @@ func TestElectNewPrimary(t *testing.T) { require.Equal(t, "gitaly-1", primary.GetStorage(), "since replication queue is empty the first candidate should be chosen") require.False(t, readOnly) - // pretend like the last election happened in the past because the query in electNewPrimary to update the primary will not - // overwrite a previous that's within 10 seconds - _, err = db.Exec(`UPDATE shard_primaries SET elected_at = now() - $1::INTERVAL SECOND WHERE shard_name = $2`, - 2*failoverTimeSeconds, - shardName) - require.NoError(t, err) - + shiftElection(t, db, shardName, -1*failoverTimeSeconds) _, err = db.Exec(testCase.initialReplQueueInsert) require.NoError(t, err) |