diff options
author | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2020-02-07 16:14:30 +0300 |
---|---|---|
committer | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2020-02-07 16:14:30 +0300 |
commit | f4c653f110d5f25167906cb2fa1ee09519b32e87 (patch) | |
tree | 184712cfeab4d8e6c5e8bd2d97fc9b2835b38e95 | |
parent | ba6b3866f6bedc6a959d7e8b56bc24e60de8548b (diff) | |
parent | 29a9cee842181c28111c166d9741f06abf06a829 (diff) |
Merge branch 'jc-node-manager-ff' into 'master'
Enable toggling on the node manager through a config value
See merge request gitlab-org/gitaly!1803
-rw-r--r-- | changelogs/unreleased/jc-node-manager-ff.yml | 5 | ||||
-rw-r--r-- | internal/praefect/config/config.go | 1 | ||||
-rw-r--r-- | internal/praefect/node_manager.go | 27 | ||||
-rw-r--r-- | internal/praefect/node_manager_test.go | 56 |
4 files changed, 75 insertions, 14 deletions
diff --git a/changelogs/unreleased/jc-node-manager-ff.yml b/changelogs/unreleased/jc-node-manager-ff.yml new file mode 100644 index 000000000..8389cf0b4 --- /dev/null +++ b/changelogs/unreleased/jc-node-manager-ff.yml @@ -0,0 +1,5 @@ +--- +title: Enable toggling on the node manager through a config value +merge_request: 1803 +author: +type: changed diff --git a/internal/praefect/config/config.go b/internal/praefect/config/config.go index 97be5c9e1..7b5296234 100644 --- a/internal/praefect/config/config.go +++ b/internal/praefect/config/config.go @@ -29,6 +29,7 @@ type Config struct { Prometheus prometheus.Config `toml:"prometheus"` Auth auth.Config `toml:"auth"` DB `toml:"database"` + FailoverEnabled bool `toml:"failover_enabled"` } // VirtualStorage represents a set of nodes for a storage diff --git a/internal/praefect/node_manager.go b/internal/praefect/node_manager.go index 1a3d00b7d..50880e89f 100644 --- a/internal/praefect/node_manager.go +++ b/internal/praefect/node_manager.go @@ -65,8 +65,9 @@ func (s *shard) GetSecondaries() ([]Node, error) { // NodeMgr is a concrete type that adheres to the NodeManager interface type NodeMgr struct { - shards map[string]*shard - log *logrus.Entry + shards map[string]*shard + log *logrus.Entry + failoverEnabled bool } // ErrPrimaryNotHealthy indicates the primary of a shard is not in a healthy state and hence @@ -74,10 +75,10 @@ type NodeMgr struct { var ErrPrimaryNotHealthy = errors.New("primary is not healthy") // NewNodeManager creates a new NodeMgr based on virtual storage configs -func NewNodeManager(log *logrus.Entry, virtualStorages []config.VirtualStorage) (*NodeMgr, error) { +func NewNodeManager(log *logrus.Entry, c config.Config) (*NodeMgr, error) { shards := make(map[string]*shard) - for _, virtualStorage := range virtualStorages { + for _, virtualStorage := range c.VirtualStorages { var secondaries []*nodeStatus var primary *nodeStatus for _, node := range virtualStorage.Nodes { @@ -99,6 +100,7 @@ func NewNodeManager(log *logrus.Entry, virtualStorages []config.VirtualStorage) secondaries = append(secondaries, ns) } + shards[virtualStorage.Name] = &shard{ primary: primary, secondaries: secondaries, @@ -106,8 +108,9 @@ func NewNodeManager(log *logrus.Entry, virtualStorages []config.VirtualStorage) } return &NodeMgr{ - shards: shards, - log: log, + shards: shards, + log: log, + failoverEnabled: c.FailoverEnabled, }, nil } @@ -145,8 +148,10 @@ func (n *NodeMgr) monitor(d time.Duration) { // Start will bootstrap the node manager by calling healthcheck on the nodes as well as kicking off // the monitoring process. Start must be called before NodeMgr can be used. func (n *NodeMgr) Start(bootstrapInterval, monitorInterval time.Duration) { - n.bootstrap(bootstrapInterval) - go n.monitor(monitorInterval) + if n.failoverEnabled { + n.bootstrap(bootstrapInterval) + go n.monitor(monitorInterval) + } } // GetShard retrieves a shard for a virtual storage name @@ -156,8 +161,10 @@ func (n *NodeMgr) GetShard(virtualStorageName string) (Shard, error) { return nil, errors.New("virtual storage does not exist") } - if !shard.primary.isHealthy() { - return nil, ErrPrimaryNotHealthy + if n.failoverEnabled { + if !shard.primary.isHealthy() { + return nil, ErrPrimaryNotHealthy + } } return shard, nil diff --git a/internal/praefect/node_manager_test.go b/internal/praefect/node_manager_test.go index 76553327c..45682a8d4 100644 --- a/internal/praefect/node_manager_test.go +++ b/internal/praefect/node_manager_test.go @@ -38,7 +38,7 @@ func TestNodeManager(t *testing.T) { internalSocket0 := testhelper.GetTemporaryGitalySocketFileName() internalSocket1 := testhelper.GetTemporaryGitalySocketFileName() - virtualStorages := []config.VirtualStorage{ + virtualStorages := []*config.VirtualStorage{ { Name: "virtual-storage-0", Nodes: []*models.Node{ @@ -55,19 +55,39 @@ func TestNodeManager(t *testing.T) { }, } + confWithFailover := config.Config{ + VirtualStorages: virtualStorages, + FailoverEnabled: true, + } + confWithoutFailover := config.Config{ + VirtualStorages: virtualStorages, + FailoverEnabled: false, + } + _, srv0, cancel0 := newHealthServer(t, internalSocket0) defer cancel0() _, _, cancel1 := newHealthServer(t, internalSocket1) defer cancel1() - nm, err := NewNodeManager(log.Default(), virtualStorages) + nm, err := NewNodeManager(log.Default(), confWithFailover) require.NoError(t, err) - _, err = nm.GetShard("virtual-storage-0") - require.Error(t, ErrPrimaryNotHealthy, err) + nmWithoutFailover, err := NewNodeManager(log.Default(), confWithoutFailover) + require.NoError(t, err) nm.Start(1*time.Millisecond, 5*time.Second) + nmWithoutFailover.Start(1*time.Millisecond, 5*time.Second) + + _, err = nm.GetShard("virtual-storage-0") + require.NoError(t, err) + + shardWithoutFailover, err := nmWithoutFailover.GetShard("virtual-storage-0") + require.NoError(t, err) + primaryWithoutFailover, err := shardWithoutFailover.GetPrimary() + require.NoError(t, err) + secondariesWithoutFailover, err := shardWithoutFailover.GetSecondaries() + require.NoError(t, err) shard, err := nm.GetShard("virtual-storage-0") require.NoError(t, err) @@ -76,6 +96,13 @@ func TestNodeManager(t *testing.T) { secondaries, err := shard.GetSecondaries() require.NoError(t, err) + // shard without failover and shard with failover should be the same + require.Equal(t, primaryWithoutFailover.GetStorage(), primary.GetStorage()) + require.Equal(t, primaryWithoutFailover.GetAddress(), primary.GetAddress()) + require.Len(t, secondaries, 1) + require.Equal(t, secondariesWithoutFailover[0].GetStorage(), secondaries[0].GetStorage()) + require.Equal(t, secondariesWithoutFailover[0].GetAddress(), secondaries[0].GetAddress()) + require.Equal(t, virtualStorages[0].Nodes[0].Storage, primary.GetStorage()) require.Equal(t, virtualStorages[0].Nodes[0].Address, primary.GetAddress()) require.Len(t, secondaries, 1) @@ -88,6 +115,13 @@ func TestNodeManager(t *testing.T) { // since the primary is unhealthy, we expect checkShards to demote primary to secondary, and promote the healthy // secondary to primary + shardWithoutFailover, err = nmWithoutFailover.GetShard("virtual-storage-0") + require.NoError(t, err) + primaryWithoutFailover, err = shardWithoutFailover.GetPrimary() + require.NoError(t, err) + secondariesWithoutFailover, err = shardWithoutFailover.GetSecondaries() + require.NoError(t, err) + shard, err = nm.GetShard("virtual-storage-0") require.NoError(t, err) primary, err = shard.GetPrimary() @@ -95,6 +129,20 @@ func TestNodeManager(t *testing.T) { secondaries, err = shard.GetSecondaries() require.NoError(t, err) + // shard without failover and shard with failover should not be the same + require.NotEqual(t, primaryWithoutFailover.GetStorage(), primary.GetStorage()) + require.NotEqual(t, primaryWithoutFailover.GetAddress(), primary.GetAddress()) + require.NotEqual(t, secondariesWithoutFailover[0].GetStorage(), secondaries[0].GetStorage()) + require.NotEqual(t, secondariesWithoutFailover[0].GetAddress(), secondaries[0].GetAddress()) + + // shard without failover should still match the config + require.Equal(t, virtualStorages[0].Nodes[0].Storage, primaryWithoutFailover.GetStorage()) + require.Equal(t, virtualStorages[0].Nodes[0].Address, primaryWithoutFailover.GetAddress()) + require.Len(t, secondaries, 1) + require.Equal(t, virtualStorages[0].Nodes[1].Storage, secondariesWithoutFailover[0].GetStorage()) + require.Equal(t, virtualStorages[0].Nodes[1].Address, secondariesWithoutFailover[0].GetAddress()) + + // shard with failover should have promoted a secondary to primary and demoted the primary to a secondary require.Equal(t, virtualStorages[0].Nodes[1].Storage, primary.GetStorage()) require.Equal(t, virtualStorages[0].Nodes[1].Address, primary.GetAddress()) require.Len(t, secondaries, 1) |