diff options
author | John Cai <jcai@gitlab.com> | 2020-02-07 16:14:30 +0300 |
---|---|---|
committer | Zeger-Jan van de Weg <git@zjvandeweg.nl> | 2020-02-07 16:14:30 +0300 |
commit | 29a9cee842181c28111c166d9741f06abf06a829 (patch) | |
tree | 184712cfeab4d8e6c5e8bd2d97fc9b2835b38e95 | |
parent | ba6b3866f6bedc6a959d7e8b56bc24e60de8548b (diff) |
Enable feature flag toggling in node manager
To enable feature flag toggling in node manager, have a set of shards
that is not monitored and stays static. When an incoming request has the
feature flag, it will get directed to the version that does failover.
If it does not have the feature flag, it will get directed to the
primary as though the node manager did nothing.
-rw-r--r-- | changelogs/unreleased/jc-node-manager-ff.yml | 5 | ||||
-rw-r--r-- | internal/praefect/config/config.go | 1 | ||||
-rw-r--r-- | internal/praefect/node_manager.go | 27 | ||||
-rw-r--r-- | internal/praefect/node_manager_test.go | 56 |
4 files changed, 75 insertions, 14 deletions
diff --git a/changelogs/unreleased/jc-node-manager-ff.yml b/changelogs/unreleased/jc-node-manager-ff.yml new file mode 100644 index 000000000..8389cf0b4 --- /dev/null +++ b/changelogs/unreleased/jc-node-manager-ff.yml @@ -0,0 +1,5 @@ +--- +title: Enable toggling on the node manager through a config value +merge_request: 1803 +author: +type: changed diff --git a/internal/praefect/config/config.go b/internal/praefect/config/config.go index 97be5c9e1..7b5296234 100644 --- a/internal/praefect/config/config.go +++ b/internal/praefect/config/config.go @@ -29,6 +29,7 @@ type Config struct { Prometheus prometheus.Config `toml:"prometheus"` Auth auth.Config `toml:"auth"` DB `toml:"database"` + FailoverEnabled bool `toml:"failover_enabled"` } // VirtualStorage represents a set of nodes for a storage diff --git a/internal/praefect/node_manager.go b/internal/praefect/node_manager.go index 1a3d00b7d..50880e89f 100644 --- a/internal/praefect/node_manager.go +++ b/internal/praefect/node_manager.go @@ -65,8 +65,9 @@ func (s *shard) GetSecondaries() ([]Node, error) { // NodeMgr is a concrete type that adheres to the NodeManager interface type NodeMgr struct { - shards map[string]*shard - log *logrus.Entry + shards map[string]*shard + log *logrus.Entry + failoverEnabled bool } // ErrPrimaryNotHealthy indicates the primary of a shard is not in a healthy state and hence @@ -74,10 +75,10 @@ type NodeMgr struct { var ErrPrimaryNotHealthy = errors.New("primary is not healthy") // NewNodeManager creates a new NodeMgr based on virtual storage configs -func NewNodeManager(log *logrus.Entry, virtualStorages []config.VirtualStorage) (*NodeMgr, error) { +func NewNodeManager(log *logrus.Entry, c config.Config) (*NodeMgr, error) { shards := make(map[string]*shard) - for _, virtualStorage := range virtualStorages { + for _, virtualStorage := range c.VirtualStorages { var secondaries []*nodeStatus var primary *nodeStatus for _, node := range virtualStorage.Nodes { @@ -99,6 +100,7 @@ func NewNodeManager(log *logrus.Entry, virtualStorages []config.VirtualStorage) secondaries = append(secondaries, ns) } + shards[virtualStorage.Name] = &shard{ primary: primary, secondaries: secondaries, @@ -106,8 +108,9 @@ func NewNodeManager(log *logrus.Entry, virtualStorages []config.VirtualStorage) } return &NodeMgr{ - shards: shards, - log: log, + shards: shards, + log: log, + failoverEnabled: c.FailoverEnabled, }, nil } @@ -145,8 +148,10 @@ func (n *NodeMgr) monitor(d time.Duration) { // Start will bootstrap the node manager by calling healthcheck on the nodes as well as kicking off // the monitoring process. Start must be called before NodeMgr can be used. func (n *NodeMgr) Start(bootstrapInterval, monitorInterval time.Duration) { - n.bootstrap(bootstrapInterval) - go n.monitor(monitorInterval) + if n.failoverEnabled { + n.bootstrap(bootstrapInterval) + go n.monitor(monitorInterval) + } } // GetShard retrieves a shard for a virtual storage name @@ -156,8 +161,10 @@ func (n *NodeMgr) GetShard(virtualStorageName string) (Shard, error) { return nil, errors.New("virtual storage does not exist") } - if !shard.primary.isHealthy() { - return nil, ErrPrimaryNotHealthy + if n.failoverEnabled { + if !shard.primary.isHealthy() { + return nil, ErrPrimaryNotHealthy + } } return shard, nil diff --git a/internal/praefect/node_manager_test.go b/internal/praefect/node_manager_test.go index 76553327c..45682a8d4 100644 --- a/internal/praefect/node_manager_test.go +++ b/internal/praefect/node_manager_test.go @@ -38,7 +38,7 @@ func TestNodeManager(t *testing.T) { internalSocket0 := testhelper.GetTemporaryGitalySocketFileName() internalSocket1 := testhelper.GetTemporaryGitalySocketFileName() - virtualStorages := []config.VirtualStorage{ + virtualStorages := []*config.VirtualStorage{ { Name: "virtual-storage-0", Nodes: []*models.Node{ @@ -55,19 +55,39 @@ func TestNodeManager(t *testing.T) { }, } + confWithFailover := config.Config{ + VirtualStorages: virtualStorages, + FailoverEnabled: true, + } + confWithoutFailover := config.Config{ + VirtualStorages: virtualStorages, + FailoverEnabled: false, + } + _, srv0, cancel0 := newHealthServer(t, internalSocket0) defer cancel0() _, _, cancel1 := newHealthServer(t, internalSocket1) defer cancel1() - nm, err := NewNodeManager(log.Default(), virtualStorages) + nm, err := NewNodeManager(log.Default(), confWithFailover) require.NoError(t, err) - _, err = nm.GetShard("virtual-storage-0") - require.Error(t, ErrPrimaryNotHealthy, err) + nmWithoutFailover, err := NewNodeManager(log.Default(), confWithoutFailover) + require.NoError(t, err) nm.Start(1*time.Millisecond, 5*time.Second) + nmWithoutFailover.Start(1*time.Millisecond, 5*time.Second) + + _, err = nm.GetShard("virtual-storage-0") + require.NoError(t, err) + + shardWithoutFailover, err := nmWithoutFailover.GetShard("virtual-storage-0") + require.NoError(t, err) + primaryWithoutFailover, err := shardWithoutFailover.GetPrimary() + require.NoError(t, err) + secondariesWithoutFailover, err := shardWithoutFailover.GetSecondaries() + require.NoError(t, err) shard, err := nm.GetShard("virtual-storage-0") require.NoError(t, err) @@ -76,6 +96,13 @@ func TestNodeManager(t *testing.T) { secondaries, err := shard.GetSecondaries() require.NoError(t, err) + // shard without failover and shard with failover should be the same + require.Equal(t, primaryWithoutFailover.GetStorage(), primary.GetStorage()) + require.Equal(t, primaryWithoutFailover.GetAddress(), primary.GetAddress()) + require.Len(t, secondaries, 1) + require.Equal(t, secondariesWithoutFailover[0].GetStorage(), secondaries[0].GetStorage()) + require.Equal(t, secondariesWithoutFailover[0].GetAddress(), secondaries[0].GetAddress()) + require.Equal(t, virtualStorages[0].Nodes[0].Storage, primary.GetStorage()) require.Equal(t, virtualStorages[0].Nodes[0].Address, primary.GetAddress()) require.Len(t, secondaries, 1) @@ -88,6 +115,13 @@ func TestNodeManager(t *testing.T) { // since the primary is unhealthy, we expect checkShards to demote primary to secondary, and promote the healthy // secondary to primary + shardWithoutFailover, err = nmWithoutFailover.GetShard("virtual-storage-0") + require.NoError(t, err) + primaryWithoutFailover, err = shardWithoutFailover.GetPrimary() + require.NoError(t, err) + secondariesWithoutFailover, err = shardWithoutFailover.GetSecondaries() + require.NoError(t, err) + shard, err = nm.GetShard("virtual-storage-0") require.NoError(t, err) primary, err = shard.GetPrimary() @@ -95,6 +129,20 @@ func TestNodeManager(t *testing.T) { secondaries, err = shard.GetSecondaries() require.NoError(t, err) + // shard without failover and shard with failover should not be the same + require.NotEqual(t, primaryWithoutFailover.GetStorage(), primary.GetStorage()) + require.NotEqual(t, primaryWithoutFailover.GetAddress(), primary.GetAddress()) + require.NotEqual(t, secondariesWithoutFailover[0].GetStorage(), secondaries[0].GetStorage()) + require.NotEqual(t, secondariesWithoutFailover[0].GetAddress(), secondaries[0].GetAddress()) + + // shard without failover should still match the config + require.Equal(t, virtualStorages[0].Nodes[0].Storage, primaryWithoutFailover.GetStorage()) + require.Equal(t, virtualStorages[0].Nodes[0].Address, primaryWithoutFailover.GetAddress()) + require.Len(t, secondaries, 1) + require.Equal(t, virtualStorages[0].Nodes[1].Storage, secondariesWithoutFailover[0].GetStorage()) + require.Equal(t, virtualStorages[0].Nodes[1].Address, secondariesWithoutFailover[0].GetAddress()) + + // shard with failover should have promoted a secondary to primary and demoted the primary to a secondary require.Equal(t, virtualStorages[0].Nodes[1].Storage, primary.GetStorage()) require.Equal(t, virtualStorages[0].Nodes[1].Address, primary.GetAddress()) require.Len(t, secondaries, 1) |