diff options
author | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-06-21 11:31:08 +0300 |
---|---|---|
committer | Patrick Steinhardt <psteinhardt@gitlab.com> | 2021-09-01 14:52:33 +0300 |
commit | c3c9d228477bcd333d8063b5a11fa662076871b4 (patch) | |
tree | 3f6352072732eaa1f2192a20edab296af3a003d9 | |
parent | 930d142ba43fa068893945ed3d858b7cbdfd26ce (diff) |
coordinator: Reflect primary's dirty-status when logging repl cause
With commit d87747c82 (Consider primary modified only if a
subtransaction was committed, 2021-05-14), we have changed replication
logic to only trigger in case the primary is considered to have been
dirtied. This was done such that we don't need to schedule replication
jobs to secondaries in cases where we're sure that no user-visible
change was performed anyway. The preexisting logging statements which
inform about replication reasons haven't been adapted though, making
it hard to see whether replication jobs were actually created or not
based on the dirty status.
Improve the situation by discarding log messages in case the primary has
not been dirtied.
(cherry picked from commit 3664de9fc2472c320c2be2e435e9c6c2c45eafdd)
-rw-r--r-- | internal/praefect/coordinator.go | 23 |
1 files changed, 16 insertions, 7 deletions
diff --git a/internal/praefect/coordinator.go b/internal/praefect/coordinator.go index 700026915..6a9c9b090 100644 --- a/internal/praefect/coordinator.go +++ b/internal/praefect/coordinator.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "io/ioutil" "sync" "time" @@ -795,15 +796,23 @@ func getUpdatedAndOutdatedSecondaries( primaryDirtied = transaction.DidCommitAnySubtransaction() || (transaction.CountSubtransactions() == 0 && primaryErr == nil) + // If the primary wasn't dirtied, then we never replicate any changes. While this is + // duplicates logic defined elsewhere, it's probably good enough given that we only talk + // about metrics here. recordReplication := func(reason string, replicationCount int) { - // If the primary wasn't dirtied, then we never replicate any changes. While this is - // duplicates logic defined elsewhere, it's probably good enough given that we only - // talk about metrics here. if primaryDirtied && replicationCount > 0 { replicationCountMetric.WithLabelValues(reason).Add(float64(replicationCount)) } } + // Same as above, we discard log entries in case the primary wasn't dirtied. + logReplication := ctxlogrus.Extract(ctx) + if !primaryDirtied { + discardLogger := logrus.New() + discardLogger.Out = ioutil.Discard + logReplication = logrus.NewEntry(discardLogger) + } + // Replication targets were not added to the transaction, most likely because they are // either not healthy or out of date. We thus need to make sure to create replication jobs // for them. @@ -813,7 +822,7 @@ func getUpdatedAndOutdatedSecondaries( // If the primary errored, then we need to assume that it has modified on-disk state and // thus need to replicate those changes to secondaries. if primaryErr != nil { - ctxlogrus.Extract(ctx).WithError(primaryErr).Info("primary failed transaction") + logReplication.WithError(primaryErr).Info("primary failed transaction") outdated = append(outdated, routerNodesToStorages(route.Secondaries)...) recordReplication("primary-failed", len(route.Secondaries)) return @@ -824,7 +833,7 @@ func getUpdatedAndOutdatedSecondaries( // no changes were done and the nodes hit an error prior to voting. If the primary processed // the RPC successfully, we assume the RPC is not correctly voting and replicate everywhere. if transaction.CountSubtransactions() == 0 { - ctxlogrus.Extract(ctx).Info("transaction did not create subtransactions") + logReplication.Info("transaction did not create subtransactions") outdated = append(outdated, routerNodesToStorages(route.Secondaries)...) recordReplication("no-votes", len(route.Secondaries)) return @@ -834,7 +843,7 @@ func getUpdatedAndOutdatedSecondaries( // safe route and just replicate to all secondaries. nodeStates, err := transaction.State() if err != nil { - ctxlogrus.Extract(ctx).WithError(err).Error("could not get transaction state") + logReplication.WithError(err).Error("could not get transaction state") outdated = append(outdated, routerNodesToStorages(route.Secondaries)...) recordReplication("missing-tx-state", len(route.Secondaries)) return @@ -845,7 +854,7 @@ func getUpdatedAndOutdatedSecondaries( // but it's what we got. So in order to ensure a consistent state, we need to replicate. if state := nodeStates[route.Primary.Storage]; state != transactions.VoteCommitted { if state == transactions.VoteFailed { - ctxlogrus.Extract(ctx).Error("transaction: primary failed vote") + logReplication.Error("transaction: primary failed vote") } outdated = append(outdated, routerNodesToStorages(route.Secondaries)...) recordReplication("primary-not-committed", len(route.Secondaries)) |