diff options
-rw-r--r-- | app.go | 19 | ||||
-rw-r--r-- | app_test.go | 22 | ||||
-rw-r--r-- | internal/logging/logging.go | 1 | ||||
-rw-r--r-- | metrics/metrics.go | 9 |
4 files changed, 50 insertions, 1 deletions
@@ -289,6 +289,7 @@ func (a *theApp) buildHandlerPipeline() (http.Handler, error) { if a.config.General.PropagateCorrelationID { correlationOpts = append(correlationOpts, correlation.WithPropagation()) } + handler = handlePanicMiddleware(handler) handler = correlation.InjectCorrelationID(handler, correlationOpts...) // This MUST be the last handler! @@ -499,3 +500,21 @@ func (a *theApp) TLSConfig() (*cryptotls.Config, error) { return tls.Create(a.config.General.RootCertificate, a.config.General.RootKey, a.ServeTLS, a.config.General.InsecureCiphers, a.config.TLS.MinVersion, a.config.TLS.MaxVersion) } + +// handlePanicMiddleware logs and captures the recover() information from any panic +func handlePanicMiddleware(handler http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + defer func() { + i := recover() + if i != nil { + err := fmt.Errorf("panic trace: %v", i) + metrics.PanicRecoveredCount.Inc() + logging.LogRequest(r).WithError(err).Error("recovered from panic") + errortracking.Capture(err, errortracking.WithRequest(r), errortracking.WithContext(r.Context())) + httperrors.Serve500(w) + } + }() + + handler.ServeHTTP(w, r) + }) +} diff --git a/app_test.go b/app_test.go index 3b476bdb..db4d9d67 100644 --- a/app_test.go +++ b/app_test.go @@ -9,11 +9,13 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/require" "gitlab.com/gitlab-org/gitlab-pages/internal/config" "gitlab.com/gitlab-org/gitlab-pages/internal/request" "gitlab.com/gitlab-org/gitlab-pages/internal/source/gitlab" + "gitlab.com/gitlab-org/gitlab-pages/metrics" ) func Test_setRequestScheme(t *testing.T) { @@ -125,3 +127,23 @@ func TestHealthCheckMiddleware(t *testing.T) { }) } } + +func TestHandlePanicMiddleware(t *testing.T) { + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + panic("on purpose") + }) + + ww := httptest.NewRecorder() + rr := httptest.NewRequest(http.MethodGet, "https://gitlab.io", nil) + + handler := handlePanicMiddleware(next) + + handler.ServeHTTP(ww, rr) + + res := ww.Result() + res.Body.Close() + + require.Equal(t, http.StatusInternalServerError, res.StatusCode) + counterCount := testutil.ToFloat64(metrics.PanicRecoveredCount) + require.Equal(t, float64(1), counterCount, "metric not updated") +} diff --git a/internal/logging/logging.go b/internal/logging/logging.go index 4ffbeb4b..ffa42370 100644 --- a/internal/logging/logging.go +++ b/internal/logging/logging.go @@ -4,7 +4,6 @@ import ( "net/http" "github.com/sirupsen/logrus" - "gitlab.com/gitlab-org/labkit/correlation" "gitlab.com/gitlab-org/labkit/log" diff --git a/metrics/metrics.go b/metrics/metrics.go index 23962dc4..680f7a05 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -185,6 +185,14 @@ var ( }, ) + // PanicRecoveredCount measures the number of times GitLab Pages has recovered from a panic + PanicRecoveredCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "gitlab_pages_panic_recovered_count", + Help: "The number of panics the service has recovered from.", + }, + ) + // RateLimitSourceIPCacheRequests is the number of cache hits/misses RateLimitSourceIPCacheRequests = prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -239,6 +247,7 @@ func MustRegister() { LimitListenerMaxConns, LimitListenerConcurrentConns, LimitListenerWaitingConns, + PanicRecoveredCount, RateLimitSourceIPCacheRequests, RateLimitSourceIPCachedEntries, RateLimitSourceIPBlockedCount, |