From 4704cedd3bce8676227858aff3ea0d1692fecec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Duffeck?= Date: Thu, 10 Jul 2025 13:55:38 +0200 Subject: [PATCH] Add metrics for the postprocessing duration --- services/postprocessing/pkg/metrics/metrics.go | 8 ++++++++ .../pkg/postprocessing/postprocessing.go | 1 + services/postprocessing/pkg/service/service.go | 18 +++++++++++++----- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/services/postprocessing/pkg/metrics/metrics.go b/services/postprocessing/pkg/metrics/metrics.go index 077a04018f..4391a27d32 100644 --- a/services/postprocessing/pkg/metrics/metrics.go +++ b/services/postprocessing/pkg/metrics/metrics.go @@ -22,6 +22,7 @@ type Metrics struct { EventsRedelivered prometheus.Gauge InProgress prometheus.Gauge Finished *prometheus.CounterVec + Duration *prometheus.HistogramVec } // New initializes the available metrics. @@ -63,6 +64,13 @@ func New() *Metrics { Name: "finished", Help: "Number of finished postprocessing events", }, []string{"status"}), + Duration: promauto.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: Namespace, + Subsystem: Subsystem, + Name: "duration_seconds", + Help: "Duration of postprocessing operations in seconds", + Buckets: []float64{0.1, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 300, 600, 1200}, + }, []string{"status"}), } return m diff --git a/services/postprocessing/pkg/postprocessing/postprocessing.go b/services/postprocessing/pkg/postprocessing/postprocessing.go index eac1a5b515..bc6db76cb2 100644 --- a/services/postprocessing/pkg/postprocessing/postprocessing.go +++ b/services/postprocessing/pkg/postprocessing/postprocessing.go @@ -24,6 +24,7 @@ type Postprocessing struct { Failures int InitiatorID string Finished bool + StartTime time.Time config config.Postprocessing } diff --git a/services/postprocessing/pkg/service/service.go b/services/postprocessing/pkg/service/service.go index c9ca40d3f9..768bece9a3 100644 --- a/services/postprocessing/pkg/service/service.go +++ b/services/postprocessing/pkg/service/service.go @@ -157,6 +157,7 @@ func (pps *PostprocessingService) processEvent(e raw.Event) error { Steps: pps.steps, InitiatorID: e.InitiatorID, ImpersonatingUser: ev.ImpersonatingUser, + StartTime: time.Now(), } pps.metrics.InProgress.Inc() next = pp.Init(ev) @@ -210,19 +211,26 @@ func (pps *PostprocessingService) processEvent(e raw.Event) error { }) case events.UploadReady: pps.metrics.InProgress.Dec() + // the upload failed - let's keep it around for a while - but mark it as finished + pp, err = pps.getPP(pps.store, ev.UploadID) + if err != nil { + pps.log.Error().Str("uploadID", ev.UploadID).Err(err).Msg("cannot get upload") + return fmt.Errorf("%w: cannot get upload", ErrEvent) + } + if ev.Failed { pps.metrics.Finished.WithLabelValues("failed", string(pp.Status.Outcome)).Inc() - // the upload failed - let's keep it around for a while - but mark it as finished - pp, err = pps.getPP(pps.store, ev.UploadID) - if err != nil { - pps.log.Error().Str("uploadID", ev.UploadID).Err(err).Msg("cannot get upload") - return fmt.Errorf("%w: cannot get upload", ErrEvent) + if !pp.StartTime.IsZero() { + pps.metrics.Duration.WithLabelValues("failed").Observe(time.Since(pp.StartTime).Seconds()) } pp.Finished = true return storePP(pps.store, pp) } pps.metrics.Finished.WithLabelValues("succeeded").Inc() + if !pp.StartTime.IsZero() { + pps.metrics.Duration.WithLabelValues("succeeded").Observe(time.Since(pp.StartTime).Seconds()) + } // the storage provider thinks the upload is done - so no need to keep it any more if err := pps.store.Delete(ev.UploadID); err != nil { pps.log.Error().Str("uploadID", ev.UploadID).Err(err).Msg("cannot delete upload")