mirror of
https://github.com/PrivateCaptcha/PrivateCaptcha.git
synced 2026-02-08 23:09:11 -06:00
323 lines
9.6 KiB
Go
323 lines
9.6 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"context"
|
|
"log/slog"
|
|
"net/http"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/PrivateCaptcha/PrivateCaptcha/pkg/common"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/collectors"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
"github.com/rs/xid"
|
|
prometheus_metrics "github.com/slok/go-http-metrics/metrics/prometheus"
|
|
"github.com/slok/go-http-metrics/middleware"
|
|
"github.com/slok/go-http-metrics/middleware/std"
|
|
)
|
|
|
|
const (
|
|
MetricsNamespaceServer = "server"
|
|
MetricsNamespaceAPI = "api"
|
|
MetricsNamespaceCDN = "cdn"
|
|
MetricsNamespacePortal = "portal"
|
|
puzzleMetricsSubsystem = "puzzle"
|
|
platformMetricsSubsystem = "platform"
|
|
apiMetricsSubsystem = "api"
|
|
userIDLabel = "user_id"
|
|
stubLabel = "stub"
|
|
resultLabel = "result"
|
|
// below is copy from go-http-metrics prometheus.go since they are not exposed publicly
|
|
statusCodeLabel = "code"
|
|
methodLabel = "method"
|
|
handlerIDLabel = "handler"
|
|
serviceLabel = "service"
|
|
typeLabel = "type"
|
|
)
|
|
|
|
type Service struct {
|
|
Registry *prometheus.Registry
|
|
fineAPIMiddleware middleware.Middleware
|
|
finePortalMiddleware middleware.Middleware
|
|
coarseServerMiddleware middleware.Middleware
|
|
coarseCDNMiddleware middleware.Middleware
|
|
portalErrorCounter *prometheus.CounterVec
|
|
apiErrorCounter *prometheus.CounterVec
|
|
puzzleCounter *prometheus.CounterVec
|
|
verifyCounter *prometheus.CounterVec
|
|
dropCounter *prometheus.CounterVec
|
|
hitRatioGauge *prometheus.GaugeVec
|
|
clickhouseHealthGauge *prometheus.GaugeVec
|
|
postgresHealthGauge *prometheus.GaugeVec
|
|
}
|
|
|
|
var _ common.PlatformMetrics = (*Service)(nil)
|
|
var _ common.APIMetrics = (*Service)(nil)
|
|
var _ common.PortalMetrics = (*Service)(nil)
|
|
|
|
func traceID() string {
|
|
return xid.New().String()
|
|
}
|
|
|
|
func Logged(h http.Handler) http.Handler {
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
t := time.Now()
|
|
ctx, _ := common.TraceContextFunc(r.Context(), traceID)
|
|
|
|
// NOTE: these data (path, method, time) are now available as prometheus metrics
|
|
slog.Log(ctx, common.LevelTrace, "Started request", "path", r.URL.Path, "method", r.Method)
|
|
defer func() {
|
|
slog.Log(ctx, common.LevelTrace, "Finished request", "path", r.URL.Path, "method", r.Method,
|
|
"duration", time.Since(t).Milliseconds())
|
|
}()
|
|
|
|
h.ServeHTTP(w, r.WithContext(ctx))
|
|
})
|
|
}
|
|
|
|
func Traced(h http.Handler) http.Handler {
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
ctx, tid := common.TraceContextFunc(r.Context(), traceID)
|
|
headers := w.Header()
|
|
headers[common.HeaderTraceID] = []string{tid}
|
|
h.ServeHTTP(w, r.WithContext(ctx))
|
|
})
|
|
}
|
|
|
|
func NewService() *Service {
|
|
reg := prometheus.NewRegistry()
|
|
reg.MustRegister(
|
|
collectors.NewGoCollector(),
|
|
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
|
|
)
|
|
|
|
puzzleCounter := prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: MetricsNamespaceAPI,
|
|
Subsystem: puzzleMetricsSubsystem,
|
|
Name: "create_total",
|
|
Help: "Total number of puzzles created",
|
|
},
|
|
[]string{userIDLabel},
|
|
)
|
|
reg.MustRegister(puzzleCounter)
|
|
|
|
verifyCounter := prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: MetricsNamespaceAPI,
|
|
Subsystem: puzzleMetricsSubsystem,
|
|
Name: "verify_total",
|
|
Help: "Total number of puzzle verifications",
|
|
},
|
|
[]string{stubLabel, userIDLabel, resultLabel},
|
|
)
|
|
reg.MustRegister(verifyCounter)
|
|
|
|
portalErrorCounter := prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: "fine", // this is the same as fine http metrics below to match go-http-metrics logic
|
|
Subsystem: "http",
|
|
Name: "error_total",
|
|
Help: "Total number of Portal HTTP errors",
|
|
},
|
|
[]string{handlerIDLabel, statusCodeLabel, methodLabel, serviceLabel},
|
|
)
|
|
reg.MustRegister(portalErrorCounter)
|
|
|
|
apiErrorCounter := prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: "fine", // this is the same as fine http metrics below to match go-http-metrics logic
|
|
Subsystem: apiMetricsSubsystem,
|
|
Name: "error_total",
|
|
Help: "Total number of API specific errors",
|
|
},
|
|
[]string{handlerIDLabel, statusCodeLabel, methodLabel, serviceLabel},
|
|
)
|
|
reg.MustRegister(apiErrorCounter)
|
|
|
|
eventDropCounter := prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: MetricsNamespaceServer,
|
|
Subsystem: platformMetricsSubsystem,
|
|
Name: "drop_total",
|
|
Help: "Total number of events dropped",
|
|
},
|
|
[]string{typeLabel},
|
|
)
|
|
reg.MustRegister(eventDropCounter)
|
|
|
|
clickhouseHealthGauge := prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespaceServer,
|
|
Subsystem: platformMetricsSubsystem,
|
|
Name: "health_clickhouse",
|
|
Help: "Health status of ClickHouse",
|
|
},
|
|
[]string{},
|
|
)
|
|
reg.MustRegister(clickhouseHealthGauge)
|
|
|
|
postgresHealthGauge := prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespaceServer,
|
|
Subsystem: platformMetricsSubsystem,
|
|
Name: "health_postgres",
|
|
Help: "Health status of Postgres",
|
|
},
|
|
[]string{},
|
|
)
|
|
reg.MustRegister(postgresHealthGauge)
|
|
|
|
hitRatioGauge := prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: MetricsNamespaceServer,
|
|
Subsystem: platformMetricsSubsystem,
|
|
Name: "cache_hit_ratio",
|
|
Help: "In-memory cache hit ratio",
|
|
},
|
|
[]string{},
|
|
)
|
|
reg.MustRegister(hitRatioGauge)
|
|
|
|
fineRecorder := prometheus_metrics.NewRecorder(prometheus_metrics.Config{
|
|
Prefix: "fine",
|
|
Registry: reg,
|
|
DurationBuckets: []float64{.05, .1, .25, .5, 1, 2.5},
|
|
})
|
|
|
|
coarseRecorder := prometheus_metrics.NewRecorder(prometheus_metrics.Config{
|
|
Prefix: "coarse",
|
|
Registry: reg,
|
|
DurationBuckets: []float64{.05, .1, .5, 1, 2.5},
|
|
})
|
|
|
|
return &Service{
|
|
Registry: reg,
|
|
fineAPIMiddleware: middleware.New(middleware.Config{
|
|
// this is added as Service label
|
|
Service: MetricsNamespaceAPI,
|
|
DisableMeasureSize: true,
|
|
Recorder: fineRecorder,
|
|
}),
|
|
finePortalMiddleware: middleware.New(middleware.Config{
|
|
// this is added as Service label
|
|
Service: MetricsNamespacePortal,
|
|
DisableMeasureSize: true,
|
|
Recorder: fineRecorder,
|
|
}),
|
|
coarseServerMiddleware: middleware.New(middleware.Config{
|
|
// this is added as Service label
|
|
Service: MetricsNamespaceServer,
|
|
GroupedStatus: true,
|
|
DisableMeasureSize: true,
|
|
DisableMeasureInflight: true,
|
|
Recorder: coarseRecorder,
|
|
}),
|
|
coarseCDNMiddleware: middleware.New(middleware.Config{
|
|
// this is added as Service label
|
|
Service: MetricsNamespaceCDN,
|
|
GroupedStatus: true,
|
|
DisableMeasureSize: true,
|
|
DisableMeasureInflight: true,
|
|
Recorder: coarseRecorder,
|
|
}),
|
|
puzzleCounter: puzzleCounter,
|
|
verifyCounter: verifyCounter,
|
|
hitRatioGauge: hitRatioGauge,
|
|
clickhouseHealthGauge: clickhouseHealthGauge,
|
|
postgresHealthGauge: postgresHealthGauge,
|
|
portalErrorCounter: portalErrorCounter,
|
|
apiErrorCounter: apiErrorCounter,
|
|
dropCounter: eventDropCounter,
|
|
}
|
|
}
|
|
|
|
// this belongs only to APIMetrics interface (at this time)
|
|
func (s *Service) Handler(h http.Handler) http.Handler {
|
|
// handlerID is taken from the request path in this case
|
|
return std.Handler("", s.fineAPIMiddleware, h)
|
|
}
|
|
|
|
func (s *Service) CDNHandler(h http.Handler) http.Handler {
|
|
// handlerID is taken from the request path in this case
|
|
return std.Handler("", s.coarseCDNMiddleware, h)
|
|
}
|
|
|
|
func (s *Service) IgnoredHandler(h http.Handler) http.Handler {
|
|
return std.Handler("_ignored", s.coarseServerMiddleware, h)
|
|
}
|
|
|
|
func (s *Service) HandlerIDFunc(handlerIDFunc func() string) func(http.Handler) http.Handler {
|
|
return func(h http.Handler) http.Handler {
|
|
handlerID := handlerIDFunc()
|
|
return std.Handler(handlerID, s.finePortalMiddleware, h)
|
|
}
|
|
}
|
|
|
|
func (s *Service) ObserveApiError(handlerID string, method string, code int) {
|
|
s.apiErrorCounter.With(prometheus.Labels{
|
|
handlerIDLabel: handlerID,
|
|
statusCodeLabel: strconv.Itoa(code),
|
|
methodLabel: method,
|
|
serviceLabel: MetricsNamespaceAPI,
|
|
}).Inc()
|
|
}
|
|
|
|
func (s *Service) ObserveHttpError(handlerID string, method string, code int) {
|
|
s.portalErrorCounter.With(prometheus.Labels{
|
|
handlerIDLabel: handlerID,
|
|
statusCodeLabel: strconv.Itoa(code),
|
|
methodLabel: method,
|
|
serviceLabel: MetricsNamespacePortal,
|
|
}).Inc()
|
|
}
|
|
|
|
func (s *Service) ObservePuzzleCreated(userID int32) {
|
|
s.puzzleCounter.With(prometheus.Labels{
|
|
userIDLabel: strconv.Itoa(int(userID)),
|
|
}).Inc()
|
|
}
|
|
|
|
func (s *Service) ObserveCacheHitRatio(ratio float64) {
|
|
s.hitRatioGauge.With(prometheus.Labels{}).Set(ratio)
|
|
}
|
|
|
|
func (s *Service) ObservePuzzleVerified(userID int32, result string, isStub bool) {
|
|
s.verifyCounter.With(prometheus.Labels{
|
|
stubLabel: strconv.FormatBool(isStub),
|
|
resultLabel: result,
|
|
userIDLabel: strconv.Itoa(int(userID)),
|
|
}).Inc()
|
|
}
|
|
|
|
func (s *Service) ObserveEventDropped(eventType common.MetricEventType) {
|
|
s.dropCounter.With(prometheus.Labels{
|
|
typeLabel: string(eventType),
|
|
}).Inc()
|
|
}
|
|
|
|
func (s *Service) ObserveHealth(postgres, clickhouse bool) {
|
|
var chVal, pgVal float64
|
|
|
|
if postgres {
|
|
pgVal = 1
|
|
} else {
|
|
pgVal = 0
|
|
}
|
|
|
|
if clickhouse {
|
|
chVal = 1
|
|
} else {
|
|
chVal = 0
|
|
}
|
|
|
|
s.postgresHealthGauge.With(prometheus.Labels{}).Set(pgVal)
|
|
s.clickhouseHealthGauge.With(prometheus.Labels{}).Set(chVal)
|
|
}
|
|
|
|
func (s *Service) Setup(mux *http.ServeMux) {
|
|
mux.Handle(http.MethodGet+" /metrics", common.Recovered(promhttp.HandlerFor(s.Registry, promhttp.HandlerOpts{Registry: s.Registry})))
|
|
s.setupProfiling(context.TODO(), mux)
|
|
}
|