fix: RED metrics

This commit is contained in:
Michael Barz
2023-12-15 23:28:16 +01:00
parent 8fb5864ca2
commit 06c18f3f65
5 changed files with 89 additions and 17 deletions
+11
View File
@@ -0,0 +1,11 @@
Bugfix: Fix RED metrics on the metrics endpoint
We connected some metrics to the metrics endpoint to support the RED method for monitoring microservices.
- Request Rate: The number of requests per second. The total count of requests is available under `ocis_proxy_requests_total`.
- Error Rate: The number of failed requests per second. The total count of failed requests is available under `ocis_proxy_errors_total`.
- Duration: The amount of time each request takes. The duration of all requests is available under `ocis_proxy_request_duration_seconds`. This is a histogram metric, so it also provides information about the distribution of request durations.
The metrics are available under the following paths: `PROXY_DEBUG_ADDR/metrics` in a prometheus compatible format and maybe secured by `PROXY_DEBUG_TOKEN`.
https://github.com/owncloud/ocis/pull/7994
+32
View File
@@ -116,3 +116,35 @@ When using the ocis IDP service instead of an external IDP:
- Use the environment variable `OCIS_URL` to define how ocis can be accessed, mandatory use `https` as protocol for the URL.
- If no reverse proxy is set up, the `PROXY_TLS` environment variable **must** be set to `true` because the embedded `libreConnect` shipped with the IDP service has a hard check if the connection is on TLS and uses the HTTPS protocol. If this mismatches, an error will be logged and no connection from the client can be established.
- `PROXY_TLS` **can** be set to `false` if a reverse proxy is used and the https connection is terminated at the reverse proxy. When setting to `false`, the communication between the reverse proxy and ocis is not secured. If set to `true`, you must provide certificates.
## Metrics
The proxy service in ocis has the ability to expose metrics in the prometheus format. The metrics are exposed on the `/metrics` endpoint. There are two ways to run the ocis proxy service which has an impact on the number of metrics exposed.
### 1) Single Process Mode
In the single process mode, all ocis services are running inside a single process. This is the default mode when using the `ocis server` command to start the services. In this mode, the proxy service exposes metrics about the proxy service itself and about the ocis services it is proxying. This is due to the nature of the prometheus registry which is a singleton. The metrics exposed by the proxy service itself are prefixed with `ocis_proxy_` and the metrics exposed by other ocis services are prefixed with `ocis_<service-name>_`.
### 2) Standalone Mode
In this mode, the proxy service only exposes its own metrics. The metrics of the other ocis services are exposed on their own metrics endpoints.
### Available Metrics
The following metrics are exposed by the proxy service:
| Metric Name | Description | Labels |
|----------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------|
| `ocis_proxy_requests_total` | [Counter](https://prometheus.io/docs/tutorials/understanding_metric_types/#counter) metric which reports the total number of HTTP requests. | `method`: HTTP method of the request |
| `ocis_proxy_errors_total` | [Counter](https://prometheus.io/docs/tutorials/understanding_metric_types/#counter) metric which reports the total number of HTTP requests which have failed. That counts all response codes >= 500 | `method`: HTTP method of the request |
| `ocis_proxy_duration_seconds` | [Histogram](https://prometheus.io/docs/tutorials/understanding_metric_types/#histogram) of the time (in seconds) each request took. A histogram metric uses buckets to count the number of events that fall into each bucket. | `method`: HTTP method of the request |
| `ocis_proxy_build_info{version}` | A metric with a constant `1` value labeled by version, exposing the version of the ocis proxy service. | `version`: Build version of the proxy |
### Prometheus Configuration
The following is an example prometheus configuration for the single process mode. It assumes that the proxy service is configured to bind on all interfaces `PROXY_HTTP_ADDR=0.0.0.0:9205` and that the proxy is available via the `ocis` service name (typically in docker-compose). The prometheus service detects the `/metrics` endpoint automatically and scrapes it every 15 seconds.
```yaml
global:
scrape_interval: 15s
scrape_configs:
- job_name: ocis_proxy
static_configs:
- targets: ["ocis:9205"]
```
+3 -2
View File
@@ -129,7 +129,7 @@ func Server(cfg *config.Config) *cli.Command {
}
{
middlewares := loadMiddlewares(ctx, logger, cfg, userInfoCache, traceProvider)
middlewares := loadMiddlewares(ctx, logger, cfg, userInfoCache, traceProvider, *m)
server, err := proxyHTTP.Server(
proxyHTTP.Handler(lh.handler()),
proxyHTTP.Logger(logger),
@@ -269,7 +269,7 @@ func (h *StaticRouteHandler) backchannelLogout(w http.ResponseWriter, r *http.Re
render.JSON(w, r, nil)
}
func loadMiddlewares(ctx context.Context, logger log.Logger, cfg *config.Config, userInfoCache microstore.Store, traceProvider trace.TracerProvider) alice.Chain {
func loadMiddlewares(ctx context.Context, logger log.Logger, cfg *config.Config, userInfoCache microstore.Store, traceProvider trace.TracerProvider, metrics metrics.Metrics) alice.Chain {
rolesClient := settingssvc.NewRoleService("com.owncloud.api.settings", cfg.GrpcClient)
policiesProviderClient := policiessvc.NewPoliciesProviderService("com.owncloud.api.policies", cfg.GrpcClient)
gatewaySelector, err := pool.GatewaySelector(cfg.Reva.Address, append(cfg.Reva.GetRevaOptions(), pool.WithRegistry(registry.GetRegistry()))...)
@@ -381,6 +381,7 @@ func loadMiddlewares(ctx context.Context, logger log.Logger, cfg *config.Config,
),
middleware.Tracer(traceProvider),
pkgmiddleware.TraceContext,
middleware.Instrumenter(metrics),
chimiddleware.RealIP,
chimiddleware.RequestID,
middleware.AccessLog(logger),
+15 -15
View File
@@ -14,8 +14,8 @@ var (
// Metrics defines the available metrics of this service.
type Metrics struct {
Counter *prometheus.CounterVec
Latency *prometheus.SummaryVec
Requests *prometheus.CounterVec
Errors *prometheus.CounterVec
Duration *prometheus.HistogramVec
BuildInfo *prometheus.GaugeVec
}
@@ -23,24 +23,24 @@ type Metrics struct {
// New initializes the available metrics.
func New() *Metrics {
m := &Metrics{
Counter: prometheus.NewCounterVec(prometheus.CounterOpts{
Requests: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: Namespace,
Subsystem: Subsystem,
Name: "proxy_total",
Help: "How many proxy requests processed",
}, []string{}),
Latency: prometheus.NewSummaryVec(prometheus.SummaryOpts{
Name: "requests_total",
Help: "How many requests processed in total",
}, []string{"method"}),
Errors: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: Namespace,
Subsystem: Subsystem,
Name: "proxy_latency_microseconds",
Help: "proxy request latencies in microseconds",
}, []string{}),
Name: "errors_total",
Help: "How many requests run into errors",
}, []string{"method"}),
Duration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: Namespace,
Subsystem: Subsystem,
Name: "proxy_duration_seconds",
Help: "proxy method request time in seconds",
}, []string{}),
Name: "duration_seconds",
Help: "request duration in seconds",
}, []string{"method"}),
BuildInfo: prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: Namespace,
Subsystem: Subsystem,
@@ -49,8 +49,8 @@ func New() *Metrics {
}, []string{"versions"}),
}
_ = prometheus.Register(m.Counter)
_ = prometheus.Register(m.Latency)
_ = prometheus.Register(m.Requests)
_ = prometheus.Register(m.Errors)
_ = prometheus.Register(m.Duration)
_ = prometheus.Register(m.BuildInfo)
return m
+28
View File
@@ -0,0 +1,28 @@
package middleware
import (
"net/http"
"time"
"github.com/go-chi/chi/v5/middleware"
"github.com/owncloud/ocis/v2/services/proxy/pkg/metrics"
"github.com/prometheus/client_golang/prometheus"
)
// Instrumenter provides a middleware to create metrics
func Instrumenter(m metrics.Metrics) func(next http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
ww := middleware.NewWrapResponseWriter(w, r.ProtoMajor)
m.Requests.With(prometheus.Labels{"method": r.Method}).Inc()
next.ServeHTTP(ww, r)
m.Duration.With(prometheus.Labels{"method": r.Method}).Observe(float64(time.Since(start).Seconds()))
if ww.Status() >= 500 {
m.Errors.With(prometheus.Labels{"method": r.Method}).Inc()
}
})
}
}