feat: add support for recovery threshold (#9428)

This commit is contained in:
Abhishek Kumar Singh
2025-11-22 01:30:37 +05:30
committed by GitHub
parent 79988b448f
commit 52228bc6c4
17 changed files with 3421 additions and 793 deletions

View File

@@ -86,7 +86,7 @@ go-run-enterprise: ## Runs the enterprise go backend server
SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_DSN=tcp://127.0.0.1:9000 \
SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_CLUSTER=cluster \
go run -race \
$(GO_BUILD_CONTEXT_ENTERPRISE)/*.go
$(GO_BUILD_CONTEXT_ENTERPRISE)/*.go server
.PHONY: go-test
go-test: ## Runs go unit tests

View File

@@ -246,7 +246,9 @@ func (r *AnomalyRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, t
continue
}
}
results, err := r.Threshold.ShouldAlert(*series, r.Unit())
results, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
ActiveAlerts: r.ActiveAlertsLabelFP(),
})
if err != nil {
return nil, err
}
@@ -296,7 +298,9 @@ func (r *AnomalyRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID,
continue
}
}
results, err := r.Threshold.ShouldAlert(*series, r.Unit())
results, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
ActiveAlerts: r.ActiveAlertsLabelFP(),
})
if err != nil {
return nil, err
}
@@ -410,6 +414,7 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
GeneratorURL: r.GeneratorURL(),
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
Missing: smpl.IsMissing,
IsRecovering: smpl.IsRecovering,
}
}
@@ -422,6 +427,9 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
alert.Value = a.Value
alert.Annotations = a.Annotations
// Update the recovering and missing state of existing alert
alert.IsRecovering = a.IsRecovering
alert.Missing = a.Missing
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
@@ -480,6 +488,30 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
Value: a.Value,
})
}
// We need to change firing alert to recovering if the returned sample meets recovery threshold
changeFiringToRecovering := a.State == model.StateFiring && a.IsRecovering
// We need to change recovering alerts to firing if the returned sample meets target threshold
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
// in any of the above case we need to update the status of alert
if changeFiringToRecovering || changeRecoveringToFiring {
state := model.StateRecovering
if changeRecoveringToFiring {
state = model.StateFiring
}
a.State = state
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: state,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
}
currentState := r.State()

View File

@@ -12,9 +12,12 @@ import (
// AlertState denotes the state of an active alert.
type AlertState int
// The enum values are ordered by priority (lowest to highest).
// When determining overall rule state, higher numeric values take precedence.
const (
StateInactive AlertState = iota
StatePending
StateRecovering
StateFiring
StateNoData
StateDisabled
@@ -32,6 +35,8 @@ func (s AlertState) String() string {
return "nodata"
case StateDisabled:
return "disabled"
case StateRecovering:
return "recovering"
}
panic(errors.Errorf("unknown alert state: %d", s))
}
@@ -58,6 +63,8 @@ func (s *AlertState) UnmarshalJSON(b []byte) error {
*s = StateNoData
case "disabled":
*s = StateDisabled
case "recovering":
*s = StateRecovering
default:
*s = StateInactive
}
@@ -83,6 +90,8 @@ func (s *AlertState) Scan(value interface{}) error {
*s = StateNoData
case "disabled":
*s = StateDisabled
case "recovering":
*s = StateRecovering
}
return nil
}

View File

@@ -191,6 +191,26 @@ func (r *BaseRule) currentAlerts() []*ruletypes.Alert {
return alerts
}
// ActiveAlertsLabelFP returns a map of active alert labels fingerprint and
// the fingerprint is computed using the QueryResultLables.Hash() method.
// We use the QueryResultLables instead of labels as these labels are raw labels
// that we get from the sample.
// This is useful in cases where we want to check if an alert is still active
// based on the labels we have.
func (r *BaseRule) ActiveAlertsLabelFP() map[uint64]struct{} {
r.mtx.Lock()
defer r.mtx.Unlock()
activeAlerts := make(map[uint64]struct{}, len(r.Active))
for _, alert := range r.Active {
if alert == nil || alert.QueryResultLables == nil {
continue
}
activeAlerts[alert.QueryResultLables.Hash()] = struct{}{}
}
return activeAlerts
}
func (r *BaseRule) EvalDelay() time.Duration {
return r.evalDelay
}

View File

@@ -1,9 +1,10 @@
package rules
import (
"github.com/stretchr/testify/require"
"testing"
"github.com/stretchr/testify/require"
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
)
@@ -74,7 +75,7 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
_, err := test.rule.Threshold.ShouldAlert(*test.series, "")
_, err := test.rule.Threshold.Eval(*test.series, "", ruletypes.EvalData{})
require.NoError(t, err)
require.Equal(t, len(test.series.Points) >= test.rule.ruleCondition.RequiredNumPoints, test.shouldAlert)
})

View File

@@ -4,13 +4,14 @@ import (
"context"
"encoding/json"
"fmt"
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
"log/slog"
"sort"
"strings"
"sync"
"time"
"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
"go.uber.org/zap"
"github.com/go-openapi/strfmt"

View File

@@ -159,7 +159,9 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
continue
}
results, err := r.Threshold.ShouldAlert(toCommonSeries(series), r.Unit())
results, err := r.Threshold.Eval(toCommonSeries(series), r.Unit(), ruletypes.EvalData{
ActiveAlerts: r.ActiveAlertsLabelFP(),
})
if err != nil {
return nil, err
}
@@ -233,6 +235,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
Value: result.V,
GeneratorURL: r.GeneratorURL(),
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
IsRecovering: result.IsRecovering,
}
}
}
@@ -245,6 +248,9 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
alert.Value = a.Value
alert.Annotations = a.Annotations
// Update the recovering and missing state of existing alert
alert.IsRecovering = a.IsRecovering
alert.Missing = a.Missing
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
@@ -304,6 +310,29 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
})
}
// We need to change firing alert to recovering if the returned sample meets recovery threshold
changeAlertingToRecovering := a.State == model.StateFiring && a.IsRecovering
// We need to change recovering alerts to firing if the returned sample meets target threshold
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
// in any of the above case we need to update the status of alert
if changeAlertingToRecovering || changeRecoveringToFiring {
state := model.StateRecovering
if changeRecoveringToFiring {
state = model.StateFiring
}
a.State = state
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: state,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
}
r.health = ruletypes.HealthGood
r.lastError = err

View File

@@ -23,7 +23,7 @@ func getVectorValues(vectors []ruletypes.Sample) []float64 {
return values
}
func TestPromRuleShouldAlert(t *testing.T) {
func TestPromRuleEval(t *testing.T) {
postableRule := ruletypes.PostableRule{
AlertName: "Test Rule",
AlertType: ruletypes.AlertTypeMetric,
@@ -696,7 +696,7 @@ func TestPromRuleShouldAlert(t *testing.T) {
assert.NoError(t, err)
}
resultVectors, err := rule.Threshold.ShouldAlert(toCommonSeries(c.values), rule.Unit())
resultVectors, err := rule.Threshold.Eval(toCommonSeries(c.values), rule.Unit(), ruletypes.EvalData{})
assert.NoError(t, err)
// Compare full result vector with expected vector

View File

@@ -24,6 +24,8 @@ type Rule interface {
HoldDuration() time.Duration
State() model.AlertState
ActiveAlerts() []*ruletypes.Alert
// ActiveAlertsLabelFP returns a map of active alert labels fingerprint
ActiveAlertsLabelFP() map[uint64]struct{}
PreferredChannels() []string

View File

@@ -488,7 +488,9 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID,
continue
}
}
resultSeries, err := r.Threshold.ShouldAlert(*series, r.Unit())
resultSeries, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
ActiveAlerts: r.ActiveAlertsLabelFP(),
})
if err != nil {
return nil, err
}
@@ -565,7 +567,9 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI
continue
}
}
resultSeries, err := r.Threshold.ShouldAlert(*series, r.Unit())
resultSeries, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
ActiveAlerts: r.ActiveAlertsLabelFP(),
})
if err != nil {
return nil, err
}
@@ -666,13 +670,14 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
// Links with timestamps should go in annotations since labels
// is used alert grouping, and we want to group alerts with the same
// label set, but different timestamps, together.
if r.typ == ruletypes.AlertTypeTraces {
switch r.typ {
case ruletypes.AlertTypeTraces:
link := r.prepareLinksToTraces(ctx, ts, smpl.Metric)
if link != "" && r.hostFromSource() != "" {
r.logger.InfoContext(ctx, "adding traces link to annotations", "link", fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link))
annotations = append(annotations, labels.Label{Name: "related_traces", Value: fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link)})
}
} else if r.typ == ruletypes.AlertTypeLogs {
case ruletypes.AlertTypeLogs:
link := r.prepareLinksToLogs(ctx, ts, smpl.Metric)
if link != "" && r.hostFromSource() != "" {
r.logger.InfoContext(ctx, "adding logs link to annotations", "link", fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link))
@@ -698,6 +703,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
GeneratorURL: r.GeneratorURL(),
Receivers: ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
Missing: smpl.IsMissing,
IsRecovering: smpl.IsRecovering,
}
}
@@ -711,6 +717,9 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
alert.Value = a.Value
alert.Annotations = a.Annotations
// Update the recovering and missing state of existing alert
alert.IsRecovering = a.IsRecovering
alert.Missing = a.Missing
if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
alert.Receivers = ruleReceiverMap[v]
}
@@ -735,6 +744,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
delete(r.Active, fp)
}
if a.State != model.StateInactive {
r.logger.DebugContext(ctx, "converting firing alert to inActive", "name", r.Name())
a.State = model.StateInactive
a.ResolvedAt = ts
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
@@ -752,6 +762,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
}
if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration {
r.logger.DebugContext(ctx, "converting pending alert to firing", "name", r.Name())
a.State = model.StateFiring
a.FiredAt = ts
state := model.StateFiring
@@ -769,6 +780,30 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
Value: a.Value,
})
}
// We need to change firing alert to recovering if the returned sample meets recovery threshold
changeAlertingToRecovering := a.State == model.StateFiring && a.IsRecovering
// We need to change recovering alerts to firing if the returned sample meets target threshold
changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
// in any of the above case we need to update the status of alert
if changeAlertingToRecovering || changeRecoveringToFiring {
state := model.StateRecovering
if changeRecoveringToFiring {
state = model.StateFiring
}
a.State = state
r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
RuleID: r.ID(),
RuleName: r.Name(),
State: state,
StateChanged: true,
UnixMilli: ts.UnixMilli(),
Labels: model.LabelsString(labelsJSON),
Fingerprint: a.QueryResultLables.Hash(),
Value: a.Value,
})
}
}
currentState := r.State()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -59,7 +59,8 @@ type Alert struct {
LastSentAt time.Time
ValidUntil time.Time
Missing bool
Missing bool
IsRecovering bool
}
func (a *Alert) NeedsSending(ts time.Time, resendDelay time.Duration) bool {

View File

@@ -621,10 +621,10 @@ func TestParseIntoRuleThresholdGeneration(t *testing.T) {
}
// Test that threshold can evaluate properly
vector, err := threshold.ShouldAlert(v3.Series{
vector, err := threshold.Eval(v3.Series{
Points: []v3.Point{{Value: 0.15, Timestamp: 1000}}, // 150ms in seconds
Labels: map[string]string{"test": "label"},
}, "")
}, "", EvalData{})
if err != nil {
t.Fatalf("Unexpected error in shouldAlert: %v", err)
}
@@ -698,20 +698,20 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
}
// Test with a value that should trigger both WARNING and CRITICAL thresholds
vector, err := threshold.ShouldAlert(v3.Series{
vector, err := threshold.Eval(v3.Series{
Points: []v3.Point{{Value: 95.0, Timestamp: 1000}}, // 95% CPU usage
Labels: map[string]string{"service": "test"},
}, "")
}, "", EvalData{})
if err != nil {
t.Fatalf("Unexpected error in shouldAlert: %v", err)
}
assert.Equal(t, 2, len(vector))
vector, err = threshold.ShouldAlert(v3.Series{
vector, err = threshold.Eval(v3.Series{
Points: []v3.Point{{Value: 75.0, Timestamp: 1000}}, // 75% CPU usage
Labels: map[string]string{"service": "test"},
}, "")
}, "", EvalData{})
if err != nil {
t.Fatalf("Unexpected error in shouldAlert: %v", err)
}
@@ -719,7 +719,7 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
assert.Equal(t, 1, len(vector))
}
func TestAnomalyNegationShouldAlert(t *testing.T) {
func TestAnomalyNegationEval(t *testing.T) {
tests := []struct {
name string
ruleJSON []byte
@@ -1046,9 +1046,9 @@ func TestAnomalyNegationShouldAlert(t *testing.T) {
t.Fatalf("unexpected error from GetRuleThreshold: %v", err)
}
resultVector, err := ruleThreshold.ShouldAlert(tt.series, "")
resultVector, err := ruleThreshold.Eval(tt.series, "", EvalData{})
if err != nil {
t.Fatalf("unexpected error from ShouldAlert: %v", err)
t.Fatalf("unexpected error from Eval: %v", err)
}
shouldAlert := len(resultVector) > 0

View File

@@ -19,7 +19,11 @@ type Sample struct {
IsMissing bool
Target float64
// IsRecovering is true if the sample is part of a recovering alert.
IsRecovering bool
Target float64
RecoveryTarget *float64
TargetUnit string
}

View File

@@ -57,8 +57,28 @@ type RuleReceivers struct {
Name string `json:"name"`
}
// EvalData are other dependent values used to evaluate the threshold rules.
type EvalData struct {
// ActiveAlerts is a map of active alert fingerprints
// used to check if a sample is part of an active alert
// when evaluating the recovery threshold.
ActiveAlerts map[uint64]struct{}
}
// HasActiveAlert checks if the given sample figerprint is active
// as an alert.
func (eval EvalData) HasActiveAlert(sampleLabelFp uint64) bool {
if len(eval.ActiveAlerts) == 0 {
return false
}
_, ok := eval.ActiveAlerts[sampleLabelFp]
return ok
}
type RuleThreshold interface {
ShouldAlert(series v3.Series, unit string) (Vector, error)
// Eval runs the given series through the threshold rules
// using the given EvalData and returns the matching series
Eval(series v3.Series, unit string, evalData EvalData) (Vector, error)
GetRuleReceivers() []RuleReceivers
}
@@ -97,7 +117,7 @@ func (r BasicRuleThresholds) Validate() error {
return errors.Join(errs...)
}
func (r BasicRuleThresholds) ShouldAlert(series v3.Series, unit string) (Vector, error) {
func (r BasicRuleThresholds) Eval(series v3.Series, unit string, evalData EvalData) (Vector, error) {
var resultVector Vector
thresholds := []BasicRuleThreshold(r)
sortThresholds(thresholds)
@@ -105,8 +125,31 @@ func (r BasicRuleThresholds) ShouldAlert(series v3.Series, unit string) (Vector,
smpl, shouldAlert := threshold.shouldAlert(series, unit)
if shouldAlert {
smpl.Target = *threshold.TargetValue
if threshold.RecoveryTarget != nil {
smpl.RecoveryTarget = threshold.RecoveryTarget
}
smpl.TargetUnit = threshold.TargetUnit
resultVector = append(resultVector, smpl)
continue
}
// Prepare alert hash from series labels and threshold name if recovery target option was provided
if threshold.RecoveryTarget == nil {
continue
}
sampleLabels := PrepareSampleLabelsForRule(series.Labels, threshold.Name)
alertHash := sampleLabels.Hash()
// check if alert is active and then check if recovery threshold matches
if evalData.HasActiveAlert(alertHash) {
smpl, matchesRecoveryThrehold := threshold.matchesRecoveryThreshold(series, unit)
if matchesRecoveryThrehold {
smpl.Target = *threshold.TargetValue
smpl.RecoveryTarget = threshold.RecoveryTarget
smpl.TargetUnit = threshold.TargetUnit
// IsRecovering to notify that metrics is in recovery stage
smpl.IsRecovering = true
resultVector = append(resultVector, smpl)
}
}
}
return resultVector, nil
@@ -133,16 +176,27 @@ func sortThresholds(thresholds []BasicRuleThreshold) {
})
}
func (b BasicRuleThreshold) target(ruleUnit string) float64 {
// convertToRuleUnit converts the given value from the target unit to the rule unit
func (b BasicRuleThreshold) convertToRuleUnit(val float64, ruleUnit string) float64 {
unitConverter := converter.FromUnit(converter.Unit(b.TargetUnit))
// convert the target value to the y-axis unit
value := unitConverter.Convert(converter.Value{
F: *b.TargetValue,
F: val,
U: converter.Unit(b.TargetUnit),
}, converter.Unit(ruleUnit))
return value.F
}
// target returns the target value in the rule unit
func (b BasicRuleThreshold) target(ruleUnit string) float64 {
return b.convertToRuleUnit(*b.TargetValue, ruleUnit)
}
// recoveryTarget returns the recovery target value in the rule unit
func (b BasicRuleThreshold) recoveryTarget(ruleUnit string) float64 {
return b.convertToRuleUnit(*b.RecoveryTarget, ruleUnit)
}
func (b BasicRuleThreshold) getCompareOp() CompareOp {
return b.CompareOp
}
@@ -178,6 +232,13 @@ func (b BasicRuleThreshold) Validate() error {
return errors.Join(errs...)
}
func (b BasicRuleThreshold) matchesRecoveryThreshold(series v3.Series, ruleUnit string) (Sample, bool) {
return b.shouldAlertWithTarget(series, b.recoveryTarget(ruleUnit))
}
func (b BasicRuleThreshold) shouldAlert(series v3.Series, ruleUnit string) (Sample, bool) {
return b.shouldAlertWithTarget(series, b.target(ruleUnit))
}
func removeGroupinSetPoints(series v3.Series) []v3.Point {
var result []v3.Point
for _, s := range series.Points {
@@ -188,21 +249,22 @@ func removeGroupinSetPoints(series v3.Series) []v3.Point {
return result
}
func (b BasicRuleThreshold) shouldAlert(series v3.Series, ruleUnit string) (Sample, bool) {
// PrepareSampleLabelsForRule prepares the labels for the sample to be used in the alerting.
// It accepts seriesLabels and thresholdName as input and returns the labels with the threshold name label added.
func PrepareSampleLabelsForRule(seriesLabels map[string]string, thresholdName string) (lbls labels.Labels) {
lb := labels.NewBuilder(labels.Labels{})
for name, value := range seriesLabels {
lb.Set(name, value)
}
lb.Set(LabelThresholdName, thresholdName)
lb.Set(LabelSeverityName, strings.ToLower(thresholdName))
return lb.Labels()
}
func (b BasicRuleThreshold) shouldAlertWithTarget(series v3.Series, target float64) (Sample, bool) {
var shouldAlert bool
var alertSmpl Sample
var lbls labels.Labels
for name, value := range series.Labels {
lbls = append(lbls, labels.Label{Name: name, Value: value})
}
target := b.target(ruleUnit)
// TODO(srikanthccv): is it better to move the logic to notifier instead of
// adding two labels?
lbls = append(lbls, labels.Label{Name: LabelThresholdName, Value: b.Name})
lbls = append(lbls, labels.Label{Name: LabelSeverityName, Value: strings.ToLower(b.Name)})
lbls := PrepareSampleLabelsForRule(series.Labels, b.Name)
series.Points = removeGroupinSetPoints(series)

View File

@@ -9,7 +9,7 @@ import (
v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
)
func TestBasicRuleThresholdShouldAlert_UnitConversion(t *testing.T) {
func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
target := 100.0
tests := []struct {
@@ -270,7 +270,7 @@ func TestBasicRuleThresholdShouldAlert_UnitConversion(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
thresholds := BasicRuleThresholds{tt.threshold}
vector, err := thresholds.ShouldAlert(tt.series, tt.ruleUnit)
vector, err := thresholds.Eval(tt.series, tt.ruleUnit, EvalData{})
assert.NoError(t, err)
alert := len(vector) > 0
@@ -301,3 +301,31 @@ func TestBasicRuleThresholdShouldAlert_UnitConversion(t *testing.T) {
})
}
}
func TestPrepareSampleLabelsForRule(t *testing.T) {
alertAllHashes := make(map[uint64]struct{})
thresholdName := "test"
for range 50_000 {
sampleLabels := map[string]string{
"service": "test",
"env": "prod",
"tier": "backend",
"namespace": "default",
"pod": "test-pod",
"container": "test-container",
"node": "test-node",
"cluster": "test-cluster",
"region": "test-region",
"az": "test-az",
"hostname": "test-hostname",
"ip": "192.168.1.1",
"port": "8080",
}
lbls := PrepareSampleLabelsForRule(sampleLabels, thresholdName)
assert.True(t, lbls.Has(LabelThresholdName), "LabelThresholdName not found in labels")
alertAllHashes[lbls.Hash()] = struct{}{}
}
t.Logf("Total hashes: %d", len(alertAllHashes))
// there should be only one hash for all the samples
assert.Equal(t, 1, len(alertAllHashes), "Expected only one hash for all the samples")
}