feat: add support for recovery threshold (#9428)

2025-12-16 18:44:36 -06:00 · 2025-11-22 01:30:37 +05:30
parent 79988b448f
commit 52228bc6c4
17 changed files with 3421 additions and 793 deletions
--- a/2
+++ b/2
@@ -86,7 +86,7 @@ go-run-enterprise: ## Runs the enterprise go backend server
 	SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_DSN=tcp://127.0.0.1:9000 \
 	SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_CLUSTER=cluster \
 	go run -race \
-		$(GO_BUILD_CONTEXT_ENTERPRISE)/*.go
+		$(GO_BUILD_CONTEXT_ENTERPRISE)/*.go server

 .PHONY: go-test
 go-test: ## Runs go unit tests
--- a/ee/query-service/rules/anomaly.go
+++ b/ee/query-service/rules/anomaly.go
@@ -246,7 +246,9 @@ func (r *AnomalyRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID, t
 				continue
 			}
 		}
-		results, err := r.Threshold.ShouldAlert(*series, r.Unit())
+		results, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
+			ActiveAlerts: r.ActiveAlertsLabelFP(),
+		})
 		if err != nil {
 			return nil, err
 		}
@@ -296,7 +298,9 @@ func (r *AnomalyRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUID,
 				continue
 			}
 		}
-		results, err := r.Threshold.ShouldAlert(*series, r.Unit())
+		results, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
+			ActiveAlerts: r.ActiveAlertsLabelFP(),
+		})
 		if err != nil {
 			return nil, err
 		}
@@ -410,6 +414,7 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
 			GeneratorURL:      r.GeneratorURL(),
 			Receivers:         ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
 			Missing:           smpl.IsMissing,
+			IsRecovering:      smpl.IsRecovering,
 		}
 	}

@@ -422,6 +427,9 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro

 			alert.Value = a.Value
 			alert.Annotations = a.Annotations
+			// Update the recovering and missing state of existing alert
+			alert.IsRecovering = a.IsRecovering
+			alert.Missing = a.Missing
 			if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
 				alert.Receivers = ruleReceiverMap[v]
 			}
@@ -480,6 +488,30 @@ func (r *AnomalyRule) Eval(ctx context.Context, ts time.Time) (interface{}, erro
 				Value:        a.Value,
 			})
 		}
+
+		// We need to change firing alert to recovering if the returned sample meets recovery threshold
+		changeFiringToRecovering := a.State == model.StateFiring && a.IsRecovering
+		// We need to change recovering alerts to firing if the returned sample meets target threshold
+		changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
+		// in any of the above case we need to update the status of alert
+		if changeFiringToRecovering || changeRecoveringToFiring {
+			state := model.StateRecovering
+			if changeRecoveringToFiring {
+				state = model.StateFiring
+			}
+			a.State = state
+			r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
+			itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
+				RuleID:       r.ID(),
+				RuleName:     r.Name(),
+				State:        state,
+				StateChanged: true,
+				UnixMilli:    ts.UnixMilli(),
+				Labels:       model.LabelsString(labelsJSON),
+				Fingerprint:  a.QueryResultLables.Hash(),
+				Value:        a.Value,
+			})
+		}
 	}

 	currentState := r.State()
--- a/pkg/query-service/model/alerting.go
+++ b/pkg/query-service/model/alerting.go
@@ -12,9 +12,12 @@ import (
 // AlertState denotes the state of an active alert.
 type AlertState int

+// The enum values are ordered by priority (lowest to highest).
+// When determining overall rule state, higher numeric values take precedence.
 const (
 	StateInactive AlertState = iota
 	StatePending
+	StateRecovering
 	StateFiring
 	StateNoData
 	StateDisabled
@@ -32,6 +35,8 @@ func (s AlertState) String() string {
 		return "nodata"
 	case StateDisabled:
 		return "disabled"
+	case StateRecovering:
+		return "recovering"
 	}
 	panic(errors.Errorf("unknown alert state: %d", s))
 }
@@ -58,6 +63,8 @@ func (s *AlertState) UnmarshalJSON(b []byte) error {
 			*s = StateNoData
 		case "disabled":
 			*s = StateDisabled
+		case "recovering":
+			*s = StateRecovering
 		default:
 			*s = StateInactive
 		}
@@ -83,6 +90,8 @@ func (s *AlertState) Scan(value interface{}) error {
 		*s = StateNoData
 	case "disabled":
 		*s = StateDisabled
+	case "recovering":
+		*s = StateRecovering
 	}
 	return nil
 }
--- a/pkg/query-service/rules/base_rule.go
+++ b/pkg/query-service/rules/base_rule.go
@@ -191,6 +191,26 @@ func (r *BaseRule) currentAlerts() []*ruletypes.Alert {
 	return alerts
 }

+// ActiveAlertsLabelFP returns a map of active alert labels fingerprint and
+// the fingerprint is computed using the QueryResultLables.Hash() method.
+// We use the QueryResultLables instead of labels as these labels are raw labels
+// that we get from the sample.
+// This is useful in cases where we want to check if an alert is still active
+// based on the labels we have.
+func (r *BaseRule) ActiveAlertsLabelFP() map[uint64]struct{} {
+	r.mtx.Lock()
+	defer r.mtx.Unlock()
+
+	activeAlerts := make(map[uint64]struct{}, len(r.Active))
+	for _, alert := range r.Active {
+		if alert == nil || alert.QueryResultLables == nil {
+			continue
+		}
+		activeAlerts[alert.QueryResultLables.Hash()] = struct{}{}
+	}
+	return activeAlerts
+}
+
 func (r *BaseRule) EvalDelay() time.Duration {
 	return r.evalDelay
 }
--- a/pkg/query-service/rules/base_rule_test.go
+++ b/pkg/query-service/rules/base_rule_test.go
@@ -1,9 +1,10 @@
 package rules

 import (
-	"github.com/stretchr/testify/require"
 	"testing"

+	"github.com/stretchr/testify/require"
+
 	v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
 	ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
 )
@@ -74,7 +75,7 @@ func TestBaseRule_RequireMinPoints(t *testing.T) {

 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
-			_, err := test.rule.Threshold.ShouldAlert(*test.series, "")
+			_, err := test.rule.Threshold.Eval(*test.series, "", ruletypes.EvalData{})
 			require.NoError(t, err)
 			require.Equal(t, len(test.series.Points) >= test.rule.ruleCondition.RequiredNumPoints, test.shouldAlert)
 		})
--- a/pkg/query-service/rules/manager.go
+++ b/pkg/query-service/rules/manager.go
@@ -4,13 +4,14 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
-	"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
 	"log/slog"
 	"sort"
 	"strings"
 	"sync"
 	"time"

+	"github.com/SigNoz/signoz/pkg/query-service/utils/labels"
+
 	"go.uber.org/zap"

 	"github.com/go-openapi/strfmt"
--- a/pkg/query-service/rules/prom_rule.go
+++ b/pkg/query-service/rules/prom_rule.go
@@ -159,7 +159,9 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
 			continue
 		}

-		results, err := r.Threshold.ShouldAlert(toCommonSeries(series), r.Unit())
+		results, err := r.Threshold.Eval(toCommonSeries(series), r.Unit(), ruletypes.EvalData{
+			ActiveAlerts: r.ActiveAlertsLabelFP(),
+		})
 		if err != nil {
 			return nil, err
 		}
@@ -233,6 +235,7 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
 				Value:             result.V,
 				GeneratorURL:      r.GeneratorURL(),
 				Receivers:         ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
+				IsRecovering:      result.IsRecovering,
 			}
 		}
 	}
@@ -245,6 +248,9 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
 		if alert, ok := r.Active[h]; ok && alert.State != model.StateInactive {
 			alert.Value = a.Value
 			alert.Annotations = a.Annotations
+			// Update the recovering and missing state of existing alert
+			alert.IsRecovering = a.IsRecovering
+			alert.Missing = a.Missing
 			if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
 				alert.Receivers = ruleReceiverMap[v]
 			}
@@ -304,6 +310,29 @@ func (r *PromRule) Eval(ctx context.Context, ts time.Time) (interface{}, error)
 			})
 		}

+		// We need to change firing alert to recovering if the returned sample meets recovery threshold
+		changeAlertingToRecovering := a.State == model.StateFiring && a.IsRecovering
+		// We need to change recovering alerts to firing if the returned sample meets target threshold
+		changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
+		// in any of the above case we need to update the status of alert
+		if changeAlertingToRecovering || changeRecoveringToFiring {
+			state := model.StateRecovering
+			if changeRecoveringToFiring {
+				state = model.StateFiring
+			}
+			a.State = state
+			r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
+			itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
+				RuleID:       r.ID(),
+				RuleName:     r.Name(),
+				State:        state,
+				StateChanged: true,
+				UnixMilli:    ts.UnixMilli(),
+				Labels:       model.LabelsString(labelsJSON),
+				Fingerprint:  a.QueryResultLables.Hash(),
+				Value:        a.Value,
+			})
+		}
 	}
 	r.health = ruletypes.HealthGood
 	r.lastError = err
--- a/pkg/query-service/rules/promrule_test.go
+++ b/pkg/query-service/rules/promrule_test.go
@@ -23,7 +23,7 @@ func getVectorValues(vectors []ruletypes.Sample) []float64 {
 	return values
 }

-func TestPromRuleShouldAlert(t *testing.T) {
+func TestPromRuleEval(t *testing.T) {
 	postableRule := ruletypes.PostableRule{
 		AlertName: "Test Rule",
 		AlertType: ruletypes.AlertTypeMetric,
@@ -696,7 +696,7 @@ func TestPromRuleShouldAlert(t *testing.T) {
 			assert.NoError(t, err)
 		}

-		resultVectors, err := rule.Threshold.ShouldAlert(toCommonSeries(c.values), rule.Unit())
+		resultVectors, err := rule.Threshold.Eval(toCommonSeries(c.values), rule.Unit(), ruletypes.EvalData{})
 		assert.NoError(t, err)

 		// Compare full result vector with expected vector
--- a/pkg/query-service/rules/rule.go
+++ b/pkg/query-service/rules/rule.go
@@ -24,6 +24,8 @@ type Rule interface {
 	HoldDuration() time.Duration
 	State() model.AlertState
 	ActiveAlerts() []*ruletypes.Alert
+	// ActiveAlertsLabelFP returns a map of active alert labels fingerprint
+	ActiveAlertsLabelFP() map[uint64]struct{}

 	PreferredChannels() []string

--- a/pkg/query-service/rules/threshold_rule.go
+++ b/pkg/query-service/rules/threshold_rule.go
@@ -488,7 +488,9 @@ func (r *ThresholdRule) buildAndRunQuery(ctx context.Context, orgID valuer.UUID,
 				continue
 			}
 		}
-		resultSeries, err := r.Threshold.ShouldAlert(*series, r.Unit())
+		resultSeries, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
+			ActiveAlerts: r.ActiveAlertsLabelFP(),
+		})
 		if err != nil {
 			return nil, err
 		}
@@ -565,7 +567,9 @@ func (r *ThresholdRule) buildAndRunQueryV5(ctx context.Context, orgID valuer.UUI
 				continue
 			}
 		}
-		resultSeries, err := r.Threshold.ShouldAlert(*series, r.Unit())
+		resultSeries, err := r.Threshold.Eval(*series, r.Unit(), ruletypes.EvalData{
+			ActiveAlerts: r.ActiveAlertsLabelFP(),
+		})
 		if err != nil {
 			return nil, err
 		}
@@ -666,13 +670,14 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
 		// Links with timestamps should go in annotations since labels
 		// is used alert grouping, and we want to group alerts with the same
 		// label set, but different timestamps, together.
-		if r.typ == ruletypes.AlertTypeTraces {
+		switch r.typ {
+		case ruletypes.AlertTypeTraces:
 			link := r.prepareLinksToTraces(ctx, ts, smpl.Metric)
 			if link != "" && r.hostFromSource() != "" {
 				r.logger.InfoContext(ctx, "adding traces link to annotations", "link", fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link))
 				annotations = append(annotations, labels.Label{Name: "related_traces", Value: fmt.Sprintf("%s/traces-explorer?%s", r.hostFromSource(), link)})
 			}
-		} else if r.typ == ruletypes.AlertTypeLogs {
+		case ruletypes.AlertTypeLogs:
 			link := r.prepareLinksToLogs(ctx, ts, smpl.Metric)
 			if link != "" && r.hostFromSource() != "" {
 				r.logger.InfoContext(ctx, "adding logs link to annotations", "link", fmt.Sprintf("%s/logs/logs-explorer?%s", r.hostFromSource(), link))
@@ -698,6 +703,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
 			GeneratorURL:      r.GeneratorURL(),
 			Receivers:         ruleReceiverMap[lbs.Map()[ruletypes.LabelThresholdName]],
 			Missing:           smpl.IsMissing,
+			IsRecovering:      smpl.IsRecovering,
 		}
 	}

@@ -711,6 +717,9 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er

 			alert.Value = a.Value
 			alert.Annotations = a.Annotations
+			// Update the recovering and missing state of existing alert
+			alert.IsRecovering = a.IsRecovering
+			alert.Missing = a.Missing
 			if v, ok := alert.Labels.Map()[ruletypes.LabelThresholdName]; ok {
 				alert.Receivers = ruleReceiverMap[v]
 			}
@@ -735,6 +744,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
 				delete(r.Active, fp)
 			}
 			if a.State != model.StateInactive {
+				r.logger.DebugContext(ctx, "converting firing alert to inActive", "name", r.Name())
 				a.State = model.StateInactive
 				a.ResolvedAt = ts
 				itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
@@ -752,6 +762,7 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
 		}

 		if a.State == model.StatePending && ts.Sub(a.ActiveAt) >= r.holdDuration {
+			r.logger.DebugContext(ctx, "converting pending alert to firing", "name", r.Name())
 			a.State = model.StateFiring
 			a.FiredAt = ts
 			state := model.StateFiring
@@ -769,6 +780,30 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (interface{}, er
 				Value:        a.Value,
 			})
 		}
+
+		// We need to change firing alert to recovering if the returned sample meets recovery threshold
+		changeAlertingToRecovering := a.State == model.StateFiring && a.IsRecovering
+		// We need to change recovering alerts to firing if the returned sample meets target threshold
+		changeRecoveringToFiring := a.State == model.StateRecovering && !a.IsRecovering && !a.Missing
+		// in any of the above case we need to update the status of alert
+		if changeAlertingToRecovering || changeRecoveringToFiring {
+			state := model.StateRecovering
+			if changeRecoveringToFiring {
+				state = model.StateFiring
+			}
+			a.State = state
+			r.logger.DebugContext(ctx, "converting alert state", "name", r.Name(), "state", state)
+			itemsToAdd = append(itemsToAdd, model.RuleStateHistory{
+				RuleID:       r.ID(),
+				RuleName:     r.Name(),
+				State:        state,
+				StateChanged: true,
+				UnixMilli:    ts.UnixMilli(),
+				Labels:       model.LabelsString(labelsJSON),
+				Fingerprint:  a.QueryResultLables.Hash(),
+				Value:        a.Value,
+			})
+		}
 	}

 	currentState := r.State()
--- a/pkg/query-service/rules/threshold_rule_test.go
+++ b/pkg/query-service/rules/threshold_rule_test.go
--- a/pkg/query-service/rules/threshold_rule_test_data.go
+++ b/pkg/query-service/rules/threshold_rule_test_data.go
--- a/pkg/types/ruletypes/alerting.go
+++ b/pkg/types/ruletypes/alerting.go
@@ -59,7 +59,8 @@ type Alert struct {
 	LastSentAt time.Time
 	ValidUntil time.Time

-	Missing bool
+	Missing      bool
+	IsRecovering bool
 }

 func (a *Alert) NeedsSending(ts time.Time, resendDelay time.Duration) bool {
--- a/pkg/types/ruletypes/api_params_test.go
+++ b/pkg/types/ruletypes/api_params_test.go
@@ -621,10 +621,10 @@ func TestParseIntoRuleThresholdGeneration(t *testing.T) {
 	}

 	// Test that threshold can evaluate properly
-	vector, err := threshold.ShouldAlert(v3.Series{
+	vector, err := threshold.Eval(v3.Series{
 		Points: []v3.Point{{Value: 0.15, Timestamp: 1000}}, // 150ms in seconds
 		Labels: map[string]string{"test": "label"},
-	}, "")
+	}, "", EvalData{})
 	if err != nil {
 		t.Fatalf("Unexpected error in shouldAlert: %v", err)
 	}
@@ -698,20 +698,20 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
 	}

 	// Test with a value that should trigger both WARNING and CRITICAL thresholds
-	vector, err := threshold.ShouldAlert(v3.Series{
+	vector, err := threshold.Eval(v3.Series{
 		Points: []v3.Point{{Value: 95.0, Timestamp: 1000}}, // 95% CPU usage
 		Labels: map[string]string{"service": "test"},
-	}, "")
+	}, "", EvalData{})
 	if err != nil {
 		t.Fatalf("Unexpected error in shouldAlert: %v", err)
 	}

 	assert.Equal(t, 2, len(vector))

-	vector, err = threshold.ShouldAlert(v3.Series{
+	vector, err = threshold.Eval(v3.Series{
 		Points: []v3.Point{{Value: 75.0, Timestamp: 1000}}, // 75% CPU usage
 		Labels: map[string]string{"service": "test"},
-	}, "")
+	}, "", EvalData{})
 	if err != nil {
 		t.Fatalf("Unexpected error in shouldAlert: %v", err)
 	}
@@ -719,7 +719,7 @@ func TestParseIntoRuleMultipleThresholds(t *testing.T) {
 	assert.Equal(t, 1, len(vector))
 }

-func TestAnomalyNegationShouldAlert(t *testing.T) {
+func TestAnomalyNegationEval(t *testing.T) {
 	tests := []struct {
 		name          string
 		ruleJSON      []byte
@@ -1046,9 +1046,9 @@ func TestAnomalyNegationShouldAlert(t *testing.T) {
 				t.Fatalf("unexpected error from GetRuleThreshold: %v", err)
 			}

-			resultVector, err := ruleThreshold.ShouldAlert(tt.series, "")
+			resultVector, err := ruleThreshold.Eval(tt.series, "", EvalData{})
 			if err != nil {
-				t.Fatalf("unexpected error from ShouldAlert: %v", err)
+				t.Fatalf("unexpected error from Eval: %v", err)
 			}

 			shouldAlert := len(resultVector) > 0
--- a/pkg/types/ruletypes/result_types.go
+++ b/pkg/types/ruletypes/result_types.go
@@ -19,7 +19,11 @@ type Sample struct {

 	IsMissing bool

-	Target float64
+	// IsRecovering is true if the sample is part of a recovering alert.
+	IsRecovering bool
+
+	Target         float64
+	RecoveryTarget *float64

 	TargetUnit string
 }
--- a/pkg/types/ruletypes/threshold.go
+++ b/pkg/types/ruletypes/threshold.go
@@ -57,8 +57,28 @@ type RuleReceivers struct {
 	Name     string   `json:"name"`
 }

+// EvalData are other dependent values used to evaluate the threshold rules.
+type EvalData struct {
+	// ActiveAlerts is a map of active alert fingerprints
+	// used to check if a sample is part of an active alert
+	// when evaluating the recovery threshold.
+	ActiveAlerts map[uint64]struct{}
+}
+
+// HasActiveAlert checks if the given sample figerprint is active
+// as an alert.
+func (eval EvalData) HasActiveAlert(sampleLabelFp uint64) bool {
+	if len(eval.ActiveAlerts) == 0 {
+		return false
+	}
+	_, ok := eval.ActiveAlerts[sampleLabelFp]
+	return ok
+}
+
 type RuleThreshold interface {
-	ShouldAlert(series v3.Series, unit string) (Vector, error)
+	// Eval runs the given series through the threshold rules
+	// using the given EvalData and returns the matching series
+	Eval(series v3.Series, unit string, evalData EvalData) (Vector, error)
 	GetRuleReceivers() []RuleReceivers
 }

@@ -97,7 +117,7 @@ func (r BasicRuleThresholds) Validate() error {
 	return errors.Join(errs...)
 }

-func (r BasicRuleThresholds) ShouldAlert(series v3.Series, unit string) (Vector, error) {
+func (r BasicRuleThresholds) Eval(series v3.Series, unit string, evalData EvalData) (Vector, error) {
 	var resultVector Vector
 	thresholds := []BasicRuleThreshold(r)
 	sortThresholds(thresholds)
@@ -105,8 +125,31 @@ func (r BasicRuleThresholds) ShouldAlert(series v3.Series, unit string) (Vector,
 		smpl, shouldAlert := threshold.shouldAlert(series, unit)
 		if shouldAlert {
 			smpl.Target = *threshold.TargetValue
+			if threshold.RecoveryTarget != nil {
+				smpl.RecoveryTarget = threshold.RecoveryTarget
+			}
 			smpl.TargetUnit = threshold.TargetUnit
 			resultVector = append(resultVector, smpl)
+			continue
+		}
+
+		// Prepare alert hash from series labels and threshold name if recovery target option was provided
+		if threshold.RecoveryTarget == nil {
+			continue
+		}
+		sampleLabels := PrepareSampleLabelsForRule(series.Labels, threshold.Name)
+		alertHash := sampleLabels.Hash()
+		// check if alert is active and then check if recovery threshold matches
+		if evalData.HasActiveAlert(alertHash) {
+			smpl, matchesRecoveryThrehold := threshold.matchesRecoveryThreshold(series, unit)
+			if matchesRecoveryThrehold {
+				smpl.Target = *threshold.TargetValue
+				smpl.RecoveryTarget = threshold.RecoveryTarget
+				smpl.TargetUnit = threshold.TargetUnit
+				// IsRecovering to notify that metrics is in recovery stage
+				smpl.IsRecovering = true
+				resultVector = append(resultVector, smpl)
+			}
 		}
 	}
 	return resultVector, nil
@@ -133,16 +176,27 @@ func sortThresholds(thresholds []BasicRuleThreshold) {
 	})
 }

-func (b BasicRuleThreshold) target(ruleUnit string) float64 {
+// convertToRuleUnit converts the given value from the target unit to the rule unit
+func (b BasicRuleThreshold) convertToRuleUnit(val float64, ruleUnit string) float64 {
 	unitConverter := converter.FromUnit(converter.Unit(b.TargetUnit))
 	// convert the target value to the y-axis unit
 	value := unitConverter.Convert(converter.Value{
-		F: *b.TargetValue,
+		F: val,
 		U: converter.Unit(b.TargetUnit),
 	}, converter.Unit(ruleUnit))
 	return value.F
 }

+// target returns the target value in the rule unit
+func (b BasicRuleThreshold) target(ruleUnit string) float64 {
+	return b.convertToRuleUnit(*b.TargetValue, ruleUnit)
+}
+
+// recoveryTarget returns the recovery target value in the rule unit
+func (b BasicRuleThreshold) recoveryTarget(ruleUnit string) float64 {
+	return b.convertToRuleUnit(*b.RecoveryTarget, ruleUnit)
+}
+
 func (b BasicRuleThreshold) getCompareOp() CompareOp {
 	return b.CompareOp
 }
@@ -178,6 +232,13 @@ func (b BasicRuleThreshold) Validate() error {
 	return errors.Join(errs...)
 }

+func (b BasicRuleThreshold) matchesRecoveryThreshold(series v3.Series, ruleUnit string) (Sample, bool) {
+	return b.shouldAlertWithTarget(series, b.recoveryTarget(ruleUnit))
+}
+func (b BasicRuleThreshold) shouldAlert(series v3.Series, ruleUnit string) (Sample, bool) {
+	return b.shouldAlertWithTarget(series, b.target(ruleUnit))
+}
+
 func removeGroupinSetPoints(series v3.Series) []v3.Point {
 	var result []v3.Point
 	for _, s := range series.Points {
@@ -188,21 +249,22 @@ func removeGroupinSetPoints(series v3.Series) []v3.Point {
 	return result
 }

-func (b BasicRuleThreshold) shouldAlert(series v3.Series, ruleUnit string) (Sample, bool) {
+// PrepareSampleLabelsForRule prepares the labels for the sample to be used in the alerting.
+// It accepts seriesLabels and thresholdName as input and returns the labels with the threshold name label added.
+func PrepareSampleLabelsForRule(seriesLabels map[string]string, thresholdName string) (lbls labels.Labels) {
+	lb := labels.NewBuilder(labels.Labels{})
+	for name, value := range seriesLabels {
+		lb.Set(name, value)
+	}
+	lb.Set(LabelThresholdName, thresholdName)
+	lb.Set(LabelSeverityName, strings.ToLower(thresholdName))
+	return lb.Labels()
+}
+
+func (b BasicRuleThreshold) shouldAlertWithTarget(series v3.Series, target float64) (Sample, bool) {
 	var shouldAlert bool
 	var alertSmpl Sample
-	var lbls labels.Labels
-
-	for name, value := range series.Labels {
-		lbls = append(lbls, labels.Label{Name: name, Value: value})
-	}
-
-	target := b.target(ruleUnit)
-
-	// TODO(srikanthccv): is it better to move the logic to notifier instead of
-	// adding two labels?
-	lbls = append(lbls, labels.Label{Name: LabelThresholdName, Value: b.Name})
-	lbls = append(lbls, labels.Label{Name: LabelSeverityName, Value: strings.ToLower(b.Name)})
+	lbls := PrepareSampleLabelsForRule(series.Labels, b.Name)

 	series.Points = removeGroupinSetPoints(series)

--- a/pkg/types/ruletypes/threshold_test.go
+++ b/pkg/types/ruletypes/threshold_test.go
@@ -9,7 +9,7 @@ import (
 	v3 "github.com/SigNoz/signoz/pkg/query-service/model/v3"
 )

-func TestBasicRuleThresholdShouldAlert_UnitConversion(t *testing.T) {
+func TestBasicRuleThresholdEval_UnitConversion(t *testing.T) {
 	target := 100.0

 	tests := []struct {
@@ -270,7 +270,7 @@ func TestBasicRuleThresholdShouldAlert_UnitConversion(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			thresholds := BasicRuleThresholds{tt.threshold}
-			vector, err := thresholds.ShouldAlert(tt.series, tt.ruleUnit)
+			vector, err := thresholds.Eval(tt.series, tt.ruleUnit, EvalData{})
 			assert.NoError(t, err)

 			alert := len(vector) > 0
@@ -301,3 +301,31 @@ func TestBasicRuleThresholdShouldAlert_UnitConversion(t *testing.T) {
 		})
 	}
 }
+
+func TestPrepareSampleLabelsForRule(t *testing.T) {
+	alertAllHashes := make(map[uint64]struct{})
+	thresholdName := "test"
+	for range 50_000 {
+		sampleLabels := map[string]string{
+			"service":   "test",
+			"env":       "prod",
+			"tier":      "backend",
+			"namespace": "default",
+			"pod":       "test-pod",
+			"container": "test-container",
+			"node":      "test-node",
+			"cluster":   "test-cluster",
+			"region":    "test-region",
+			"az":        "test-az",
+			"hostname":  "test-hostname",
+			"ip":        "192.168.1.1",
+			"port":      "8080",
+		}
+		lbls := PrepareSampleLabelsForRule(sampleLabels, thresholdName)
+		assert.True(t, lbls.Has(LabelThresholdName), "LabelThresholdName not found in labels")
+		alertAllHashes[lbls.Hash()] = struct{}{}
+	}
+	t.Logf("Total hashes: %d", len(alertAllHashes))
+	// there should be only one hash for all the samples
+	assert.Equal(t, 1, len(alertAllHashes), "Expected only one hash for all the samples")
+}