no volume-based alerts (only volume-based muting)

This commit is contained in:
Klaas van Schelven
2024-01-05 20:31:51 +01:00
parent 89cab4f3c8
commit 6bc00c366a
6 changed files with 49 additions and 57 deletions

View File

@@ -4,28 +4,46 @@
Default setting, may be adapted at organisational level.
You should get alerts for:
You should get alerts for any state change for the worse, such as:
* **Any State Change for the Worse:**
- New issues
- Regressions
- Unmuting (could be due to volume)
* New issues
* Regressions
* Unmuting (could be due to volume)
The basic belief underlying this is: "you should care about the errors on your project". And therefore, you'll be
alerted when they occur.
The basic belief underlying this is: "you should care about the errors on your project". And therefore, you'll be
alerted when they occur.
You may configure alerts for:
### Volume-Based Rules for unmuting
* **Volume-Based Rules:**
- _Any time_ more than 5 events per the last 3 hours occur
- _First time_ more than 10 events per day
- _First time_ the total number of events exceeds 100
The idea is: to avoid getting swamped, set thresholds slightly higher then 1 (over some period).
Volume based rules may be configured per-issue for unmuting. e.g.
An alternative approach is to automatically unmute issues based on such rules (and to initially mute them as long as
they don't match the rules yet); and then send the notification on-unmute. (by the definition of unmuting, the alert
occurs only the _first time_ the condition occurs)
- First time more than 5 events per the last 3 hours occur
- First time more than 10 events per day
- First time the total number of events exceeds 100
The idea is: "I know about this issue, I've determined it's not important, and I only want to get notified when it start
occurrung a lot (for some definition of 'a lot')"
The tie-in with alerts is: you can send notification on-unmute. (by the definition of unmuting, the alert
occurs only the _first time_ the condition occurs)
## "Later": auto-mute w/ default unmute rules
At some point we may make it so that you can configure per-project that issues are initially muted (on create) but with
some predefined set of volume-based unmute conditions, which is set on the muted issues on-create.
(This is in place of project-level volume-based alert settings. The main reason for this is that I want to keep a
symmetry/tracability between what's going on in the UI and the alerts that are going out)
## "Later": more than first-time-only
(The below only works if you let go of the idea that auto-mute is the only way you get volume-based alerts).
Rather than just having first-time only rules, you could have alerting rules that are triggered when a condition
persists. e.g.
* Any time more than 5 events occur the past 3 hours (ignoring any events that occured before the last time this was
triggered).
## Personal Notification Settings

View File

@@ -98,22 +98,28 @@ class PeriodCounter(object):
is_new_period = _inc(self.counts[tl], tup[:tl], n, mx)
event_listeners_for_tl = self.event_listeners[tl]
for ((nr_of_periods, gte_threshold), (wbt, wbf, is_true)) in list(event_listeners_for_tl.items()):
for ((nr_of_periods, gte_threshold), (wbt, wbf, ar, is_true)) in list(event_listeners_for_tl.items()):
if is_true:
if not is_new_period:
continue # no new period means: never becomes false, because no old period becomes irrelevant
if not self._get_event_state(tup[:tl], tl, nr_of_periods, gte_threshold):
event_listeners_for_tl[(nr_of_periods, gte_threshold)] = (wbt, wbf, False)
if ar:
del event_listeners_for_tl[(nr_of_periods, gte_threshold)]
else:
event_listeners_for_tl[(nr_of_periods, gte_threshold)] = (wbt, wbf, ar, False)
wbf()
else:
if self._get_event_state(tup[:tl], tl, nr_of_periods, gte_threshold):
event_listeners_for_tl[(nr_of_periods, gte_threshold)] = (wbt, wbf, True)
if ar:
del event_listeners_for_tl[(nr_of_periods, gte_threshold)]
else:
event_listeners_for_tl[(nr_of_periods, gte_threshold)] = (wbt, wbf, ar, True)
wbt()
def add_event_listener(self, period_name, nr_of_periods, gte_threshold, when_becomes_true=noop,
when_becomes_false=noop, initial_event_state=None, tup=None):
when_becomes_false=noop, auto_remove=False, initial_event_state=None, tup=None):
# note: the 'events' here are not bugsink-events; but the more general concept of 'an event'; we may consider a
# different name for this in the future because of that.
@@ -128,7 +134,7 @@ class PeriodCounter(object):
initial_event_state = self._get_event_state(tup, tl, nr_of_periods, gte_threshold)
self.event_listeners[tl][(nr_of_periods, gte_threshold)] = \
(when_becomes_true, when_becomes_false, initial_event_state)
(when_becomes_true, when_becomes_false, auto_remove, initial_event_state)
def _tl_for_period(self, period_name):
return {

View File

@@ -23,16 +23,12 @@ class PeriodCounterRegistry(object):
def load_from_scratch(self, projects, issues, ordered_events, now_tup):
by_project = {}
by_issue = {}
issue_pcs_by_project = {}
for project in projects:
by_project[project.id] = PeriodCounter()
for issue in issues:
by_issue[issue.id] = PeriodCounter()
if issue.project_id not in issue_pcs_by_project[issue.project_id]:
issue_pcs_by_project[issue.project_id] = []
issue_pcs_by_project[issue.project_id].append(by_issue[issue.id])
for event in ordered_events:
project_pc = by_project[event.project_id]
@@ -41,30 +37,6 @@ class PeriodCounterRegistry(object):
issue_pc = by_issue[event.issue_id]
issue_pc.inc(event.timestamp)
for project in projects:
project_pc = by_project[project.id]
volume_based_conditions = [
VolumeBasedCondition.from_dict(vbc_s)
for vbc_s in json.loads(project.alert_on_volume_based_conditions)
]
for issue_pc in issue_pcs_by_project[project.id]:
for vbc in volume_based_conditions:
issue_pc.add_event_listener(
period_name=vbc.period_name,
nr_of_periods=vbc.nr_of_periods,
gte_threshold=vbc.volume,
when_becomes_true=..., # do the alert. and stop monitoring, at least when it's 'first time'
# er rijzen echter opnieuw vragen rond "moet je niet gewoon (un)muting gebruiken als middel?
# ook rijst de vraag: hoe nuttig is die "any time" nou helemaal? want: eenmaal overschreden
# blijf je vaak dezelfde conditie overschrijden...
# antwoord: ik zou zeggen "ignore any information pre-tuple-x"... maar allemaal tamelijk
# advanced
when_becomes_false=..., # what... really? 'stop monitoring' could be the answer.
tup=now_tup,
)
for issue in issues.filter(is_muted=True):
issue_pc = by_issue[issue.id]
@@ -89,4 +61,3 @@ class PeriodCounterRegistry(object):
# some TODOs:
#
# * quotas (per project, per issue)
# * alerting (settings live on project, but alerts and therefore the listeners are per issue)

View File

@@ -123,9 +123,9 @@ class PeriodCounterTestCase(TestCase):
class VolumeBasedConditionTestCase(TestCase):
def test_serialization(self):
vbc = VolumeBasedCondition("any", "day", 1, 100)
vbc = VolumeBasedCondition("day", 1, 100)
json_str = vbc.to_json_str()
self.assertEquals('{"any_or_first": "any", "period": "day", "nr_of_periods": 1, "volume": 100}', json_str)
self.assertEquals('{"period": "day", "nr_of_periods": 1, "volume": 100}', json_str)
vbc2 = VolumeBasedCondition.from_json_str(json_str)
self.assertEquals(vbc, vbc2)

View File

@@ -3,8 +3,7 @@ import json
class VolumeBasedCondition(object):
def __init__(self, any_or_first, period, nr_of_periods, volume):
self.any_or_first = any_or_first
def __init__(self, period, nr_of_periods, volume):
self.period = period
self.nr_of_periods = nr_of_periods
self.volume = volume
@@ -13,7 +12,6 @@ class VolumeBasedCondition(object):
def from_json_str(cls, json_str): # TODO had toch gewoon dict moeten wezen
json_dict = json.loads(json_str)
return VolumeBasedCondition(
json_dict['any_or_first'],
json_dict['period'],
json_dict['nr_of_periods'],
json_dict['volume'],

View File

@@ -48,7 +48,6 @@ class Project(models.Model):
alert_on_new_issue = models.BooleanField(default=True)
alert_on_regression = models.BooleanField(default=True)
alert_on_unmute = models.BooleanField(default=True)
alert_on_volume_based_conditions = models.TextField(blank=False, null=False, default="[]") # json string
def get_latest_release(self):
# TODO perfomance considerations... this can be denormalized/cached at the project level