Retention/quotas: something that 'seems to work' (doesn't immediately crash)

This commit is contained in:
Klaas van Schelven
2024-06-21 11:50:13 +02:00
parent c2b821589d
commit ea6aa9bbca
6 changed files with 100 additions and 41 deletions

View File

@@ -0,0 +1,19 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('events', '0003_initial'),
]
operations = [
migrations.AddField(
model_name='event',
name='irrelevance_for_retention',
# Given the (very) small group of test users, we can get away with just setting a default of 0 here (no
# datamigration)
field=models.PositiveIntegerField(default=0),
preserve_default=False,
),
]

View File

@@ -179,8 +179,7 @@ class Event(models.Model):
# below at least puts the parsed_data in the right place, and does some of the basic object set up (FKs to other
# objects etc).
# +1 because we're about to add a new event and want the number for _that_ event
irrelevance_for_retention = get_random_irrelevance(stored_event_count + 1)
irrelevance_for_retention = get_random_irrelevance(stored_event_count)
try:
event = cls.objects.create(

View File

@@ -22,7 +22,7 @@ def get_epoch_bounds(lower, upper=None):
return Q()
if lower is None:
return Q(timestamp__gte=datetime_for_epoch(lower))
return Q(timestamp__lt=datetime_for_epoch(upper))
if upper is None:
return Q(timestamp__gte=datetime_for_epoch(lower))
@@ -39,7 +39,6 @@ def nonzero_leading_bits(n):
101000 -> 3
110001 -> 6
"""
s = format(n, 'b')
return len(s.rstrip('0'))
@@ -58,14 +57,12 @@ def get_random_irrelevance(event_count):
def should_evict(project, timestamp, stored_event_count):
if stored_event_count is None:
return False # no events, nothing to evict
# if/when we implement 'just drop' this might go somewhere (maybe not here)
# if (project.retention_last_eviction is not None and
# get_epoch(project.retention_last_eviction) != get_epoch(timestamp)):
# return True
if (project.retention_last_eviction is not None and
get_epoch(project.retention_last_eviction) != get_epoch(timestamp)):
return True
if stored_event_count >= project.retention_max_event_count: # >= because this function is called right before add
if stored_event_count > project.retention_max_event_count: # > because: do something when _over_ the max
return True
return False
@@ -101,15 +98,17 @@ def get_epoch_bounds_with_irrelevance(project, current_timestamp):
from .models import Event
# TODO 'first_seen' is cheaper? I don't think it exists at project-level though.
first_epoch = get_epoch(Event.objects.filter(project=project).aggregate(Min('timestamp')))
# We can safely assume some Event exists when this point is reached because of the conditions in `should_evict`
first_epoch = get_epoch(Event.objects.filter(project=project).aggregate(val=Min('server_side_timestamp'))['val'])
current_epoch = get_epoch(current_timestamp)
difference = current_epoch - first_epoch
bounds = pairwise(
# because we construct in reverse order (from the most recent to the oldest) we end up with the pairs swapped
swapped_bounds = pairwise(
[None] + [current_epoch - n for n in list(map_N_until(get_age_for_irrelevance, difference))] + [None])
return [(bound, age_based_irrelevance) for age_based_irrelevance, bound in enumerate(bounds)]
return [((lb, ub), age_based_irrelevance) for age_based_irrelevance, (ub, lb) in enumerate(swapped_bounds)]
def get_irrelevance_pairs(project, epoch_bounds_with_irrelevance):
@@ -117,9 +116,10 @@ def get_irrelevance_pairs(project, epoch_bounds_with_irrelevance):
from .models import Event
for (lower_bound, upper_bound), age_based_irrelevance in epoch_bounds_with_irrelevance:
max_event_irrelevance = \
Event.objects.filter(get_epoch_bounds(lower_bound, upper_bound)).aggregate(Max('irrelevance_for_retention'))
yield age_based_irrelevance, max_event_irrelevance
d = Event.objects.filter(get_epoch_bounds(lower_bound, upper_bound)).aggregate(Max('irrelevance_for_retention'))
max_event_irrelevance = d["irrelevance_for_retention__max"] or 0
yield (age_based_irrelevance, max_event_irrelevance)
def map_N_until(f, until, onemore=False):
@@ -144,21 +144,36 @@ def pairwise(it):
prev = current
def filter_for_work(epoch_bounds_with_irrelevance, pairs, max_total_irrelevance):
# Here, we filter out epoch bounds for which there is obviously no work to be done, based on the pairs that we have
# selected at the beginning of the algo. We compare the selected irrelevances with the total, and if they do not
# exceed it no work will need to be done for that set of epochs.
# Since it uses only the (already available) information that was gathered at the beginning of the algo, it is not a
# full filter for avoiding useless deletions, but at least we use the available info (from queries) to the fullest.
for pair, ebwi in zip(pairs, epoch_bounds_with_irrelevance):
if sum(pair) > max_total_irrelevance: # > because only if it is strictly greater will anything be evicted.
yield ebwi
def evict_for_max_events(project, timestamp, stored_event_count=None):
from .models import Event
if stored_event_count is None:
# allowed as a pass-in to save a query (we generally start off knowing this)
stored_event_count = Event.objects.filter(project=project).count()
stored_event_count = Event.objects.filter(project=project).count() + 1
epoch_bounds_with_irrelevance = get_epoch_bounds_with_irrelevance(project, timestamp)
# we start off with the currently observed max total irrelevance
# +1 to correct for -= 1 at the beginning of the loop
max_total_irrelevance = max(sum(pair) for pair in get_irrelevance_pairs(project, epoch_bounds_with_irrelevance)) + 1
pairs = get_irrelevance_pairs(project, epoch_bounds_with_irrelevance)
max_total_irrelevance = max(sum(pair) for pair in pairs)
while stored_event_count >= project.retention_max_event_count: # >= as elsewhere: eviction is before add-new
while stored_event_count > project.retention_max_event_count: # > as in `should_evict`
# -1 at the beginning of the loop; this means the actually observed max value is precisely the first thing that
# will be evicted (since `evict_for_irrelevance` will evict anything above (but not including) the given value)
max_total_irrelevance -= 1
evict_for_irrelevance(max_total_irrelevance, epoch_bounds_with_irrelevance)
stored_event_count = Event.objects.filter(project=project).count()
@@ -173,27 +188,25 @@ def evict_for_max_events(project, timestamp, stored_event_count=None):
def evict_for_irrelevance(max_total_irrelevance, epoch_bounds_with_irrelevance):
# print("evict_for_irrelevance(%d, %s)" % (max_total_irrelevance, epoch_bounds_with_irrelevance))
# max_total_irrelevance, i.e. the total may not exceed this (but it may equal it)
for (_, epoch_ub_exclusive), irrelevance_for_age in epoch_bounds_with_irrelevance:
max_item_irrelevance = max_total_irrelevance - irrelevance_for_age
if max_item_irrelevance < -1:
evict_for_epoch_and_irrelevance(epoch_ub_exclusive, max_item_irrelevance)
if max_item_irrelevance <= -1:
# in the actual eviction, the test on max_item_irrelevance is done exclusively, i.e. only items of greater
# irrelevance are evicted. The minimal actually occuring value is 0. Such items can be evicted with a call
# with max_item_irrelevance = -1. Once the value is smaller than that, there's nothing to evict left.
# (the above is true under conditions, which we meet, namely):
# * the actual eviction takes only an UB for the epochs, so older epochs are always cleaned as part of the
# newer ones (see picture below)
# * we always have at least a single pass throught the loop for the current epoch (which has 0 irrelevance)
# and we always have at least one call with at least 0 max_total_irrelevance
# with max_item_irrelevance = -1. This means that if we just did such an eviction, we're done for all epochs
break
evict_for_epoch_and_irrelevance(epoch_ub_exclusive, max_item_irrelevance)
def evict_for_epoch_and_irrelevance(max_epoch, max_irrelevance):
# print("evict_for_epoch_and_irrelevance(%s, %s)" % (max_epoch, max_irrelevance))
from .models import Event
# evicting "at", based on the total irrelevance split out into 2 parts: max item irrelevance, and an epoch as
# implied by the age-based irrelevance.

View File

@@ -28,6 +28,7 @@ from bugsink.streams import content_encoding_reader, MaxDataReader, MaxDataWrite
from bugsink.app_settings import get_settings
from events.models import Event
from events.retention import evict_for_max_events, should_evict
from releases.models import create_release_if_needed
from alerts.tasks import send_new_issue_alert, send_regression_alert
from compat.timestamp import format_timestamp, parse_timestamp
@@ -194,20 +195,30 @@ class BaseIngestAPIView(View):
# NOTE: possibly expensive. "in theory" we can just do some bookkeeping for a denormalized value, but that may
# be hard to keep in-sync in practice. Let's check the actual cost first.
stored_event_count = issue.event_set.count()
# +1 because we're about to add one event.
project_stored_event_count = (project.event_set.count() or 0) + 1
issue_stored_event_count = (issue.event_set.count() or 0) + 1
if should_evict(project, timestamp, stored_event_count):
project.retention_last_eviction = timestamp
project.retention_max_total_irrelevance = evict_for_max_events(project, timestamp, stored_event_count)
# TODO: actually save the project? or use an update call?
# TODO: the idea of cooling off the max_total_irrelevance
if should_evict(project, timestamp, project_stored_event_count):
# Note: I considered pushing this into some async process, but it makes reasoning much harder, and it's
# doubtful whether it would help, because in the end there's just a single pipeline of ingested-related
# stuff todo, might as well do the work straight away. Similar thoughts about pushing this into something
# cron-like. (not exactly the same, because for cron-like time savings are possible if the cron-likeness
# causes the work to be outside of the 'rush hour' -- OTOH this also introduces a lot of complexity about
# "what is a limit anyway, if you can go either over it, or work is done before the limit is reached")
evict_for_max_events(project, timestamp, project_stored_event_count)
# project.retention_last_eviction = timestamp
# project.retention_max_total_irrelevance
# TODO-if-the-above: actually save the project? or use an update call?
# TODO-if-the-above: the idea of cooling off the max_total_irrelevance
# NOTE: an event always has a single (automatically calculated) Grouping associated with it. Since we have that
# information available here, we could add it to the Event model.
event, event_created = Event.from_ingested(
event_metadata,
issue.event_count,
stored_event_count,
issue_stored_event_count,
issue,
event_data,
denormalized_fields,

View File

@@ -0,0 +1,16 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('projects', '0002_initial'),
]
operations = [
migrations.AddField(
model_name='project',
name='retention_max_event_count',
field=models.PositiveIntegerField(default=10000),
),
]

View File

@@ -61,11 +61,12 @@ class Project(models.Model):
visibility = models.IntegerField(choices=ProjectVisibility.choices, default=ProjectVisibility.TEAM_MEMBERS)
# retention quota
retention_event_count_quota = models.PositiveIntegerField(default=10_000)
retention_max_event_count = models.PositiveIntegerField(default=10_000)
# bookkeeping of the eviction algorithm
retention_last_eviction = models.DateTimeField(null=True, blank=True)
retention_max_total_irrelevance = models.PositiveIntegerField(null=True, blank=True)
# this will be needed if/when we take an approach of 'drop immediately'
# retention_last_eviction = models.DateTimeField(null=True, blank=True)
# retention_max_total_irrelevance = models.PositiveIntegerField(null=True, blank=True)
def __str__(self):
return self.name