mirror of
https://github.com/bugsink/bugsink.git
synced 2025-12-20 20:41:01 -06:00
Chache stored_event_count (on Issue and Projet)
"possibly expensive" turned out to be "actually expensive". On 'emu', with 1.5M events, the counts take 85 and 154 ms for Project and Issue respectively; bottlenecking our digestion to ~3 events/s. Note: this is single-issue, single-project (presumably, the cost would be lower for more spread-out cases) Note on indexes: Event already has indexes for both Project & Issue (though as the first item in a multi-column index). Without checking further: that appears to not "magically solve counting". This commit also optimizes the .count() on the issue-detail event list (via Paginator). This commit also slightly changes the value passed as `stored_event_count` to be used for `get_random_irrelevance` to be the post-evication value. That won't matter much in practice, but is slightly more correct IMHO.
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management import call_command
|
||||
|
||||
from issues.models import Issue
|
||||
from events.models import Event
|
||||
@@ -17,3 +18,5 @@ class Command(BaseCommand):
|
||||
print("nuking")
|
||||
Issue.objects.all().delete()
|
||||
Event.objects.all().delete()
|
||||
|
||||
call_command("make_consistent")
|
||||
|
||||
@@ -4,6 +4,7 @@ from django.db.models import Count
|
||||
from releases.models import Release
|
||||
from issues.models import Issue, Grouping, TurningPoint
|
||||
from events.models import Event
|
||||
from projects.models import Project
|
||||
|
||||
|
||||
def make_consistent():
|
||||
@@ -46,6 +47,21 @@ def make_consistent():
|
||||
event.never_evict = True
|
||||
event.save()
|
||||
|
||||
# counter reset: do it last, because the above deletions may have changed the counts
|
||||
for issue in Issue.objects.all():
|
||||
if issue.stored_event_count != issue.event_set.count():
|
||||
print("Updating event count for issue %s from %d to %d" % (
|
||||
issue, issue.stored_event_count, issue.event_set.count()))
|
||||
issue.stored_event_count = issue.event_set.count()
|
||||
issue.save()
|
||||
|
||||
for project in Project.objects.all():
|
||||
if project.stored_event_count != project.event_set.count():
|
||||
print("Updating event count for project %s from %d to %d" % (
|
||||
project, project.stored_event_count, project.event_set.count()))
|
||||
project.stored_event_count = project.event_set.count()
|
||||
project.save()
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = """In 'normal operation', this command should not be used, because normal operation leaves the DB in a
|
||||
|
||||
@@ -227,7 +227,7 @@ def evict_for_max_events(project, timestamp, stored_event_count=None, include_ne
|
||||
if max_total_irrelevance < -1: # < -1: as in `evict_for_irrelevance`
|
||||
if not include_never_evict:
|
||||
# everything that remains is 'never_evict'. 'never say never' and evict those as a last measure
|
||||
return evict_for_max_events(project, timestamp, stored_event_count - evicted, True)
|
||||
return evicted + evict_for_max_events(project, timestamp, stored_event_count - evicted, True)
|
||||
|
||||
raise Exception("No more effective eviction possible but target not reached") # "should not happen"
|
||||
|
||||
@@ -239,7 +239,7 @@ def evict_for_max_events(project, timestamp, stored_event_count=None, include_ne
|
||||
stored_event_count - evicted - 1, # down to: -1, because the +1 happens post-eviction
|
||||
orig_max_total_irrelevance, max_total_irrelevance, phase0.took, phase1.took, phase0.count, phase1.count)
|
||||
|
||||
return max_total_irrelevance
|
||||
return evicted
|
||||
|
||||
|
||||
def evict_for_irrelevance(
|
||||
|
||||
@@ -521,6 +521,19 @@ class IngestViewTestCase(TransactionTestCase):
|
||||
# but at least this closes the door for the next event
|
||||
self.assertEqual(now + relativedelta(minutes=5), project.quota_exceeded_until)
|
||||
|
||||
def test_ingest_updates_stored_event_counts(self):
|
||||
request = self.request_factory.post("/api/1/store/")
|
||||
|
||||
BaseIngestAPIView().digest_event(**_digest_params(create_event_data(), self.quiet_project, request))
|
||||
|
||||
self.assertEqual(1, Issue.objects.count())
|
||||
self.assertEqual(1, Issue.objects.get().stored_event_count)
|
||||
self.assertEqual(1, Project.objects.get(id=self.quiet_project.id).stored_event_count)
|
||||
|
||||
BaseIngestAPIView().digest_event(**_digest_params(create_event_data(), self.quiet_project, request))
|
||||
self.assertEqual(2, Issue.objects.get().stored_event_count)
|
||||
self.assertEqual(2, Project.objects.get(id=self.quiet_project.id).stored_event_count)
|
||||
|
||||
|
||||
class TestParser(RegularTestCase):
|
||||
|
||||
|
||||
@@ -236,6 +236,7 @@ class BaseIngestAPIView(View):
|
||||
first_seen=ingested_at,
|
||||
last_seen=ingested_at,
|
||||
digested_event_count=1,
|
||||
stored_event_count=0, # we increment this below
|
||||
**denormalized_fields,
|
||||
)
|
||||
issue_created = True
|
||||
@@ -255,11 +256,8 @@ class BaseIngestAPIView(View):
|
||||
issue.last_seen = ingested_at
|
||||
issue.digested_event_count += 1
|
||||
|
||||
# NOTE: possibly expensive. "in theory" we can just do some bookkeeping for a denormalized value, but that may
|
||||
# be hard to keep in-sync in practice. Let's check the actual cost first.
|
||||
# +1 because we're about to add one event.
|
||||
project_stored_event_count = (project.event_set.count() or 0) + 1
|
||||
issue_stored_event_count = (issue.event_set.count() or 0) + 1
|
||||
project_stored_event_count = project.stored_event_count + 1
|
||||
|
||||
if should_evict(project, digested_at, project_stored_event_count):
|
||||
# Note: I considered pushing this into some async process, but it makes reasoning much harder, and it's
|
||||
@@ -268,13 +266,19 @@ class BaseIngestAPIView(View):
|
||||
# cron-like. (not exactly the same, because for cron-like time savings are possible if the cron-likeness
|
||||
# causes the work to be outside of the 'rush hour' -- OTOH this also introduces a lot of complexity about
|
||||
# "what is a limit anyway, if you can go either over it, or work is done before the limit is reached")
|
||||
evict_for_max_events(project, digested_at, project_stored_event_count)
|
||||
evicted = evict_for_max_events(project, digested_at, project_stored_event_count)
|
||||
else:
|
||||
evicted = 0
|
||||
|
||||
issue.stored_event_count = issue.stored_event_count + 1 - evicted # +1 because we're about to add one event
|
||||
project.stored_event_count = project_stored_event_count - evicted
|
||||
project.save(update_fields=["stored_event_count"])
|
||||
|
||||
event, event_created = Event.from_ingested(
|
||||
event_metadata,
|
||||
digested_at,
|
||||
issue.digested_event_count,
|
||||
issue_stored_event_count,
|
||||
issue.stored_event_count,
|
||||
issue,
|
||||
grouping,
|
||||
event_data,
|
||||
|
||||
@@ -51,6 +51,7 @@ class IssueAdmin(admin.ModelAdmin):
|
||||
'unmute_on_volume_based_conditions',
|
||||
'unmute_after',
|
||||
'digested_event_count',
|
||||
'stored_event_count',
|
||||
]
|
||||
|
||||
inlines = [
|
||||
@@ -62,6 +63,7 @@ class IssueAdmin(admin.ModelAdmin):
|
||||
"title",
|
||||
"project",
|
||||
"digested_event_count",
|
||||
"stored_event_count",
|
||||
]
|
||||
list_filter = [
|
||||
"project",
|
||||
@@ -75,4 +77,5 @@ class IssueAdmin(admin.ModelAdmin):
|
||||
'calculated_type',
|
||||
'calculated_value',
|
||||
'digested_event_count',
|
||||
'stored_event_count',
|
||||
]
|
||||
|
||||
18
issues/migrations/0008_issue_stored_event_count.py
Normal file
18
issues/migrations/0008_issue_stored_event_count.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# Generated by Django 4.2.18 on 2025-02-06 10:04
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("issues", "0004_b_squashed"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="issue",
|
||||
name="stored_event_count",
|
||||
field=models.IntegerField(default=0, editable=False),
|
||||
),
|
||||
]
|
||||
24
issues/migrations/0009_fill_stored_event_count.py
Normal file
24
issues/migrations/0009_fill_stored_event_count.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
def unfill_stored_event_count(apps, schema_editor):
|
||||
Issue = apps.get_model("issues", "Issue")
|
||||
Issue.objects.all().update(stored_event_count=0)
|
||||
|
||||
|
||||
def fill_stored_event_count(apps, schema_editor):
|
||||
Issue = apps.get_model("issues", "Issue")
|
||||
for issue in Issue.objects.all():
|
||||
issue.stored_event_count = issue.event_set.count()
|
||||
issue.save()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("issues", "0008_issue_stored_event_count"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(fill_stored_event_count, unfill_stored_event_count),
|
||||
]
|
||||
@@ -39,6 +39,7 @@ class Issue(models.Model):
|
||||
last_seen = models.DateTimeField(blank=False, null=False) # based on event.ingested_at
|
||||
first_seen = models.DateTimeField(blank=False, null=False) # based on event.ingested_at
|
||||
digested_event_count = models.IntegerField(blank=False, null=False)
|
||||
stored_event_count = models.IntegerField(blank=False, null=False, default=0, editable=False)
|
||||
calculated_type = models.CharField(max_length=255, blank=True, null=False, default="")
|
||||
calculated_value = models.CharField(max_length=255, blank=True, null=False, default="")
|
||||
transaction = models.CharField(max_length=200, blank=True, null=False, default="")
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
<div class="overflow-hidden">
|
||||
<div class="italic">
|
||||
Showing {{ page_obj.start_index }} - {{ page_obj.end_index }} of {{ page_obj.paginator.count }}
|
||||
{% if project.digested_event_count != project.event_count %}
|
||||
{% if project.digested_event_count != project.stored_event_count %}
|
||||
available events ({{ project.digested_event_count }} total observed).
|
||||
{% else %}
|
||||
total events.
|
||||
|
||||
@@ -46,6 +46,18 @@ GLOBAL_MUTE_OPTIONS = [
|
||||
]
|
||||
|
||||
|
||||
class KnownCountPaginator(Paginator):
|
||||
"""optimization: we know the total count of the queryset, so we can avoid a count() query"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._count = kwargs.pop("count")
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@property
|
||||
def count(self):
|
||||
return self._count
|
||||
|
||||
|
||||
def _is_valid_action(action, issue):
|
||||
"""We take the 'strict' approach of complaining even when the action is simply a no-op, because you're already in
|
||||
the desired state."""
|
||||
@@ -510,7 +522,7 @@ def issue_event_list(request, issue):
|
||||
event_list = issue.event_set.order_by("digest_order")
|
||||
|
||||
# re 250: in general "big is good" because it allows a lot "at a glance".
|
||||
paginator = Paginator(event_list, 250)
|
||||
paginator = KnownCountPaginator(event_list, 250, count=issue.stored_event_count)
|
||||
|
||||
page_number = request.GET.get("page")
|
||||
page_obj = paginator.get_page(page_number)
|
||||
|
||||
@@ -103,8 +103,10 @@ def _make_message_body():
|
||||
"project_count": Project.objects.count(),
|
||||
"team_count": Team.objects.count(),
|
||||
|
||||
# event-counts [per some interval (e.g. 24 hours)] is a possible future enhancement. If that turns out to be
|
||||
# expensive, one thing to consider is pulling _make_message_body() out of the immediate_atomic() block.
|
||||
# event-counts [per some interval (e.g. 24 hours)] is a possible future enhancement. We've already seen that
|
||||
# such counts are expensive though, but if _make_message_body() is pulled out of the immediate_atomic()
|
||||
# block (which is OK, since there is no need for some kind of 'transactional consistency' to register this
|
||||
# simple metadata fact), then it might be OK to add some more expensive queries here.
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
18
projects/migrations/0010_project_stored_event_count.py
Normal file
18
projects/migrations/0010_project_stored_event_count.py
Normal file
@@ -0,0 +1,18 @@
|
||||
# Generated by Django 4.2.18 on 2025-02-06 10:04
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("projects", "0003_project_projects_pr_name_11d782_idx"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="project",
|
||||
name="stored_event_count",
|
||||
field=models.IntegerField(default=0, editable=False),
|
||||
),
|
||||
]
|
||||
23
projects/migrations/0011_fill_stored_event_count.py
Normal file
23
projects/migrations/0011_fill_stored_event_count.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from django.db import migrations
|
||||
|
||||
def unfill_stored_event_count(apps, schema_editor):
|
||||
Project = apps.get_model("projects", "Project")
|
||||
Project.objects.all().update(stored_event_count=0)
|
||||
|
||||
|
||||
def fill_stored_event_count(apps, schema_editor):
|
||||
Project = apps.get_model("projects", "Project")
|
||||
for project in Project.objects.all():
|
||||
project.stored_event_count = project.event_set.count()
|
||||
project.save()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("projects", "0010_project_stored_event_count"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(fill_stored_event_count, unfill_stored_event_count),
|
||||
]
|
||||
@@ -52,6 +52,7 @@ class Project(models.Model):
|
||||
# denormalized/cached/counted fields below
|
||||
has_releases = models.BooleanField(editable=False, default=False)
|
||||
digested_event_count = models.PositiveIntegerField(null=False, blank=False, default=0, editable=False)
|
||||
stored_event_count = models.IntegerField(blank=False, null=False, default=0, editable=False)
|
||||
|
||||
# alerting conditions
|
||||
alert_on_new_issue = models.BooleanField(default=True)
|
||||
|
||||
Reference in New Issue
Block a user