From 6a27a248af31e7c42478c676b4b9cfbb10faa5bb Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Sun, 7 Jan 2024 22:09:38 +0100 Subject: [PATCH] Split up ingest/digest (and have a test for process_event) --- bugsink/settings.py | 5 +++ bugsink/tests.py | 14 ++----- events/factories.py | 35 ++++++++++++++++ events/migrations/0008_alter_event_issue.py | 18 +++++++++ events/models.py | 5 ++- .../0003_decompressedevent_debug_info.py | 16 ++++++++ ingest/models.py | 9 ++++- ingest/tests.py | 21 ++++++++++ ingest/views.py | 40 ++++++++++++++----- 9 files changed, 138 insertions(+), 25 deletions(-) create mode 100644 events/factories.py create mode 100644 events/migrations/0008_alter_event_issue.py create mode 100644 ingest/migrations/0003_decompressedevent_debug_info.py diff --git a/bugsink/settings.py b/bugsink/settings.py index ab5fade..74c420e 100644 --- a/bugsink/settings.py +++ b/bugsink/settings.py @@ -136,6 +136,11 @@ STATICFILES_DIRS = [ # no support for uuid in this setting yet (https://code.djangoproject.com/ticket/32577) so we leave it as-is DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' + +# ###################### SERVER-MODE SETTINGS ################# + +BUGSINK_DIGEST_IMMEDIATELY = True + # ###################### MOST PER-SITE CONFIG BELOW THIS LINE ################### diff --git a/bugsink/tests.py b/bugsink/tests.py index cf3b114..70cf35f 100644 --- a/bugsink/tests.py +++ b/bugsink/tests.py @@ -1,4 +1,3 @@ -import uuid from datetime import datetime, timezone from unittest import TestCase @@ -7,6 +6,7 @@ from django.test import TestCase as DjangoTestCase from projects.models import Project from issues.models import Issue from events.models import Event +from events.factories import create_event from .period_counter import PeriodCounter, _prev_tup, TL_DAY, TL_YEAR from .volume_based_condition import VolumeBasedCondition @@ -170,16 +170,8 @@ class PCRegistryTestCase(DjangoTestCase): is_muted=True, unmute_on_volume_based_conditions='[{"period": "day", "nr_of_periods": 1, "volume": 100}]', ) - Event.objects.create( - project=project, - issue=issue, - timestamp=datetime.now(timezone.utc), - server_side_timestamp=datetime.now(timezone.utc), - event_id=uuid.uuid4().hex, - has_exception=True, - has_logentry=True, - data="{}", - ) + + create_event(project, issue) by_project, by_issue = PeriodCounterRegistry().load_from_scratch( Project.objects.all(), diff --git a/events/factories.py b/events/factories.py new file mode 100644 index 0000000..2e1f5bc --- /dev/null +++ b/events/factories.py @@ -0,0 +1,35 @@ +import json +import uuid + +from django.utils import timezone + +from events.models import Event + + +def create_event(project, issue, timestamp=None, event_data=None): + if timestamp is None: + timestamp = timezone.now() + + if event_data is None: + event_data = create_event_data() + + return Event.objects.create( + project=project, + issue=issue, + server_side_timestamp=timestamp, + timestamp=timestamp, + event_id=uuid.uuid4().hex, + has_exception=True, + has_logentry=True, + data=json.dumps(event_data), + ) + + +def create_event_data(): + # create minimal event data that is valid as per from_json() + + return { + "event_id": uuid.uuid4().hex, + "timestamp": timezone.now().isoformat(), + "platform": "python", + } diff --git a/events/migrations/0008_alter_event_issue.py b/events/migrations/0008_alter_event_issue.py new file mode 100644 index 0000000..2cd2de2 --- /dev/null +++ b/events/migrations/0008_alter_event_issue.py @@ -0,0 +1,18 @@ +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('issues', '0007_remove_issue_events'), + ('events', '0007_alter_event_issue'), + ] + + operations = [ + migrations.AlterField( + model_name='event', + name='issue', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='issues.issue'), + ), + ] diff --git a/events/models.py b/events/models.py index 1f581eb..ca5c55a 100644 --- a/events/models.py +++ b/events/models.py @@ -130,7 +130,7 @@ class Event(models.Model): has_exception = models.BooleanField(null=False) has_logentry = models.BooleanField(null=False) - # this is a temporary, bugsink-specific value; + # this is a temporary(?), bugsink-specific value; debug_info = models.CharField(max_length=255, blank=True, null=False, default="") class Meta: @@ -149,11 +149,12 @@ class Event(models.Model): return "/events/event/%s/download/" % self.id @classmethod - def from_json(cls, project, parsed_data, server_side_timestamp, debug_info): + def from_json(cls, project, issue, parsed_data, server_side_timestamp, debug_info): event, created = cls.objects.get_or_create( # NOTE immediate creation... is this what we want? event_id=parsed_data["event_id"], project=project, defaults={ + 'issue': issue, 'server_side_timestamp': server_side_timestamp, 'data': json.dumps(parsed_data), diff --git a/ingest/migrations/0003_decompressedevent_debug_info.py b/ingest/migrations/0003_decompressedevent_debug_info.py new file mode 100644 index 0000000..c4799b9 --- /dev/null +++ b/ingest/migrations/0003_decompressedevent_debug_info.py @@ -0,0 +1,16 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('ingest', '0002_alter_decompressedevent_timestamp'), + ] + + operations = [ + migrations.AddField( + model_name='decompressedevent', + name='debug_info', + field=models.CharField(blank=True, default='', max_length=255), + ), + ] diff --git a/ingest/models.py b/ingest/models.py index 876b800..e543440 100644 --- a/ingest/models.py +++ b/ingest/models.py @@ -6,9 +6,16 @@ from django.utils import timezone from projects.models import Project -class DecompressedEvent(models.Model): # or... DecompressedRawEvent +class DecompressedEvent(models.Model): # or... DecompressedRawEvent... or just IngestedEvent """Ingested Event, no processing""" id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) project = models.ForeignKey(Project, blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later' data = models.TextField(blank=False, null=False) timestamp = models.DateTimeField(null=False, default=timezone.now, help_text="Server-side timestamp") + + # filled with values from the http header X-Bugsink-Debug-Info; for now this is duplicated across + # Event/DecompressedEvent; we'll have to figure out which of the 2 models is the right one to put it in (this + # relates to the question of whether we want to keep the raw data around) + # (at the very least I want it here, because in the setup with split up ingest/digest, we need to store it here to + # be able to have it available when we digest) + debug_info = models.CharField(max_length=255, blank=True, null=False, default="") diff --git a/ingest/tests.py b/ingest/tests.py index 0693a12..9e55b34 100644 --- a/ingest/tests.py +++ b/ingest/tests.py @@ -3,8 +3,29 @@ import datetime from django.conf import settings from django.test import TestCase from django.utils import timezone +from django.test.client import RequestFactory + +from projects.models import Project +from events.factories import create_event_data from .models import DecompressedEvent +from .views import BaseIngestAPIView + + +class IngestViewTestCase(TestCase): + + def setUp(self): + self.factory = RequestFactory() + + def test_ingest_view(self): + project = Project.objects.create(name="test") + request = self.factory.post("/api/1/store/") + + BaseIngestAPIView().process_event( + create_event_data(), + project, + request, + ) class TimeZoneTesCase(TestCase): diff --git a/ingest/views.py b/ingest/views.py index f414e04..6aa0267 100644 --- a/ingest/views.py +++ b/ingest/views.py @@ -2,6 +2,7 @@ from datetime import datetime, timezone import json # TODO consider faster APIs from django.shortcuts import get_object_or_404 +from django.conf import settings from rest_framework import permissions, status from rest_framework.response import Response @@ -19,6 +20,7 @@ from issues.regressions import issue_is_regression from events.models import Event from releases.models import create_release_if_needed from bugsink.registry import get_pc_registry +from bugsink.period_counter import PeriodCounter from .negotiation import IgnoreClientContentNegotiation from .parsers import EnvelopeParser @@ -56,30 +58,43 @@ class BaseIngestAPIView(APIView): sentry_key = cls.get_sentry_key_for_request(request) return get_object_or_404(Project, pk=project_id, sentry_key=sentry_key) - def process_event(self, event_data, request, project): + @classmethod + def process_event(cls, event_data, project, request): # because we want to count events before having created event objects (quota may block the latter) we cannot # depend on event.timestamp; instead, we look on the clock once here, and then use that for both the project # and issue period counters. now = datetime.now(timezone.utc) + # note: we may want to skip saving the raw data in a setup where we have integrated ingest/digest, but for now + # we just always save it; note that even for the integrated setup a case can be made for saving the raw data + # before proceeding because it may be useful for debugging errors in the digest process. + ingested_event = cls.ingest_event(now, event_data, request, project) + if settings.BUGSINK_DIGEST_IMMEDIATELY: + cls.digest_event(now, event_data, project, ingested_event.debug_info) + + @classmethod + def ingest_event(cls, now, event_data, request, project): project_pc = get_pc_registry().by_project[project.id] project_pc.inc(now) - DecompressedEvent.objects.create( + debug_info = request.META.get("HTTP_X_BUGSINK_DEBUGINFO", "") + + return DecompressedEvent.objects.create( project=project, data=json.dumps(event_data), # TODO don't parse-then-print for BaseIngestion timestamp=now, + debug_info=debug_info, ) - debug_info = request.META.get("HTTP_X_BUGSINK_DEBUGINFO", "") - + @classmethod + def digest_event(cls, now, event_data, project, debug_info): hash_ = get_hash_for_data(event_data) issue, issue_created = Issue.objects.get_or_create( project=project, hash=hash_, ) - event, event_created = Event.from_json(project, event_data, issue, now, debug_info) + event, event_created = Event.from_json(project, issue, event_data, now, debug_info) if not event_created: # note: previously we created the event before the issue, which allowed for one less query. I don't see # straight away how we can reproduce that now that we create issue-before-event (since creating the issue @@ -88,19 +103,22 @@ class BaseIngestAPIView(APIView): create_release_if_needed(project, event.release) - issue_pc = get_pc_registry().by_issue[issue.id] - issue_pc.inc(now) - if issue_created: if project.alert_on_new_issue: - alert_for_new_issue.delay(issue) + pass # alert_for_new_issue.delay(issue) elif issue_is_regression(issue, event.release): # new issues cannot be regressions by definition, hence 'else' if project.alert_on_regression: - alert_for_regression.delay(issue) + pass # alert_for_regression.delay(issue) IssueResolver.reopen(issue) + if issue.id not in get_pc_registry().by_issue: + get_pc_registry().by_issue[issue.id] = PeriodCounter() + + issue_pc = get_pc_registry().by_issue[issue.id] + issue_pc.inc(now) + # TODO bookkeeping of events_at goes here. issue.save() @@ -110,7 +128,7 @@ class IngestEventAPIView(BaseIngestAPIView): def post(self, request, project_id=None): project = self.get_project(request, project_id) - self.process_event(request.data, request, project) + self.process_event(request.data, project, request) return Response()