Split up ingest/digest (and have a test for process_event)

This commit is contained in:
Klaas van Schelven
2024-01-07 22:09:38 +01:00
parent 5cf1445eeb
commit 6a27a248af
9 changed files with 138 additions and 25 deletions

View File

@@ -136,6 +136,11 @@ STATICFILES_DIRS = [
# no support for uuid in this setting yet (https://code.djangoproject.com/ticket/32577) so we leave it as-is
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
# ###################### SERVER-MODE SETTINGS #################
BUGSINK_DIGEST_IMMEDIATELY = True
# ###################### MOST PER-SITE CONFIG BELOW THIS LINE ###################

View File

@@ -1,4 +1,3 @@
import uuid
from datetime import datetime, timezone
from unittest import TestCase
@@ -7,6 +6,7 @@ from django.test import TestCase as DjangoTestCase
from projects.models import Project
from issues.models import Issue
from events.models import Event
from events.factories import create_event
from .period_counter import PeriodCounter, _prev_tup, TL_DAY, TL_YEAR
from .volume_based_condition import VolumeBasedCondition
@@ -170,16 +170,8 @@ class PCRegistryTestCase(DjangoTestCase):
is_muted=True,
unmute_on_volume_based_conditions='[{"period": "day", "nr_of_periods": 1, "volume": 100}]',
)
Event.objects.create(
project=project,
issue=issue,
timestamp=datetime.now(timezone.utc),
server_side_timestamp=datetime.now(timezone.utc),
event_id=uuid.uuid4().hex,
has_exception=True,
has_logentry=True,
data="{}",
)
create_event(project, issue)
by_project, by_issue = PeriodCounterRegistry().load_from_scratch(
Project.objects.all(),

35
events/factories.py Normal file
View File

@@ -0,0 +1,35 @@
import json
import uuid
from django.utils import timezone
from events.models import Event
def create_event(project, issue, timestamp=None, event_data=None):
if timestamp is None:
timestamp = timezone.now()
if event_data is None:
event_data = create_event_data()
return Event.objects.create(
project=project,
issue=issue,
server_side_timestamp=timestamp,
timestamp=timestamp,
event_id=uuid.uuid4().hex,
has_exception=True,
has_logentry=True,
data=json.dumps(event_data),
)
def create_event_data():
# create minimal event data that is valid as per from_json()
return {
"event_id": uuid.uuid4().hex,
"timestamp": timezone.now().isoformat(),
"platform": "python",
}

View File

@@ -0,0 +1,18 @@
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('issues', '0007_remove_issue_events'),
('events', '0007_alter_event_issue'),
]
operations = [
migrations.AlterField(
model_name='event',
name='issue',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='issues.issue'),
),
]

View File

@@ -130,7 +130,7 @@ class Event(models.Model):
has_exception = models.BooleanField(null=False)
has_logentry = models.BooleanField(null=False)
# this is a temporary, bugsink-specific value;
# this is a temporary(?), bugsink-specific value;
debug_info = models.CharField(max_length=255, blank=True, null=False, default="")
class Meta:
@@ -149,11 +149,12 @@ class Event(models.Model):
return "/events/event/%s/download/" % self.id
@classmethod
def from_json(cls, project, parsed_data, server_side_timestamp, debug_info):
def from_json(cls, project, issue, parsed_data, server_side_timestamp, debug_info):
event, created = cls.objects.get_or_create( # NOTE immediate creation... is this what we want?
event_id=parsed_data["event_id"],
project=project,
defaults={
'issue': issue,
'server_side_timestamp': server_side_timestamp,
'data': json.dumps(parsed_data),

View File

@@ -0,0 +1,16 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('ingest', '0002_alter_decompressedevent_timestamp'),
]
operations = [
migrations.AddField(
model_name='decompressedevent',
name='debug_info',
field=models.CharField(blank=True, default='', max_length=255),
),
]

View File

@@ -6,9 +6,16 @@ from django.utils import timezone
from projects.models import Project
class DecompressedEvent(models.Model): # or... DecompressedRawEvent
class DecompressedEvent(models.Model): # or... DecompressedRawEvent... or just IngestedEvent
"""Ingested Event, no processing"""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
project = models.ForeignKey(Project, blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
data = models.TextField(blank=False, null=False)
timestamp = models.DateTimeField(null=False, default=timezone.now, help_text="Server-side timestamp")
# filled with values from the http header X-Bugsink-Debug-Info; for now this is duplicated across
# Event/DecompressedEvent; we'll have to figure out which of the 2 models is the right one to put it in (this
# relates to the question of whether we want to keep the raw data around)
# (at the very least I want it here, because in the setup with split up ingest/digest, we need to store it here to
# be able to have it available when we digest)
debug_info = models.CharField(max_length=255, blank=True, null=False, default="")

View File

@@ -3,8 +3,29 @@ import datetime
from django.conf import settings
from django.test import TestCase
from django.utils import timezone
from django.test.client import RequestFactory
from projects.models import Project
from events.factories import create_event_data
from .models import DecompressedEvent
from .views import BaseIngestAPIView
class IngestViewTestCase(TestCase):
def setUp(self):
self.factory = RequestFactory()
def test_ingest_view(self):
project = Project.objects.create(name="test")
request = self.factory.post("/api/1/store/")
BaseIngestAPIView().process_event(
create_event_data(),
project,
request,
)
class TimeZoneTesCase(TestCase):

View File

@@ -2,6 +2,7 @@ from datetime import datetime, timezone
import json # TODO consider faster APIs
from django.shortcuts import get_object_or_404
from django.conf import settings
from rest_framework import permissions, status
from rest_framework.response import Response
@@ -19,6 +20,7 @@ from issues.regressions import issue_is_regression
from events.models import Event
from releases.models import create_release_if_needed
from bugsink.registry import get_pc_registry
from bugsink.period_counter import PeriodCounter
from .negotiation import IgnoreClientContentNegotiation
from .parsers import EnvelopeParser
@@ -56,30 +58,43 @@ class BaseIngestAPIView(APIView):
sentry_key = cls.get_sentry_key_for_request(request)
return get_object_or_404(Project, pk=project_id, sentry_key=sentry_key)
def process_event(self, event_data, request, project):
@classmethod
def process_event(cls, event_data, project, request):
# because we want to count events before having created event objects (quota may block the latter) we cannot
# depend on event.timestamp; instead, we look on the clock once here, and then use that for both the project
# and issue period counters.
now = datetime.now(timezone.utc)
# note: we may want to skip saving the raw data in a setup where we have integrated ingest/digest, but for now
# we just always save it; note that even for the integrated setup a case can be made for saving the raw data
# before proceeding because it may be useful for debugging errors in the digest process.
ingested_event = cls.ingest_event(now, event_data, request, project)
if settings.BUGSINK_DIGEST_IMMEDIATELY:
cls.digest_event(now, event_data, project, ingested_event.debug_info)
@classmethod
def ingest_event(cls, now, event_data, request, project):
project_pc = get_pc_registry().by_project[project.id]
project_pc.inc(now)
DecompressedEvent.objects.create(
debug_info = request.META.get("HTTP_X_BUGSINK_DEBUGINFO", "")
return DecompressedEvent.objects.create(
project=project,
data=json.dumps(event_data), # TODO don't parse-then-print for BaseIngestion
timestamp=now,
debug_info=debug_info,
)
debug_info = request.META.get("HTTP_X_BUGSINK_DEBUGINFO", "")
@classmethod
def digest_event(cls, now, event_data, project, debug_info):
hash_ = get_hash_for_data(event_data)
issue, issue_created = Issue.objects.get_or_create(
project=project,
hash=hash_,
)
event, event_created = Event.from_json(project, event_data, issue, now, debug_info)
event, event_created = Event.from_json(project, issue, event_data, now, debug_info)
if not event_created:
# note: previously we created the event before the issue, which allowed for one less query. I don't see
# straight away how we can reproduce that now that we create issue-before-event (since creating the issue
@@ -88,19 +103,22 @@ class BaseIngestAPIView(APIView):
create_release_if_needed(project, event.release)
issue_pc = get_pc_registry().by_issue[issue.id]
issue_pc.inc(now)
if issue_created:
if project.alert_on_new_issue:
alert_for_new_issue.delay(issue)
pass # alert_for_new_issue.delay(issue)
elif issue_is_regression(issue, event.release): # new issues cannot be regressions by definition, hence 'else'
if project.alert_on_regression:
alert_for_regression.delay(issue)
pass # alert_for_regression.delay(issue)
IssueResolver.reopen(issue)
if issue.id not in get_pc_registry().by_issue:
get_pc_registry().by_issue[issue.id] = PeriodCounter()
issue_pc = get_pc_registry().by_issue[issue.id]
issue_pc.inc(now)
# TODO bookkeeping of events_at goes here.
issue.save()
@@ -110,7 +128,7 @@ class IngestEventAPIView(BaseIngestAPIView):
def post(self, request, project_id=None):
project = self.get_project(request, project_id)
self.process_event(request.data, request, project)
self.process_event(request.data, project, request)
return Response()