From 725822ce3d92b1c40069f369d48f002a1078a194 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Sat, 11 Nov 2023 21:13:15 +0100 Subject: [PATCH] Events: some modelling and a command to ingest JSONs from other projects as examples --- compat/tests.py | 35 +++++++++++++ compat/timestamp.py | 22 +++++++++ events/admin.py | 44 ++++++++++++++++- events/management/__init__.py | 0 events/management/commands/__init__.py | 0 events/management/commands/cleanup_events.py | 21 ++++++++ events/migrations/0001_initial.py | 38 ++++++++++++++ events/migrations/0002_event_debug_info.py | 16 ++++++ ..._event_has_exception_event_has_logentry.py | 23 +++++++++ events/models.py | 49 +++++++++++++++++++ events/urls.py | 6 +-- events/views.py | 12 ++--- ingest/management/commands/send_json.py | 26 ++++++++-- ingest/models.py | 6 +-- ingest/views.py | 9 +++- issues/migrations/0003_alter_issue_events.py | 17 +++++++ issues/models.py | 2 +- .../0002_project_name_project_sentry_key.py | 7 ++- 18 files changed, 309 insertions(+), 24 deletions(-) create mode 100644 compat/timestamp.py create mode 100644 events/management/__init__.py create mode 100644 events/management/commands/__init__.py create mode 100644 events/management/commands/cleanup_events.py create mode 100644 events/migrations/0001_initial.py create mode 100644 events/migrations/0002_event_debug_info.py create mode 100644 events/migrations/0003_event_has_exception_event_has_logentry.py create mode 100644 issues/migrations/0003_alter_issue_events.py diff --git a/compat/tests.py b/compat/tests.py index 2182e15..90009e3 100644 --- a/compat/tests.py +++ b/compat/tests.py @@ -1,7 +1,10 @@ from unittest import TestCase +import datetime +from django.test import override_settings from .dsn import get_store_url, get_envelope_url, get_header_value from .auth import parse_auth_header_value +from .timestamp import parse_timestamp class DsnTestCase(TestCase): @@ -29,7 +32,39 @@ class DsnTestCase(TestCase): "Sentry sentry_key=public_key, sentry_version=7, sentry_client=bugsink/0.0.1", get_header_value("https://public_key@hosted.bugsink/1")) + +class AuthTestCase(TestCase): def test_parse_header_value(self): self.assertEquals( {"sentry_key": "foo", "sentry_version": "bar"}, parse_auth_header_value('Sentry sentry_key=foo,sentry_version=bar')) + + +class TimestampTestCase(TestCase): + def test_numeric_values(self): + self.assertEquals( + datetime.datetime(2023, 11, 11, 17, 32, 24, tzinfo=datetime.timezone.utc), + parse_timestamp(1699723944)) + + self.assertEquals( + datetime.datetime(2023, 11, 11, 17, 32, 24, 500_000, tzinfo=datetime.timezone.utc), + parse_timestamp(1699723944.5)) + + def test_string(self): + self.assertEquals( + datetime.datetime(2022, 9, 1, 9, 45, 0, tzinfo=datetime.timezone.utc), + parse_timestamp("2022-09-01T09:45:00.000Z")) + + self.assertEquals( + datetime.datetime(2018, 1, 1, 5, 6, 7, tzinfo=datetime.timezone.utc), + parse_timestamp("2018-01-01T05:06:07+00:00")) + + @override_settings(TIME_ZONE='Europe/Istanbul') + def test_non_utc_settings_dont_influence_parsing(self): + self.assertEquals( + datetime.datetime(2023, 11, 11, 17, 32, 24, tzinfo=datetime.timezone.utc), + parse_timestamp(1699723944)) + + self.assertEquals( + datetime.datetime(2022, 9, 1, 9, 45, 0, tzinfo=datetime.timezone.utc), + parse_timestamp("2022-09-01T09:45:00.000Z")) diff --git a/compat/timestamp.py b/compat/timestamp.py new file mode 100644 index 0000000..4f173e8 --- /dev/null +++ b/compat/timestamp.py @@ -0,0 +1,22 @@ +import datetime + +from django.utils.dateparse import parse_datetime + + +def parse_timestamp(value): + """ + > Indicates when the event was created in the Sentry SDK. The format is either a string as defined in RFC 3339 or a + > numeric (integer or float) value representing the number of seconds that have elapsed since the Unix epoch + + > Timezone is assumed to be UTC if missing. + + > Sub-microsecond precision is not preserved with numeric values due to precision limitations with floats (at least + > in our systems). With that caveat in mind, just send whatever is easiest to produce. + + > All timestamps in the event protocol are formatted this way. + """ + + if isinstance(value, int) or isinstance(value, float): + return datetime.datetime.fromtimestamp(value, tz=datetime.timezone.utc) + + return parse_datetime(value) diff --git a/events/admin.py b/events/admin.py index 8c38f3f..76be547 100644 --- a/events/admin.py +++ b/events/admin.py @@ -1,3 +1,45 @@ +from django.utils.html import escape, mark_safe from django.contrib import admin -# Register your models here. +import json + +from .models import Event + + +@admin.register(Event) +class EventAdmin(admin.ModelAdmin): + list_display = [ + 'timestamp', + # 'project', + 'platform', + 'level', + 'sdk_name', + 'sdk_version', + 'has_exception', + 'has_logentry', + 'debug_info', + 'on_site', + ] + + list_filter = [ + 'project', + 'platform', + 'level', + 'sdk_name', + 'sdk_version', + 'has_exception', + 'has_logentry', + ] + + exclude = ["data"] + + readonly_fields = [ + 'pretty_data', + ] + + def pretty_data(self, obj): + return mark_safe("
" + escape(json.dumps(json.loads(obj.data), indent=2)) + "
") + pretty_data.short_description = "Data" + + def on_site(self, obj): + return mark_safe('View') diff --git a/events/management/__init__.py b/events/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/events/management/commands/__init__.py b/events/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/events/management/commands/cleanup_events.py b/events/management/commands/cleanup_events.py new file mode 100644 index 0000000..f89be01 --- /dev/null +++ b/events/management/commands/cleanup_events.py @@ -0,0 +1,21 @@ +from django.core.management.base import BaseCommand + +from issues.models import Issue +from events.models import Event +from ingest.models import DecompressedEvent + + +class Command(BaseCommand): + help = "..." + + def add_arguments(self, parser): + pass + + def handle(self, *args, **options): + if input("Clean slate (ingestion and its effect)? [y/n] ") != "y": + return + + print("nuking") + Issue.objects.all().delete() + Event.objects.all().delete() + DecompressedEvent.objects.all().delete() diff --git a/events/migrations/0001_initial.py b/events/migrations/0001_initial.py new file mode 100644 index 0000000..caf6d6c --- /dev/null +++ b/events/migrations/0001_initial.py @@ -0,0 +1,38 @@ +from django.db import migrations, models +import django.db.models.deletion +import uuid + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('projects', '0002_project_name_project_sentry_key'), + ] + + operations = [ + migrations.CreateModel( + name='Event', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('event_id', models.UUIDField(editable=False)), + ('data', models.TextField()), + ('timestamp', models.DateTimeField(db_index=True)), + ('platform', models.CharField(choices=[('as3', 'As3'), ('c', 'C'), ('cfml', 'Cfml'), ('cocoa', 'Cocoa'), ('csharp', 'Csharp'), ('elixir', 'Elixir'), ('haskell', 'Haskell'), ('go', 'Go'), ('groovy', 'Groovy'), ('java', 'Java'), ('javascript', 'Javascript'), ('native', 'Native'), ('node', 'Node'), ('objc', 'Objc'), ('other', 'Other'), ('perl', 'Perl'), ('php', 'Php'), ('python', 'Python'), ('ruby', 'Ruby')], max_length=64)), + ('level', models.CharField(blank=True, choices=[('fatal', 'Fatal'), ('error', 'Error'), ('warning', 'Warning'), ('info', 'Info'), ('debug', 'Debug')], max_length=7)), + ('logger', models.CharField(blank=True, default='', max_length=64)), + ('transaction', models.CharField(blank=True, default='', max_length=200)), + ('server_name', models.CharField(blank=True, default='', max_length=255)), + ('release', models.CharField(blank=True, default='', max_length=250)), + ('dist', models.CharField(blank=True, default='', max_length=64)), + ('environment', models.CharField(blank=True, default='', max_length=64)), + ('sdk_name', models.CharField(blank=True, default='', max_length=255)), + ('sdk_version', models.CharField(blank=True, default='', max_length=255)), + ('project', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='projects.project')), + ], + options={ + 'unique_together': {('project', 'event_id')}, + }, + ), + ] diff --git a/events/migrations/0002_event_debug_info.py b/events/migrations/0002_event_debug_info.py new file mode 100644 index 0000000..fe25c21 --- /dev/null +++ b/events/migrations/0002_event_debug_info.py @@ -0,0 +1,16 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('events', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='event', + name='debug_info', + field=models.CharField(blank=True, default='', max_length=255), + ), + ] diff --git a/events/migrations/0003_event_has_exception_event_has_logentry.py b/events/migrations/0003_event_has_exception_event_has_logentry.py new file mode 100644 index 0000000..3d5a52f --- /dev/null +++ b/events/migrations/0003_event_has_exception_event_has_logentry.py @@ -0,0 +1,23 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('events', '0002_event_debug_info'), + ] + + operations = [ + migrations.AddField( + model_name='event', + name='has_exception', + field=models.BooleanField(default=False), + preserve_default=False, + ), + migrations.AddField( + model_name='event', + name='has_logentry', + field=models.BooleanField(default=False), + preserve_default=False, + ), + ] diff --git a/events/models.py b/events/models.py index 1036352..ca4055e 100644 --- a/events/models.py +++ b/events/models.py @@ -1,7 +1,10 @@ +import json import uuid + from django.db import models from projects.models import Project +from compat.timestamp import parse_timestamp class Platform(models.TextChoices): @@ -34,6 +37,10 @@ class Level(models.TextChoices): DEBUG = "debug" +def maybe_empty(s): + return "" if not s else s + + class Event(models.Model): # Lines quotes with ">" are from the following to resources: # https://develop.sentry.dev/sdk/event-payloads/ (supposedly more human-readable) @@ -115,6 +122,48 @@ class Event(models.Model): sdk_name = models.CharField(max_length=255, blank=True, null=False, default="") sdk_version = models.CharField(max_length=255, blank=True, null=False, default="") + # these 2 are perhaps temporary, I made them up myself. Idea: ability to get a sense of the shape of the data quicly + has_exception = models.BooleanField(null=False) + has_logentry = models.BooleanField(null=False) + + # this is a temporary, bugsink-specific value; + debug_info = models.CharField(max_length=255, blank=True, null=False, default="") + class Meta: unique_together = (("project", "event_id"),) # index_together = (("group_id", "datetime"),) TODO seriously think about indexes + + def get_absolute_url(self): + return "/events/event/%s/" % self.id + + @classmethod + def from_json(cls, project, parsed_data, debug_info): + event, created = cls.objects.get_or_create( # NOTE immediate creation... is this what we want? + event_id=parsed_data["event_id"], + project=project, + defaults={ + 'data': json.dumps(parsed_data), + + 'timestamp': parse_timestamp(parsed_data["timestamp"]), + 'platform': parsed_data["platform"], + + 'level': maybe_empty(parsed_data.get("level", "")), + 'logger': maybe_empty(parsed_data.get("logger", "")), + 'transaction': maybe_empty(parsed_data.get("transaction", "")), + + 'server_name': maybe_empty(parsed_data.get("server_name", "")), + 'release': maybe_empty(parsed_data.get("release", "")), + 'dist': maybe_empty(parsed_data.get("dist", "")), + + 'environment': maybe_empty(parsed_data.get("environment", "")), + + 'sdk_name': maybe_empty(parsed_data.get("", {}).get("name", "")), + 'sdk_version': maybe_empty(parsed_data.get("", {}).get("version", "")), + + 'has_exception': "exception" in parsed_data, + 'has_logentry': "logentry" in parsed_data, + + 'debug_info': debug_info, + } + ) + return event, created diff --git a/events/urls.py b/events/urls.py index 42b0b38..a7cea59 100644 --- a/events/urls.py +++ b/events/urls.py @@ -1,9 +1,9 @@ from django.urls import path -from .views import decompressed_event_detail, debug_get_hash +from .views import event_detail, debug_get_hash urlpatterns = [ - path('event//', decompressed_event_detail), - path('debug_get_hash//', debug_get_hash), + path('event//', event_detail), + path('debug_get_hash//', debug_get_hash), ] diff --git a/events/views.py b/events/views.py index 2517a67..1e61b02 100644 --- a/events/views.py +++ b/events/views.py @@ -2,13 +2,13 @@ import json from django.shortcuts import render, get_object_or_404 -from ingest.models import DecompressedEvent from issues.utils import get_hash_for_data, get_issue_grouper_for_data +from .models import Event -def decompressed_event_detail(request, pk): - # this view is misplaced "by nature" (it mixes ingested stuff and rendering); until we create a pipeline for that. - obj = get_object_or_404(DecompressedEvent, pk=pk) + +def event_detail(request, pk): + obj = get_object_or_404(Event, pk=pk) parsed_data = json.loads(obj.data) @@ -23,10 +23,10 @@ def decompressed_event_detail(request, pk): }) -def debug_get_hash(request, decompressed_event_pk): +def debug_get_hash(request, event_pk): # debug view; not for eternity - obj = get_object_or_404(DecompressedEvent, pk=decompressed_event_pk) + obj = get_object_or_404(Event, pk=event_pk) parsed_data = json.loads(obj.data) diff --git a/ingest/management/commands/send_json.py b/ingest/management/commands/send_json.py index 395c6af..757074a 100644 --- a/ingest/management/commands/send_json.py +++ b/ingest/management/commands/send_json.py @@ -7,7 +7,7 @@ from compat.dsn import get_store_url, get_header_value class Command(BaseCommand): - help = "..." + help = "Quick and dirty command to load a bunch of events from e.g. the sentry test codebase" def add_arguments(self, parser): parser.add_argument("--dsn") @@ -18,16 +18,36 @@ class Command(BaseCommand): for json_filename in options["json_files"]: with open(json_filename) as f: - print("HIER", json_filename) + print("considering", json_filename) try: data = json.loads(f.read()) except Exception as e: self.stderr.write("%s %s %s" % ("Not JSON", json_filename, str(e))) + continue + + if "event_id" not in data: + self.stderr.write("%s %s" % ("Probably not a (single) event", json_filename)) + continue + + if "timestamp" not in data: + # weirdly enough a large numer of sentry test data don't actually have this required attribute set. + # thus, we set it to something arbitrary on the sending side rather than have our server be robust + # for it. + data["timestamp"] = 0 + + if "platform" not in data: + # in a few cases this value isn't set either in the sentry test data but I'd rather ignore those... + # because 'platform' is such a valuable piece of info while getting a sense of the shape of the data + self.stderr.write("%s %s" % ("Platform not set", json_filename)) + continue try: response = requests.post( get_store_url(dsn), - headers={"X-Sentry-Auth": get_header_value(dsn)}, + headers={ + "X-Sentry-Auth": get_header_value(dsn), + "X-BugSink-DebugInfo": json_filename, + }, json=data, ) response.raise_for_status() diff --git a/ingest/models.py b/ingest/models.py index 5cb600c..e81c60c 100644 --- a/ingest/models.py +++ b/ingest/models.py @@ -5,13 +5,9 @@ from django.db import models from projects.models import Project -class DecompressedEvent(models.Model): +class DecompressedEvent(models.Model): # or... DecompressedRawEvent """Ingested Event, no processing""" id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) project = models.ForeignKey(Project, blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later' data = models.TextField(blank=False, null=False) timestamp = models.DateTimeField(null=False, auto_now_add=True, help_text="Server-side timestamp") - - def get_absolute_url(self): - # same note about misplacement as the view this is pointing to - return "/events/event/%s/" % self.id diff --git a/ingest/views.py b/ingest/views.py index 567d6f0..7b085d4 100644 --- a/ingest/views.py +++ b/ingest/views.py @@ -14,6 +14,7 @@ from projects.models import Project from issues.models import Issue from issues.utils import get_hash_for_data +from events.models import Event from .negotiation import IgnoreClientContentNegotiation from .parsers import EnvelopeParser @@ -52,11 +53,17 @@ class BaseIngestAPIView(APIView): return get_object_or_404(Project, pk=project_id, sentry_key=sentry_key) def process_event(self, event_data, request, project): - event = DecompressedEvent.objects.create( + DecompressedEvent.objects.create( project=project, data=json.dumps(event_data), # TODO don't parse-then-print for BaseIngestion ) + debug_info = request.META.get("HTTP_X_BUGSINK_DEBUGINFO", "") + + event, created = Event.from_json(project, event_data, debug_info) + if not created: + return + hash_ = get_hash_for_data(event_data) issue, _ = Issue.objects.get_or_create( diff --git a/issues/migrations/0003_alter_issue_events.py b/issues/migrations/0003_alter_issue_events.py new file mode 100644 index 0000000..717f9e0 --- /dev/null +++ b/issues/migrations/0003_alter_issue_events.py @@ -0,0 +1,17 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('events', '0001_initial'), + ('issues', '0002_issue_project'), + ] + + operations = [ + migrations.AlterField( + model_name='issue', + name='events', + field=models.ManyToManyField(to='events.event'), + ), + ] diff --git a/issues/models.py b/issues/models.py index 12614a6..6e25ddc 100644 --- a/issues/models.py +++ b/issues/models.py @@ -10,7 +10,7 @@ class Issue(models.Model): project = models.ForeignKey( "projects.Project", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later' hash = models.CharField(max_length=32, blank=False, null=False) - events = models.ManyToManyField("ingest.DecompressedEvent") + events = models.ManyToManyField("events.Event") def get_absolute_url(self): return f"/issues/issue/{ self.id }/events/" diff --git a/projects/migrations/0002_project_name_project_sentry_key.py b/projects/migrations/0002_project_name_project_sentry_key.py index c86a53b..220798d 100644 --- a/projects/migrations/0002_project_name_project_sentry_key.py +++ b/projects/migrations/0002_project_name_project_sentry_key.py @@ -1,6 +1,5 @@ -from random import random from django.db import migrations, models -import projects.models +import uuid class Migration(migrations.Migration): @@ -13,12 +12,12 @@ class Migration(migrations.Migration): migrations.AddField( model_name='project', name='name', - field=models.CharField(default=lambda: str(random()), max_length=255), + field=models.CharField(default='asdf', max_length=255), preserve_default=False, ), migrations.AddField( model_name='project', name='sentry_key', - field=models.CharField(default=projects.models.uuid4_hex, max_length=32, unique=True), + field=models.UUIDField(default=uuid.uuid4, editable=False), ), ]