Events: some modelling and a command to ingest JSONs from other projects as examples

This commit is contained in:
Klaas van Schelven
2023-11-11 21:13:15 +01:00
parent 238fb6dda7
commit 725822ce3d
18 changed files with 309 additions and 24 deletions

View File

@@ -1,7 +1,10 @@
from unittest import TestCase
import datetime
from django.test import override_settings
from .dsn import get_store_url, get_envelope_url, get_header_value
from .auth import parse_auth_header_value
from .timestamp import parse_timestamp
class DsnTestCase(TestCase):
@@ -29,7 +32,39 @@ class DsnTestCase(TestCase):
"Sentry sentry_key=public_key, sentry_version=7, sentry_client=bugsink/0.0.1",
get_header_value("https://public_key@hosted.bugsink/1"))
class AuthTestCase(TestCase):
def test_parse_header_value(self):
self.assertEquals(
{"sentry_key": "foo", "sentry_version": "bar"},
parse_auth_header_value('Sentry sentry_key=foo,sentry_version=bar'))
class TimestampTestCase(TestCase):
def test_numeric_values(self):
self.assertEquals(
datetime.datetime(2023, 11, 11, 17, 32, 24, tzinfo=datetime.timezone.utc),
parse_timestamp(1699723944))
self.assertEquals(
datetime.datetime(2023, 11, 11, 17, 32, 24, 500_000, tzinfo=datetime.timezone.utc),
parse_timestamp(1699723944.5))
def test_string(self):
self.assertEquals(
datetime.datetime(2022, 9, 1, 9, 45, 0, tzinfo=datetime.timezone.utc),
parse_timestamp("2022-09-01T09:45:00.000Z"))
self.assertEquals(
datetime.datetime(2018, 1, 1, 5, 6, 7, tzinfo=datetime.timezone.utc),
parse_timestamp("2018-01-01T05:06:07+00:00"))
@override_settings(TIME_ZONE='Europe/Istanbul')
def test_non_utc_settings_dont_influence_parsing(self):
self.assertEquals(
datetime.datetime(2023, 11, 11, 17, 32, 24, tzinfo=datetime.timezone.utc),
parse_timestamp(1699723944))
self.assertEquals(
datetime.datetime(2022, 9, 1, 9, 45, 0, tzinfo=datetime.timezone.utc),
parse_timestamp("2022-09-01T09:45:00.000Z"))

22
compat/timestamp.py Normal file
View File

@@ -0,0 +1,22 @@
import datetime
from django.utils.dateparse import parse_datetime
def parse_timestamp(value):
"""
> Indicates when the event was created in the Sentry SDK. The format is either a string as defined in RFC 3339 or a
> numeric (integer or float) value representing the number of seconds that have elapsed since the Unix epoch
> Timezone is assumed to be UTC if missing.
> Sub-microsecond precision is not preserved with numeric values due to precision limitations with floats (at least
> in our systems). With that caveat in mind, just send whatever is easiest to produce.
> All timestamps in the event protocol are formatted this way.
"""
if isinstance(value, int) or isinstance(value, float):
return datetime.datetime.fromtimestamp(value, tz=datetime.timezone.utc)
return parse_datetime(value)

View File

@@ -1,3 +1,45 @@
from django.utils.html import escape, mark_safe
from django.contrib import admin
# Register your models here.
import json
from .models import Event
@admin.register(Event)
class EventAdmin(admin.ModelAdmin):
list_display = [
'timestamp',
# 'project',
'platform',
'level',
'sdk_name',
'sdk_version',
'has_exception',
'has_logentry',
'debug_info',
'on_site',
]
list_filter = [
'project',
'platform',
'level',
'sdk_name',
'sdk_version',
'has_exception',
'has_logentry',
]
exclude = ["data"]
readonly_fields = [
'pretty_data',
]
def pretty_data(self, obj):
return mark_safe("<pre>" + escape(json.dumps(json.loads(obj.data), indent=2)) + "</pre>")
pretty_data.short_description = "Data"
def on_site(self, obj):
return mark_safe('<a href="' + escape(obj.get_absolute_url()) + '">View</a>')

View File

View File

View File

@@ -0,0 +1,21 @@
from django.core.management.base import BaseCommand
from issues.models import Issue
from events.models import Event
from ingest.models import DecompressedEvent
class Command(BaseCommand):
help = "..."
def add_arguments(self, parser):
pass
def handle(self, *args, **options):
if input("Clean slate (ingestion and its effect)? [y/n] ") != "y":
return
print("nuking")
Issue.objects.all().delete()
Event.objects.all().delete()
DecompressedEvent.objects.all().delete()

View File

@@ -0,0 +1,38 @@
from django.db import migrations, models
import django.db.models.deletion
import uuid
class Migration(migrations.Migration):
initial = True
dependencies = [
('projects', '0002_project_name_project_sentry_key'),
]
operations = [
migrations.CreateModel(
name='Event',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('event_id', models.UUIDField(editable=False)),
('data', models.TextField()),
('timestamp', models.DateTimeField(db_index=True)),
('platform', models.CharField(choices=[('as3', 'As3'), ('c', 'C'), ('cfml', 'Cfml'), ('cocoa', 'Cocoa'), ('csharp', 'Csharp'), ('elixir', 'Elixir'), ('haskell', 'Haskell'), ('go', 'Go'), ('groovy', 'Groovy'), ('java', 'Java'), ('javascript', 'Javascript'), ('native', 'Native'), ('node', 'Node'), ('objc', 'Objc'), ('other', 'Other'), ('perl', 'Perl'), ('php', 'Php'), ('python', 'Python'), ('ruby', 'Ruby')], max_length=64)),
('level', models.CharField(blank=True, choices=[('fatal', 'Fatal'), ('error', 'Error'), ('warning', 'Warning'), ('info', 'Info'), ('debug', 'Debug')], max_length=7)),
('logger', models.CharField(blank=True, default='', max_length=64)),
('transaction', models.CharField(blank=True, default='', max_length=200)),
('server_name', models.CharField(blank=True, default='', max_length=255)),
('release', models.CharField(blank=True, default='', max_length=250)),
('dist', models.CharField(blank=True, default='', max_length=64)),
('environment', models.CharField(blank=True, default='', max_length=64)),
('sdk_name', models.CharField(blank=True, default='', max_length=255)),
('sdk_version', models.CharField(blank=True, default='', max_length=255)),
('project', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='projects.project')),
],
options={
'unique_together': {('project', 'event_id')},
},
),
]

View File

@@ -0,0 +1,16 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('events', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='event',
name='debug_info',
field=models.CharField(blank=True, default='', max_length=255),
),
]

View File

@@ -0,0 +1,23 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('events', '0002_event_debug_info'),
]
operations = [
migrations.AddField(
model_name='event',
name='has_exception',
field=models.BooleanField(default=False),
preserve_default=False,
),
migrations.AddField(
model_name='event',
name='has_logentry',
field=models.BooleanField(default=False),
preserve_default=False,
),
]

View File

@@ -1,7 +1,10 @@
import json
import uuid
from django.db import models
from projects.models import Project
from compat.timestamp import parse_timestamp
class Platform(models.TextChoices):
@@ -34,6 +37,10 @@ class Level(models.TextChoices):
DEBUG = "debug"
def maybe_empty(s):
return "" if not s else s
class Event(models.Model):
# Lines quotes with ">" are from the following to resources:
# https://develop.sentry.dev/sdk/event-payloads/ (supposedly more human-readable)
@@ -115,6 +122,48 @@ class Event(models.Model):
sdk_name = models.CharField(max_length=255, blank=True, null=False, default="")
sdk_version = models.CharField(max_length=255, blank=True, null=False, default="")
# these 2 are perhaps temporary, I made them up myself. Idea: ability to get a sense of the shape of the data quicly
has_exception = models.BooleanField(null=False)
has_logentry = models.BooleanField(null=False)
# this is a temporary, bugsink-specific value;
debug_info = models.CharField(max_length=255, blank=True, null=False, default="")
class Meta:
unique_together = (("project", "event_id"),)
# index_together = (("group_id", "datetime"),) TODO seriously think about indexes
def get_absolute_url(self):
return "/events/event/%s/" % self.id
@classmethod
def from_json(cls, project, parsed_data, debug_info):
event, created = cls.objects.get_or_create( # NOTE immediate creation... is this what we want?
event_id=parsed_data["event_id"],
project=project,
defaults={
'data': json.dumps(parsed_data),
'timestamp': parse_timestamp(parsed_data["timestamp"]),
'platform': parsed_data["platform"],
'level': maybe_empty(parsed_data.get("level", "")),
'logger': maybe_empty(parsed_data.get("logger", "")),
'transaction': maybe_empty(parsed_data.get("transaction", "")),
'server_name': maybe_empty(parsed_data.get("server_name", "")),
'release': maybe_empty(parsed_data.get("release", "")),
'dist': maybe_empty(parsed_data.get("dist", "")),
'environment': maybe_empty(parsed_data.get("environment", "")),
'sdk_name': maybe_empty(parsed_data.get("", {}).get("name", "")),
'sdk_version': maybe_empty(parsed_data.get("", {}).get("version", "")),
'has_exception': "exception" in parsed_data,
'has_logentry': "logentry" in parsed_data,
'debug_info': debug_info,
}
)
return event, created

View File

@@ -1,9 +1,9 @@
from django.urls import path
from .views import decompressed_event_detail, debug_get_hash
from .views import event_detail, debug_get_hash
urlpatterns = [
path('event/<uuid:pk>/', decompressed_event_detail),
path('debug_get_hash/<uuid:decompressed_event_pk>/', debug_get_hash),
path('event/<uuid:pk>/', event_detail),
path('debug_get_hash/<uuid:event_pk>/', debug_get_hash),
]

View File

@@ -2,13 +2,13 @@ import json
from django.shortcuts import render, get_object_or_404
from ingest.models import DecompressedEvent
from issues.utils import get_hash_for_data, get_issue_grouper_for_data
from .models import Event
def decompressed_event_detail(request, pk):
# this view is misplaced "by nature" (it mixes ingested stuff and rendering); until we create a pipeline for that.
obj = get_object_or_404(DecompressedEvent, pk=pk)
def event_detail(request, pk):
obj = get_object_or_404(Event, pk=pk)
parsed_data = json.loads(obj.data)
@@ -23,10 +23,10 @@ def decompressed_event_detail(request, pk):
})
def debug_get_hash(request, decompressed_event_pk):
def debug_get_hash(request, event_pk):
# debug view; not for eternity
obj = get_object_or_404(DecompressedEvent, pk=decompressed_event_pk)
obj = get_object_or_404(Event, pk=event_pk)
parsed_data = json.loads(obj.data)

View File

@@ -7,7 +7,7 @@ from compat.dsn import get_store_url, get_header_value
class Command(BaseCommand):
help = "..."
help = "Quick and dirty command to load a bunch of events from e.g. the sentry test codebase"
def add_arguments(self, parser):
parser.add_argument("--dsn")
@@ -18,16 +18,36 @@ class Command(BaseCommand):
for json_filename in options["json_files"]:
with open(json_filename) as f:
print("HIER", json_filename)
print("considering", json_filename)
try:
data = json.loads(f.read())
except Exception as e:
self.stderr.write("%s %s %s" % ("Not JSON", json_filename, str(e)))
continue
if "event_id" not in data:
self.stderr.write("%s %s" % ("Probably not a (single) event", json_filename))
continue
if "timestamp" not in data:
# weirdly enough a large numer of sentry test data don't actually have this required attribute set.
# thus, we set it to something arbitrary on the sending side rather than have our server be robust
# for it.
data["timestamp"] = 0
if "platform" not in data:
# in a few cases this value isn't set either in the sentry test data but I'd rather ignore those...
# because 'platform' is such a valuable piece of info while getting a sense of the shape of the data
self.stderr.write("%s %s" % ("Platform not set", json_filename))
continue
try:
response = requests.post(
get_store_url(dsn),
headers={"X-Sentry-Auth": get_header_value(dsn)},
headers={
"X-Sentry-Auth": get_header_value(dsn),
"X-BugSink-DebugInfo": json_filename,
},
json=data,
)
response.raise_for_status()

View File

@@ -5,13 +5,9 @@ from django.db import models
from projects.models import Project
class DecompressedEvent(models.Model):
class DecompressedEvent(models.Model): # or... DecompressedRawEvent
"""Ingested Event, no processing"""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
project = models.ForeignKey(Project, blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
data = models.TextField(blank=False, null=False)
timestamp = models.DateTimeField(null=False, auto_now_add=True, help_text="Server-side timestamp")
def get_absolute_url(self):
# same note about misplacement as the view this is pointing to
return "/events/event/%s/" % self.id

View File

@@ -14,6 +14,7 @@ from projects.models import Project
from issues.models import Issue
from issues.utils import get_hash_for_data
from events.models import Event
from .negotiation import IgnoreClientContentNegotiation
from .parsers import EnvelopeParser
@@ -52,11 +53,17 @@ class BaseIngestAPIView(APIView):
return get_object_or_404(Project, pk=project_id, sentry_key=sentry_key)
def process_event(self, event_data, request, project):
event = DecompressedEvent.objects.create(
DecompressedEvent.objects.create(
project=project,
data=json.dumps(event_data), # TODO don't parse-then-print for BaseIngestion
)
debug_info = request.META.get("HTTP_X_BUGSINK_DEBUGINFO", "")
event, created = Event.from_json(project, event_data, debug_info)
if not created:
return
hash_ = get_hash_for_data(event_data)
issue, _ = Issue.objects.get_or_create(

View File

@@ -0,0 +1,17 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('events', '0001_initial'),
('issues', '0002_issue_project'),
]
operations = [
migrations.AlterField(
model_name='issue',
name='events',
field=models.ManyToManyField(to='events.event'),
),
]

View File

@@ -10,7 +10,7 @@ class Issue(models.Model):
project = models.ForeignKey(
"projects.Project", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
hash = models.CharField(max_length=32, blank=False, null=False)
events = models.ManyToManyField("ingest.DecompressedEvent")
events = models.ManyToManyField("events.Event")
def get_absolute_url(self):
return f"/issues/issue/{ self.id }/events/"

View File

@@ -1,6 +1,5 @@
from random import random
from django.db import migrations, models
import projects.models
import uuid
class Migration(migrations.Migration):
@@ -13,12 +12,12 @@ class Migration(migrations.Migration):
migrations.AddField(
model_name='project',
name='name',
field=models.CharField(default=lambda: str(random()), max_length=255),
field=models.CharField(default='asdf', max_length=255),
preserve_default=False,
),
migrations.AddField(
model_name='project',
name='sentry_key',
field=models.CharField(default=projects.models.uuid4_hex, max_length=32, unique=True),
field=models.UUIDField(default=uuid.uuid4, editable=False),
),
]