mirror of
https://github.com/bugsink/bugsink.git
synced 2025-12-20 12:29:57 -06:00
Introduce 'Grouping' data-modeling
This commit is contained in:
@@ -13,8 +13,8 @@ from rest_framework import exceptions
|
||||
from compat.auth import parse_auth_header_value
|
||||
|
||||
from projects.models import Project
|
||||
from issues.models import Issue, IssueStateManager
|
||||
from issues.utils import get_hash_for_data
|
||||
from issues.models import Issue, IssueStateManager, Grouping
|
||||
from issues.utils import get_issue_grouper_for_data
|
||||
from issues.regressions import issue_is_regression
|
||||
|
||||
import sentry_sdk_extensions
|
||||
@@ -103,16 +103,30 @@ class BaseIngestAPIView(APIView):
|
||||
# leave this at the top -- it may involve reading from the DB which should come before any DB writing
|
||||
pc_registry = get_pc_registry()
|
||||
|
||||
hash_ = get_hash_for_data(event_data)
|
||||
issue, issue_created = Issue.objects.get_or_create(
|
||||
project=ingested_event.project,
|
||||
hash=hash_,
|
||||
defaults={
|
||||
"first_seen": ingested_event.timestamp,
|
||||
"last_seen": ingested_event.timestamp,
|
||||
"event_count": 1,
|
||||
},
|
||||
)
|
||||
grouping_key = get_issue_grouper_for_data(event_data)
|
||||
|
||||
if not Grouping.objects.filter(project=ingested_event.project, grouping_key=grouping_key).exists():
|
||||
issue = Issue.objects.create(
|
||||
project=ingested_event.project,
|
||||
first_seen=ingested_event.timestamp,
|
||||
last_seen=ingested_event.timestamp,
|
||||
event_count=1,
|
||||
)
|
||||
# even though in our data-model a given grouping does not imply a single Issue (in fact, that's the whole
|
||||
# point of groupings as a data-model), at-creation such implication does exist, because manual information
|
||||
# ("this grouper is actually part of some other issue") can by definition not yet have been specified.
|
||||
issue_created = True
|
||||
|
||||
grouping = Grouping.objects.create(
|
||||
project=ingested_event.project,
|
||||
grouping_key=grouping_key,
|
||||
issue=issue,
|
||||
)
|
||||
|
||||
else:
|
||||
grouping = Grouping.objects.get(project=ingested_event.project, grouping_key=grouping_key)
|
||||
issue = grouping.issue
|
||||
issue_created = False
|
||||
|
||||
event, event_created = Event.from_ingested(ingested_event, issue, event_data)
|
||||
if not event_created:
|
||||
|
||||
@@ -1,13 +1,21 @@
|
||||
from django.contrib import admin
|
||||
|
||||
from .models import Issue
|
||||
from .models import Issue, Grouping
|
||||
|
||||
|
||||
class GroupingInline(admin.TabularInline):
|
||||
model = Grouping
|
||||
extra = 0
|
||||
exclude = ['project']
|
||||
readonly_fields = [
|
||||
'grouping_key',
|
||||
]
|
||||
|
||||
|
||||
@admin.register(Issue)
|
||||
class IssueAdmin(admin.ModelAdmin):
|
||||
fields = [
|
||||
'project',
|
||||
'hash',
|
||||
'last_seen',
|
||||
'first_seen',
|
||||
'is_resolved',
|
||||
@@ -19,9 +27,12 @@ class IssueAdmin(admin.ModelAdmin):
|
||||
'event_count',
|
||||
]
|
||||
|
||||
inlines = [
|
||||
GroupingInline,
|
||||
]
|
||||
|
||||
list_display = [
|
||||
"title",
|
||||
"hash",
|
||||
"project",
|
||||
"event_count", # expensive operation as written now (query in loop)
|
||||
]
|
||||
|
||||
@@ -4,8 +4,8 @@ from django.utils import timezone
|
||||
|
||||
from projects.models import Project
|
||||
|
||||
from .models import Issue
|
||||
from .utils import get_hash_for_data
|
||||
from .models import Issue, Grouping
|
||||
from .utils import get_issue_grouper_for_data
|
||||
|
||||
|
||||
def get_or_create_issue(project=None, event_data=None):
|
||||
@@ -15,12 +15,26 @@ def get_or_create_issue(project=None, event_data=None):
|
||||
if project is None:
|
||||
project = Project.objects.create(name="Test project")
|
||||
|
||||
hash_ = get_hash_for_data(event_data)
|
||||
issue, issue_created = Issue.objects.get_or_create(
|
||||
project=project,
|
||||
hash=hash_,
|
||||
defaults=denormalized_issue_fields(),
|
||||
)
|
||||
grouping_key = get_issue_grouper_for_data(event_data)
|
||||
|
||||
if not Grouping.objects.filter(project=project, grouping_key=grouping_key).exists():
|
||||
issue = Issue.objects.create(
|
||||
project=project,
|
||||
**denormalized_issue_fields(),
|
||||
)
|
||||
issue_created = True
|
||||
|
||||
grouping = Grouping.objects.create(
|
||||
project=project,
|
||||
grouping_key=grouping_key,
|
||||
issue=issue,
|
||||
)
|
||||
|
||||
else:
|
||||
grouping = Grouping.objects.get(project=project, grouping_key=grouping_key)
|
||||
issue = grouping.issue
|
||||
issue_created = False
|
||||
|
||||
return issue, issue_created
|
||||
|
||||
|
||||
|
||||
28
issues/migrations/0015_remove_issue_hash_grouping.py
Normal file
28
issues/migrations/0015_remove_issue_hash_grouping.py
Normal file
@@ -0,0 +1,28 @@
|
||||
# Generated by Django 4.2.11 on 2024-04-08 08:59
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('projects', '0006_alter_projectmembership_unique_together'),
|
||||
('issues', '0014_alter_issue_events_at_alter_issue_fixed_at'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RemoveField(
|
||||
model_name='issue',
|
||||
name='hash',
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name='Grouping',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('grouping_key', models.TextField()),
|
||||
('issue', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='issues.issue')),
|
||||
('project', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='projects.project')),
|
||||
],
|
||||
),
|
||||
]
|
||||
36
issues/migrations/0016_create_grouping_objects.py
Normal file
36
issues/migrations/0016_create_grouping_objects.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import json
|
||||
from django.db import migrations
|
||||
|
||||
from issues.utils import get_issue_grouper_for_data
|
||||
|
||||
|
||||
def create_grouping_objects(apps, schema_editor):
|
||||
Issue = apps.get_model('issues', 'Issue')
|
||||
Grouping = apps.get_model('issues', 'Grouping')
|
||||
|
||||
for issue in Issue.objects.all():
|
||||
# we can do this because we know that there is at least one event, and because the events are already grouped
|
||||
# per issue (we just need to reconstruct the grouping key as implied by the hash)
|
||||
some_random_event = issue.event_set.first()
|
||||
event_data = json.loads(some_random_event.data)
|
||||
|
||||
# we have _not_ inlined the code (which is standard good practice when creating datamigrations). Reason: this
|
||||
# data-migration is just here for my own development process, we don't need it to be perfect.
|
||||
grouping_key = get_issue_grouper_for_data(event_data)
|
||||
|
||||
Grouping.objects.create(
|
||||
project=issue.project,
|
||||
issue=issue,
|
||||
grouping_key=grouping_key,
|
||||
)
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('issues', '0015_remove_issue_hash_grouping'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(create_grouping_objects),
|
||||
]
|
||||
@@ -17,11 +17,15 @@ class IncongruentStateException(Exception):
|
||||
|
||||
|
||||
class Issue(models.Model):
|
||||
"""
|
||||
An Issue models a group of similar events. In particular: it models the result of both automatic (client-side and
|
||||
server-side) and manual ("merge") grouping.
|
||||
"""
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
|
||||
project = models.ForeignKey(
|
||||
"projects.Project", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
|
||||
hash = models.CharField(max_length=32, blank=False, null=False)
|
||||
|
||||
# denormalized fields:
|
||||
last_seen = models.DateTimeField(blank=False, null=False) # based on event.server_side_timestamp
|
||||
@@ -110,6 +114,24 @@ class Issue(models.Model):
|
||||
]
|
||||
|
||||
|
||||
class Grouping(models.Model):
|
||||
"""A Grouping models an automatically calculated grouping key (from the event data, with a key role for the SDK-side
|
||||
fingerprint).
|
||||
"""
|
||||
project = models.ForeignKey(
|
||||
"projects.Project", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
|
||||
|
||||
# NOTE: I don't want to have any principled maximum on the grouping key, nor do I want to prematurely optimize the
|
||||
# lookup. If lookups are slow, we _could_ examine whether manually hashing these values and matching on the hash
|
||||
# helps.
|
||||
grouping_key = models.TextField(blank=False, null=False)
|
||||
|
||||
issue = models.ForeignKey("Issue", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
|
||||
|
||||
def __str__(self):
|
||||
return self.grouping_key
|
||||
|
||||
|
||||
def add_periods_to_datetime(dt, nr_of_periods, period_name):
|
||||
dateutil_kwargs_map = {
|
||||
"year": "years",
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import hashlib
|
||||
|
||||
from sentry.eventtypes.base import DefaultEvent
|
||||
from sentry.eventtypes.error import ErrorEvent
|
||||
|
||||
@@ -30,12 +28,6 @@ def get_issue_grouper_for_data(data):
|
||||
return default_issue_grouper(title, transaction, event_type_name)
|
||||
|
||||
|
||||
def get_hash_for_data(data):
|
||||
"""Generate hash used for grouping issues (note: not a cryptographically secure hash)"""
|
||||
issue_grouper = get_issue_grouper_for_data(data)
|
||||
return hashlib.md5(issue_grouper.encode()).hexdigest()
|
||||
|
||||
|
||||
# utilities related to storing and retrieving release-versions; we use the fact that sentry (and we've adopted their
|
||||
# limitation) disallows the use of newlines in release-versions, so we can use newlines as a separator
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ from django.conf import settings
|
||||
|
||||
from performance.bursty_data import generate_bursty_data, buckets_to_points_in_time
|
||||
from projects.models import Project
|
||||
from issues.models import Issue
|
||||
from issues.models import Issue, Grouping
|
||||
from events.models import Event
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ class Command(BaseCommand):
|
||||
raise ValueError("This command should only be run on the performance-test database")
|
||||
|
||||
Project.objects.all().delete()
|
||||
Grouping.objects.all().delete()
|
||||
Issue.objects.all().delete()
|
||||
Event.objects.all().delete()
|
||||
|
||||
@@ -31,7 +32,14 @@ class Command(BaseCommand):
|
||||
issues_by_project = {}
|
||||
|
||||
for p in projects:
|
||||
issues_by_project[p.id] = [Issue.objects.create(project=p, hash="hash %d" % i) for i in range(100)]
|
||||
issues_by_project[p.id] = []
|
||||
for i in range(100):
|
||||
issues_by_project[p.id].append(Issue.objects.create(project=p))
|
||||
Grouping.objects.create(
|
||||
project=p,
|
||||
grouping_key="grouping key %d" % i,
|
||||
issue=issues_by_project[p.id][i],
|
||||
)
|
||||
|
||||
# now we have 10 projects, each with 100 issues. let's create 10k events for each project.
|
||||
for p in projects:
|
||||
|
||||
Reference in New Issue
Block a user