mirror of
https://github.com/bugsink/bugsink.git
synced 2026-02-19 12:58:46 -06:00
Tags; deducing tags; search on tags; WIP
This commit is contained in:
@@ -39,6 +39,19 @@ def pairwise(it):
|
||||
prev = current
|
||||
|
||||
|
||||
def tuplewise(iterable):
|
||||
"""
|
||||
>>> list(tuplewise(range(4)))
|
||||
[(0, 1), (2, 3)]
|
||||
"""
|
||||
i = iter(iterable)
|
||||
while True:
|
||||
try:
|
||||
yield next(i), next(i)
|
||||
except StopIteration:
|
||||
return
|
||||
|
||||
|
||||
def batched(iterable, n):
|
||||
# itertools.batched was introduced in Python 3.12, but it's "roughly equivalent" to this:
|
||||
|
||||
|
||||
@@ -78,6 +78,7 @@ BUGSINK_APPS = [
|
||||
'ingest',
|
||||
'issues',
|
||||
'events',
|
||||
'tags',
|
||||
'alerts',
|
||||
|
||||
'performance',
|
||||
|
||||
@@ -132,6 +132,7 @@ def eat_your_own_dogfood(sentry_dsn, **kwargs):
|
||||
"sentry",
|
||||
"sentry_sdk_extensions",
|
||||
"snappea",
|
||||
"tags",
|
||||
"teams",
|
||||
"theme",
|
||||
"users",
|
||||
|
||||
@@ -59,6 +59,10 @@ def write_to_storage(event_id, parsed_data):
|
||||
|
||||
|
||||
class Event(models.Model):
|
||||
# TODO now that the Tag models are introduced, an number of the below fields are actually already stored as tags.
|
||||
# At some point we should decide whether to proceed with "just as tags" or "just in the event table". Will depend on
|
||||
# findings about performance (and how generic our solution with tags really is).
|
||||
|
||||
# Lines quotes with ">" are from the following to resources:
|
||||
# https://develop.sentry.dev/sdk/event-payloads/ (supposedly more human-readable)
|
||||
# https://develop.sentry.dev/sdk/event-payloads/types/ (more up-to-date and complete)
|
||||
@@ -155,7 +159,7 @@ class Event(models.Model):
|
||||
# them to be [yet]):
|
||||
#
|
||||
# > Optional. A map or list of tags for this event. Each tag must be less than 200 characters.
|
||||
# tags
|
||||
# tags = # implemented in tags/models.py
|
||||
#
|
||||
# > A list of relevant modules and their versions.
|
||||
# modules =
|
||||
|
||||
@@ -35,6 +35,7 @@ from events.retention import evict_for_max_events, should_evict
|
||||
from releases.models import create_release_if_needed
|
||||
from alerts.tasks import send_new_issue_alert, send_regression_alert
|
||||
from compat.timestamp import format_timestamp, parse_timestamp
|
||||
from tags.models import digest_tags
|
||||
|
||||
from .parsers import StreamingEnvelopeParser, ParseError
|
||||
from .filestore import get_filename_for_event_id
|
||||
@@ -389,6 +390,10 @@ class BaseIngestAPIView(View):
|
||||
|
||||
issue.save()
|
||||
|
||||
# intentionally at the end: possible future optimization is to push this out of the transaction (or even use
|
||||
# a separate DB for this)
|
||||
digest_tags(event_data, event, issue)
|
||||
|
||||
@classmethod
|
||||
def count_project_periods_and_act_on_it(cls, project, now):
|
||||
# returns: True if any further processing should be done.
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
from collections import namedtuple
|
||||
import json
|
||||
import sentry_sdk
|
||||
import re
|
||||
|
||||
from django.utils import timezone
|
||||
from django.shortcuts import render, get_object_or_404, redirect
|
||||
from django.db.models import Q
|
||||
from django.db.models import Q, Subquery
|
||||
from django.http import HttpResponseRedirect, HttpResponseNotAllowed
|
||||
from django.utils.safestring import mark_safe
|
||||
from django.template.defaultfilters import date
|
||||
@@ -13,6 +14,9 @@ from django.core.exceptions import PermissionDenied
|
||||
from django.http import Http404
|
||||
from django.core.paginator import Paginator
|
||||
|
||||
from sentry.utils.safe import get_path
|
||||
|
||||
from bugsink.moreiterutils import tuplewise
|
||||
from bugsink.decorators import project_membership_required, issue_membership_required, atomic_for_request_method
|
||||
from bugsink.transaction import durable_atomic
|
||||
from bugsink.period_utils import add_periods_to_datetime
|
||||
@@ -22,7 +26,7 @@ from events.ua_stuff import get_contexts_enriched_with_ua
|
||||
|
||||
from compat.timestamp import format_timestamp
|
||||
from projects.models import ProjectMembership
|
||||
from sentry.utils.safe import get_path
|
||||
from tags.models import TagValue, IssueTag
|
||||
|
||||
from .models import Issue, IssueQuerysetStateManager, IssueStateManager, TurningPoint, TurningPointKind
|
||||
from .forms import CommentForm
|
||||
@@ -216,6 +220,60 @@ def _issue_list_pt_1(request, project, state_filter="open"):
|
||||
return project, unapplied_issue_ids
|
||||
|
||||
|
||||
def remove_slices(s, slices_to_remove):
|
||||
"""Returns s with the slices removed."""
|
||||
items = [item for tup in slices_to_remove for item in tup]
|
||||
slices_to_preserve = tuplewise([0] + items + [None])
|
||||
return "".join(s[start:stop] for start, stop in slices_to_preserve)
|
||||
|
||||
|
||||
def _and_join(q_objects):
|
||||
if len(q_objects) == 0:
|
||||
# we _could_ just return Q(), but I'd force the calling location to not do this
|
||||
raise ValueError("empty list of Q objects")
|
||||
|
||||
result = q_objects[0]
|
||||
for q in q_objects[1:]:
|
||||
result &= q
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _search(project, issue_list, q):
|
||||
# The simplest possible query-language that could have any value: key:value is recognized as such; the rest is "free
|
||||
# text"; no support for quoting of spaces.
|
||||
slices_to_remove = []
|
||||
clauses = []
|
||||
for match in re.finditer(r"(\S+):(\S+)", q):
|
||||
slices_to_remove.append(match.span())
|
||||
key, value = match.groups()
|
||||
try:
|
||||
tag_value_obj = TagValue.objects.get(project=project, key__key=key, value=value)
|
||||
except TagValue.DoesNotExist:
|
||||
# if the tag doesn't exist, we can't have any issues with it; the below short-circuit is fine, I think (I
|
||||
# mean: we _could_ say "tag x is to blame" but that's not what one does generally in search, is it?
|
||||
return issue_list.none()
|
||||
|
||||
# TODO: Extensive performance testing of various choices here is necessary; in particular the choice of Subquery
|
||||
# vs. joins; and the choice of a separate query to get TagValue v.s. doing everything in a single big query will
|
||||
# have different trade-offs _in practice_.
|
||||
clauses.append(
|
||||
Q(id__in=Subquery(IssueTag.objects.filter(value=tag_value_obj).values_list("issue_id", flat=True))))
|
||||
|
||||
# this is really TSTTCPW (or more like a "fake it till you make it" thing); but I'd rather "have something" and then
|
||||
# have really-good-search than to have either nothing at all, or half-baked search. Note that we didn't even bother
|
||||
# to set indexes on the fields we search on (nor create a single searchable field for the whole of 'title').
|
||||
plain_text_q = remove_slices(q, slices_to_remove).strip()
|
||||
if plain_text_q:
|
||||
clauses.append(Q(Q(calculated_type__icontains=plain_text_q) | Q(calculated_value__icontains=plain_text_q)))
|
||||
|
||||
# if we reach this point, there's always either a plain_text_q or some key/value pair (this is a condition for
|
||||
# _and_join)
|
||||
issue_list = issue_list.filter(_and_join(clauses))
|
||||
|
||||
return issue_list
|
||||
|
||||
|
||||
def _issue_list_pt_2(request, project, state_filter, unapplied_issue_ids):
|
||||
d_state_filter = {
|
||||
"open": lambda qs: qs.filter(is_resolved=False, is_muted=False),
|
||||
@@ -229,12 +287,8 @@ def _issue_list_pt_2(request, project, state_filter, unapplied_issue_ids):
|
||||
Issue.objects.filter(project=project)
|
||||
).order_by("-last_seen")
|
||||
|
||||
# this is really TSTTCPW (or more like a "fake it till you make it" thing); but I'd rather "have something" and then
|
||||
# have really-good-search than to have either nothing at all, or half-baked search. Note that we didn't even bother
|
||||
# to set indexes on the fields we search on (nor create a single searchable field for the whole of 'title').
|
||||
if request.GET.get("q"):
|
||||
issue_list = issue_list.filter(
|
||||
Q(calculated_type__icontains=request.GET["q"]) | Q(calculated_value__icontains=request.GET["q"]))
|
||||
issue_list = _search(project, issue_list, request.GET["q"])
|
||||
|
||||
paginator = Paginator(issue_list, 250)
|
||||
page_number = request.GET.get("page")
|
||||
|
||||
@@ -54,6 +54,7 @@ include = [
|
||||
"sentry_sdk_extensions*",
|
||||
"snappea*",
|
||||
"static*",
|
||||
"tags*",
|
||||
"teams*",
|
||||
"templates*",
|
||||
"theme*",
|
||||
|
||||
0
tags/__init__.py
Normal file
0
tags/__init__.py
Normal file
0
tags/admin.py
Normal file
0
tags/admin.py
Normal file
6
tags/apps.py
Normal file
6
tags/apps.py
Normal file
@@ -0,0 +1,6 @@
|
||||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class TagsConfig(AppConfig):
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "tags"
|
||||
169
tags/migrations/0001_initial.py
Normal file
169
tags/migrations/0001_initial.py
Normal file
@@ -0,0 +1,169 @@
|
||||
# Generated by Django 4.2.19 on 2025-02-27 12:04
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = [
|
||||
("events", "0019_event_storage_backend"),
|
||||
("issues", "0010_issue_list_indexes"),
|
||||
("projects", "0011_fill_stored_event_count"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="TagKey",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("key", models.CharField(max_length=32)),
|
||||
(
|
||||
"project",
|
||||
models.ForeignKey(
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
to="projects.project",
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="TagValue",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("value", models.CharField(db_index=True, max_length=200)),
|
||||
(
|
||||
"key",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE, to="tags.tagkey"
|
||||
),
|
||||
),
|
||||
(
|
||||
"project",
|
||||
models.ForeignKey(
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
to="projects.project",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"unique_together": {("project", "key", "value")},
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="IssueTag",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"issue",
|
||||
models.ForeignKey(
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="tags",
|
||||
to="issues.issue",
|
||||
),
|
||||
),
|
||||
(
|
||||
"project",
|
||||
models.ForeignKey(
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
to="projects.project",
|
||||
),
|
||||
),
|
||||
(
|
||||
"value",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE, to="tags.tagvalue"
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="EventTag",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"event",
|
||||
models.ForeignKey(
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="tags",
|
||||
to="events.event",
|
||||
),
|
||||
),
|
||||
(
|
||||
"project",
|
||||
models.ForeignKey(
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
to="projects.project",
|
||||
),
|
||||
),
|
||||
(
|
||||
"value",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE, to="tags.tagvalue"
|
||||
),
|
||||
),
|
||||
],
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="tagkey",
|
||||
index=models.Index(fields=["key"], name="tags_tagkey_key_5e8a5a_idx"),
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name="tagkey",
|
||||
unique_together={("project", "key")},
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="issuetag",
|
||||
index=models.Index(
|
||||
fields=["issue", "value"], name="tags_issuet_issue_i_f06f20_idx"
|
||||
),
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name="issuetag",
|
||||
unique_together={("value", "issue")},
|
||||
),
|
||||
migrations.AlterUniqueTogether(
|
||||
name="eventtag",
|
||||
unique_together={("value", "event")},
|
||||
),
|
||||
]
|
||||
0
tags/migrations/__init__.py
Normal file
0
tags/migrations/__init__.py
Normal file
103
tags/models.py
Normal file
103
tags/models.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""
|
||||
Tags provide support for arbitrary key/value pairs (both strings) on Events and Issues, allowing for searching &
|
||||
counting. Some notes:
|
||||
|
||||
* Arbitrary Tags can be set programatically in the SDKs, which we need to support (Sentry API Compatability).
|
||||
* Some "synthetic" Tags are introduced by Bugsink itself: attributes of an Event are deduced and stored explicitly as a
|
||||
Tag. The main reason to do this: stay flexible in terms of DB design and allow for generic code for searching and
|
||||
counting. _However_, we don't make a commitment to any particular implementation, and if the deduce-and-store approach
|
||||
turns out to be a performance bottleneck, it may be replaced. Particular notes on what we deduce are in `deduce_tags`.
|
||||
|
||||
https://docs.sentry.io/platforms/python/enriching-events/tags/
|
||||
|
||||
> Tag keys have a maximum length of 32 characters and can contain only letters (a-zA-Z), numbers (0-9), underscores (_),
|
||||
> periods (.), colons (:), and dashes (-).
|
||||
>
|
||||
> Tag values have a maximum length of 200 characters and they cannot contain the newline (\n) character.
|
||||
"""
|
||||
|
||||
|
||||
from django.db import models
|
||||
|
||||
from projects.models import Project
|
||||
from tags.utils import deduce_tags
|
||||
|
||||
|
||||
class TagKey(models.Model):
|
||||
project = models.ForeignKey(Project, blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
|
||||
key = models.CharField(max_length=32, blank=False, null=False)
|
||||
|
||||
# I briefly considered being explicit about is_deduced; but it's annoying to store this info on the TagKey, and it's
|
||||
# probably redundant if we just come up with a list of "reserved" tags or similar.
|
||||
# is_deduced = models.BooleanField(default=False)
|
||||
|
||||
class Meta:
|
||||
unique_together = ('project', 'key')
|
||||
indexes = [
|
||||
# Untested assumption: when searching, we search by TagValue[..]key__key=key, so we need an
|
||||
# index-not-prefixed-by-project too
|
||||
models.Index(fields=['key']),
|
||||
]
|
||||
|
||||
|
||||
class TagValue(models.Model):
|
||||
project = models.ForeignKey(Project, blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
|
||||
# CASCADE: TBD; one argument might be: decouple deletions from events ingestion, but at least have them internally
|
||||
# consistent
|
||||
key = models.ForeignKey(TagKey, blank=False, null=False, on_delete=models.CASCADE)
|
||||
value = models.CharField(max_length=200, blank=False, null=False, db_index=True)
|
||||
|
||||
class Meta:
|
||||
# matches what we do in search
|
||||
unique_together = ('project', 'key', 'value')
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.key.key}:{self.value}"
|
||||
|
||||
|
||||
class EventTag(models.Model):
|
||||
project = models.ForeignKey(Project, blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
|
||||
|
||||
# value already implies key in our current setup.
|
||||
value = models.ForeignKey(TagValue, blank=False, null=False, on_delete=models.CASCADE)
|
||||
|
||||
event = models.ForeignKey('events.Event', blank=False, null=True, on_delete=models.SET_NULL, related_name='tags')
|
||||
|
||||
class Meta:
|
||||
unique_together = ('value', 'event')
|
||||
|
||||
|
||||
# class GroupingTag is not needed (not even for future-proofing); it would only be needed if you'd want to "unmerge"
|
||||
# manually merged issues while preserving the tags/counts. I think that's in "not worth it" territory.
|
||||
|
||||
|
||||
class IssueTag(models.Model):
|
||||
project = models.ForeignKey(Project, blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
|
||||
|
||||
# value already implies key in our current setup.
|
||||
value = models.ForeignKey(TagValue, blank=False, null=False, on_delete=models.CASCADE)
|
||||
|
||||
issue = models.ForeignKey('issues.Issue', blank=False, null=True, on_delete=models.SET_NULL, related_name='tags')
|
||||
|
||||
class Meta:
|
||||
# searching: by value, then Issue (is this so though? .qs is already on the other!)
|
||||
unique_together = ('value', 'issue')
|
||||
indexes = [
|
||||
models.Index(fields=['issue', 'value']), # make lookups by issue (for detail pages) faster
|
||||
]
|
||||
|
||||
|
||||
def digest_tags(event_data, event, issue):
|
||||
tags = deduce_tags(event_data)
|
||||
|
||||
for key, value in tags.items():
|
||||
# The max length of 200 is from TFM for user-provided tags. Still, we just apply it on synthetic tags as well;
|
||||
# It's a reasonably safe guess that this will not terribly confuse people, and avoids triggering errors on-save.
|
||||
value = value[:200]
|
||||
|
||||
# TODO just use bulk_create for each of the types of objects
|
||||
# TODO check: event.project won't trigger a query, right? it's already loaded, right?
|
||||
tag_key, _ = TagKey.objects.get_or_create(project=event.project, key=key)
|
||||
tag_value, _ = TagValue.objects.get_or_create(project=event.project, key=tag_key, value=value)
|
||||
EventTag.objects.get_or_create(project=event.project, value=tag_value, event=event)
|
||||
IssueTag.objects.get_or_create(project=event.project, value=tag_value, issue=issue)
|
||||
1
tags/tests.py
Normal file
1
tags/tests.py
Normal file
@@ -0,0 +1 @@
|
||||
# from django.test import TestCase
|
||||
81
tags/utils.py
Normal file
81
tags/utils.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from events.ua_stuff import get_contexts_enriched_with_ua
|
||||
from sentry.utils.safe import get_path
|
||||
|
||||
|
||||
EVENT_DATA_CONVERSION_TABLE = {
|
||||
# NOTE that "level" is not included here; Sentry puts this front and center, and although we may give it _some_
|
||||
# place in the UI, Bugsink's take is that "level: Error" in an Error Tracker is an open door/waste of space.
|
||||
"server_name": "server_name",
|
||||
"release": "release",
|
||||
"environment": "environment",
|
||||
"transaction": "transaction",
|
||||
}
|
||||
|
||||
|
||||
CONTEXT_CONVERSION_TABLE = {
|
||||
"trace": ("trace", "trace_id"),
|
||||
"trace.span": ("trace", "span_id"),
|
||||
"browser.name": ("browser", "name"),
|
||||
"os.name": ("os", "name"),
|
||||
|
||||
# TODO probably useful, simply not done yet:
|
||||
# runtime
|
||||
# runtime.name
|
||||
# device.something
|
||||
}
|
||||
|
||||
|
||||
def deduce_user_tag(contexts):
|
||||
# quick & dirty / barebones implementation; we don't try to mimick Sentry's full behavior, instead just pick the
|
||||
# most relevant piece of the user context that we can find. For reference, Sentry has the concept of an "EventUser"
|
||||
# (`src/sentry/models/eventuser.py`)
|
||||
|
||||
if "user" not in contexts:
|
||||
return None
|
||||
|
||||
for key in ["id", "username", "email", "ip_address"]:
|
||||
if contexts["user"].get(key):
|
||||
return contexts["user"][key]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def deduce_tags(event_data):
|
||||
"""
|
||||
Deduce tags for `event_data`. Used as an "opportunistic" (generic) way to implement counting and searching. Although
|
||||
Sentry does something similar, we're not striving to replicate Sentry's behavior (tag names etc). In particular, we
|
||||
feel that Sentry's choices are poorly documented, the separation of concerns between events/contexts/tags is
|
||||
unclear, and some choices are straight up not so great. We'd rather think about what information matters ourselves.
|
||||
"""
|
||||
|
||||
# we start with the explicitly provided tags
|
||||
tags = event_data.get('tags', {})
|
||||
|
||||
for tag_key, lookup_path in EVENT_DATA_CONVERSION_TABLE.items():
|
||||
value = get_path(event_data, lookup_path)
|
||||
if value not in [None, ""]:
|
||||
tags[tag_key] = value
|
||||
|
||||
# deduce from contexts
|
||||
contexts = get_contexts_enriched_with_ua(event_data)
|
||||
|
||||
for tag_key, path in CONTEXT_CONVERSION_TABLE.items():
|
||||
value = get_path(contexts, *path)
|
||||
if value not in [None, ""]:
|
||||
tags[tag_key] = value
|
||||
|
||||
if "trace" in tags and "trace.span" in tags:
|
||||
tags["trace.ctx"] = f"{tags['trace']}.{tags['trace.span']}"
|
||||
|
||||
if "browser.name" in tags and (browser_version := tags.get("browser.version")):
|
||||
tags["browser"] = f"{tags['browser.name']} {browser_version}"
|
||||
|
||||
if user_tag := deduce_user_tag(contexts):
|
||||
tags["user"] = user_tag
|
||||
|
||||
# TODO further tags that are probably useful:
|
||||
# url
|
||||
# logger
|
||||
# mechanism
|
||||
|
||||
return tags
|
||||
Reference in New Issue
Block a user