Optimization: Search on EvenTag without involving Event if possible

When searching by tag, there is no need to join with Event; especially when
just counting results or determining first/last digest_order (for navigation).

(For the above "no need" to be actually true, digest_order was denormalized
into EventTag).

The above is implemented in `search_events_optimized`.

Further improvements:

* the bounds of `digest_order` are fetched only once; for first/last this info
  is reused.

* explicitly pass `event_qs_count` to the templates

* non-event pages used to calculate a "last event" to generate a tab with a
  correct event.id; since we simply have the "last" idiom, better use that.
  this also makes clear the "none" idiom was never needed, we remove it again.

Results:

Locally (60K event DB, 30K events on largest issue) my testbatch now
runs in 25% of time (overall).

* The effect on the AND-ing are in fact very large (13% runtime remaining)
* The event details page is not noticably improved.
This commit is contained in:
Klaas van Schelven
2025-03-12 20:38:07 +01:00
parent aa341c7437
commit 1eea9268a5
12 changed files with 192 additions and 85 deletions

View File

@@ -107,9 +107,9 @@
{# overflow-x-auto is needed at the level of the flex item such that it works at the level where we need it (the code listings)#}
<div class="ml-4 mb-4 mr-4 border-2 overflow-x-auto flex-[2_1_96rem]"><!-- the whole of the big tabbed view--> {# 96rem is 1536px, which matches the 2xl class; this is no "must" but eyeballing revealed: good result #}
<div class="flex bg-slate-50 border-b-2"><!-- container for the actual tab buttons -->
<a href="/issues/issue/{{ issue.id }}/event/{% if event %}{{ event.id }}{% else %}none{% endif %}/{% current_qs %}"><div class="p-4 font-bold hover:bg-slate-200 {% if tab == "stacktrace" %}text-cyan-500 border-cyan-500 border-b-4{% else %}text-slate-500 border-slate-400 hover:border-b-4{% endif %}">Stacktrace</div></a>
<a href="/issues/issue/{{ issue.id }}/event/{% if event %}{{ event.id }}{% else %}none{% endif %}/details/{% current_qs %}"><div class="p-4 font-bold hover:bg-slate-200 {% if tab == "event-details" %}text-cyan-500 border-cyan-500 border-b-4{% else %}text-slate-500 border-slate-400 hover:border-b-4{% endif %}">Event&nbsp;Details</div></a>
<a href="/issues/issue/{{ issue.id }}/event/{% if event %}{{ event.id }}{% else %}none{% endif %}/breadcrumbs/{% current_qs %}"><div class="p-4 font-bold hover:bg-slate-200 {% if tab == "breadcrumbs" %}text-cyan-500 border-cyan-500 border-b-4{% else %}text-slate-500 border-slate-400 hover:border-b-4{% endif %}">Breadcrumbs</div></a>
<a href="/issues/issue/{{ issue.id }}/event/{% if event %}{{ event.id }}{% else %}last{% endif %}/{% current_qs %}"><div class="p-4 font-bold hover:bg-slate-200 {% if tab == "stacktrace" %}text-cyan-500 border-cyan-500 border-b-4{% else %}text-slate-500 border-slate-400 hover:border-b-4{% endif %}">Stacktrace</div></a>
<a href="/issues/issue/{{ issue.id }}/event/{% if event %}{{ event.id }}{% else %}last{% endif %}/details/{% current_qs %}"><div class="p-4 font-bold hover:bg-slate-200 {% if tab == "event-details" %}text-cyan-500 border-cyan-500 border-b-4{% else %}text-slate-500 border-slate-400 hover:border-b-4{% endif %}">Event&nbsp;Details</div></a>
<a href="/issues/issue/{{ issue.id }}/event/{% if event %}{{ event.id }}{% else %}last{% endif %}/breadcrumbs/{% current_qs %}"><div class="p-4 font-bold hover:bg-slate-200 {% if tab == "breadcrumbs" %}text-cyan-500 border-cyan-500 border-b-4{% else %}text-slate-500 border-slate-400 hover:border-b-4{% endif %}">Breadcrumbs</div></a>
<a href="/issues/issue/{{ issue.id }}/events/{% current_qs %}"><div class="p-4 font-bold hover:bg-slate-200 {% if tab == "event-list" %}text-cyan-500 border-cyan-500 border-b-4{% else %}text-slate-500 border-slate-400 hover:border-b-4{% endif %}">Event&nbsp;List</div></a>
<a href="/issues/issue/{{ issue.id }}/tags/{% current_qs %}"><div class="p-4 font-bold hover:bg-slate-200 {% if tab == "tags" %}text-cyan-500 border-cyan-500 border-b-4{% else %}text-slate-500 border-slate-400 hover:border-b-4{% endif %}">Tags</div></a>
<a href="/issues/issue/{{ issue.id }}/grouping/{% current_qs %}"><div class="p-4 font-bold hover:bg-slate-200 {% if tab == "grouping" %}text-cyan-500 border-cyan-500 border-b-4{% else %}text-slate-500 border-slate-400 hover:border-b-4{% endif %}">Grouping</div></a>

View File

@@ -8,7 +8,7 @@
<div class="flex">
<div class="overflow-hidden">
<div class="italic">{{ event.ingested_at|date:"j M G:i T" }} (Event {{ event.digest_order|intcomma }} of {{ issue.digested_event_count|intcomma }} total{% if q %} — {{ event_qs.count }} found by search{% endif %})</div>
<div class="italic">{{ event.ingested_at|date:"j M G:i T" }} (Event {{ event.digest_order|intcomma }} of {{ issue.digested_event_count|intcomma }} total{% if q %} — {{ event_qs_count }} found by search{% endif %})</div>
</div>
<div class="ml-auto flex-none">

View File

@@ -7,7 +7,7 @@
<div class="flex">
<div class="overflow-hidden">
<div class="italic">{{ event.ingested_at|date:"j M G:i T" }} (Event {{ event.digest_order|intcomma }} of {{ issue.digested_event_count|intcomma }} total{% if q %} — {{ event_qs.count }} found by search{% endif %})</div>
<div class="italic">{{ event.ingested_at|date:"j M G:i T" }} (Event {{ event.digest_order|intcomma }} of {{ issue.digested_event_count|intcomma }} total{% if q %} — {{ event_qs_count }} found by search{% endif %})</div>
</div>
<div class="ml-auto flex-none">

View File

@@ -10,7 +10,7 @@
{# event-nav only #}
<div class="flex">
<div class="overflow-hidden">
<div class="italic">{{ event.ingested_at|date:"j M G:i T" }} (Event {{ event.digest_order|intcomma }} of {{ issue.digested_event_count|intcomma }} total{% if q %} — {{ event_qs.count }} found by search{% endif %})</div>
<div class="italic">{{ event.ingested_at|date:"j M G:i T" }} (Event {{ event.digest_order|intcomma }} of {{ issue.digested_event_count|intcomma }} total{% if q %} — {{ event_qs_count }} found by search{% endif %})</div>
</div>
<div class="ml-auto flex-none">
@@ -31,7 +31,7 @@
<div class="flex">
<div class="overflow-hidden">
{% if forloop.counter0 == 0 %}
<div class="italic">{{ event.ingested_at|date:"j M G:i T" }} (Event {{ event.digest_order|intcomma }} of {{ issue.digested_event_count|intcomma }} total{% if q %} — {{ event_qs.count }} found by search{% endif %})</div>
<div class="italic">{{ event.ingested_at|date:"j M G:i T" }} (Event {{ event.digest_order|intcomma }} of {{ issue.digested_event_count|intcomma }} total{% if q %} — {{ event_qs_count }} found by search{% endif %})</div>
{% endif %}
<h1 class="text-2xl font-bold {% if forloop.counter0 > 0 %}mt-4{% endif %} text-ellipsis whitespace-nowrap overflow-hidden">{{ exception.type }}</h1>
<div class="text-lg mb-4 text-ellipsis whitespace-nowrap overflow-hidden">{{ exception.value }}</div>

View File

@@ -22,7 +22,6 @@ def regex_converter(passed_regex):
register_converter(regex_converter("(first|last)"), "first-last")
register_converter(regex_converter("(prev|next)"), "prev-next")
register_converter(regex_converter("(none)"), "str-none")
urlpatterns = [
@@ -36,11 +35,6 @@ urlpatterns = [
path('issue/<uuid:issue_pk>/event/<uuid:event_pk>/details/', issue_event_details, name="event_details"),
path('issue/<uuid:issue_pk>/event/<uuid:event_pk>/breadcrumbs/', issue_event_breadcrumbs, name="event_breadcrumbs"),
path('issue/<uuid:issue_pk>/event/<str-none:event_pk>/', issue_event_stacktrace, name="event_stacktrace"),
path('issue/<uuid:issue_pk>/event/<str-none:event_pk>/details/', issue_event_details, name="event_details"),
path('issue/<uuid:issue_pk>/event/<str-none:event_pk>/breadcrumbs/',
issue_event_breadcrumbs, name="event_breadcrumbs"),
path('issue/<uuid:issue_pk>/event/<int:digest_order>/', issue_event_stacktrace, name="event_stacktrace"),
path('issue/<uuid:issue_pk>/event/<int:digest_order>/details/', issue_event_details, name="event_details"),
path('issue/<uuid:issue_pk>/event/<int:digest_order>/breadcrumbs/', issue_event_breadcrumbs,

View File

@@ -24,7 +24,7 @@ from events.ua_stuff import get_contexts_enriched_with_ua
from compat.timestamp import format_timestamp
from projects.models import ProjectMembership
from tags.search import search_issues, search_events
from tags.search import search_issues, search_events, search_events_optimized
from .models import Issue, IssueQuerysetStateManager, IssueStateManager, TurningPoint, TurningPointKind
from .forms import CommentForm
@@ -287,42 +287,49 @@ def _handle_post(request, issue):
return HttpResponseRedirect(request.path_info)
def _get_event(qs, event_pk, digest_order, nav):
"""Returns the event using the "url lookup"."""
def _get_event(qs, issue, event_pk, digest_order, nav, bounds):
"""
Returns the event using the "url lookup".
The passed qs is "something you can use to deduce digest_order (for next/prev)."
When a direct (non-nav) method is used, we do _not_ check against existence in qs; this is more performant, and it's
not clear that being pedantic in this case is actually more valuable from a UX perspective.
"""
if nav is not None:
if nav not in ["first", "last", "prev", "next"]:
raise Http404("Cannot look up with '%s'" % nav)
if nav == "first":
result = qs.order_by("digest_order").first()
# basically, the below. But because first/last are calculated anyway for "_has_next_prev", we pass these
# digest_order = qs.order_by("digest_order").values_list("digest_order", flat=True).first()
digest_order = bounds[0]
elif nav == "last":
result = qs.order_by("digest_order").last()
# digest_order = qs.order_by("digest_order").values_list("digest_order", flat=True).last()
digest_order = bounds[1]
elif nav in ["prev", "next"]:
if digest_order is None:
raise Http404("Cannot look up with '%s' without digest_order" % nav)
if nav == "prev":
result = qs.filter(digest_order__lt=digest_order).order_by("-digest_order").first()
digest_order = qs.filter(digest_order__lt=digest_order).values_list("digest_order", flat=True)\
.order_by("-digest_order").first()
elif nav == "next":
result = qs.filter(digest_order__gt=digest_order).order_by("digest_order").first()
digest_order = qs.filter(digest_order__gt=digest_order).values_list("digest_order", flat=True)\
.order_by("digest_order").first()
if result is None:
if digest_order is None:
raise Event.DoesNotExist
return result
return Event.objects.get(issue=issue, digest_order=digest_order)
elif event_pk is not None:
# we match on both internal and external id, trying internal first
if event_pk == "none":
raise Event.DoesNotExist()
try:
return Event.objects.get(pk=event_pk)
except Event.DoesNotExist:
return qs.get(event_id=event_pk)
return Event.objects.get(event_id=event_pk)
elif digest_order is not None:
return qs.get(digest_order=digest_order)
return Event.objects.get(digest_order=digest_order)
else:
raise Http404("Either event_pk, nav, or digest_order must be provided")
@@ -333,12 +340,13 @@ def issue_event_stacktrace(request, issue, event_pk=None, digest_order=None, nav
if request.method == "POST":
return _handle_post(request, issue)
event_qs = search_events(issue.project, issue, request.GET.get("q", ""))
event_x_qs = search_events_optimized(issue.project, issue, request.GET.get("q", ""))
first_do, last_do = _first_last(event_x_qs)
try:
event = _get_event(event_qs, event_pk, digest_order, nav)
event = _get_event(event_x_qs, issue, event_pk, digest_order, nav, (first_do, last_do))
except Event.DoesNotExist:
return issue_event_404(request, issue, event_qs, "stacktrace", "event_stacktrace")
return issue_event_404(request, issue, event_x_qs, "stacktrace", "event_stacktrace")
parsed_data = event.get_parsed_data()
@@ -387,26 +395,24 @@ def issue_event_stacktrace(request, issue, event_pk=None, digest_order=None, nav
"stack_of_plates": stack_of_plates,
"mute_options": GLOBAL_MUTE_OPTIONS,
"q": request.GET.get("q", ""),
"event_qs": event_qs,
**_has_next_prev(event, event_qs),
"event_qs_count": event_x_qs.count(),
"has_prev": event.digest_order > first_do,
"has_next": event.digest_order < last_do,
})
def issue_event_404(request, issue, event_qs, tab, this_view):
def issue_event_404(request, issue, event_x_qs, tab, this_view):
"""If the Event is 404, but the issue is not, we can still show the issue page; we show a message for the event"""
last_event = event_qs.order_by("digest_order").last() # used for switching to an event-page (using tabs)
return render(request, "issues/event_404.html", {
"tab": tab,
"this_view": this_view,
"project": issue.project,
"issue": issue,
"event": last_event,
"is_event_page": False, # this variable is used to denote "we have event-related info", which we don't
"mute_options": GLOBAL_MUTE_OPTIONS,
"event_qs": event_qs,
"q": request.GET.get("q", ""),
"event_qs_count": event_qs.count(), # avoids repeating the count() query
"event_qs_count": event_x_qs.count(),
})
@@ -416,12 +422,13 @@ def issue_event_breadcrumbs(request, issue, event_pk=None, digest_order=None, na
if request.method == "POST":
return _handle_post(request, issue)
event_qs = search_events(issue.project, issue, request.GET.get("q", ""))
event_x_qs = search_events_optimized(issue.project, issue, request.GET.get("q", ""))
first_do, last_do = _first_last(event_x_qs)
try:
event = _get_event(event_qs, event_pk, digest_order, nav)
event = _get_event(event_x_qs, issue, event_pk, digest_order, nav, (first_do, last_do))
except Event.DoesNotExist:
return issue_event_404(request, issue, event_qs, "breadcrumbs", "event_breadcrumbs")
return issue_event_404(request, issue, event_x_qs, "breadcrumbs", "event_breadcrumbs")
parsed_data = event.get_parsed_data()
@@ -436,8 +443,9 @@ def issue_event_breadcrumbs(request, issue, event_pk=None, digest_order=None, na
"breadcrumbs": get_values(parsed_data.get("breadcrumbs")),
"mute_options": GLOBAL_MUTE_OPTIONS,
"q": request.GET.get("q", ""),
"event_qs": event_qs,
**_has_next_prev(event, event_qs),
"event_qs_count": event_x_qs.count(),
"has_prev": event.digest_order > first_do,
"has_next": event.digest_order < last_do,
})
@@ -447,18 +455,11 @@ def _date_with_milis_html(timestamp):
'<span class="text-xs">' + date(timestamp, "u")[:3] + '</span>')
def _has_next_prev(event, event_qs):
# guarding against empty event_qs is not necessary, because we only get here if any event exists (because otherwise
# event would be None too)
# this was once implemented with Min/Max, but just doing 2 queries is (on sqlite at least) ~100× faster.
first = event_qs.order_by("digest_order").values("digest_order").first()
last = event_qs.order_by("-digest_order").values("digest_order").first()
return {
"has_prev": event.digest_order > first["digest_order"],
"has_next": event.digest_order < last["digest_order"],
}
def _first_last(qs_with_digest_order):
# this was once implemented with Min/Max, but just doing 2 queries is (on sqlite at least) much faster.
first = qs_with_digest_order.order_by("digest_order").values_list("digest_order", flat=True).first()
last = qs_with_digest_order.order_by("-digest_order").values_list("digest_order", flat=True).first()
return first, last
@atomic_for_request_method
@@ -467,12 +468,13 @@ def issue_event_details(request, issue, event_pk=None, digest_order=None, nav=No
if request.method == "POST":
return _handle_post(request, issue)
event_qs = search_events(issue.project, issue, request.GET.get("q", ""))
event_x_qs = search_events_optimized(issue.project, issue, request.GET.get("q", ""))
first_do, last_do = _first_last(event_x_qs)
try:
event = _get_event(event_qs, event_pk, digest_order, nav)
event = _get_event(event_x_qs, issue, event_pk, digest_order, nav, (first_do, last_do))
except Event.DoesNotExist:
return issue_event_404(request, issue, event_qs, "event-details", "event_details")
return issue_event_404(request, issue, event_x_qs, "event-details", "event_details")
parsed_data = event.get_parsed_data()
key_info = [
@@ -560,8 +562,9 @@ def issue_event_details(request, issue, event_pk=None, digest_order=None, nav=No
"contexts": contexts,
"mute_options": GLOBAL_MUTE_OPTIONS,
"q": request.GET.get("q", ""),
"event_qs": event_qs,
**_has_next_prev(event, event_qs),
"event_qs_count": event_x_qs.count(),
"has_prev": event.digest_order > first_do,
"has_next": event.digest_order < last_do,
})
@@ -572,12 +575,11 @@ def issue_history(request, issue):
return _handle_post(request, issue)
event_qs = search_events(issue.project, issue, request.GET.get("q", ""))
last_event = event_qs.order_by("digest_order").last() # used for switching to an event-page (using tabs)
last_event = event_qs.order_by("digest_order").last()
return render(request, "issues/history.html", {
"tab": "history",
"project": issue.project,
"issue": issue,
"event": last_event,
"is_event_page": False,
"request_repr": _request_repr(last_event.get_parsed_data()) if last_event is not None else "",
"mute_options": GLOBAL_MUTE_OPTIONS,
@@ -591,12 +593,11 @@ def issue_tags(request, issue):
return _handle_post(request, issue)
event_qs = search_events(issue.project, issue, request.GET.get("q", ""))
last_event = event_qs.order_by("digest_order").last() # used for switching to an event-page (using tabs)
last_event = event_qs.order_by("digest_order").last()
return render(request, "issues/tags.html", {
"tab": "tags",
"project": issue.project,
"issue": issue,
"event": last_event,
"is_event_page": False,
"request_repr": _request_repr(last_event.get_parsed_data()) if last_event is not None else "",
"mute_options": GLOBAL_MUTE_OPTIONS,
@@ -610,12 +611,11 @@ def issue_grouping(request, issue):
return _handle_post(request, issue)
event_qs = search_events(issue.project, issue, request.GET.get("q", ""))
last_event = event_qs.order_by("digest_order").last() # used for switching to an event-page (using tabs)
last_event = event_qs.order_by("digest_order").last()
return render(request, "issues/grouping.html", {
"tab": "grouping",
"project": issue.project,
"issue": issue,
"event": last_event,
"is_event_page": False,
"request_repr": _request_repr(last_event.get_parsed_data()) if last_event is not None else "",
"mute_options": GLOBAL_MUTE_OPTIONS,
@@ -628,9 +628,12 @@ def issue_event_list(request, issue):
if request.method == "POST":
return _handle_post(request, issue)
# because we we need _actual events_ for display, and we don't have the regular has_prev/has_next (paginator
# instead), we don't try to optimize using search_events_optimized in this view (except for counting)
if "q" in request.GET:
event_list = search_events(issue.project, issue, request.GET["q"]).order_by("digest_order")
paginator = Paginator(event_list, 250) # might as well use Paginator; the cost of .count() is incurred anyway
event_x_qs = search_events_optimized(issue.project, issue, request.GET.get("q", ""))
paginator = KnownCountPaginator(event_list, 250, count=event_x_qs.count())
else:
event_list = issue.event_set.order_by("digest_order")
# re 250: in general "big is good" because it allows a lot "at a glance".
@@ -639,13 +642,12 @@ def issue_event_list(request, issue):
page_number = request.GET.get("page")
page_obj = paginator.get_page(page_number)
last_event = event_list.last() # used for switching to an event-page (using tabs)
last_event = event_list.last()
return render(request, "issues/event_list.html", {
"tab": "event-list",
"project": issue.project,
"issue": issue,
"event": last_event,
"event_list": event_list,
"is_event_page": False,
"request_repr": _request_repr(last_event.get_parsed_data()) if last_event is not None else "",

View File

@@ -61,7 +61,7 @@ def _format_query_plan(rows):
@contextmanager
def query_debugger():
def query_debugger(print_all):
d = {}
queries_i = len(connection.queries)
yield d
@@ -70,7 +70,10 @@ def query_debugger():
print('Queries executed:', len(connection.queries) - queries_i)
print('Total query time:', sum(float(query['time']) for query in connection.queries[queries_i:]))
interesting_queries = [query for query in connection.queries[queries_i:] if float(query['time']) > 0.005]
if print_all:
interesting_queries = connection.queries[queries_i:]
else:
interesting_queries = [query for query in connection.queries[queries_i:] if float(query['time']) > 0.005]
for query in interesting_queries:
print()
@@ -89,7 +92,7 @@ def query_debugger():
class Command(BaseCommand):
"""Internal (debugging) command to test the performance of search queries."""
def _test_url(self, url, title, description):
def _test_url(self, url, title, description, print_all=False):
"""
Runs the view code that matches the given URL with a fake request; prints relevant stats
"""
@@ -105,7 +108,7 @@ class Command(BaseCommand):
print(description)
print()
with query_debugger() as d:
with query_debugger(print_all) as d:
view(request, *args, **kwargs)
self.total_time += d['total_time']

View File

@@ -0,0 +1,30 @@
# Generated by Django 4.2.19 on 2025-03-12 13:46
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("tags", "0001_initial"),
]
operations = [
migrations.RemoveIndex(
model_name="eventtag",
name="tags_eventt_value_i_255b9c_idx",
),
migrations.AddField(
model_name="eventtag",
name="digest_order",
field=models.PositiveIntegerField(default=0),
preserve_default=False,
),
migrations.AddIndex(
model_name="eventtag",
index=models.Index(
fields=["value", "issue", "digest_order"],
name="tags_eventt_value_i_6f1823_idx",
),
),
]

View File

@@ -0,0 +1,20 @@
from django.db import migrations
from django.db.models import OuterRef, Subquery
def set_eventtag_digest_order(apps, schema_editor):
EventTag = apps.get_model("tags", "EventTag")
EventTag.objects.update(
digest_order=Subquery(EventTag.objects.filter(pk=OuterRef('pk')).values('event__digest_order')[:1])
)
class Migration(migrations.Migration):
dependencies = [
("tags", "0002_remove_eventtag_tags_eventt_value_i_255b9c_idx_and_more"),
]
operations = [
migrations.RunPython(set_eventtag_digest_order, migrations.RunPython.noop),
]

View File

@@ -79,6 +79,9 @@ class EventTag(models.Model):
issue = models.ForeignKey(
'issues.Issue', blank=False, null=True, on_delete=models.SET_NULL, related_name="event_tags")
# digest_order is a denormalization that allows for a single-table-index for efficient search.
digest_order = models.PositiveIntegerField(blank=False, null=False)
# DO_NOTHING: we manually implement CASCADE (i.e. when an event is cleaned up, clean up associated tags) in the
# eviction process. Why CASCADE? [1] you'll have to do it "at some point", so you might as well do it right when
# evicting (async in the 'most resilient setup' anyway, b/c that happens when ingesting) [2] the order of magnitude
@@ -93,9 +96,9 @@ class EventTag(models.Model):
models.Index(fields=['event']), # for lookups by event (for event-details page, event-deletions)
# for search, which filters a list of EventTag down to those matching certain values and a given issue.
# (both orderings of the index would work for the current search query; if we ever introduce "search across
# issues" the below would work for that too (but the reverse wouldn't))
models.Index(fields=['value', 'issue']),
# (both orderings of the (value, issue) would work for the current search query; if we ever introduce
# "search across issues" the below would work for that too (but the reverse wouldn't))
models.Index(fields=['value', 'issue', 'digest_order']),
]
@@ -175,7 +178,7 @@ def store_tags(event, issue, tags):
# project_id=event.project_id, key=key, mostly_unique=is_mostly_unique(key))
# tag_value, _ = TagValue.objects.get_or_create(project_id=event.project_id, key=tag_key, value=value)
# EventTag.objects.get_or_create(project_id=event.project_id, value=tag_value, event=event,
# defaults={'issue': issue})
# defaults={'issue': issue, 'digest_order': event.digest_order})
# IssueTag.objects.get_or_create(
# project_id=event.project_id, key_id=tag_value.key_id, value=tag_value, issue=issue)
#
@@ -212,7 +215,8 @@ def store_tags(event, issue, tags):
project_id=event.project_id,
value=tag_value,
event=event,
issue=issue
issue=issue,
digest_order=event.digest_order,
) for tag_value in tag_value_objects
], ignore_conflicts=True)

View File

@@ -5,13 +5,13 @@ least it means we have all of this together in a separate file this way.
"""
import re
from django.db.models import Q, Subquery
from django.db.models import Q, Subquery, Count
from collections import namedtuple
from bugsink.moreiterutils import tuplewise
from events.models import Event
from .models import TagValue, IssueTag, EventTag
from .models import TagValue, IssueTag, EventTag, _or_join
ParsedQuery = namedtuple("ParsedQ", ["tags", "plain_text"])
@@ -68,8 +68,6 @@ def _search(m2m_qs, fk_fieldname, project, obj_list_all, obj_list_filtered, q):
parsed = parse_query(q)
# The simplest possible query-language that could have any value: key:value is recognized as such; the rest is "free
# text"; no support for quoting of spaces.
clauses = []
for key, value in parsed.tags.items():
try:
@@ -81,9 +79,6 @@ def _search(m2m_qs, fk_fieldname, project, obj_list_all, obj_list_filtered, q):
# mean: we _could_ say "tag x is to blame" but that's not what one does generally in search, is it?
return obj_list_all.none()
# TODO: Extensive performance testing of various choices here is necessary; in particular the choice of Subquery
# vs. joins; and the choice of a separate query to get TagValue v.s. doing everything in a single big query will
# have different trade-offs _in practice_.
clauses.append(
Q(id__in=Subquery(m2m_qs.filter(value=tag_value_obj).values_list(fk_fieldname, flat=True))))
@@ -128,3 +123,55 @@ def search_events(project, issue, q):
Event.objects.filter(issue=issue),
q,
)
def search_event_tags(project, issue, parsed):
if parsed.plain_text or not parsed.tags:
raise ValueError("Only works for at least one tag and no plain text")
clauses = []
for key, value in parsed.tags.items():
try:
# Since we have project as a field on TagValue, we _could_ filter on project directly; with our current set
# of indexes the below formulation is a nice way to reuse the index on TagKey (project, key) though.
tag_value_obj = TagValue.objects.get(key__project=project, key__key=key, value=value)
except TagValue.DoesNotExist:
# if the tag doesn't exist, we can't have any issues with it; the below short-circuit is fine, I think (I
# mean: we _could_ say "tag x is to blame" but that's not what one does generally in search, is it?
return EventTag.objects.none()
clauses.append(Q(issue=issue, value=tag_value_obj))
if len(clauses) == 1:
return EventTag.objects.filter(clauses[0])
# We have multiple clauses; we need to find the events that have all of these tags; we can't just do a filter() with
# and _and_join of the clauses, because ANDing 2 disjoint sets of tags will never match anything. So we need to do a
# count of the number of events that have each of the tags, and then filter on the ones that have all of them.
# Note that grouping-by/counting on the digest_order (rather than just event_id) works because the digest_order is
# unique per event (in the context of a single issue); its use allows for a covering index on the OR-ed parts.
# Note that pulling the match on issue out of the OR-ed parts to a single AND was tried, but found to be 2x slower.
return EventTag.objects.filter(
_or_join(clauses)).values("digest_order").annotate(count=Count("digest_order")).filter(count=len(clauses))
def search_events_optimized(project, issue, q):
"""
Search events or event tags (which will have digest_order efficiently), based on what you can do given "q".
"""
if not q:
return Event.objects.filter(issue=issue)
parsed = parse_query(q)
if parsed.plain_text or not parsed.tags:
# if-clause is a bit redundant given the `not q` guard above, but this fully covers the cases search_event_tags
# can't handle;
# in this case we just fall back to "whatever we did pre-optimization"; in other words, you're now entering the
# "not so optimized" path. The reason is simply: we had to stop somewhere. One thing we don't try to do in
# particular is use the result of `search_event_tags` as an inner query to construct Events (whether that's even
# faster wasn't checked)
return search_events(project, issue, q)
return search_event_tags(project, issue, parsed)

View File

@@ -8,7 +8,7 @@ from issues.models import Issue
from .models import store_tags, EventTag
from .utils import deduce_tags
from .search import search_events, search_issues, parse_query
from .search import search_events, search_issues, parse_query, search_events_optimized
class DeduceTagsTestCase(RegularTestCase):
@@ -199,8 +199,12 @@ class SearchTestCase(DjangoTestCase):
# in the above, we create 2 items with tags
self.assertEqual(search_x("k-0:v-0").count(), 2)
# an "AND" query should yield the same 2
self.assertEqual(search_x("k-0:v-0 k-1:v-1").count(), 2)
# non-matching tag: no results
self.assertEqual(search_x("k-0:nosuchthing").count(), 0)
self.assertEqual(search_x("k-0:nosuchthing k-1:v-1").count(), 0)
# findable-by-text: 2 such items
self.assertEqual(search_x("findable value").count(), 2)
@@ -216,6 +220,9 @@ class SearchTestCase(DjangoTestCase):
def test_search_events(self):
self._test_search(lambda query: search_events(self.project, self.global_issue, query))
def test_search_events_optimized(self):
self._test_search(lambda query: search_events_optimized(self.project, self.global_issue, query))
def test_search_events_wrong_issue(self):
issue_without_events = Issue.objects.create(project=self.project, **denormalized_issue_fields())