Search optimization: when counting the results takes too long, don't

2026-01-30 01:20:08 -06:00 · 2025-03-12 21:26:47 +01:00
parent c3ed995ecc
commit 175e103d23
2 changed files with 41 additions and 15 deletions
--- a/bugsink/timed_sqlite_backend/base.py
+++ b/bugsink/timed_sqlite_backend/base.py
@@ -7,26 +7,35 @@ from django.db.backends.sqlite3.base import (
 )


-_allow_long_running_queries = False
+_runtime_limit = 5.0


 def allow_long_running_queries():
    """Set a global flag to allow long-running queries. Useful for one-off commands, where slowness is expected."""
-    global _allow_long_running_queries
-    _allow_long_running_queries = True
+    global _runtime_limit
+    _runtime_limit = float("inf")


@contextmanager
-def limit_runtime(conn, seconds):
-    global _allow_long_running_queries
+def different_runtime_limit(seconds):
+    global _runtime_limit
+    old = _runtime_limit
+    _runtime_limit = seconds
+
+    try:
+        yield
+    finally:
+        _runtime_limit = old
+
+
+@contextmanager
+def limit_runtime(conn):
+    global _runtime_limit

    start = time.time()

    def check_time():
-        if _allow_long_running_queries:  # check in-the-callback, to avoid
-            return 0
-
-        if time.time() > start + seconds:
+        if time.time() > start + _runtime_limit:
            return 1

        return 0
@@ -91,7 +100,7 @@ class SQLiteCursorWrapper(UnpatchedSQLiteCursorWrapper):
            # migrations in Sqlite are often slow (drop/recreate tables, etc); so we don't want to limit them
            return super().execute(query, params)

-        with limit_runtime(self.connection, 5.0):
+        with limit_runtime(self.connection):
            return super().execute(query, params)

    def executemany(self, query, param_list):
@@ -99,5 +108,5 @@ class SQLiteCursorWrapper(UnpatchedSQLiteCursorWrapper):
            # migrations in Sqlite are often slow (drop/recreate tables, etc); so we don't want to limit them
            return super().executemany(query, param_list)

-        with limit_runtime(self.connection, 5.0):
+        with limit_runtime(self.connection):
            return super().executemany(query, param_list)
--- a/issues/views.py
+++ b/issues/views.py
@@ -12,12 +12,14 @@ from django.urls import reverse
 from django.core.exceptions import PermissionDenied
 from django.http import Http404
 from django.core.paginator import Paginator
+from django.db.utils import OperationalError

 from sentry.utils.safe import get_path

 from bugsink.decorators import project_membership_required, issue_membership_required, atomic_for_request_method
 from bugsink.transaction import durable_atomic
 from bugsink.period_utils import add_periods_to_datetime
+from bugsink.timed_sqlite_backend.base import different_runtime_limit

 from events.models import Event
 from events.ua_stuff import get_contexts_enriched_with_ua
@@ -334,6 +336,20 @@ def _get_event(qs, issue, event_pk, digest_order, nav, bounds):
        raise Http404("Either event_pk, nav, or digest_order must be provided")


+def _event_count(request, issue, event_x_qs):
+    # We want to be able to show the number of matching events for some query in the UI, but counting is potentially
+    # expensive, because it's a full scan over all matching events. We just show "many" if this takes too long.
+    # different_runtime_limit is sqlite-only, it doesn't affect other backends.
+
+    with different_runtime_limit(0.1):
+        try:
+            return event_x_qs.count() if request.GET.get("q") else issue.stored_event_count
+        except OperationalError as e:
+            if e.args[0] != "interrupted":
+                raise
+            return "many"
+
+
@atomic_for_request_method
@issue_membership_required
 def issue_event_stacktrace(request, issue, event_pk=None, digest_order=None, nav=None):
@@ -395,7 +411,7 @@ def issue_event_stacktrace(request, issue, event_pk=None, digest_order=None, nav
        "stack_of_plates": stack_of_plates,
        "mute_options": GLOBAL_MUTE_OPTIONS,
        "q": request.GET.get("q", ""),
-        "event_qs_count": event_x_qs.count() if request.GET.get("q") else issue.stored_event_count,
+        "event_qs_count": _event_count(request, issue, event_x_qs),
        "has_prev": event.digest_order > first_do,
        "has_next": event.digest_order < last_do,
    })
@@ -412,7 +428,7 @@ def issue_event_404(request, issue, event_x_qs, tab, this_view):
        "is_event_page": False,  # this variable is used to denote "we have event-related info", which we don't
        "mute_options": GLOBAL_MUTE_OPTIONS,
        "q": request.GET.get("q", ""),
-        "event_qs_count": event_x_qs.count() if request.GET.get("q") else issue.stored_event_count,
+        "event_qs_count": _event_count(request, issue, event_x_qs),
    })


@@ -443,7 +459,7 @@ def issue_event_breadcrumbs(request, issue, event_pk=None, digest_order=None, na
        "breadcrumbs": get_values(parsed_data.get("breadcrumbs")),
        "mute_options": GLOBAL_MUTE_OPTIONS,
        "q": request.GET.get("q", ""),
-        "event_qs_count": event_x_qs.count() if request.GET.get("q") else issue.stored_event_count,
+        "event_qs_count": _event_count(request, issue, event_x_qs),
        "has_prev": event.digest_order > first_do,
        "has_next": event.digest_order < last_do,
    })
@@ -562,7 +578,7 @@ def issue_event_details(request, issue, event_pk=None, digest_order=None, nav=No
        "contexts": contexts,
        "mute_options": GLOBAL_MUTE_OPTIONS,
        "q": request.GET.get("q", ""),
-        "event_qs_count": event_x_qs.count() if request.GET.get("q") else issue.stored_event_count,
+        "event_qs_count": _event_count(request, issue, event_x_qs),
        "has_prev": event.digest_order > first_do,
        "has_next": event.digest_order < last_do,
    })
@@ -633,6 +649,7 @@ def issue_event_list(request, issue):
    if "q" in request.GET:
        event_list = search_events(issue.project, issue, request.GET["q"]).order_by("digest_order")
        event_x_qs = search_events_optimized(issue.project, issue, request.GET.get("q", ""))
+        # we don't do the `_event_count` optimization here, because we need the correct number for pagination
        paginator = KnownCountPaginator(event_list, 250, count=event_x_qs.count())
    else:
        event_list = issue.event_set.order_by("digest_order")