Visualize trimmed data ('x items trimmed')

Fix #18 Similar to [the request for the same feature in Sentry](https://github.com/getsentry/sentry/issues/68426) SDK-side complaints: * https://github.com/getsentry/sentry-python/issues/377 * https://github.com/getsentry/sentry-python/issues/805 * https://github.com/getsentry/sentry-python/issues/1041 * https://github.com/getsentry/sentry-python/issues/1105 * https://github.com/getsentry/sentry-python/issues/2121 * https://github.com/getsentry/sentry-python/issues/2682 * https://github.com/getsentry/sentry-python/issues/3209 * https://github.com/getsentry/sentry-python/issues/3634 * https://github.com/getsentry/sentry-python/issues/3740
2026-02-15 02:48:45 -06:00 · 2024-12-18 16:49:23 +01:00
parent c3d6fdce1c
commit a5bc27032a
6 changed files with 372 additions and 4 deletions
--- a/events/tests.py
+++ b/events/tests.py
@@ -1,3 +1,4 @@
+import json
 import datetime

 from django.test import TestCase as DjangoTestCase
@@ -13,6 +14,7 @@ from issues.factories import denormalized_issue_fields

 from .factories import create_event
 from .retention import eviction_target
+from .utils import annotate_with_meta

 User = get_user_model()

@@ -96,3 +98,118 @@ class RetentionTestCase(RegularTestCase):
        # Note that we have no special-casing for under-target (yet); not needed because should_evict (which does a
        # simple comparison) is always called first.
        # self.assertEqual(0, eviction_target(10_000, 9_999))
+
+
+class AnnotateWithMetaTestCase(RegularTestCase):
+    def test_annotate_with_meta(self):
+        parsed_data = json.loads(EXAMPLE_META)
+
+        exception_values = parsed_data["exception"]["values"]
+        frames = exception_values[0]["stacktrace"]["frames"]
+        meta_frames = parsed_data["_meta"]["exception"]["values"]["0"]["stacktrace"]["frames"]
+
+        annotate_with_meta(exception_values, parsed_data["_meta"]["exception"]["values"])
+
+        # length of the vars in a frame
+        self.assertTrue(hasattr(frames[0]["vars"], "incomplete"))
+        self.assertEqual(
+            meta_frames["0"]["vars"][""]["len"] - len(frames[0]["vars"]),
+            frames[0]["vars"].incomplete)
+
+        # a var itself
+        self.assertTrue(hasattr(frames[1]["vars"]["installed_apps"], "incomplete"))
+        self.assertEqual(
+            meta_frames["1"]["vars"]["installed_apps"][""]["len"] - len(frames[1]["vars"]["installed_apps"]),
+            frames[1]["vars"]["installed_apps"].incomplete)
+
+        # a var which is a list, containing a dict
+        self.assertTrue(hasattr(frames[2]["vars"]["args"][1]["__builtins__"], "incomplete"))
+        self.assertEqual(
+            (meta_frames["2"]["vars"]["args"]["1"]["__builtins__"][""]["len"] -
+             len(frames[2]["vars"]["args"][1]["__builtins__"])),
+            frames[2]["vars"]["args"][1]["__builtins__"].incomplete)
+
+
+EXAMPLE_META = r'''{
+  "exception": {
+    "values": [
+      {
+        "stacktrace": {
+          "frames": [
+            {
+              "vars": {
+                "os": "<module 'os' from '/usr/lib/python3.10/os.py'>"
+              }
+            },
+            {
+              "vars": {
+                "self": "<django.apps.registry.Apps object at 0x7f65d4bdfeb0>",
+                "installed_apps": [
+                  "'projects'"
+                ]
+              }
+            },
+            {
+              "vars": {
+                "f": "<built-in function exec>",
+                "args": [
+                  "<code object <module> at 0x7f65d33e92c0, file \"...\", line 1>",
+                  {
+                    "__name__": "'releases.models'",
+                    "__builtins__": {
+                      "any": "<built-in function any>"
+                    }
+                  }
+                ]
+              }
+            }
+          ]
+        }
+      }
+    ]
+  },
+  "_meta": {
+    "exception": {
+      "values": {
+        "0": {
+          "stacktrace": {
+            "frames": {
+              "0": {
+                "vars": {
+                  "": {
+                    "len": 12
+                  }
+                }
+              },
+              "1": {
+                "vars": {
+                  "installed_apps": {
+                    "": {
+                      "len": 16
+                    }
+                  }
+                }
+              },
+              "2": {
+                "vars": {
+                  "args": {
+                    "1": {
+                      "__builtins__": {
+                        "": {
+                          "len": 155
+                        }
+                      },
+                      "": {
+                        "len": 13
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}'''  # extracted from a real event; limited to the parts that are needed for annotate_with_meta
--- a/events/utils.py
+++ b/events/utils.py
@@ -0,0 +1,70 @@
+class IncompleteList(list):
+    def __init__(self, lst, cnt):
+        super().__init__(lst)
+        self.incomplete = cnt
+
+
+class IncompleteDict(dict):
+    def __init__(self, dct, cnt):
+        super().__init__(dct)
+        self.incomplete = cnt
+
+
+def annotate_with_meta(values, meta_values):
+    """
+    Use the meta_values (values attr of a "_meta" key) to annotate the values, in particular to add information about
+    which lists/dicts have been trimmed.
+
+    This depends on an ondocumented API of the Python Sentry SDK; we've just reverse-engineered the format of the
+    "_meta" values.
+
+    From the Sentry SDK source code, one could conclude that there are various pieces of info (I've seen "rem", "len",
+    "val", and "err" mentioned as keys and "!limit" as a value) but I've not actually been able to get the Sentry SDK
+    to emit records with the "!limit" value, and there are no tests for it, so I'm not sure how it's supposed to work.
+    For now, I'm basing myself on what I've actually seen in the wild. (Also: I'm less worried about pruning in depth
+    than in breadth, because in the case of in-depth pruning the fallback is still to repr() the remaining stuff, so
+    you don't end up with silently trimmed data).
+
+    See also:
+    https://github.com/getsentry/relay/blob/b3ecbb980c63be542547cf346f433061f69c4bba/relay-protocol/src/meta.rs#L417
+
+    The values are modified in-place.
+    """
+
+    for str_i, meta_value in meta_values.items():
+        annotate_exception_with_meta(values[int(str_i)], meta_value)
+
+
+def annotate_exception_with_meta(exception, meta_value):
+    frames = exception.get("stacktrace", {}).get("frames", {})
+    meta_frames = meta_value.get("stacktrace", {}).get("frames", {})
+
+    for str_i, meta_frame in meta_frames.items():
+        annotate_frame_with_meta(frames[int(str_i)], meta_frame)
+
+
+def annotate_frame_with_meta(frame, meta_frame):
+    frame["vars"] = annotate_var_with_meta(frame["vars"], meta_frame["vars"])
+
+
+def annotate_var_with_meta(var, meta_var):
+    """
+    'var' is a (potentially trimmed) list or dict, 'meta_var' is a dict describing the trimming.
+    """
+    assert isinstance(var, (list, dict))
+
+    if isinstance(var, list):
+        Incomplete = IncompleteList
+        at = lambda k: int(k)  # noqa; (for some reason the meta_k for list lookups is stored as a string)
+
+    else:  # isinstance(var, dict):
+        Incomplete = IncompleteDict
+        at = lambda k: k  # noqa
+
+    for meta_k, meta_v in meta_var.items():
+        if meta_k == "":
+            var = Incomplete(var, meta_v["len"] - len(var))
+        else:
+            var[at(meta_k)] = annotate_var_with_meta(var[at(meta_k)], meta_v)
+
+    return var
--- a/issues/templates/issues/stacktrace.html
+++ b/issues/templates/issues/stacktrace.html
@@ -127,10 +127,17 @@
                        </div>
                        {% for var, value in frame.vars|items %}
                        <div class="flex">
-                            <div class="w-1/3 pl-4 {% if not forloop.last %}border-b-2 border-dotted border-slate-300{% endif %}">{{ var }}</div>
-                            <div class="w-2/3 pr-4 {% if not forloop.last %} border-b-2 border-dotted border-slate-300{% endif %}">{{ value }}</div>
+                            <div class="w-1/3 pl-4 {% if not forloop.last or frame.vars|incomplete %}border-b-2 border-dotted border-slate-300{% endif %}">{{ var }}</div>
+                            <div class="w-2/3 pr-4 {% if not forloop.last or frame.vars|incomplete %} border-b-2 border-dotted border-slate-300{% endif %}">{{ value|format_var }}</div>
                        </div>
                        {% endfor %}
+                        {% if frame.vars|incomplete %}
+                        <div class="flex">
+                            <div class="w-1/3 pl-4 {# last by default #}italic">&lt;{{ frame.vars.incomplete }} items trimmed…&gt;</div>
+                            <div class="w-2/3 pr-4 {# last by default #}"></div>
+                        </div>
+                        {% endif %}
+
                    </div>
                    {% endif %}

--- a/issues/views.py
+++ b/issues/views.py
@@ -1,5 +1,6 @@
 from collections import namedtuple
 import json
+import sentry_sdk

 from django.utils import timezone
 from django.shortcuts import render, get_object_or_404, redirect
@@ -25,6 +26,7 @@ from projects.models import ProjectMembership
 from .models import Issue, IssueQuerysetStateManager, IssueStateManager, TurningPoint, TurningPointKind
 from .forms import CommentForm
 from .utils import get_values
+from events.utils import annotate_with_meta


 MuteOption = namedtuple("MuteOption", ["for_or_until", "period_name", "nr_of_periods", "gte_threshold"])
@@ -310,6 +312,17 @@ def issue_event_stacktrace(request, issue, event_pk=None, digest_order=None, nav

    exceptions = get_values(parsed_data["exception"]) if "exception" in parsed_data else None

+    try:
+        # get_values for consistency (whether it's needed: unclear, since _meta is not actually in the specs)
+        meta_values = get_values(parsed_data.get("_meta", {}).get("exception", {"values": {}}))
+        annotate_with_meta(exceptions, meta_values)
+    except Exception as e:
+        # broad Exception handling: "_meta" is completely undocumented, and though we have some example of event-data
+        # with "_meta" in it, we're not quite sure what the full structure could be in the wild. Because the
+        # 'incomplete' annotations are not absolutely necessary (Sentry itself went without it for years) we silently
+        # swallow the error in that case.
+        sentry_sdk.capture_exception(e)
+
    # NOTE: I considered making this a clickable button of some sort, but decided against it in the end. Getting the UI
    # right is quite hard (https://ux.stackexchange.com/questions/1318) but more generally I would assume that having
    # your whole screen turned upside down is not something you do willy-nilly. Better to just have good defaults and
--- a/theme/templatetags/issues.py
+++ b/theme/templatetags/issues.py
@@ -3,7 +3,7 @@ from django import template
 from pygments import highlight
 from pygments.formatters import HtmlFormatter

-
+from django.utils.html import escape
 from django.utils.safestring import mark_safe


@@ -135,3 +135,103 @@ def shortsha(value):
        return value

    return value[:12]
+
+
+@register.filter()
+def format_var(value):
+    """Formats a variable for display in the template; deals with 'marked as incomplete'."""
+    # this is a non-recursive version of the function below, which is faster and allows for arbitrary nesting.
+    # implementation: `todo` is a generator object that yields [1] parts of the result, and [2] instructions to recurse,
+    # which we interpret manually using a python-list "stack"
+
+    def storevalue(v):
+        # sentinel function to store the value for later retrieval; because JSON contains no callables this allows us
+        # to distinguish between `None` meaning no recurse and `None`, a value that needs to be displayed.
+        def get():
+            return v
+        return get
+
+    def gen_base(obj):
+        yield escape(repr(obj)), None
+
+    def bracket_wrap(gen, b_open, sep, b_close):
+        yield b_open, None
+        fst = True
+        for part, recurse in gen:
+            if not fst:
+                yield sep, None
+            yield part, recurse
+            fst = False
+        yield b_close, None
+
+    def gen_list(lst):
+        for value in lst:
+            yield "", storevalue(value)
+
+        if hasattr(lst, "incomplete"):
+            yield f"<i>&lt;{lst.incomplete} items trimmed…&gt;</i>", None
+
+    def gen_dict(d):
+        for (k, v) in d.items():
+            yield escape(repr(k)) + ": ", storevalue(v)
+
+        if hasattr(d, "incomplete"):
+            yield f"<i>&lt;{d.incomplete} items trimmed…&gt;</i>", None
+
+    def gen_switch(obj):
+        if isinstance(obj, list):
+            return bracket_wrap(gen_list(obj), "[", ", ", "]")
+        if isinstance(obj, dict):
+            return bracket_wrap(gen_dict(obj), "{", ", ", "}")
+        return gen_base(obj)
+
+    result = []
+    stack = []
+    todo = gen_switch(value)
+    done = False
+
+    while not done:
+        try:
+            part, recurse = next(todo)
+            result.append(part)
+        except StopIteration:
+            recurse = None
+            if stack:
+                todo = stack.pop()
+            else:
+                done = True
+
+        if callable(recurse):
+            stack.append(todo)
+            todo = gen_switch(recurse())
+
+    # mark_safe is OK because the only non-escaped characters are the brackets, commas, and colons.
+    return mark_safe("".join(result))
+
+
+# recursive equivalent:
+# @register.filter()
+# def format_var(value):
+#     """Formats a variable for display in the template; deals with 'marked as incomplete'.
+#     """
+#     # mark_safe is OK because the only non-escaped characters are the brackets, commas, and colons.
+#
+#     if isinstance(value, dict):
+#         parts = [(escape(repr(k)) + ": " + format_var(v)) for (k, v) in value.items()]
+#         if hasattr(value, "incomplete"):
+#             parts.append(mark_safe(f"<i>&lt;{value.incomplete} items trimmed…&gt;</i>"))
+#         return mark_safe("{" + ", ".join(parts) + "}")
+#
+#     if isinstance(value, list):
+#         parts = [format_var(v) for v in value]
+#         if hasattr(value, "incomplete"):
+#             parts.append(mark_safe(f"<i>&lt;{value.incomplete} items trimmed…&gt;</i>"))
+#         return mark_safe("[" + ", ".join(parts) + "]")
+#
+#     return escape(value)
+
+
+@register.filter()
+def incomplete(value):
+    # needed to disinguish between 'has an incomplete' attr (set by us) and 'contains an incomplete key' (event-data)
+    return hasattr(value, "incomplete")
--- a/theme/tests.py
+++ b/theme/tests.py
@@ -1,7 +1,10 @@
 from unittest import TestCase as RegularTestCase

 from bugsink.pygments_extensions import choose_lexer_for_pattern, get_all_lexers
-from .templatetags.issues import _pygmentize_lines as actual_pygmentize_lines
+
+from events.utils import IncompleteList, IncompleteDict
+
+from .templatetags.issues import _pygmentize_lines as actual_pygmentize_lines, format_var


 def _pygmentize_lines(lines):
@@ -68,3 +71,61 @@ class TestChooseLexerForPatter(RegularTestCase):

        for pattern, lexers in get_all_lexers()._list:
            choose_lexer_for_pattern(pattern, lexers, "", "", "python")
+
+
+class TestFormatVar(RegularTestCase):
+
+    def _format_var(self, var):
+        # small helper for readable tests
+        return format_var(var).replace("&#x27;", "'")
+
+    def test_format_var_none(self):
+        self.assertEqual(
+            "None",
+            self._format_var(None),
+        )
+
+    def test_format_var_nested(self):
+        var = {
+            "a": 1,
+            "b": [2, 3],
+            "c": {"d": 4},
+            "d": [],
+            "e": {},
+            "f": None,
+        }
+
+        self.assertEqual(
+            "{'a': 1, 'b': [2, 3], 'c': {'d': 4}, 'd': [], 'e': {}, 'f': None}",
+            self._format_var(var),
+        )
+
+    def test_format_var_deep(self):
+        def _deep(level):
+            result = None
+            for i in range(level):
+                result = [result]
+            return result
+
+        var = _deep(10_000)
+
+        self.assertEqual(
+            '[' * 10_000 + 'None' + ']' * 10_000,
+            self._format_var(var),
+        )
+
+    def test_format_var_incomplete_list(self):
+        var = IncompleteList([1, 2, 3], 9)
+
+        self.assertEqual(
+            "[1, 2, 3, <i>&lt;9 items trimmed…&gt;</i>]",
+            self._format_var(var),
+        )
+
+    def test_format_var_incomplete_dict(self):
+        var = IncompleteDict({"a": 1, "b": 2, "c": 3}, 9)
+
+        self.assertEqual(
+            "{'a': 1, 'b': 2, 'c': 3, <i>&lt;9 items trimmed…&gt;</i>}",
+            self._format_var(var),
+        )