Implement (and document) 'unrepr'

This commit is contained in:
Klaas van Schelven
2023-11-24 20:20:43 +01:00
parent 97b83a7d16
commit a197efb888
2 changed files with 45 additions and 0 deletions

View File

@@ -1,3 +1,4 @@
import json
from unittest import TestCase
import datetime
from django.test import override_settings
@@ -5,6 +6,7 @@ from django.test import override_settings
from .dsn import build_dsn, get_store_url, get_envelope_url, get_header_value
from .auth import parse_auth_header_value
from .timestamp import parse_timestamp
from .vars import unrepr
class DsnTestCase(TestCase):
@@ -82,3 +84,19 @@ class TimestampTestCase(TestCase):
self.assertEquals(
datetime.datetime(2022, 9, 1, 9, 45, 0, tzinfo=datetime.timezone.utc),
parse_timestamp("2022-09-01T09:45:00.000Z"))
class VarsTestCase(TestCase):
def test_dicts(self):
d = json.loads('''{"baz":"1","foo":"'bar'","snu":"None","recurse":{"foo": "'bar'"}}''')
self.assertEquals(
'''{baz: 1, foo: 'bar', snu: None, recurse: {foo: 'bar'}}''',
unrepr(d))
def test_lists(self):
d = json.loads('''["'bar'","1","None",["'bar'","1","None"]]''')
self.assertEquals(
'''['bar', 1, None, ['bar', 1, None]]''',
unrepr(d))

27
compat/vars.py Normal file
View File

@@ -0,0 +1,27 @@
def unrepr(value):
"""The Sentry Client (at least the Python one) makes particular choices when serializing the data as JSON. In
general, not everything can be serialized, so they call repr(). However, they also call repr when this is not
strictly necessary, with the note "For example, it's useful to see the difference between a unicode-string and a
bytestring when viewing a stacktrace." (see `_should_repr_strings`)
When receiving such data, especially when nested inside e.g. a dict or list, we must take care to not render both
both the quote for "string data in a json dict" and the quote for "repr has been called on a string", like so:
{"foo": "'bar'", ...} <= WRONG
This would put potentially put human debuggers on the path of trying to figure out where the spurious quotes would
come from in the application that's being debugged.
The following code at least tackles that particular problem.
Notes on compat (as of late 2023):
* GlitchTip has this wrong; sentry suffered from this in the past: https://github.com/getsentry/sentry/issues/15912
* Sentry (and we) renders the _keys_ in dicts wrong, because for strings repr() isn't called client side. However,
"naked" (non-string) symbols cannot occur in Python dicts, so this can never cause confusion as mentioned above.
"""
if isinstance(value, dict):
return "{" + (", ".join(f"{k}: {unrepr(v)}" for k, v in value.items())) + "}"
if isinstance(value, list):
return "[" + (", ".join(f"{unrepr(v)}" for v in value)) + "]"
return value