From a197efb888d2acce729af96e805b989317f99014 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Fri, 24 Nov 2023 20:20:43 +0100 Subject: [PATCH] Implement (and document) 'unrepr' --- compat/tests.py | 18 ++++++++++++++++++ compat/vars.py | 27 +++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 compat/vars.py diff --git a/compat/tests.py b/compat/tests.py index 7bb4128..155cc32 100644 --- a/compat/tests.py +++ b/compat/tests.py @@ -1,3 +1,4 @@ +import json from unittest import TestCase import datetime from django.test import override_settings @@ -5,6 +6,7 @@ from django.test import override_settings from .dsn import build_dsn, get_store_url, get_envelope_url, get_header_value from .auth import parse_auth_header_value from .timestamp import parse_timestamp +from .vars import unrepr class DsnTestCase(TestCase): @@ -82,3 +84,19 @@ class TimestampTestCase(TestCase): self.assertEquals( datetime.datetime(2022, 9, 1, 9, 45, 0, tzinfo=datetime.timezone.utc), parse_timestamp("2022-09-01T09:45:00.000Z")) + + +class VarsTestCase(TestCase): + def test_dicts(self): + d = json.loads('''{"baz":"1","foo":"'bar'","snu":"None","recurse":{"foo": "'bar'"}}''') + + self.assertEquals( + '''{baz: 1, foo: 'bar', snu: None, recurse: {foo: 'bar'}}''', + unrepr(d)) + + def test_lists(self): + d = json.loads('''["'bar'","1","None",["'bar'","1","None"]]''') + + self.assertEquals( + '''['bar', 1, None, ['bar', 1, None]]''', + unrepr(d)) diff --git a/compat/vars.py b/compat/vars.py new file mode 100644 index 0000000..8f9b5cd --- /dev/null +++ b/compat/vars.py @@ -0,0 +1,27 @@ +def unrepr(value): + """The Sentry Client (at least the Python one) makes particular choices when serializing the data as JSON. In + general, not everything can be serialized, so they call repr(). However, they also call repr when this is not + strictly necessary, with the note "For example, it's useful to see the difference between a unicode-string and a + bytestring when viewing a stacktrace." (see `_should_repr_strings`) + + When receiving such data, especially when nested inside e.g. a dict or list, we must take care to not render both + both the quote for "string data in a json dict" and the quote for "repr has been called on a string", like so: + + {"foo": "'bar'", ...} <= WRONG + + This would put potentially put human debuggers on the path of trying to figure out where the spurious quotes would + come from in the application that's being debugged. + + The following code at least tackles that particular problem. + + Notes on compat (as of late 2023): + + * GlitchTip has this wrong; sentry suffered from this in the past: https://github.com/getsentry/sentry/issues/15912 + * Sentry (and we) renders the _keys_ in dicts wrong, because for strings repr() isn't called client side. However, + "naked" (non-string) symbols cannot occur in Python dicts, so this can never cause confusion as mentioned above. + """ + if isinstance(value, dict): + return "{" + (", ".join(f"{k}: {unrepr(v)}" for k, v in value.items())) + "}" + if isinstance(value, list): + return "[" + (", ".join(f"{unrepr(v)}" for v in value)) + "]" + return value