From a197efb888d2acce729af96e805b989317f99014 Mon Sep 17 00:00:00 2001
From: Klaas van Schelven <klaas@vanschelven.com>
Date: Fri, 24 Nov 2023 20:20:43 +0100
Subject: [PATCH] Implement (and document) 'unrepr'

---
 compat/tests.py | 18 ++++++++++++++++++
 compat/vars.py  | 27 +++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100644 compat/vars.py

diff --git a/compat/tests.py b/compat/tests.py
index 7bb4128..155cc32 100644
--- a/compat/tests.py
+++ b/compat/tests.py
@@ -1,3 +1,4 @@
+import json
 from unittest import TestCase
 import datetime
 from django.test import override_settings
@@ -5,6 +6,7 @@ from django.test import override_settings
 from .dsn import build_dsn, get_store_url, get_envelope_url, get_header_value
 from .auth import parse_auth_header_value
 from .timestamp import parse_timestamp
+from .vars import unrepr
 
 
 class DsnTestCase(TestCase):
@@ -82,3 +84,19 @@ class TimestampTestCase(TestCase):
         self.assertEquals(
             datetime.datetime(2022, 9, 1, 9, 45, 0, tzinfo=datetime.timezone.utc),
             parse_timestamp("2022-09-01T09:45:00.000Z"))
+
+
+class VarsTestCase(TestCase):
+    def test_dicts(self):
+        d = json.loads('''{"baz":"1","foo":"'bar'","snu":"None","recurse":{"foo": "'bar'"}}''')
+
+        self.assertEquals(
+            '''{baz: 1, foo: 'bar', snu: None, recurse: {foo: 'bar'}}''',
+            unrepr(d))
+
+    def test_lists(self):
+        d = json.loads('''["'bar'","1","None",["'bar'","1","None"]]''')
+
+        self.assertEquals(
+            '''['bar', 1, None, ['bar', 1, None]]''',
+            unrepr(d))
diff --git a/compat/vars.py b/compat/vars.py
new file mode 100644
index 0000000..8f9b5cd
--- /dev/null
+++ b/compat/vars.py
@@ -0,0 +1,27 @@
+def unrepr(value):
+    """The Sentry Client (at least the Python one) makes particular choices when serializing the data as JSON. In
+    general, not everything can be serialized, so they call repr(). However, they also call repr when this is not
+    strictly necessary, with the note "For example, it's useful to see the difference between a unicode-string and a
+    bytestring when viewing a stacktrace." (see `_should_repr_strings`)
+
+    When receiving such data, especially when nested inside e.g. a dict or list, we must take care to not render both
+    both the quote for "string data in a json dict" and the quote for "repr has been called on a string", like so:
+
+    {"foo": "'bar'", ...}  <= WRONG
+
+    This would put potentially put human debuggers on the path of trying to figure out where the spurious quotes would
+    come from in the application that's being debugged.
+
+    The following code at least tackles that particular problem.
+
+    Notes on compat (as of late 2023):
+
+    * GlitchTip has this wrong; sentry suffered from this in the past: https://github.com/getsentry/sentry/issues/15912
+    * Sentry (and we) renders the _keys_ in dicts wrong, because for strings repr() isn't called client side. However,
+      "naked" (non-string) symbols cannot occur in Python dicts, so this can never cause confusion as mentioned above.
+    """
+    if isinstance(value, dict):
+        return "{" + (", ".join(f"{k}: {unrepr(v)}" for k, v in value.items())) + "}"
+    if isinstance(value, list):
+        return "[" + (", ".join(f"{unrepr(v)}" for v in value)) + "]"
+    return value