From f2a78fed9d2414a4c028ba26249c22848c510f85 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Wed, 18 Sep 2024 11:36:47 +0200 Subject: [PATCH] Use the envelope's event_id when using the envelope endpoint * As per the spec 'takes precendence' * Also fixes the reported bug on Laravel, which apparently doesn't send event_id as part of the event payload. * Fixes the envelope tests (they were doing nothing when I moved the data samples around recently) * Adds a 'no event_id in data, but yes in envelope' test to that test. * Adds handling to send_json such that we can send envelopes when the event_id is missing from the event data. --- events/models.py | 5 +- ingest/management/commands/send_json.py | 4 +- ingest/tests.py | 65 ++++++++++++++++--------- ingest/views.py | 16 +++--- issues/tests.py | 3 ++ 5 files changed, 60 insertions(+), 33 deletions(-) diff --git a/events/models.py b/events/models.py index 3297290..46625a1 100644 --- a/events/models.py +++ b/events/models.py @@ -61,8 +61,11 @@ class Event(models.Model): # not actually expected to be null, but we want to be able to delete issues without deleting events (cleanup later) issue = models.ForeignKey("issues.Issue", blank=False, null=True, on_delete=models.SET_NULL) + # The docs say: # > Required. Hexadecimal string representing a uuid4 value. The length is exactly 32 characters. Dashes are not # > allowed. Has to be lowercase. + # But event.schema.json has this anyOf [..] null and only speaks of "it is strongly recommended to generate that + # uuid4 clientside". In any case, we just rely on the envelope's event_id (required per the envelope spec). # Not a primary key: events may be duplicated across projects event_id = models.UUIDField(primary_key=False, null=False, editable=False, help_text="As per the sent data") project = models.ForeignKey(Project, blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later' @@ -191,7 +194,7 @@ class Event(models.Model): try: event = cls.objects.create( - event_id=parsed_data["event_id"], + event_id=event_metadata["event_id"], # the metadata is the envelope's event_id, which takes precedence project_id=event_metadata["project_id"], issue=issue, ingested_at=event_metadata["ingested_at"], diff --git a/ingest/management/commands/send_json.py b/ingest/management/commands/send_json.py index b23a817..5e69b03 100644 --- a/ingest/management/commands/send_json.py +++ b/ingest/management/commands/send_json.py @@ -129,7 +129,9 @@ class Command(BaseCommand): data_bytes = json.dumps(data).encode("utf-8") if use_envelope: # the smallest possible envelope: - data_bytes = (b'{"event_id": "%s"}\n{"type": "event"}\n' % (data["event_id"]).encode("utf-8") + + event_id = data.get("event_id", uuid.uuid4().hex) + + data_bytes = (b'{"event_id": "%s"}\n{"type": "event"}\n' % event_id.encode("utf-8") + data_bytes) if compress in ["gzip", "deflate"]: diff --git a/ingest/tests.py b/ingest/tests.py index 2d7f927..2f82211 100644 --- a/ingest/tests.py +++ b/ingest/tests.py @@ -1,3 +1,4 @@ +import os from glob import glob import json import io @@ -35,7 +36,12 @@ def _digest_params(event_data, project, request, now=None): # adapter to quickly reuse existing tests on refactored code. let's see where the code ends up before spending # considerable time on rewriting the tests return { - "event_metadata": {"project_id": project.id, "ingested_at": format_timestamp(now), "debug_info": ""}, + "event_metadata": { + "event_id": event_data["event_id"], + "project_id": project.id, + "ingested_at": format_timestamp(now), + "debug_info": "", + }, "event_data": event_data, "project": project, "digested_at": now, @@ -275,33 +281,46 @@ class IngestViewTestCase(TransactionTestCase): command.stdout = io.StringIO() command.stderr = io.StringIO() - for filename in glob("./ingest/samples/*/*.json")[:1]: # one is enough here - with open(filename) as f: - data = json.loads(f.read()) + SAMPLES_DIR = os.getenv("SAMPLES_DIR", "../event-samples") - data["event_id"] = uuid.uuid4().hex + event_samples = glob(SAMPLES_DIR + "/*/*.json") - if "timestamp" not in data: - # as per send_json command ("weirdly enough a large numer of sentry test data don't actually...") - data["timestamp"] = time.time() + if len(event_samples) == 0: + raise Exception(f"No event samples found in {SAMPLES_DIR}; I insist on having some to test with.") - if not command.is_valid(data, filename): - continue + for with_event_id in [True, False]: + for filename in event_samples[:1]: # one is enough here + with open(filename) as f: + data = json.loads(f.read()) - data_bytes = json.dumps(data).encode("utf-8") - data_bytes = (b'{"event_id": "%s"}\n{"type": "event"}\n' % (data["event_id"]).encode("utf-8") + data_bytes) + data["event_id"] = uuid.uuid4().hex # for good measure we reset this to avoid duplicates. - response = self.client.post( - f"/api/{ project.id }/envelope/", - content_type="application/json", - headers={ - "X-Sentry-Auth": sentry_auth_header, - "X-BugSink-DebugInfo": filename, - }, - data=data_bytes, - ) - self.assertEqual( - 200, response.status_code, response.content if response.status_code != 302 else response.url) + if "timestamp" not in data: + # as per send_json command ("weirdly enough a large numer of sentry test data don't actually...") + data["timestamp"] = time.time() + + if not command.is_valid(data, filename): + continue + + event_id = data["event_id"] + if not with_event_id: + del data["event_id"] + + data_bytes = json.dumps(data).encode("utf-8") + data_bytes = ( + b'{"event_id": "%s"}\n{"type": "event"}\n' % event_id.encode("utf-8") + data_bytes) + + response = self.client.post( + f"/api/{ project.id }/envelope/", + content_type="application/json", + headers={ + "X-Sentry-Auth": sentry_auth_header, + "X-BugSink-DebugInfo": filename, + }, + data=data_bytes, + ) + self.assertEqual( + 200, response.status_code, response.content if response.status_code != 302 else response.url) def test_envelope_endpoint_digest_non_immediate(self): with override_settings(DIGEST_IMMEDIATELY=False): diff --git a/ingest/views.py b/ingest/views.py index 4335a47..8412e79 100644 --- a/ingest/views.py +++ b/ingest/views.py @@ -89,7 +89,7 @@ class BaseIngestAPIView(View): @classmethod def process_event(cls, ingested_at, event_id, event_data_stream, project, request): - event_metadata = cls.get_event_meta(ingested_at, request, project) + event_metadata = cls.get_event_meta(event_id, ingested_at, request, project) if get_settings().DIGEST_IMMEDIATELY: # in this case the stream will be an BytesIO object, so we can actually call .get_value() on it. @@ -106,11 +106,12 @@ class BaseIngestAPIView(View): digest.delay(event_id, event_metadata) @classmethod - def get_event_meta(cls, ingested_at, request, project): + def get_event_meta(cls, event_id, ingested_at, request, project): # Meta means: not part of the event data. Basically: information that is available at the time of ingestion, and # that must be passed to digest() in a serializable form. debug_info = request.META.get("HTTP_X_BUGSINK_DEBUGINFO", "") return { + "event_id": event_id, "project_id": project.id, "ingested_at": format_timestamp(ingested_at), "debug_info": debug_info, @@ -381,19 +382,18 @@ class IngestEventAPIView(BaseIngestAPIView): # keep up. # # In particular I'd like to just call process_event() here, but that takes both an event_id and an unparsed data - # stream, and we don't have an event_id here before parsing (and we don't want to parse twice). + # stream, and we don't have an event_id here before parsing (and we don't want to parse twice). similarly, + # event_metadata construction requires the event_id. # # Instead, we just copy/pasted the relevant parts of process_event() here, and take only one branch (the one - # that digests immediately). + # that digests immediately); i.e. we always digest immediately, independent of the setting. - event_metadata = self.get_event_meta(ingested_at, request, project) - - # if get_settings().DIGEST_IMMEDIATELY: this is the only branch we implemented here, i.e. we always digest - # immediately, independent of the setting. event_data = json.loads( MaxDataReader("MAX_EVENT_SIZE", content_encoding_reader( MaxDataReader("MAX_EVENT_COMPRESSED_SIZE", request))).read()) + event_metadata = self.get_event_meta(event_data["event_id"], ingested_at, request, project) + self.digest_event(event_metadata, event_data, project=project) return HttpResponse() diff --git a/issues/tests.py b/issues/tests.py index eddf9c9..63c0ca5 100644 --- a/issues/tests.py +++ b/issues/tests.py @@ -476,6 +476,9 @@ class IntegrationTest(TransactionTestCase): with open(filename) as f: data = json.loads(f.read()) + # we do this because our samples do not have unique event_ids; additionally this sets the event_id if it's + # not set in the sample (it sometimes isn't); (the fact that we can deal with that case is separately + # tested) data["event_id"] = uuid.uuid4().hex if not command.is_valid(data, filename):