mirror of
https://github.com/jlengrand/bugsink.git
synced 2026-03-10 08:01:17 +00:00
Envelope parsing: validate headers as per the docs
headers means: envelope headers and item headers. Provides more robustness and a layer of defense-in-depth Only those headers that we might rely on in a near future (event-based) are included. See #173
This commit is contained in:
@@ -58,3 +58,18 @@ def get_header_value(sentry_dsn):
|
|||||||
def get_sentry_key(sentry_dsn):
|
def get_sentry_key(sentry_dsn):
|
||||||
parts = urllib.parse.urlsplit(sentry_dsn)
|
parts = urllib.parse.urlsplit(sentry_dsn)
|
||||||
return parts.username
|
return parts.username
|
||||||
|
|
||||||
|
|
||||||
|
def validate_sentry_dsn(sentry_dsn):
|
||||||
|
parts = urllib.parse.urlsplit(sentry_dsn)
|
||||||
|
|
||||||
|
if not parts.scheme or not parts.hostname or not parts.username:
|
||||||
|
raise ValueError("Invalid Sentry DSN format. It must contain a scheme, hostname, and public_key.")
|
||||||
|
|
||||||
|
if parts.scheme not in ("http", "https"):
|
||||||
|
raise ValueError("Invalid Sentry DSN scheme. It must be 'http' or 'https'.")
|
||||||
|
|
||||||
|
if (not parts.path) or ("/" not in parts.path) or (not parts.path.rsplit("/", 1)[1]):
|
||||||
|
raise ValueError("Invalid DSN: path must include '/<project_id>'")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|||||||
2
ingest/exceptions.py
Normal file
2
ingest/exceptions.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
class ParseError(Exception):
|
||||||
|
pass
|
||||||
101
ingest/header_validators.py
Normal file
101
ingest/header_validators.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from compat.dsn import validate_sentry_dsn
|
||||||
|
from .exceptions import ParseError
|
||||||
|
|
||||||
|
|
||||||
|
# Based on the documentation here:
|
||||||
|
#
|
||||||
|
# https://develop.sentry.dev/sdk/data-model/envelopes/
|
||||||
|
# https://develop.sentry.dev/sdk/data-model/envelope-items/
|
||||||
|
#
|
||||||
|
# From the docs, we deduced validation for
|
||||||
|
#
|
||||||
|
# * envelope headers -> all of them
|
||||||
|
# * item headers -> only those that are relevant for "event" items
|
||||||
|
|
||||||
|
|
||||||
|
_RFC3339_Z = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z$")
|
||||||
|
_UUID32 = re.compile(r"^[0-9a-fA-F]{32}$")
|
||||||
|
_UUID36 = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
|
||||||
|
|
||||||
|
|
||||||
|
def validate_dsn(v):
|
||||||
|
try:
|
||||||
|
validate_sentry_dsn(v)
|
||||||
|
except ValueError as e:
|
||||||
|
raise ParseError(f'Envelope header "dsn" invalid: {e}')
|
||||||
|
|
||||||
|
|
||||||
|
def validate_sdk(v):
|
||||||
|
if not isinstance(v, dict):
|
||||||
|
raise ParseError('Envelope header "sdk" must be an object')
|
||||||
|
|
||||||
|
|
||||||
|
def validate_sent_at(v):
|
||||||
|
if not isinstance(v, str) or not _RFC3339_Z.match(v):
|
||||||
|
raise ParseError(f'Envelope header "sent_at" must be an RFC3339 UTC timestamp ending in Z: {v}')
|
||||||
|
|
||||||
|
try:
|
||||||
|
datetime.strptime(v, "%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
except ValueError:
|
||||||
|
datetime.fromisoformat(v.replace("Z", "+00:00"))
|
||||||
|
|
||||||
|
|
||||||
|
def validate_event_id(v):
|
||||||
|
if not isinstance(v, str) or not (_UUID32.match(v) or _UUID36.match(v)):
|
||||||
|
raise ParseError(f'Envelope header "event_id" must be a valid UUID string: {v}')
|
||||||
|
|
||||||
|
|
||||||
|
envelope_validators = {
|
||||||
|
"dsn": validate_dsn,
|
||||||
|
"sdk": validate_sdk,
|
||||||
|
"sent_at": validate_sent_at,
|
||||||
|
"event_id": validate_event_id,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def validate_envelope_headers(headers):
|
||||||
|
for key, val in headers.items():
|
||||||
|
if key in envelope_validators:
|
||||||
|
envelope_validators[key](val)
|
||||||
|
|
||||||
|
|
||||||
|
ALLOWED_TYPES = {
|
||||||
|
"event", "transaction", "attachment", "session", "sessions", "feedback", "user_report", "client_report",
|
||||||
|
"replay_event", "replay_recording", "profile", "profile_chunk", "check_in", "log", "otel_log"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def validate_type(v):
|
||||||
|
return
|
||||||
|
# alternatively (1):
|
||||||
|
# if v not in _allowed_types:
|
||||||
|
# Sentry's protocol might add new item types in the future; we don't want to raise an error for those.
|
||||||
|
# logger.warning(f'Item header "type" is not recognized: {v}.'
|
||||||
|
#
|
||||||
|
# alternatively (2):
|
||||||
|
# raise ParseError(f'Item header "type" must be one of {_allowed_types}, got: {v}')
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_length(v):
|
||||||
|
if not isinstance(v, int) or v < 0:
|
||||||
|
raise ParseError(f'Item header "length" must be a non-negative integer, got: {v}')
|
||||||
|
|
||||||
|
|
||||||
|
item_validators = {
|
||||||
|
"type": validate_type,
|
||||||
|
"length": _validate_length,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def validate_item_headers(headers):
|
||||||
|
if headers.get("type") != "event":
|
||||||
|
# Only validate item headers for events. Reason: it's the only type of event that we actually process; rather
|
||||||
|
# than trying to keep the validation in sync with for a part of the protocol that we don't use, we skip it.
|
||||||
|
return
|
||||||
|
|
||||||
|
for key, val in headers.items():
|
||||||
|
if key in item_validators:
|
||||||
|
item_validators[key](val)
|
||||||
@@ -3,9 +3,8 @@ import io
|
|||||||
|
|
||||||
from bugsink.streams import MaxDataWriter
|
from bugsink.streams import MaxDataWriter
|
||||||
|
|
||||||
|
from .exceptions import ParseError
|
||||||
class ParseError(Exception):
|
from .header_validators import validate_envelope_headers, validate_item_headers
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class NewlineFinder:
|
class NewlineFinder:
|
||||||
@@ -150,6 +149,7 @@ class StreamingEnvelopeParser:
|
|||||||
if self.envelope_headers is None:
|
if self.envelope_headers is None:
|
||||||
# see test_eof_after_envelope_headers for why we don't error on EOF-after-header here
|
# see test_eof_after_envelope_headers for why we don't error on EOF-after-header here
|
||||||
self.envelope_headers = self._parse_headers(empty_is_error=True, eof_after_header_is_error=False)
|
self.envelope_headers = self._parse_headers(empty_is_error=True, eof_after_header_is_error=False)
|
||||||
|
validate_envelope_headers(self.envelope_headers)
|
||||||
|
|
||||||
return self.envelope_headers
|
return self.envelope_headers
|
||||||
|
|
||||||
@@ -161,10 +161,13 @@ class StreamingEnvelopeParser:
|
|||||||
|
|
||||||
while not self.at_eof:
|
while not self.at_eof:
|
||||||
item_headers = self._parse_headers(empty_is_error=False, eof_after_header_is_error=True)
|
item_headers = self._parse_headers(empty_is_error=False, eof_after_header_is_error=True)
|
||||||
|
|
||||||
if item_headers is None:
|
if item_headers is None:
|
||||||
self.at_eof = True
|
self.at_eof = True
|
||||||
break
|
break
|
||||||
|
|
||||||
|
validate_item_headers(item_headers)
|
||||||
|
|
||||||
if "length" in item_headers:
|
if "length" in item_headers:
|
||||||
length = item_headers["length"]
|
length = item_headers["length"]
|
||||||
finder = LengthFinder(length, error_for_eof="EOF while reading item with explicitly specified length")
|
finder = LengthFinder(length, error_for_eof="EOF while reading item with explicitly specified length")
|
||||||
|
|||||||
Reference in New Issue
Block a user