api_catch_all: header-based

rather than try-and-recover, just look at the headers and show body/POST etc.
this avoids hard-to-reason about situations where either of those won't work
because the other has already been executed; in combination with reasoning
about max size usage the explicit solution is simply easier to reason about.

further:

* makes api_catch_all one of the content_encoding-ready views.
* implement a max length for the ingest api view
This commit is contained in:
Klaas van Schelven
2025-11-11 15:25:51 +01:00
parent 937df4cbb8
commit ab065a6329
4 changed files with 37 additions and 16 deletions

View File

@@ -32,6 +32,8 @@ class ContentEncodingCheckMiddleware:
"ingest-store",
"ingest-envelope",
"ingest-minidump",
"api_catch_all",
]
def __init__(self, get_response):

View File

@@ -165,7 +165,7 @@ def content_encoding_reader(request):
return request
def handle_request_content_encoding(request):
def handle_request_content_encoding(request, max_length):
"""Turns a request w/ Content-Encoding into an unpacked equivalent; for further "regular" (POST, FILES) handling
by Django.
"""
@@ -173,7 +173,7 @@ def handle_request_content_encoding(request):
encoding = request.META.get("HTTP_CONTENT_ENCODING", "").lower()
if encoding in ["gzip", "deflate", "br"]:
assert_(not request._read_started)
request._stream = content_encoding_reader(request)
request._stream = MaxDataReader(max_length, content_encoding_reader(request))
request.META["CONTENT_LENGTH"] = str(pow(2, 32) - 1) # large enough (we can't predict the decompressed value)
request.META.pop("HTTP_CONTENT_ENCODING") # the resulting request is no longer encoded

View File

@@ -13,6 +13,7 @@ from sentry.assemble import ChunkFileState
from bugsink.app_settings import get_settings
from bugsink.transaction import durable_atomic, immediate_atomic
from bugsink.streams import handle_request_content_encoding
from bsmain.models import AuthToken
from .models import Chunk, File, FileMetadata
@@ -292,6 +293,8 @@ def api_catch_all(request, subpath):
# the existance of this view (and the associated URL pattern) has the effect of `APPEND_SLASH=False` for our API
# endpoints, which is a good thing: for API enpoints you generally don't want this kind of magic (explicit breakage
# is desirable for APIs, and redirects don't even work for POST/PUT data)
MAX_API_CATCH_ALL_SIZE = 1_000_000 # security and usability meet at this value (or below)
handle_request_content_encoding(request, MAX_API_CATCH_ALL_SIZE)
if not get_settings().API_LOG_UNIMPLEMENTED_CALLS:
raise Http404("Unimplemented API endpoint: /api/" + subpath)
@@ -302,27 +305,44 @@ def api_catch_all(request, subpath):
f" Method: {request.method}",
]
interesting_meta_keys = ["CONTENT_TYPE", "CONTENT_LENGTH", "HTTP_TRANSFER_ENCODING"]
interesting_headers = {
k: request.META[k] for k in interesting_meta_keys if k in request.META
}
if interesting_headers:
lines.append(" Headers:")
for k, v in interesting_headers.items():
lines.append(f" {k}: {v}")
if request.GET:
lines.append(f" GET: {request.GET.dict()}")
body = request.body # note: must be above request.POST access to avoid "You cannot access body after reading ..."
if request.POST:
lines.append(f" POST: {request.POST.dict()}")
content_type = request.META.get("CONTENT_TYPE", "")
if content_type == "application/x-www-form-urlencoded" or content_type.startswith("multipart/form-data"):
if request.POST:
lines.append(f" POST: {request.POST.dict()}")
if request.FILES:
lines.append(f" FILES: {[f.name for f in request.FILES.values()]}")
if body:
try:
decoded = body.decode("utf-8", errors="replace").strip()
lines.append(" Body:")
lines.append(f" {decoded[:500]}")
else:
body = request.read(MAX_API_CATCH_ALL_SIZE)
decoded = body.decode("utf-8", errors="replace").strip()
if content_type == "application/json":
shown_pretty = False
try:
parsed = json.loads(decoded)
pretty = json.dumps(parsed, indent=2)[:10_000]
pretty = json.dumps(parsed, indent=2)
lines.append(" JSON body:")
lines.extend(f" {line}" for line in pretty.splitlines())
shown_pretty = True
except json.JSONDecodeError:
pass
except Exception as e:
lines.append(f" Body: <decode error: {e}>")
if not shown_pretty:
lines.append(" Body:")
lines.append(f" {body}")
logger.info("\n".join(lines))
raise Http404("Unimplemented API endpoint: /api/" + subpath)

View File

@@ -734,9 +734,8 @@ class MinidumpAPIView(BaseIngestAPIView):
def post(self, request, project_pk=None):
# not reusing the CORS stuff here; minidump-from-browser doesn't make sense.
# TODO: actually implement max (we just use Django defaults now, which will switch to write-to-tmp after 2.5M
# for the file, but this can still swamp your CPU/tmp dir.
handle_request_content_encoding(request)
# TODO: actually pick/configure max
handle_request_content_encoding(request, 50 * 1024 * 1024)
ingested_at = datetime.now(timezone.utc)
project = self.get_project_for_request(project_pk, request)