api_catch_all: header-based

rather than try-and-recover, just look at the headers and show body/POST etc. this avoids hard-to-reason about situations where either of those won't work because the other has already been executed; in combination with reasoning about max size usage the explicit solution is simply easier to reason about. further: * makes api_catch_all one of the content_encoding-ready views. * implement a max length for the ingest api view
2026-03-10 08:01:17 +00:00 · 2025-11-11 15:25:51 +01:00
parent 937df4cbb8
commit ab065a6329
4 changed files with 37 additions and 16 deletions
--- a/bugsink/middleware.py
+++ b/bugsink/middleware.py
@@ -32,6 +32,8 @@ class ContentEncodingCheckMiddleware:
        "ingest-store",
        "ingest-envelope",
        "ingest-minidump",
+
+        "api_catch_all",
    ]

    def __init__(self, get_response):
--- a/bugsink/streams.py
+++ b/bugsink/streams.py
@@ -165,7 +165,7 @@ def content_encoding_reader(request):
    return request


-def handle_request_content_encoding(request):
+def handle_request_content_encoding(request, max_length):
    """Turns a request w/ Content-Encoding into an unpacked equivalent; for further "regular" (POST, FILES) handling
    by Django.
    """
@@ -173,7 +173,7 @@ def handle_request_content_encoding(request):
    encoding = request.META.get("HTTP_CONTENT_ENCODING", "").lower()
    if encoding in ["gzip", "deflate", "br"]:
        assert_(not request._read_started)
-        request._stream = content_encoding_reader(request)
+        request._stream = MaxDataReader(max_length, content_encoding_reader(request))

        request.META["CONTENT_LENGTH"] = str(pow(2, 32) - 1)  # large enough (we can't predict the decompressed value)
        request.META.pop("HTTP_CONTENT_ENCODING")  # the resulting request is no longer encoded
--- a/files/views.py
+++ b/files/views.py
@@ -13,6 +13,7 @@ from sentry.assemble import ChunkFileState

 from bugsink.app_settings import get_settings
 from bugsink.transaction import durable_atomic, immediate_atomic
+from bugsink.streams import handle_request_content_encoding
 from bsmain.models import AuthToken

 from .models import Chunk, File, FileMetadata
@@ -292,6 +293,8 @@ def api_catch_all(request, subpath):
    # the existance of this view (and the associated URL pattern) has the effect of `APPEND_SLASH=False` for our API
    # endpoints, which is a good thing: for API enpoints you generally don't want this kind of magic (explicit breakage
    # is desirable for APIs, and redirects don't even work for POST/PUT data)
+    MAX_API_CATCH_ALL_SIZE = 1_000_000  # security and usability meet at this value (or below)
+    handle_request_content_encoding(request, MAX_API_CATCH_ALL_SIZE)

    if not get_settings().API_LOG_UNIMPLEMENTED_CALLS:
        raise Http404("Unimplemented API endpoint: /api/" + subpath)
@@ -302,27 +305,44 @@ def api_catch_all(request, subpath):
        f"  Method: {request.method}",
    ]

+    interesting_meta_keys = ["CONTENT_TYPE", "CONTENT_LENGTH", "HTTP_TRANSFER_ENCODING"]
+    interesting_headers = {
+        k: request.META[k] for k in interesting_meta_keys if k in request.META
+    }
+
+    if interesting_headers:
+        lines.append("  Headers:")
+        for k, v in interesting_headers.items():
+            lines.append(f"    {k}: {v}")
+
    if request.GET:
        lines.append(f"  GET:    {request.GET.dict()}")

-    body = request.body  # note: must be above request.POST access to avoid "You cannot access body after reading ..."
-    if request.POST:
-        lines.append(f"  POST:   {request.POST.dict()}")
+    content_type = request.META.get("CONTENT_TYPE", "")
+    if content_type == "application/x-www-form-urlencoded" or content_type.startswith("multipart/form-data"):
+        if request.POST:
+            lines.append(f"  POST:   {request.POST.dict()}")
+        if request.FILES:
+            lines.append(f"  FILES:  {[f.name for f in request.FILES.values()]}")

-    if body:
-        try:
-            decoded = body.decode("utf-8", errors="replace").strip()
-            lines.append("  Body:")
-            lines.append(f"    {decoded[:500]}")
+    else:
+        body = request.read(MAX_API_CATCH_ALL_SIZE)
+        decoded = body.decode("utf-8", errors="replace").strip()
+
+        if content_type == "application/json":
+            shown_pretty = False
            try:
                parsed = json.loads(decoded)
-                pretty = json.dumps(parsed, indent=2)[:10_000]
+                pretty = json.dumps(parsed, indent=2)
                lines.append("  JSON body:")
                lines.extend(f"    {line}" for line in pretty.splitlines())
+                shown_pretty = True
            except json.JSONDecodeError:
                pass
-        except Exception as e:
-            lines.append(f"  Body: <decode error: {e}>")
+
+        if not shown_pretty:
+            lines.append("  Body:")
+            lines.append(f"    {body}")

    logger.info("\n".join(lines))
    raise Http404("Unimplemented API endpoint: /api/" + subpath)
--- a/ingest/views.py
+++ b/ingest/views.py
@@ -734,9 +734,8 @@ class MinidumpAPIView(BaseIngestAPIView):
    def post(self, request, project_pk=None):
        # not reusing the CORS stuff here; minidump-from-browser doesn't make sense.

-        # TODO: actually implement max (we just use Django defaults now, which will switch to write-to-tmp after 2.5M
-        # for the file, but this can still swamp your CPU/tmp dir.
-        handle_request_content_encoding(request)
+        # TODO: actually pick/configure max
+        handle_request_content_encoding(request, 50 * 1024 * 1024)

        ingested_at = datetime.now(timezone.utc)
        project = self.get_project_for_request(project_pk, request)