Merge pull request #80 from bugsink/sourcemaps-spike

Sourcemaps: first version
2026-03-10 08:01:17 +00:00 · 2025-04-14 11:08:19 +02:00
parent 53db317529 6bcbaa71b9
commit 712eb63faa
30 changed files with 966 additions and 7 deletions
--- a/bsmain/admin.py
+++ b/bsmain/admin.py
@@ -0,0 +1,10 @@
+from django.contrib import admin
+
+from .models import AuthToken
+
+
+@admin.register(AuthToken)
+class AuthTokenAdmin(admin.ModelAdmin):
+    list_display = ("token", "created_at")
+    list_filter = ("created_at",)
+    ordering = ("-created_at",)
--- a/bsmain/management/commands/create_auth_token.py
+++ b/bsmain/management/commands/create_auth_token.py
@@ -0,0 +1,11 @@
+from django.core.management.base import BaseCommand
+
+from bsmain.models import AuthToken
+
+
+class Command(BaseCommand):
+    help = "Creates an auth_token and prints it on screen"""
+
+    def handle(self, *args, **options):
+        auth_token = AuthToken.objects.create()
+        print(auth_token.token)
--- a/bsmain/migrations/0001_initial.py
+++ b/bsmain/migrations/0001_initial.py
@@ -0,0 +1,44 @@
+# Generated by Django 4.2.19 on 2025-04-11 11:33
+
+import bsmain.models
+import django.core.validators
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = []
+
+    operations = [
+        migrations.CreateModel(
+            name="AuthToken",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                (
+                    "token",
+                    models.CharField(
+                        default=bsmain.models.generate_token,
+                        max_length=40,
+                        unique=True,
+                        validators=[
+                            django.core.validators.RegexValidator(
+                                message="Token must be a 40-character hexadecimal string.",
+                                regex="^[a-f0-9]{40}$",
+                            )
+                        ],
+                    ),
+                ),
+                ("created_at", models.DateTimeField(auto_now_add=True)),
+            ],
+        ),
+    ]
--- a/bsmain/migrations/init.py
+++ b/bsmain/migrations/init.py
--- a/bsmain/models.py
+++ b/bsmain/models.py
@@ -0,0 +1,20 @@
+import secrets
+
+from django.db import models
+from django.core.validators import RegexValidator
+
+
+def generate_token():
+    # nchars = nbytes * 2
+    return secrets.token_hex(nbytes=20)
+
+
+class AuthToken(models.Model):
+    """Global (Bugsink-wide) token for authentication."""
+    token = models.CharField(max_length=40, unique=True, default=generate_token, validators=[
+        RegexValidator(regex=r'^[a-f0-9]{40}$', message='Token must be a 40-character hexadecimal string.'),
+    ])
+    created_at = models.DateTimeField(auto_now_add=True, editable=False)
+
+    def __str__(self):
+        return f"AuthToken(token={self.token})"
--- a/bsmain/templates/bsmain/auth_token_list.html
+++ b/bsmain/templates/bsmain/auth_token_list.html
@@ -0,0 +1,80 @@
+{% extends "base.html" %}
+{% load static %}
+
+{% block title %}Auth Tokens · {{ site_title }}{% endblock %}
+
+{% block content %}
+
+<div class="flex items-center justify-center">
+
+    <div class="m-4 max-w-4xl flex-auto">
+
+        {% if messages %}
+        <ul class="mb-4">
+            {% for message in messages %}
+                {# if we introduce different levels we can use{% message.level == DEFAULT_MESSAGE_LEVELS.SUCCESS %} #}
+                <li class="bg-cyan-50 border-2 border-cyan-800 p-4 rounded-lg">{{ message }}</li>
+            {% endfor %}
+        </ul>
+        {% endif %}
+
+        <div class="flex">
+            <h1 class="text-4xl mt-4 font-bold">Auth Tokens</h1>
+
+            <div class="ml-auto mt-6">
+                <form action="{% url "auth_token_create" %}" method="post">
+                {% csrf_token %} {# margins display slightly different from the <a href version that I have for e.g. project memembers, but I don't care _that_ much #}
+                    <button class="font-bold text-slate-800 border-slate-500 pl-4 pr-4 pb-2 pt-2 ml-1 border-2 bg-cyan-200 hover:bg-cyan-400 active:ring rounded-md">Add Token</button>
+                </form>
+            </div> 
+        </div>
+
+        <div>
+          <form action="." method="post">
+          {% csrf_token %}
+
+          <table class="w-full mt-8">
+          <tbody>
+                <thead>
+                <tr class="bg-slate-200">
+                    <th class="w-full p-4 text-left text-xl" colspan="2">Auth Tokens</th>
+                </tr>
+
+                {% for auth_token in auth_tokens %}
+                <tr class="bg-white border-slate-200 border-b-2">
+                <td class="w-full p-4">
+                    <div>
+                        {{ auth_token.token }}
+                    </div>
+                </td>
+
+                <td class="p-4">
+                    <div class="flex justify-end">
+                        <button name="action" value="delete:{{ auth_token.id }}" class="font-bold text-slate-500 border-slate-300 pl-4 pr-4 pb-2 pt-2 ml-2 border-2 hover:bg-slate-200 active:ring rounded-md">Delete</button>
+                    </div> 
+                </td>
+
+                </tr>
+                {% empty %}
+                <tr class="bg-white border-slate-200 border-b-2">
+                <td class="w-full p-4">
+                    <div>
+                        No Auth Tokens.
+                    </div>
+                </td>
+
+                <td class="p-4">
+                </td>
+
+                </tr>
+                {% endfor %}
+
+            </tbody>
+            </table>
+
+            </form>
+        </div>
+
+</div>
+
+{% endblock %}
--- a/bsmain/urls.py
+++ b/bsmain/urls.py
@@ -0,0 +1,9 @@
+from django.urls import path
+
+from .views import auth_token_list, auth_token_create
+
+
+urlpatterns = [
+    path('auth_tokens/', auth_token_list, name='auth_token_list'),
+    path('auth_tokens/create/', auth_token_create, name='auth_token_create'),
+]
--- a/bsmain/views.py
+++ b/bsmain/views.py
@@ -0,0 +1,39 @@
+from django.shortcuts import render, redirect
+from django.http import Http404
+from django.contrib import messages
+from django.contrib.auth.decorators import user_passes_test
+
+from bugsink.decorators import atomic_for_request_method
+
+from .models import AuthToken
+
+
+@atomic_for_request_method
+@user_passes_test(lambda u: u.is_superuser)
+def auth_token_list(request):
+    auth_tokens = AuthToken.objects.all()
+
+    if request.method == 'POST':
+        # DIT KOMT ZO WEL
+        full_action_str = request.POST.get('action')
+        action, pk = full_action_str.split(":", 1)
+        if action == "delete":
+            AuthToken.objects.get(pk=pk).delete()
+
+            messages.success(request, 'Token deleted')
+            return redirect('auth_token_list')
+
+    return render(request, 'bsmain/auth_token_list.html', {
+        'auth_tokens': auth_tokens,
+    })
+
+
+@atomic_for_request_method
+@user_passes_test(lambda u: u.is_superuser)
+def auth_token_create(request):
+    if request.method != 'POST':
+        raise Http404("Invalid request method")
+
+    AuthToken.objects.create()
+
+    return redirect("auth_token_list")
--- a/bugsink/settings/default.py
+++ b/bugsink/settings/default.py
@@ -77,6 +77,7 @@ BUGSINK_APPS = [
    'releases',
    'ingest',
    'issues',
+    'files',
    'events',
    'tags',
    'alerts',
--- a/bugsink/urls.py
+++ b/bugsink/urls.py
@@ -3,6 +3,7 @@ from django.conf import settings
 from django.contrib import admin
 from django.urls import include, path
 from django.contrib.auth import views as auth_views
+from django.views.generic import RedirectView

 from alerts.views import debug_email as debug_alerts_email
 from users.views import debug_email as debug_users_email
@@ -10,6 +11,7 @@ from teams.views import debug_email as debug_teams_email
 from bugsink.app_settings import get_settings
 from users.views import signup, confirm_email, resend_confirmation, request_reset_password, reset_password, preferences
 from ingest.views import download_envelope
+from files.views import chunk_upload, artifact_bundle_assemble

 from .views import home, trigger_error, favicon, settings_view, silence_email_system_warning
 from .debug_views import csrf_debug
@@ -38,6 +40,13 @@ urlpatterns = [
    # many user-related views are directly exposed above (/accounts/), the rest is here:
    path("users/", include("users.urls")),

+    # these are sentry-cli endpoint for uploading; they're unrelated to e.g. the ingestion API.
+    # the /api/0/ is just a hard prefix (for the ingest API, that position indicates the project id, but here it's just
+    # a prefix)
+    path("api/0/organizations/<slug:organization_slug>/chunk-upload/", chunk_upload, name="chunk_upload"),
+    path("api/0/organizations/<slug:organization_slug>/artifactbundle/assemble/", artifact_bundle_assemble,
+         name="artifact_bundle_assemble"),
+
    path('api/', include('ingest.urls')),

    # not in /api/ because it's not part of the ingest API, but still part of the ingest app
@@ -47,6 +56,14 @@ urlpatterns = [
    path('teams/', include('teams.urls')),
    path('events/', include('events.urls')),
    path('issues/', include('issues.urls')),
+    path('files/', include('files.urls')),
+
+    # this weird URL is what sentry-cli uses as part of their "login" flow. weird, because the word ':orgslug' shows up
+    # verbatim. In any case, we simply redirect to the auth token list, such that you can set one up
+    path('orgredirect/organizations/:orgslug/settings/auth-tokens/',
+         RedirectView.as_view(url='/bsmain/auth_tokens/', permanent=False)),
+
+    path('bsmain/', include('bsmain.urls')),

    path('admin/', admin.site.urls),

--- a/events/utils.py
+++ b/events/utils.py
@@ -1,3 +1,15 @@
+import json
+import sourcemap
+from issues.utils import get_values
+
+from files.models import FileMetadata
+
+
+# Dijkstra, Sourcemaps and Python lists start at 0, but editors and our UI show lines starting at 1.
+FROM_DISPLAY = -1
+TO_DISPLAY = 1
+
+
 class IncompleteList(list):
    """A list that indicates how many items were trimmed from the list."""
    def __init__(self, lst, cnt):
@@ -75,3 +87,59 @@ def annotate_var_with_meta(var, meta_var):
            var[at(meta_k)] = annotate_var_with_meta(var[at(meta_k)], meta_v)

    return var
+
+
+def apply_sourcemaps(event_data):
+    images = event_data.get("debug_meta", {}).get("images", [])
+    if not images:
+        return
+
+    debug_id_for_filename = {
+        image["code_file"]: image["debug_id"]
+        for image in images
+        if "debug_id" in image and "code_file" in image and image["type"] == "sourcemap"
+    }
+
+    metadata_obj_lookup = {
+        str(metadata_obj.debug_id): metadata_obj
+        for metadata_obj in FileMetadata.objects.filter(
+            debug_id__in=debug_id_for_filename.values(), file_type="source_map").select_related("file")
+    }
+
+    filenames_with_metas = [
+        (filename, metadata_obj_lookup[debug_id])
+        for (filename, debug_id) in debug_id_for_filename.items()
+        if debug_id in metadata_obj_lookup  # if not: sourcemap not uploaded
+        ]
+
+    sourcemap_for_filename = {
+        filename: sourcemap.loads(meta.file.data)
+        for (filename, meta) in filenames_with_metas
+    }
+
+    source_for_filename = {}
+    for filename, meta in filenames_with_metas:
+        sm_data = json.loads(meta.file.data)
+        if "sourcesContent" not in sm_data or len(sm_data["sourcesContent"]) != 1:
+            # our assumption is: 1 sourcemap, 1 source. The fact that both "sources" (a list of filenames) and
+            # "sourcesContent" are lists seems to indicate that this assumption does not generally hold. But it not
+            # holding does not play well with the id of debug_id, I think?
+            continue
+
+        source_for_filename[filename] = sm_data["sourcesContent"][0].splitlines()
+
+    for exception in get_values(event_data.get("exception", {})):
+        for frame in exception.get("stacktrace", {}).get("frames", []):
+            # NOTE: try/except in the loop would allow us to selectively skip frames that we fail to process
+
+            if frame.get("filename") in sourcemap_for_filename and frame["filename"] in source_for_filename:
+                sm = sourcemap_for_filename[frame["filename"]]
+                lines = source_for_filename[frame["filename"]]
+
+                token = sm.lookup(frame["lineno"] + FROM_DISPLAY, frame["colno"])
+
+                frame["pre_context"] = lines[max(0, token.src_line - 5):token.src_line]
+                frame["context_line"] = lines[token.src_line]
+                frame["post_context"] = lines[token.src_line + 1:token.src_line + 5]
+                frame["lineno"] = token.src_line + TO_DISPLAY
+                # frame["colno"] = token.src_col + TO_DISPLAY  not actually used
--- a/files/init.py
+++ b/files/init.py
--- a/files/admin.py
+++ b/files/admin.py
@@ -0,0 +1,31 @@
+from django.contrib import admin
+from django.urls import reverse
+from django.utils.html import format_html
+
+from .models import Chunk, File, FileMetadata
+
+
+@admin.register(Chunk)
+class ChunkAdmin(admin.ModelAdmin):
+    list_display = ('checksum', 'size')
+    search_fields = ('checksum',)
+    readonly_fields = ('data',)
+
+
+@admin.register(File)
+class FileAdmin(admin.ModelAdmin):
+    list_display = ('filename', 'checksum', 'size', 'download_link')
+    search_fields = ('checksum',)
+    readonly_fields = ('data', 'download_link')
+
+    def download_link(self, obj):
+        return format_html(
+            '<a href="{}">{}</a>',
+            reverse("download_file", args=(obj.checksum,)), str(obj.filename),
+        )
+
+
+@admin.register(FileMetadata)
+class FileMetadataAdmin(admin.ModelAdmin):
+    list_display = ('debug_id', 'file_type', 'file')
+    search_fields = ('file__checksum', 'debug_id', 'file_type')
--- a/files/apps.py
+++ b/files/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class FilesConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "files"
--- a/files/migrations/0001_initial.py
+++ b/files/migrations/0001_initial.py
@@ -0,0 +1,77 @@
+# Generated by Django 4.2.19 on 2025-04-10 08:15
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    initial = True
+
+    dependencies = []
+
+    operations = [
+        migrations.CreateModel(
+            name="Chunk",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("checksum", models.CharField(max_length=40, unique=True)),
+                ("size", models.PositiveIntegerField()),
+                ("data", models.BinaryField()),
+            ],
+        ),
+        migrations.CreateModel(
+            name="File",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("checksum", models.CharField(max_length=40, unique=True)),
+                ("filename", models.CharField(max_length=255)),
+                ("size", models.PositiveIntegerField()),
+                ("data", models.BinaryField()),
+            ],
+        ),
+        migrations.CreateModel(
+            name="FileMetadata",
+            fields=[
+                (
+                    "id",
+                    models.BigAutoField(
+                        auto_created=True,
+                        primary_key=True,
+                        serialize=False,
+                        verbose_name="ID",
+                    ),
+                ),
+                ("debug_id", models.UUIDField(blank=True, null=True)),
+                ("file_type", models.CharField(blank=True, max_length=255, null=True)),
+                ("data", models.TextField()),
+                (
+                    "file",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE,
+                        related_name="metadatas",
+                        to="files.file",
+                    ),
+                ),
+            ],
+            options={
+                "unique_together": {("debug_id", "file_type")},
+            },
+        ),
+    ]
--- a/files/migrations/init.py
+++ b/files/migrations/init.py
--- a/files/models.py
+++ b/files/models.py
@@ -0,0 +1,48 @@
+from django.db import models
+
+
+class Chunk(models.Model):
+    checksum = models.CharField(max_length=40, unique=True)  # unique implies index, which we also use for lookups
+    size = models.PositiveIntegerField()
+    data = models.BinaryField(null=False)  # as with Events, we can "eventually" move this out of the database
+
+    def __str__(self):
+        return self.checksum
+
+
+class File(models.Model):
+    # NOTE: since we do single-chunk uploads, optimizations are imaginable. Make it work first though
+
+    checksum = models.CharField(max_length=40, unique=True)  # unique implies index, which we also use for lookups
+
+    # the filename is not unique, nor meaningful in the sense that you could use it to identify the file. It is only
+    # here for convenience, i.e. to eye-ball the file in a list. note that we store by checksum, and the filename gets
+    # associated on the first successful store. i.e. it's possible that a file would be stored again with a different
+    # name but that would go undetected by us. all that is to say: convenience thingie without strong guarantees.
+    filename = models.CharField(max_length=255)
+
+    size = models.PositiveIntegerField()
+    data = models.BinaryField(null=False)  # as with Events, we can "eventually" move this out of the database
+
+    def __str__(self):
+        return self.filename
+
+
+class FileMetadata(models.Model):
+    file = models.ForeignKey(File, null=False, on_delete=models.CASCADE, related_name="metadatas")
+
+    # debug_id & file_type nullability: such data exists in manifest.json; we are future-proof for it although we
+    # currently don't store it as such.
+    debug_id = models.UUIDField(max_length=40, null=True, blank=True)
+    file_type = models.CharField(max_length=255, null=True, blank=True)
+    data = models.TextField()  # we just dump the rest in here; let's see how much we really need.
+
+    def __str__(self):
+        # somewhat useless when debug_id is None; but that's not the case we care about ATM
+        return f"debug_id: {self.debug_id} ({self.file_type})"
+
+    class Meta:
+        # it's _imaginable_ that the below does not actually hold (we just trust the CLI, after all), but that wouldn't
+        # make any sense, so we just enforce a property that makes sense. Pro: lookups work. Con: if the client sends
+        # garbage, this is not exposed.
+        unique_together = (("debug_id", "file_type"),)
--- a/files/tasks.py
+++ b/files/tasks.py
@@ -0,0 +1,84 @@
+from zipfile import ZipFile
+import json
+from hashlib import sha1
+from io import BytesIO
+from os.path import basename
+
+from snappea.decorators import shared_task
+from bugsink.transaction import immediate_atomic
+
+from .models import Chunk, File, FileMetadata
+
+
+@shared_task
+def assemble_artifact_bundle(bundle_checksum, chunk_checksums):
+    # arguably, you could just wrap-around each operation, "around everything" guarantees a fully consistent update on
+    # the data and we don't do this that often that it's assumed to matter.
+    with immediate_atomic():
+        # NOTE: as it stands we don't store the (optional) extra info of release/dist.
+
+        # NOTE: there's also the concept of an artifact bundle as _tied_ to a release, i.e. without debug_ids. We don't
+        # support that, but if we ever were to support it we'd need a separate method/param to distinguish it.
+
+        bundle_file, _ = assemble_file(bundle_checksum, chunk_checksums, filename=f"{bundle_checksum}.zip")
+
+        bundle_zip = ZipFile(BytesIO(bundle_file.data))  # NOTE: in-memory handling of zips.
+        manifest_bytes = bundle_zip.read("manifest.json")
+        manifest = json.loads(manifest_bytes.decode("utf-8"))
+
+        for filename, manifest_entry in manifest["files"].items():
+            file_data = bundle_zip.read(filename)
+
+            checksum = sha1(file_data).hexdigest()
+
+            filename = basename(manifest_entry.get("url", filename))[:255]
+
+            file, _ = File.objects.get_or_create(
+                checksum=checksum,
+                defaults={
+                    "filename": filename,
+                    "size": len(file_data),
+                    "data": file_data,
+                })
+
+            debug_id = manifest_entry.get("headers", {}).get("debug-id", None)
+            file_type = manifest_entry.get("type", None)
+            if debug_id is None or file_type is None:
+                # such records exist and we could store them, but we don't, since we don't have a purpose for them.
+                continue
+
+            FileMetadata.objects.get_or_create(
+                debug_id=debug_id,
+                file_type=file_type,
+                defaults={
+                    "file": file,
+                    "data": json.dumps(manifest_entry),
+                }
+            )
+
+        # NOTE we _could_ get rid of the file at this point (but we don't). Ties in to broader questions of retention.
+
+
+def assemble_file(checksum, chunk_checksums, filename):
+    """Assembles a file from chunks"""
+
+    # NOTE: unimplemented checks/tricks
+    # * total file-size v.s. some max
+    # * explicit check chunk availability (as it stands, our processing is synchronous, so no need)
+    # * skip-on-checksum-exists
+
+    chunks = Chunk.objects.filter(checksum__in=chunk_checksums)
+    chunks_dicts = {chunk.checksum: chunk for chunk in chunks}
+    chunks_in_order = [chunks_dicts[checksum] for checksum in chunk_checksums]  # implicitly checks chunk availability
+    data = b"".join([chunk.data for chunk in chunks_in_order])
+
+    if sha1(data).hexdigest() != checksum:
+        raise Exception("checksum mismatch")
+
+    return File.objects.get_or_create(
+        checksum=checksum,
+        defaults={
+            "size": len(data),
+            "data": data,
+            "filename": filename,
+        })
--- a/files/tests.py
+++ b/files/tests.py
@@ -0,0 +1,134 @@
+import json
+import gzip
+from io import BytesIO
+import os
+from glob import glob
+from django.contrib.auth import get_user_model
+
+from compat.dsn import get_header_value
+from bugsink.test_utils import TransactionTestCase25251 as TransactionTestCase
+from projects.models import Project, ProjectMembership
+from events.models import Event
+from bsmain.models import AuthToken
+
+
+User = get_user_model()
+
+
+class FilesTests(TransactionTestCase):
+    # Integration-test of file-upload and does-it-render-sourcemaps
+
+    def setUp(self):
+        super().setUp()
+        self.user = User.objects.create_user(username='test', password='test')
+        self.project = Project.objects.create()
+        ProjectMembership.objects.create(project=self.project, user=self.user)
+        self.client.force_login(self.user)
+        self.auth_token = AuthToken.objects.create()
+        self.token_headers = {"Authorization": f"Bearer {self.auth_token.token}"}
+
+    def test_auth_no_header(self):
+        response = self.client.get("/api/0/organizations/anyorg/chunk-upload/", headers={})
+        self.assertEqual(401, response.status_code)
+        self.assertEqual({"error": "Authorization header not found"}, response.json())
+
+    def test_auth_empty_header(self):
+        response = self.client.get("/api/0/organizations/anyorg/chunk-upload/", headers={"Authorization": ""})
+        self.assertEqual(401, response.status_code)
+        self.assertEqual({"error": "Authorization header not found"}, response.json())
+
+    def test_auth_overfull_header(self):
+        response = self.client.get("/api/0/organizations/anyorg/chunk-upload/", headers={"Authorization": "Bearer a b"})
+        self.assertEqual(401, response.status_code)
+        self.assertEqual({"error": "Expecting 'Authorization: Token abc123...' but got 'Bearer a b'"}, response.json())
+
+    def test_auth_wrong_token(self):
+        response = self.client.get("/api/0/organizations/anyorg/chunk-upload/", headers={"Authorization": "Bearer xxx"})
+        self.assertEqual(401, response.status_code)
+        self.assertEqual({"error": "Invalid token"}, response.json())
+
+    def test_assemble_artifact_bundle(self):
+        SAMPLES_DIR = os.getenv("SAMPLES_DIR", "../event-samples")
+        event_samples = [SAMPLES_DIR + fn for fn in ["/bugsink/uglifyjs-minified-sourcemaps-in-bundle.json"]]
+
+        artifact_bundles = glob(SAMPLES_DIR + "/*/artifact_bundles/*.zip")
+
+        if len(artifact_bundles) == 0:
+            raise Exception(f"No artifact bundles found in {SAMPLES_DIR}; I insist on having some to test with.")
+
+        for filename in artifact_bundles:
+            with open(filename, 'rb') as f:
+                data = f.read()
+
+            checksum = os.path.basename(filename).split(".")[0]
+
+            gzipped_file = BytesIO(gzip.compress(data))
+            gzipped_file.name = checksum
+
+            # 1. chunk-upload
+            response = self.client.post(
+                "/api/0/organizations/anyorg/chunk-upload/",
+                data={"file_gzip": gzipped_file},
+                headers=self.token_headers,
+            )
+
+            self.assertEqual(
+                200, response.status_code, "Error in %s: %s" % (
+                    filename, response.content if response.status_code != 302 else response.url))
+
+            # 2. artifactbundle/assemble
+            data = {
+                "checksum": checksum,
+                "chunks": [
+                    checksum,  # single-chunk upload, so this works
+                ],
+                "projects": [
+                    "unused_for_now"
+                ]
+            }
+
+            response = self.client.post(
+                "/api/0/organizations/anyorg/artifactbundle/assemble/",
+                json.dumps(data),
+                content_type="application/json",
+                headers=self.token_headers,
+            )
+
+            self.assertEqual(
+                200, response.status_code, "Error in %s: %s" % (
+                    filename, response.content if response.status_code != 302 else response.url))
+
+        sentry_auth_header = get_header_value(f"http://{ self.project.sentry_key }@hostisignored/{ self.project.id }")
+        for filename in event_samples:
+            # minimal assertions on correctness in this loop; this will be caught by our general sample-testing
+            with open(filename) as f:
+                data = json.loads(f.read())
+
+            response = self.client.post(
+                f"/api/{ self.project.id }/store/",
+                json.dumps(data),
+                content_type="application/json",
+                headers={
+                    "X-Sentry-Auth": sentry_auth_header,
+                },
+            )
+            self.assertEqual(
+                200, response.status_code, "Error in %s: %s" % (
+                    filename, response.content if response.status_code != 302 else response.url))
+
+        for event in Event.objects.all():
+            url = f'/issues/issue/{ event.issue.id }/event/{ event.id }/'
+            try:
+                # we just check for a 200; this at least makes sure we have no failing template rendering
+                response = self.client.get(url)
+
+                self.assertEqual(
+                    200, response.status_code, response.content if response.status_code != 302 else response.url)
+
+                # we could/should make this more general later; this is great for example nr.1:
+                key_phrase = '<span class="font-bold">captureException</span> line <span class="font-bold">15</span>'
+                self.assertTrue(key_phrase in response.content.decode('utf-8'))
+
+            except Exception as e:
+                # we want to know _which_ event failed, hence the raise-from-e here
+                raise AssertionError("Error rendering event %s" % event.debug_info) from e
--- a/files/urls.py
+++ b/files/urls.py
@@ -0,0 +1,27 @@
+from django.urls import path
+from django.urls import register_converter
+
+from .views import download_file
+
+
+def regex_converter(passed_regex):
+    # copy/pasta w/ issues/urls.py
+
+    class RegexConverter:
+        regex = passed_regex
+
+        def to_python(self, value):
+            return value
+
+        def to_url(self, value):
+            return value
+
+    return RegexConverter
+
+
+register_converter(regex_converter("[0-9a-f]{40}"), "sha1")
+
+
+urlpatterns = [
+    path('downloads/<sha1:checksum>/', download_file, name='download_file'),
+]
--- a/files/views.py
+++ b/files/views.py
@@ -0,0 +1,195 @@
+import json
+from hashlib import sha1
+from gzip import GzipFile
+from io import BytesIO
+
+from django.http import JsonResponse, HttpResponse
+from django.views.decorators.csrf import csrf_exempt
+from django.contrib.auth.decorators import user_passes_test
+
+from sentry.assemble import ChunkFileState
+
+from bugsink.app_settings import get_settings
+from bugsink.transaction import durable_atomic, immediate_atomic
+from bsmain.models import AuthToken
+
+from .models import Chunk, File
+from .tasks import assemble_artifact_bundle
+
+
+_KIBIBYTE = 1024
+_MEBIBYTE = 1024 * _KIBIBYTE
+
+
+class NamedBytesIO(BytesIO):
+    def __init__(self, data, name):
+        super().__init__(data)
+        self.name = name
+
+
+def get_chunk_upload_settings(request, organization_slug):
+    # Sentry / Sentry-CLI has a whole bunch of logic surrounding URLs, which I do not understand and which presumably
+    # doesn't make it past Bugsink's cost/benefit-analysis. feature-completeness. For now, we just return our own URL
+    # which seems to "just work". If we ever want to go down this path :
+    #
+    # https://github.com/getsentry/sentry/pull/7095/files <= upload-url-prefix: introduced, but rationale not explained
+    #
+    # 2 more starting points for the whole "relative" idea
+    # * https://github.com/getsentry/sentry-cli/issues/839
+    # * https://github.com/getsentry/sentry/pull/29347
+    url = get_settings().BASE_URL + "/api/0/organizations/" + organization_slug + "/chunk-upload/"
+
+    # Our "chunk_upload" is chunked in name only; i.e. we only "speak chunked" for the purpose of API-compatability with
+    # sentry-cli, but we provide params here such that that cli will only send a single chunk.
+
+    return JsonResponse({
+        "url": url,
+
+        # For now, staying close to the default MAX_ENVELOPE_COMPRESSED_SIZE, which is 20MiB;
+        # I _think_ I saw a note somewhere on (one of) these values having to be a power of 2; hence 32 here.
+        #
+        # When implementing uploading, it was done to support sourcemaps. It seems that over at Sentry, the reason they
+        # went so complicated in the first place was to enable DIF support (hunderds of MiB regularly).
+        "chunkSize": 32 * _MEBIBYTE,
+        "maxRequestSize": 32 * _MEBIBYTE,
+
+        # I didn't check the supposed relationship between maxRequestSize and maxFileSize, but assume something similar
+        # to what happens w/ envelopes; hence harmonizing with MAX_ENVELOPE_SIZE (and rounding up to a power of 2) here
+        "maxFileSize": 128 * _MEBIBYTE,
+
+        # force single-chunk by setting these to 1.
+        "concurrency": 1,
+        "chunksPerRequest": 1,
+
+        "hashAlgorithm": "sha1",
+        "compression": ["gzip"],
+
+        "accept": [
+            # I don't claim to fully understand how the sentry-cli switches based on these advertised capabilities, but
+            # the list below works for now. Any understanding that I did gain is documented.
+            # for a full list of types we _could_ accept, see src/sentry/api/endpoints/chunk.py
+            #
+
+            # If the below is off, sentry-cli complains "A release slug is required". Because release-less artifacts are
+            # actually the simpler thing, that's undesirable. Other consequences of turning it on have not been charted
+            # yet.
+            "release_files",
+
+            # this would seem to be the "javascript sourcemaps" thing, but how exactly I did not check yet.
+            "sources",
+
+            # https://github.com/getsentry/sentry/discussions/46967
+            # artifact_bundles is a concept originating from sentry that uses debug_ids to link maps & sources. Despite
+            # it being relatively new, it's my _first_ target for getting sourcemaps to work, because it's actually the
+            # most simple and reliable thing (uuid, bidirectional mapping)
+            "artifact_bundles",
+
+            # AFAIU the only thing _v2 would signify is the ability to "Implement de-duplication with chunking in the
+            # assemble endpoint for artifact bundles (#51224)". Which is needlessly complex from my point of view.
+            # "artifact_bundles_v2",
+
+            # the rest of the options are below:
+            # "debug_files",
+            # "release_files",
+            # "pdbs",
+            # "bcsymbolmaps",
+            # "il2cpp",
+            # "portablepdbs",
+            # "artifact_bundles",
+            # "proguard",
+        ]
+    })
+
+
+def requires_auth_token(view_function):
+    # {"error": "..."} (status=401) response is API-compatible; for that to work we need the present function to be a
+    # decorator (so we can return, rather than raise, which plain-Django doesn't support for 401)
+
+    def first_require_auth_token(request, *args, **kwargs):
+        header_value = request.META.get("HTTP_AUTHORIZATION")
+        if not header_value:
+            return JsonResponse({"error": "Authorization header not found"}, status=401)
+
+        header_values = header_value.split()
+
+        if len(header_values) != 2:
+            return JsonResponse(
+                {"error": "Expecting 'Authorization: Token abc123...' but got '%s'" % header_value}, status=401)
+
+        the_word_bearer, token = header_values
+
+        if AuthToken.objects.filter(token=token).count() < 1:
+            return JsonResponse({"error": "Invalid token"}, status=401)
+
+        return view_function(request, *args, **kwargs)
+
+    first_require_auth_token.__name__ = view_function.__name__
+    return first_require_auth_token
+
+
+@csrf_exempt
+@requires_auth_token
+def chunk_upload(request, organization_slug):
+    # Bugsink has a single-organization model; we simply ignore organization_slug
+    # NOTE: we don't check against chunkSize, maxRequestSize and chunksPerRequest (yet), we expect the CLI to behave.
+
+    if request.method == "GET":
+        # a GET at this endpoint returns a dict of settings that the CLI takes into account when uploading
+        return get_chunk_upload_settings(request, organization_slug)
+
+    # POST: upload (full-size) "chunks" and store them as Chunk objects; file.name whould be the sha1 of the content.
+    chunks = []
+    if request.FILES:
+        chunks = request.FILES.getlist("file")
+
+        # NOTE: we read the whole unzipped file into memory; we _could_ take an approach like bugsink/streams.py.
+        # (Note that, because of the auth layer in front, we're slightly less worried about adverserial scenarios)
+        chunks += [
+            NamedBytesIO(GzipFile(fileobj=file_gzip, mode="rb").read(), name=file_gzip.name)
+            for file_gzip in request.FILES.getlist("file_gzip")]
+
+    for chunk in chunks:
+        data = chunk.getvalue()
+
+        if sha1(data).hexdigest() != chunk.name:
+            raise Exception("checksum mismatch")
+
+        with immediate_atomic():  # a snug fit around the only DB-writing thing we do here to ensure minimal blocking
+            _, _ = Chunk.objects.get_or_create(
+                checksum=chunk.name,
+                defaults={
+                    "size": len(data),
+                    "data": data,  # NOTE: further possible optimization: don't even read the file when already existing
+                })
+
+    return HttpResponse()
+
+
+@csrf_exempt  # we're in API context here; this could potentially be pulled up to a higher level though
+@requires_auth_token
+def artifact_bundle_assemble(request, organization_slug):
+    # Bugsink has a single-organization model; we simply ignore organization_slug
+
+    # NOTE a JSON-schema for this endpoint is available under Apache 2 license (2 year anniversary rule) at
+    # https://github.com/getsentry/sentry/blob/8df7543848b4/src/sentry/api/endpoints/organization_artifactbundle_assemble.py#L24
+    # (not worth the trouble of extracting right now, since our /sentry dir contains BSD-3 licensed code (2019 version)
+
+    data = json.loads(request.body)
+    assemble_artifact_bundle.delay(data["checksum"], data["chunks"])
+
+    # NOTE sentry & glitchtip _always_ return an empty list for "missingChunks" in this view; I don't really understand
+    # what's being achieved with that, but it seems to be the expected behavior. Working hypothesis: this was introduced
+    # for DIF uploads, and the present endpoint doesn't use it at all. Not even for "v2", surprisingly.
+
+    # In the ALWAYS_EAGER setup, we process the bundle inline, so arguably we could return "OK" here too; "CREATED" is
+    # what sentry returns though, so for faithful mimicking it's the safest bet.
+    return JsonResponse({"state": ChunkFileState.CREATED, "missingChunks": []})
+
+
+@user_passes_test(lambda u: u.is_superuser)
+@durable_atomic
+def download_file(request, checksum):
+    file = File.objects.get(checksum=checksum)
+    response = HttpResponse(file.data, content_type="application/octet-stream")
+    response["Content-Disposition"] = f"attachment; filename={file.filename}"
+    return response
--- a/issues/tests.py
+++ b/issues/tests.py
@@ -419,6 +419,10 @@ class ViewTests(TransactionTestCase):
        response = self.client.get(f"/issues/issue/{self.issue.id}/tags/")
        self.assertContains(response, self.issue.title())

+    def test_issue_grouping(self):
+        response = self.client.get(f"/issues/issue/{self.issue.id}/grouping/")
+        self.assertContains(response, self.issue.title())
+
    def test_issue_history(self):
        response = self.client.get(f"/issues/issue/{self.issue.id}/history/")
        self.assertContains(response, self.issue.title())
--- a/issues/views.py
+++ b/issues/views.py
@@ -1,6 +1,7 @@
 from collections import namedtuple
 import json
 import sentry_sdk
+import logging

 from django.db.models import Q
 from django.utils import timezone
@@ -13,8 +14,10 @@ from django.core.exceptions import PermissionDenied
 from django.http import Http404
 from django.core.paginator import Paginator, Page
 from django.db.utils import OperationalError
+from django.conf import settings

 from sentry.utils.safe import get_path
+from sentry_sdk_extensions import capture_or_log_exception

 from bugsink.decorators import project_membership_required, issue_membership_required, atomic_for_request_method
 from bugsink.transaction import durable_atomic
@@ -31,7 +34,9 @@ from tags.search import search_issues, search_events, search_events_optimized
 from .models import Issue, IssueQuerysetStateManager, IssueStateManager, TurningPoint, TurningPointKind
 from .forms import CommentForm
 from .utils import get_values, get_main_exception
-from events.utils import annotate_with_meta
+from events.utils import annotate_with_meta, apply_sourcemaps
+
+logger = logging.getLogger("bugsink.issues")


 MuteOption = namedtuple("MuteOption", ["for_or_until", "period_name", "nr_of_periods", "gte_threshold"])
@@ -401,6 +406,16 @@ def issue_event_stacktrace(request, issue, event_pk=None, digest_order=None, nav
        # swallow the error in that case.
        sentry_sdk.capture_exception(e)

+    try:
+        apply_sourcemaps(parsed_data)
+    except Exception as e:
+        if settings.DEBUG or settings.I_AM_RUNNING == "TEST":
+            # when developing/testing, I _do_ want to get notified
+            raise
+
+        # sourcemaps are still experimental; we don't want to fail on them, so we just log the error and move on.
+        capture_or_log_exception(e, logger)
+
    # NOTE: I considered making this a clickable button of some sort, but decided against it in the end. Getting the UI
    # right is quite hard (https://ux.stackexchange.com/questions/1318) but more generally I would assume that having
    # your whole screen turned upside down is not something you do willy-nilly. Better to just have good defaults and
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,6 +46,7 @@ include = [
    "ee*",
    "ingest*",
    "issues*",
+    "files*",
    "performance*",
    "phonehome*",
    "projects*",
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,3 +15,4 @@ monofy==1.1.*
 user-agents==2.2.*
 fastjsonschema==2.21.*
 verbose_csrf_middleware==1.0.*
+sourcemap==0.2.*
--- a/sentry/assemble.py
+++ b/sentry/assemble.py
@@ -0,0 +1,16 @@
+# from src/sentry/tasks/assemble.py
+
+
+def enum(**named_values):
+    """Creates an enum type."""
+    return type("Enum", (), named_values)
+
+
+ChunkFileState = enum(
+    OK="ok",  # File in database
+    NOT_FOUND="not_found",  # File not found in database
+    CREATED="created",  # File was created in the request and send to the worker for assembling
+    ASSEMBLING="assembling",  # File still being processed by worker
+    ERROR="error",  # Error happened during assembling
+)
+
--- a/sentry_sdk_extensions/init.py
+++ b/sentry_sdk_extensions/init.py
@@ -1,3 +1,4 @@
+import traceback
 import types

 from sentry_sdk.utils import current_stacktrace
@@ -111,3 +112,19 @@ def capture_stacktrace(message):
        }
    }
    sentry_sdk.capture_event(event)
+
+
+def capture_or_log_exception(e, logger):
+    try:
+        if sentry_sdk.is_initialized():
+            sentry_sdk.capture_exception(e)
+        else:
+            # this gnarly approach makes it so that each line of the traceback gets the same prefixes (dates etc)
+            for bunch_of_lines in traceback.format_exception(e):
+                for line in bunch_of_lines.splitlines():
+                    # Note: when .is_initialized() is True, .error is spammy (it gets captured) but we don't have that
+                    # problem in this branch.
+                    logger.error(line)
+    except Exception as e2:
+        # We just never want our error-handling code to be the cause of an error.
+        print("Error in capture_or_log_exception", str(e2), "during handling of", str(e))
--- a/snappea/foreman.py
+++ b/snappea/foreman.py
@@ -10,11 +10,12 @@ import time
 import signal
 import threading
 from inotify_simple import INotify, flags
-from sentry_sdk import capture_exception
+import sentry_sdk

 from django.conf import settings
 from django.db import connections

+from sentry_sdk_extensions import capture_or_log_exception
 from performance.context_managers import time_to_logger
 from bugsink.transaction import durable_atomic

@@ -180,9 +181,11 @@ class Foreman:
                    function(*inner_args, **inner_kwargs)

            except Exception as e:
-                # Potential TODO: make this configurable / depend on our existing config in bugsink/settings.py
-                logger.warning("Snappea caught Exception: %s", str(e))
-                capture_exception(e)
+                if sentry_sdk.is_initialized():
+                    # Only for the case where full error is captured to Dogfooded Bugsink, do we want to draw some
+                    # attention to this; in the other case the big error in the logs (full traceback) is clear enough.
+                    logger.warning("Snappea caught Exception: %s", str(e))
+                capture_or_log_exception(e, logger)
            finally:
                # equivalent to the below, but slightly more general (and thus more future-proof). In both cases nothing
                # happens with already-closed/never opened connections):
@@ -324,7 +327,7 @@ class Foreman:
                logger.error('Create workers: can\'t execute "%s": %s', task.task_name, e)
                with time_to_logger(performance_logger, "Snappea delete Task"):
                    task.delete()  # we delete the task because we can't do anything with it, and we don't want to hang
-                capture_exception(e)
+                capture_or_log_exception(e, logger)
                self.worker_semaphore.release()
                continue

--- a/templates/403_csrf.html
+++ b/templates/403_csrf.html
@@ -1,4 +1,4 @@
-{% extends "base.html" %}
+{% extends "bare_base.html" %}
 {% comment %}
 Note on security: the basic principle is "the more you expose, the more an attacker knows."
 In this case, I think it should be fine, because all we expose here is stuff about mismatched hosts, and:
--- a/theme/templates/base.html
+++ b/theme/templates/base.html
@@ -33,6 +33,7 @@

                {% if user.is_superuser %}
                <a href="/users/"><div class="px-4 py-2 my-2 hover:bg-slate-300 rounded-xl">Users</div></a>
+                <a href="/bsmain/auth_tokens/"><div class="px-4 py-2 my-2 hover:bg-slate-300 rounded-xl">Tokens</div></a>
                {% endif %}

                {% if logged_in_user.is_anonymous %}