Merge pull request #80 from bugsink/sourcemaps-spike

Sourcemaps: first version
This commit is contained in:
Klaas van Schelven
2025-04-14 11:08:19 +02:00
committed by GitHub
30 changed files with 966 additions and 7 deletions

View File

@@ -0,0 +1,10 @@
from django.contrib import admin
from .models import AuthToken
@admin.register(AuthToken)
class AuthTokenAdmin(admin.ModelAdmin):
list_display = ("token", "created_at")
list_filter = ("created_at",)
ordering = ("-created_at",)

View File

@@ -0,0 +1,11 @@
from django.core.management.base import BaseCommand
from bsmain.models import AuthToken
class Command(BaseCommand):
help = "Creates an auth_token and prints it on screen"""
def handle(self, *args, **options):
auth_token = AuthToken.objects.create()
print(auth_token.token)

View File

@@ -0,0 +1,44 @@
# Generated by Django 4.2.19 on 2025-04-11 11:33
import bsmain.models
import django.core.validators
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name="AuthToken",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"token",
models.CharField(
default=bsmain.models.generate_token,
max_length=40,
unique=True,
validators=[
django.core.validators.RegexValidator(
message="Token must be a 40-character hexadecimal string.",
regex="^[a-f0-9]{40}$",
)
],
),
),
("created_at", models.DateTimeField(auto_now_add=True)),
],
),
]

View File

View File

@@ -0,0 +1,20 @@
import secrets
from django.db import models
from django.core.validators import RegexValidator
def generate_token():
# nchars = nbytes * 2
return secrets.token_hex(nbytes=20)
class AuthToken(models.Model):
"""Global (Bugsink-wide) token for authentication."""
token = models.CharField(max_length=40, unique=True, default=generate_token, validators=[
RegexValidator(regex=r'^[a-f0-9]{40}$', message='Token must be a 40-character hexadecimal string.'),
])
created_at = models.DateTimeField(auto_now_add=True, editable=False)
def __str__(self):
return f"AuthToken(token={self.token})"

View File

@@ -0,0 +1,80 @@
{% extends "base.html" %}
{% load static %}
{% block title %}Auth Tokens · {{ site_title }}{% endblock %}
{% block content %}
<div class="flex items-center justify-center">
<div class="m-4 max-w-4xl flex-auto">
{% if messages %}
<ul class="mb-4">
{% for message in messages %}
{# if we introduce different levels we can use{% message.level == DEFAULT_MESSAGE_LEVELS.SUCCESS %} #}
<li class="bg-cyan-50 border-2 border-cyan-800 p-4 rounded-lg">{{ message }}</li>
{% endfor %}
</ul>
{% endif %}
<div class="flex">
<h1 class="text-4xl mt-4 font-bold">Auth Tokens</h1>
<div class="ml-auto mt-6">
<form action="{% url "auth_token_create" %}" method="post">
{% csrf_token %} {# margins display slightly different from the <a href version that I have for e.g. project memembers, but I don't care _that_ much #}
<button class="font-bold text-slate-800 border-slate-500 pl-4 pr-4 pb-2 pt-2 ml-1 border-2 bg-cyan-200 hover:bg-cyan-400 active:ring rounded-md">Add Token</button>
</form>
</div>
</div>
<div>
<form action="." method="post">
{% csrf_token %}
<table class="w-full mt-8">
<tbody>
<thead>
<tr class="bg-slate-200">
<th class="w-full p-4 text-left text-xl" colspan="2">Auth Tokens</th>
</tr>
{% for auth_token in auth_tokens %}
<tr class="bg-white border-slate-200 border-b-2">
<td class="w-full p-4">
<div>
{{ auth_token.token }}
</div>
</td>
<td class="p-4">
<div class="flex justify-end">
<button name="action" value="delete:{{ auth_token.id }}" class="font-bold text-slate-500 border-slate-300 pl-4 pr-4 pb-2 pt-2 ml-2 border-2 hover:bg-slate-200 active:ring rounded-md">Delete</button>
</div>
</td>
</tr>
{% empty %}
<tr class="bg-white border-slate-200 border-b-2">
<td class="w-full p-4">
<div>
No Auth Tokens.
</div>
</td>
<td class="p-4">
</td>
</tr>
{% endfor %}
</tbody>
</table>
</form>
</div>
</div>
{% endblock %}

9
bsmain/urls.py Normal file
View File

@@ -0,0 +1,9 @@
from django.urls import path
from .views import auth_token_list, auth_token_create
urlpatterns = [
path('auth_tokens/', auth_token_list, name='auth_token_list'),
path('auth_tokens/create/', auth_token_create, name='auth_token_create'),
]

View File

@@ -0,0 +1,39 @@
from django.shortcuts import render, redirect
from django.http import Http404
from django.contrib import messages
from django.contrib.auth.decorators import user_passes_test
from bugsink.decorators import atomic_for_request_method
from .models import AuthToken
@atomic_for_request_method
@user_passes_test(lambda u: u.is_superuser)
def auth_token_list(request):
auth_tokens = AuthToken.objects.all()
if request.method == 'POST':
# DIT KOMT ZO WEL
full_action_str = request.POST.get('action')
action, pk = full_action_str.split(":", 1)
if action == "delete":
AuthToken.objects.get(pk=pk).delete()
messages.success(request, 'Token deleted')
return redirect('auth_token_list')
return render(request, 'bsmain/auth_token_list.html', {
'auth_tokens': auth_tokens,
})
@atomic_for_request_method
@user_passes_test(lambda u: u.is_superuser)
def auth_token_create(request):
if request.method != 'POST':
raise Http404("Invalid request method")
AuthToken.objects.create()
return redirect("auth_token_list")

View File

@@ -77,6 +77,7 @@ BUGSINK_APPS = [
'releases',
'ingest',
'issues',
'files',
'events',
'tags',
'alerts',

View File

@@ -3,6 +3,7 @@ from django.conf import settings
from django.contrib import admin
from django.urls import include, path
from django.contrib.auth import views as auth_views
from django.views.generic import RedirectView
from alerts.views import debug_email as debug_alerts_email
from users.views import debug_email as debug_users_email
@@ -10,6 +11,7 @@ from teams.views import debug_email as debug_teams_email
from bugsink.app_settings import get_settings
from users.views import signup, confirm_email, resend_confirmation, request_reset_password, reset_password, preferences
from ingest.views import download_envelope
from files.views import chunk_upload, artifact_bundle_assemble
from .views import home, trigger_error, favicon, settings_view, silence_email_system_warning
from .debug_views import csrf_debug
@@ -38,6 +40,13 @@ urlpatterns = [
# many user-related views are directly exposed above (/accounts/), the rest is here:
path("users/", include("users.urls")),
# these are sentry-cli endpoint for uploading; they're unrelated to e.g. the ingestion API.
# the /api/0/ is just a hard prefix (for the ingest API, that position indicates the project id, but here it's just
# a prefix)
path("api/0/organizations/<slug:organization_slug>/chunk-upload/", chunk_upload, name="chunk_upload"),
path("api/0/organizations/<slug:organization_slug>/artifactbundle/assemble/", artifact_bundle_assemble,
name="artifact_bundle_assemble"),
path('api/', include('ingest.urls')),
# not in /api/ because it's not part of the ingest API, but still part of the ingest app
@@ -47,6 +56,14 @@ urlpatterns = [
path('teams/', include('teams.urls')),
path('events/', include('events.urls')),
path('issues/', include('issues.urls')),
path('files/', include('files.urls')),
# this weird URL is what sentry-cli uses as part of their "login" flow. weird, because the word ':orgslug' shows up
# verbatim. In any case, we simply redirect to the auth token list, such that you can set one up
path('orgredirect/organizations/:orgslug/settings/auth-tokens/',
RedirectView.as_view(url='/bsmain/auth_tokens/', permanent=False)),
path('bsmain/', include('bsmain.urls')),
path('admin/', admin.site.urls),

View File

@@ -1,3 +1,15 @@
import json
import sourcemap
from issues.utils import get_values
from files.models import FileMetadata
# Dijkstra, Sourcemaps and Python lists start at 0, but editors and our UI show lines starting at 1.
FROM_DISPLAY = -1
TO_DISPLAY = 1
class IncompleteList(list):
"""A list that indicates how many items were trimmed from the list."""
def __init__(self, lst, cnt):
@@ -75,3 +87,59 @@ def annotate_var_with_meta(var, meta_var):
var[at(meta_k)] = annotate_var_with_meta(var[at(meta_k)], meta_v)
return var
def apply_sourcemaps(event_data):
images = event_data.get("debug_meta", {}).get("images", [])
if not images:
return
debug_id_for_filename = {
image["code_file"]: image["debug_id"]
for image in images
if "debug_id" in image and "code_file" in image and image["type"] == "sourcemap"
}
metadata_obj_lookup = {
str(metadata_obj.debug_id): metadata_obj
for metadata_obj in FileMetadata.objects.filter(
debug_id__in=debug_id_for_filename.values(), file_type="source_map").select_related("file")
}
filenames_with_metas = [
(filename, metadata_obj_lookup[debug_id])
for (filename, debug_id) in debug_id_for_filename.items()
if debug_id in metadata_obj_lookup # if not: sourcemap not uploaded
]
sourcemap_for_filename = {
filename: sourcemap.loads(meta.file.data)
for (filename, meta) in filenames_with_metas
}
source_for_filename = {}
for filename, meta in filenames_with_metas:
sm_data = json.loads(meta.file.data)
if "sourcesContent" not in sm_data or len(sm_data["sourcesContent"]) != 1:
# our assumption is: 1 sourcemap, 1 source. The fact that both "sources" (a list of filenames) and
# "sourcesContent" are lists seems to indicate that this assumption does not generally hold. But it not
# holding does not play well with the id of debug_id, I think?
continue
source_for_filename[filename] = sm_data["sourcesContent"][0].splitlines()
for exception in get_values(event_data.get("exception", {})):
for frame in exception.get("stacktrace", {}).get("frames", []):
# NOTE: try/except in the loop would allow us to selectively skip frames that we fail to process
if frame.get("filename") in sourcemap_for_filename and frame["filename"] in source_for_filename:
sm = sourcemap_for_filename[frame["filename"]]
lines = source_for_filename[frame["filename"]]
token = sm.lookup(frame["lineno"] + FROM_DISPLAY, frame["colno"])
frame["pre_context"] = lines[max(0, token.src_line - 5):token.src_line]
frame["context_line"] = lines[token.src_line]
frame["post_context"] = lines[token.src_line + 1:token.src_line + 5]
frame["lineno"] = token.src_line + TO_DISPLAY
# frame["colno"] = token.src_col + TO_DISPLAY not actually used

0
files/__init__.py Normal file
View File

31
files/admin.py Normal file
View File

@@ -0,0 +1,31 @@
from django.contrib import admin
from django.urls import reverse
from django.utils.html import format_html
from .models import Chunk, File, FileMetadata
@admin.register(Chunk)
class ChunkAdmin(admin.ModelAdmin):
list_display = ('checksum', 'size')
search_fields = ('checksum',)
readonly_fields = ('data',)
@admin.register(File)
class FileAdmin(admin.ModelAdmin):
list_display = ('filename', 'checksum', 'size', 'download_link')
search_fields = ('checksum',)
readonly_fields = ('data', 'download_link')
def download_link(self, obj):
return format_html(
'<a href="{}">{}</a>',
reverse("download_file", args=(obj.checksum,)), str(obj.filename),
)
@admin.register(FileMetadata)
class FileMetadataAdmin(admin.ModelAdmin):
list_display = ('debug_id', 'file_type', 'file')
search_fields = ('file__checksum', 'debug_id', 'file_type')

6
files/apps.py Normal file
View File

@@ -0,0 +1,6 @@
from django.apps import AppConfig
class FilesConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "files"

View File

@@ -0,0 +1,77 @@
# Generated by Django 4.2.19 on 2025-04-10 08:15
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name="Chunk",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("checksum", models.CharField(max_length=40, unique=True)),
("size", models.PositiveIntegerField()),
("data", models.BinaryField()),
],
),
migrations.CreateModel(
name="File",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("checksum", models.CharField(max_length=40, unique=True)),
("filename", models.CharField(max_length=255)),
("size", models.PositiveIntegerField()),
("data", models.BinaryField()),
],
),
migrations.CreateModel(
name="FileMetadata",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("debug_id", models.UUIDField(blank=True, null=True)),
("file_type", models.CharField(blank=True, max_length=255, null=True)),
("data", models.TextField()),
(
"file",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="metadatas",
to="files.file",
),
),
],
options={
"unique_together": {("debug_id", "file_type")},
},
),
]

View File

48
files/models.py Normal file
View File

@@ -0,0 +1,48 @@
from django.db import models
class Chunk(models.Model):
checksum = models.CharField(max_length=40, unique=True) # unique implies index, which we also use for lookups
size = models.PositiveIntegerField()
data = models.BinaryField(null=False) # as with Events, we can "eventually" move this out of the database
def __str__(self):
return self.checksum
class File(models.Model):
# NOTE: since we do single-chunk uploads, optimizations are imaginable. Make it work first though
checksum = models.CharField(max_length=40, unique=True) # unique implies index, which we also use for lookups
# the filename is not unique, nor meaningful in the sense that you could use it to identify the file. It is only
# here for convenience, i.e. to eye-ball the file in a list. note that we store by checksum, and the filename gets
# associated on the first successful store. i.e. it's possible that a file would be stored again with a different
# name but that would go undetected by us. all that is to say: convenience thingie without strong guarantees.
filename = models.CharField(max_length=255)
size = models.PositiveIntegerField()
data = models.BinaryField(null=False) # as with Events, we can "eventually" move this out of the database
def __str__(self):
return self.filename
class FileMetadata(models.Model):
file = models.ForeignKey(File, null=False, on_delete=models.CASCADE, related_name="metadatas")
# debug_id & file_type nullability: such data exists in manifest.json; we are future-proof for it although we
# currently don't store it as such.
debug_id = models.UUIDField(max_length=40, null=True, blank=True)
file_type = models.CharField(max_length=255, null=True, blank=True)
data = models.TextField() # we just dump the rest in here; let's see how much we really need.
def __str__(self):
# somewhat useless when debug_id is None; but that's not the case we care about ATM
return f"debug_id: {self.debug_id} ({self.file_type})"
class Meta:
# it's _imaginable_ that the below does not actually hold (we just trust the CLI, after all), but that wouldn't
# make any sense, so we just enforce a property that makes sense. Pro: lookups work. Con: if the client sends
# garbage, this is not exposed.
unique_together = (("debug_id", "file_type"),)

84
files/tasks.py Normal file
View File

@@ -0,0 +1,84 @@
from zipfile import ZipFile
import json
from hashlib import sha1
from io import BytesIO
from os.path import basename
from snappea.decorators import shared_task
from bugsink.transaction import immediate_atomic
from .models import Chunk, File, FileMetadata
@shared_task
def assemble_artifact_bundle(bundle_checksum, chunk_checksums):
# arguably, you could just wrap-around each operation, "around everything" guarantees a fully consistent update on
# the data and we don't do this that often that it's assumed to matter.
with immediate_atomic():
# NOTE: as it stands we don't store the (optional) extra info of release/dist.
# NOTE: there's also the concept of an artifact bundle as _tied_ to a release, i.e. without debug_ids. We don't
# support that, but if we ever were to support it we'd need a separate method/param to distinguish it.
bundle_file, _ = assemble_file(bundle_checksum, chunk_checksums, filename=f"{bundle_checksum}.zip")
bundle_zip = ZipFile(BytesIO(bundle_file.data)) # NOTE: in-memory handling of zips.
manifest_bytes = bundle_zip.read("manifest.json")
manifest = json.loads(manifest_bytes.decode("utf-8"))
for filename, manifest_entry in manifest["files"].items():
file_data = bundle_zip.read(filename)
checksum = sha1(file_data).hexdigest()
filename = basename(manifest_entry.get("url", filename))[:255]
file, _ = File.objects.get_or_create(
checksum=checksum,
defaults={
"filename": filename,
"size": len(file_data),
"data": file_data,
})
debug_id = manifest_entry.get("headers", {}).get("debug-id", None)
file_type = manifest_entry.get("type", None)
if debug_id is None or file_type is None:
# such records exist and we could store them, but we don't, since we don't have a purpose for them.
continue
FileMetadata.objects.get_or_create(
debug_id=debug_id,
file_type=file_type,
defaults={
"file": file,
"data": json.dumps(manifest_entry),
}
)
# NOTE we _could_ get rid of the file at this point (but we don't). Ties in to broader questions of retention.
def assemble_file(checksum, chunk_checksums, filename):
"""Assembles a file from chunks"""
# NOTE: unimplemented checks/tricks
# * total file-size v.s. some max
# * explicit check chunk availability (as it stands, our processing is synchronous, so no need)
# * skip-on-checksum-exists
chunks = Chunk.objects.filter(checksum__in=chunk_checksums)
chunks_dicts = {chunk.checksum: chunk for chunk in chunks}
chunks_in_order = [chunks_dicts[checksum] for checksum in chunk_checksums] # implicitly checks chunk availability
data = b"".join([chunk.data for chunk in chunks_in_order])
if sha1(data).hexdigest() != checksum:
raise Exception("checksum mismatch")
return File.objects.get_or_create(
checksum=checksum,
defaults={
"size": len(data),
"data": data,
"filename": filename,
})

134
files/tests.py Normal file
View File

@@ -0,0 +1,134 @@
import json
import gzip
from io import BytesIO
import os
from glob import glob
from django.contrib.auth import get_user_model
from compat.dsn import get_header_value
from bugsink.test_utils import TransactionTestCase25251 as TransactionTestCase
from projects.models import Project, ProjectMembership
from events.models import Event
from bsmain.models import AuthToken
User = get_user_model()
class FilesTests(TransactionTestCase):
# Integration-test of file-upload and does-it-render-sourcemaps
def setUp(self):
super().setUp()
self.user = User.objects.create_user(username='test', password='test')
self.project = Project.objects.create()
ProjectMembership.objects.create(project=self.project, user=self.user)
self.client.force_login(self.user)
self.auth_token = AuthToken.objects.create()
self.token_headers = {"Authorization": f"Bearer {self.auth_token.token}"}
def test_auth_no_header(self):
response = self.client.get("/api/0/organizations/anyorg/chunk-upload/", headers={})
self.assertEqual(401, response.status_code)
self.assertEqual({"error": "Authorization header not found"}, response.json())
def test_auth_empty_header(self):
response = self.client.get("/api/0/organizations/anyorg/chunk-upload/", headers={"Authorization": ""})
self.assertEqual(401, response.status_code)
self.assertEqual({"error": "Authorization header not found"}, response.json())
def test_auth_overfull_header(self):
response = self.client.get("/api/0/organizations/anyorg/chunk-upload/", headers={"Authorization": "Bearer a b"})
self.assertEqual(401, response.status_code)
self.assertEqual({"error": "Expecting 'Authorization: Token abc123...' but got 'Bearer a b'"}, response.json())
def test_auth_wrong_token(self):
response = self.client.get("/api/0/organizations/anyorg/chunk-upload/", headers={"Authorization": "Bearer xxx"})
self.assertEqual(401, response.status_code)
self.assertEqual({"error": "Invalid token"}, response.json())
def test_assemble_artifact_bundle(self):
SAMPLES_DIR = os.getenv("SAMPLES_DIR", "../event-samples")
event_samples = [SAMPLES_DIR + fn for fn in ["/bugsink/uglifyjs-minified-sourcemaps-in-bundle.json"]]
artifact_bundles = glob(SAMPLES_DIR + "/*/artifact_bundles/*.zip")
if len(artifact_bundles) == 0:
raise Exception(f"No artifact bundles found in {SAMPLES_DIR}; I insist on having some to test with.")
for filename in artifact_bundles:
with open(filename, 'rb') as f:
data = f.read()
checksum = os.path.basename(filename).split(".")[0]
gzipped_file = BytesIO(gzip.compress(data))
gzipped_file.name = checksum
# 1. chunk-upload
response = self.client.post(
"/api/0/organizations/anyorg/chunk-upload/",
data={"file_gzip": gzipped_file},
headers=self.token_headers,
)
self.assertEqual(
200, response.status_code, "Error in %s: %s" % (
filename, response.content if response.status_code != 302 else response.url))
# 2. artifactbundle/assemble
data = {
"checksum": checksum,
"chunks": [
checksum, # single-chunk upload, so this works
],
"projects": [
"unused_for_now"
]
}
response = self.client.post(
"/api/0/organizations/anyorg/artifactbundle/assemble/",
json.dumps(data),
content_type="application/json",
headers=self.token_headers,
)
self.assertEqual(
200, response.status_code, "Error in %s: %s" % (
filename, response.content if response.status_code != 302 else response.url))
sentry_auth_header = get_header_value(f"http://{ self.project.sentry_key }@hostisignored/{ self.project.id }")
for filename in event_samples:
# minimal assertions on correctness in this loop; this will be caught by our general sample-testing
with open(filename) as f:
data = json.loads(f.read())
response = self.client.post(
f"/api/{ self.project.id }/store/",
json.dumps(data),
content_type="application/json",
headers={
"X-Sentry-Auth": sentry_auth_header,
},
)
self.assertEqual(
200, response.status_code, "Error in %s: %s" % (
filename, response.content if response.status_code != 302 else response.url))
for event in Event.objects.all():
url = f'/issues/issue/{ event.issue.id }/event/{ event.id }/'
try:
# we just check for a 200; this at least makes sure we have no failing template rendering
response = self.client.get(url)
self.assertEqual(
200, response.status_code, response.content if response.status_code != 302 else response.url)
# we could/should make this more general later; this is great for example nr.1:
key_phrase = '<span class="font-bold">captureException</span> line <span class="font-bold">15</span>'
self.assertTrue(key_phrase in response.content.decode('utf-8'))
except Exception as e:
# we want to know _which_ event failed, hence the raise-from-e here
raise AssertionError("Error rendering event %s" % event.debug_info) from e

27
files/urls.py Normal file
View File

@@ -0,0 +1,27 @@
from django.urls import path
from django.urls import register_converter
from .views import download_file
def regex_converter(passed_regex):
# copy/pasta w/ issues/urls.py
class RegexConverter:
regex = passed_regex
def to_python(self, value):
return value
def to_url(self, value):
return value
return RegexConverter
register_converter(regex_converter("[0-9a-f]{40}"), "sha1")
urlpatterns = [
path('downloads/<sha1:checksum>/', download_file, name='download_file'),
]

195
files/views.py Normal file
View File

@@ -0,0 +1,195 @@
import json
from hashlib import sha1
from gzip import GzipFile
from io import BytesIO
from django.http import JsonResponse, HttpResponse
from django.views.decorators.csrf import csrf_exempt
from django.contrib.auth.decorators import user_passes_test
from sentry.assemble import ChunkFileState
from bugsink.app_settings import get_settings
from bugsink.transaction import durable_atomic, immediate_atomic
from bsmain.models import AuthToken
from .models import Chunk, File
from .tasks import assemble_artifact_bundle
_KIBIBYTE = 1024
_MEBIBYTE = 1024 * _KIBIBYTE
class NamedBytesIO(BytesIO):
def __init__(self, data, name):
super().__init__(data)
self.name = name
def get_chunk_upload_settings(request, organization_slug):
# Sentry / Sentry-CLI has a whole bunch of logic surrounding URLs, which I do not understand and which presumably
# doesn't make it past Bugsink's cost/benefit-analysis. feature-completeness. For now, we just return our own URL
# which seems to "just work". If we ever want to go down this path :
#
# https://github.com/getsentry/sentry/pull/7095/files <= upload-url-prefix: introduced, but rationale not explained
#
# 2 more starting points for the whole "relative" idea
# * https://github.com/getsentry/sentry-cli/issues/839
# * https://github.com/getsentry/sentry/pull/29347
url = get_settings().BASE_URL + "/api/0/organizations/" + organization_slug + "/chunk-upload/"
# Our "chunk_upload" is chunked in name only; i.e. we only "speak chunked" for the purpose of API-compatability with
# sentry-cli, but we provide params here such that that cli will only send a single chunk.
return JsonResponse({
"url": url,
# For now, staying close to the default MAX_ENVELOPE_COMPRESSED_SIZE, which is 20MiB;
# I _think_ I saw a note somewhere on (one of) these values having to be a power of 2; hence 32 here.
#
# When implementing uploading, it was done to support sourcemaps. It seems that over at Sentry, the reason they
# went so complicated in the first place was to enable DIF support (hunderds of MiB regularly).
"chunkSize": 32 * _MEBIBYTE,
"maxRequestSize": 32 * _MEBIBYTE,
# I didn't check the supposed relationship between maxRequestSize and maxFileSize, but assume something similar
# to what happens w/ envelopes; hence harmonizing with MAX_ENVELOPE_SIZE (and rounding up to a power of 2) here
"maxFileSize": 128 * _MEBIBYTE,
# force single-chunk by setting these to 1.
"concurrency": 1,
"chunksPerRequest": 1,
"hashAlgorithm": "sha1",
"compression": ["gzip"],
"accept": [
# I don't claim to fully understand how the sentry-cli switches based on these advertised capabilities, but
# the list below works for now. Any understanding that I did gain is documented.
# for a full list of types we _could_ accept, see src/sentry/api/endpoints/chunk.py
#
# If the below is off, sentry-cli complains "A release slug is required". Because release-less artifacts are
# actually the simpler thing, that's undesirable. Other consequences of turning it on have not been charted
# yet.
"release_files",
# this would seem to be the "javascript sourcemaps" thing, but how exactly I did not check yet.
"sources",
# https://github.com/getsentry/sentry/discussions/46967
# artifact_bundles is a concept originating from sentry that uses debug_ids to link maps & sources. Despite
# it being relatively new, it's my _first_ target for getting sourcemaps to work, because it's actually the
# most simple and reliable thing (uuid, bidirectional mapping)
"artifact_bundles",
# AFAIU the only thing _v2 would signify is the ability to "Implement de-duplication with chunking in the
# assemble endpoint for artifact bundles (#51224)". Which is needlessly complex from my point of view.
# "artifact_bundles_v2",
# the rest of the options are below:
# "debug_files",
# "release_files",
# "pdbs",
# "bcsymbolmaps",
# "il2cpp",
# "portablepdbs",
# "artifact_bundles",
# "proguard",
]
})
def requires_auth_token(view_function):
# {"error": "..."} (status=401) response is API-compatible; for that to work we need the present function to be a
# decorator (so we can return, rather than raise, which plain-Django doesn't support for 401)
def first_require_auth_token(request, *args, **kwargs):
header_value = request.META.get("HTTP_AUTHORIZATION")
if not header_value:
return JsonResponse({"error": "Authorization header not found"}, status=401)
header_values = header_value.split()
if len(header_values) != 2:
return JsonResponse(
{"error": "Expecting 'Authorization: Token abc123...' but got '%s'" % header_value}, status=401)
the_word_bearer, token = header_values
if AuthToken.objects.filter(token=token).count() < 1:
return JsonResponse({"error": "Invalid token"}, status=401)
return view_function(request, *args, **kwargs)
first_require_auth_token.__name__ = view_function.__name__
return first_require_auth_token
@csrf_exempt
@requires_auth_token
def chunk_upload(request, organization_slug):
# Bugsink has a single-organization model; we simply ignore organization_slug
# NOTE: we don't check against chunkSize, maxRequestSize and chunksPerRequest (yet), we expect the CLI to behave.
if request.method == "GET":
# a GET at this endpoint returns a dict of settings that the CLI takes into account when uploading
return get_chunk_upload_settings(request, organization_slug)
# POST: upload (full-size) "chunks" and store them as Chunk objects; file.name whould be the sha1 of the content.
chunks = []
if request.FILES:
chunks = request.FILES.getlist("file")
# NOTE: we read the whole unzipped file into memory; we _could_ take an approach like bugsink/streams.py.
# (Note that, because of the auth layer in front, we're slightly less worried about adverserial scenarios)
chunks += [
NamedBytesIO(GzipFile(fileobj=file_gzip, mode="rb").read(), name=file_gzip.name)
for file_gzip in request.FILES.getlist("file_gzip")]
for chunk in chunks:
data = chunk.getvalue()
if sha1(data).hexdigest() != chunk.name:
raise Exception("checksum mismatch")
with immediate_atomic(): # a snug fit around the only DB-writing thing we do here to ensure minimal blocking
_, _ = Chunk.objects.get_or_create(
checksum=chunk.name,
defaults={
"size": len(data),
"data": data, # NOTE: further possible optimization: don't even read the file when already existing
})
return HttpResponse()
@csrf_exempt # we're in API context here; this could potentially be pulled up to a higher level though
@requires_auth_token
def artifact_bundle_assemble(request, organization_slug):
# Bugsink has a single-organization model; we simply ignore organization_slug
# NOTE a JSON-schema for this endpoint is available under Apache 2 license (2 year anniversary rule) at
# https://github.com/getsentry/sentry/blob/8df7543848b4/src/sentry/api/endpoints/organization_artifactbundle_assemble.py#L24
# (not worth the trouble of extracting right now, since our /sentry dir contains BSD-3 licensed code (2019 version)
data = json.loads(request.body)
assemble_artifact_bundle.delay(data["checksum"], data["chunks"])
# NOTE sentry & glitchtip _always_ return an empty list for "missingChunks" in this view; I don't really understand
# what's being achieved with that, but it seems to be the expected behavior. Working hypothesis: this was introduced
# for DIF uploads, and the present endpoint doesn't use it at all. Not even for "v2", surprisingly.
# In the ALWAYS_EAGER setup, we process the bundle inline, so arguably we could return "OK" here too; "CREATED" is
# what sentry returns though, so for faithful mimicking it's the safest bet.
return JsonResponse({"state": ChunkFileState.CREATED, "missingChunks": []})
@user_passes_test(lambda u: u.is_superuser)
@durable_atomic
def download_file(request, checksum):
file = File.objects.get(checksum=checksum)
response = HttpResponse(file.data, content_type="application/octet-stream")
response["Content-Disposition"] = f"attachment; filename={file.filename}"
return response

View File

@@ -419,6 +419,10 @@ class ViewTests(TransactionTestCase):
response = self.client.get(f"/issues/issue/{self.issue.id}/tags/")
self.assertContains(response, self.issue.title())
def test_issue_grouping(self):
response = self.client.get(f"/issues/issue/{self.issue.id}/grouping/")
self.assertContains(response, self.issue.title())
def test_issue_history(self):
response = self.client.get(f"/issues/issue/{self.issue.id}/history/")
self.assertContains(response, self.issue.title())

View File

@@ -1,6 +1,7 @@
from collections import namedtuple
import json
import sentry_sdk
import logging
from django.db.models import Q
from django.utils import timezone
@@ -13,8 +14,10 @@ from django.core.exceptions import PermissionDenied
from django.http import Http404
from django.core.paginator import Paginator, Page
from django.db.utils import OperationalError
from django.conf import settings
from sentry.utils.safe import get_path
from sentry_sdk_extensions import capture_or_log_exception
from bugsink.decorators import project_membership_required, issue_membership_required, atomic_for_request_method
from bugsink.transaction import durable_atomic
@@ -31,7 +34,9 @@ from tags.search import search_issues, search_events, search_events_optimized
from .models import Issue, IssueQuerysetStateManager, IssueStateManager, TurningPoint, TurningPointKind
from .forms import CommentForm
from .utils import get_values, get_main_exception
from events.utils import annotate_with_meta
from events.utils import annotate_with_meta, apply_sourcemaps
logger = logging.getLogger("bugsink.issues")
MuteOption = namedtuple("MuteOption", ["for_or_until", "period_name", "nr_of_periods", "gte_threshold"])
@@ -401,6 +406,16 @@ def issue_event_stacktrace(request, issue, event_pk=None, digest_order=None, nav
# swallow the error in that case.
sentry_sdk.capture_exception(e)
try:
apply_sourcemaps(parsed_data)
except Exception as e:
if settings.DEBUG or settings.I_AM_RUNNING == "TEST":
# when developing/testing, I _do_ want to get notified
raise
# sourcemaps are still experimental; we don't want to fail on them, so we just log the error and move on.
capture_or_log_exception(e, logger)
# NOTE: I considered making this a clickable button of some sort, but decided against it in the end. Getting the UI
# right is quite hard (https://ux.stackexchange.com/questions/1318) but more generally I would assume that having
# your whole screen turned upside down is not something you do willy-nilly. Better to just have good defaults and

View File

@@ -46,6 +46,7 @@ include = [
"ee*",
"ingest*",
"issues*",
"files*",
"performance*",
"phonehome*",
"projects*",

View File

@@ -15,3 +15,4 @@ monofy==1.1.*
user-agents==2.2.*
fastjsonschema==2.21.*
verbose_csrf_middleware==1.0.*
sourcemap==0.2.*

16
sentry/assemble.py Normal file
View File

@@ -0,0 +1,16 @@
# from src/sentry/tasks/assemble.py
def enum(**named_values):
"""Creates an enum type."""
return type("Enum", (), named_values)
ChunkFileState = enum(
OK="ok", # File in database
NOT_FOUND="not_found", # File not found in database
CREATED="created", # File was created in the request and send to the worker for assembling
ASSEMBLING="assembling", # File still being processed by worker
ERROR="error", # Error happened during assembling
)

View File

@@ -1,3 +1,4 @@
import traceback
import types
from sentry_sdk.utils import current_stacktrace
@@ -111,3 +112,19 @@ def capture_stacktrace(message):
}
}
sentry_sdk.capture_event(event)
def capture_or_log_exception(e, logger):
try:
if sentry_sdk.is_initialized():
sentry_sdk.capture_exception(e)
else:
# this gnarly approach makes it so that each line of the traceback gets the same prefixes (dates etc)
for bunch_of_lines in traceback.format_exception(e):
for line in bunch_of_lines.splitlines():
# Note: when .is_initialized() is True, .error is spammy (it gets captured) but we don't have that
# problem in this branch.
logger.error(line)
except Exception as e2:
# We just never want our error-handling code to be the cause of an error.
print("Error in capture_or_log_exception", str(e2), "during handling of", str(e))

View File

@@ -10,11 +10,12 @@ import time
import signal
import threading
from inotify_simple import INotify, flags
from sentry_sdk import capture_exception
import sentry_sdk
from django.conf import settings
from django.db import connections
from sentry_sdk_extensions import capture_or_log_exception
from performance.context_managers import time_to_logger
from bugsink.transaction import durable_atomic
@@ -180,9 +181,11 @@ class Foreman:
function(*inner_args, **inner_kwargs)
except Exception as e:
# Potential TODO: make this configurable / depend on our existing config in bugsink/settings.py
logger.warning("Snappea caught Exception: %s", str(e))
capture_exception(e)
if sentry_sdk.is_initialized():
# Only for the case where full error is captured to Dogfooded Bugsink, do we want to draw some
# attention to this; in the other case the big error in the logs (full traceback) is clear enough.
logger.warning("Snappea caught Exception: %s", str(e))
capture_or_log_exception(e, logger)
finally:
# equivalent to the below, but slightly more general (and thus more future-proof). In both cases nothing
# happens with already-closed/never opened connections):
@@ -324,7 +327,7 @@ class Foreman:
logger.error('Create workers: can\'t execute "%s": %s', task.task_name, e)
with time_to_logger(performance_logger, "Snappea delete Task"):
task.delete() # we delete the task because we can't do anything with it, and we don't want to hang
capture_exception(e)
capture_or_log_exception(e, logger)
self.worker_semaphore.release()
continue

View File

@@ -1,4 +1,4 @@
{% extends "base.html" %}
{% extends "bare_base.html" %}
{% comment %}
Note on security: the basic principle is "the more you expose, the more an attacker knows."
In this case, I think it should be fine, because all we expose here is stuff about mismatched hosts, and:

View File

@@ -33,6 +33,7 @@
{% if user.is_superuser %}
<a href="/users/"><div class="px-4 py-2 my-2 hover:bg-slate-300 rounded-xl">Users</div></a>
<a href="/bsmain/auth_tokens/"><div class="px-4 py-2 my-2 hover:bg-slate-300 rounded-xl">Tokens</div></a>
{% endif %}
{% if logged_in_user.is_anonymous %}