mirror of
https://github.com/jlengrand/bugsink.git
synced 2026-03-10 08:01:17 +00:00
denormalize/cache last_frame_* and transaction on Event and Issue
for performance, but also fixes: * not just the 'last frame' but the 'last relevant frame' (in-app) * truncation is properly done (matching the DB size, and for each of the fields)
This commit is contained in:
@@ -0,0 +1,26 @@
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('events', '0016_alter_event_unique_together'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='event',
|
||||
name='last_frame_filename',
|
||||
field=models.CharField(blank=True, default='', max_length=255),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='event',
|
||||
name='last_frame_function',
|
||||
field=models.CharField(blank=True, default='', max_length=255),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='event',
|
||||
name='last_frame_module',
|
||||
field=models.CharField(blank=True, default='', max_length=255),
|
||||
),
|
||||
]
|
||||
39
events/migrations/0018_fill_denormalized_fields.py
Normal file
39
events/migrations/0018_fill_denormalized_fields.py
Normal file
@@ -0,0 +1,39 @@
|
||||
# Generated by Django 4.2.11 on 2024-04-09 19:54
|
||||
|
||||
import json
|
||||
from django.db import migrations
|
||||
from issues.utils import get_denormalized_fields_for_data
|
||||
|
||||
|
||||
def fill_denormalized_fields(apps, schema_editor):
|
||||
# This function does both events and issues; we don't care about the fact that this is "formally incorrect" as long
|
||||
# as we can keep developing for now.
|
||||
|
||||
Event = apps.get_model('events', 'Event')
|
||||
|
||||
for event in Event.objects.all():
|
||||
denormalized_fields = get_denormalized_fields_for_data(json.loads(event.data))
|
||||
for k, v in denormalized_fields.items():
|
||||
setattr(event, k, v)
|
||||
event.save()
|
||||
|
||||
# inefficient, because we do each issue multiple times, but who cares, this is just to have something "for now"
|
||||
issue = event.issue
|
||||
for k, v in denormalized_fields.items():
|
||||
setattr(issue, k, v)
|
||||
issue.save()
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('events', '0017_event_last_frame_filename_event_last_frame_function_and_more'),
|
||||
('ingest', '0003_decompressedevent_debug_info'),
|
||||
('issues', '0021_issue_last_frame_filename_issue_last_frame_function_and_more'),
|
||||
('projects', '0008_set_project_slugs'),
|
||||
('releases', '0003_alter_release_version'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.RunPython(fill_denormalized_fields),
|
||||
]
|
||||
@@ -139,6 +139,10 @@ class Event(models.Model):
|
||||
# denormalized/cached fields:
|
||||
calculated_type = models.CharField(max_length=255, blank=True, null=False, default="")
|
||||
calculated_value = models.CharField(max_length=255, blank=True, null=False, default="")
|
||||
# transaction = models.CharField(max_length=200, blank=True, null=False, default="") defined first-class above
|
||||
last_frame_filename = models.CharField(max_length=255, blank=True, null=False, default="")
|
||||
last_frame_module = models.CharField(max_length=255, blank=True, null=False, default="")
|
||||
last_frame_function = models.CharField(max_length=255, blank=True, null=False, default="")
|
||||
|
||||
# 1-based, because this is for human consumption only, and using 0-based internally when we don't actually do
|
||||
# anything with this value other than showing it to humans is super-confusing. Sorry Dijkstra!
|
||||
@@ -165,7 +169,7 @@ class Event(models.Model):
|
||||
return get_title_for_exception_type_and_value(self.calculated_type, self.calculated_value)
|
||||
|
||||
@classmethod
|
||||
def from_ingested(cls, ingested_event, ingest_order, issue, parsed_data, calculated_type, calculated_value):
|
||||
def from_ingested(cls, ingested_event, ingest_order, issue, parsed_data, denormalized_fields):
|
||||
# 'from_ingested' may be a bit of a misnomer... the full 'from_ingested' is done in 'digest_event' in the views.
|
||||
# below at least puts the parsed_data in the right place, and does some of the basic object set up (FKs to other
|
||||
# objects etc).
|
||||
@@ -184,7 +188,7 @@ class Event(models.Model):
|
||||
|
||||
'level': maybe_empty(parsed_data.get("level", "")),
|
||||
'logger': maybe_empty(parsed_data.get("logger", "")),
|
||||
'transaction': maybe_empty(parsed_data.get("transaction", "")),
|
||||
# 'transaction': maybe_empty(parsed_data.get("transaction", "")), passed as part of denormalized_fields
|
||||
|
||||
'server_name': maybe_empty(parsed_data.get("server_name", "")),
|
||||
'release': maybe_empty(parsed_data.get("release", "")),
|
||||
@@ -200,9 +204,9 @@ class Event(models.Model):
|
||||
|
||||
'debug_info': ingested_event.debug_info,
|
||||
|
||||
'calculated_type': calculated_type,
|
||||
'calculated_value': calculated_value,
|
||||
'ingest_order': ingest_order,
|
||||
|
||||
**denormalized_fields,
|
||||
}
|
||||
)
|
||||
return event, created
|
||||
|
||||
@@ -3,7 +3,6 @@ import json # TODO consider faster APIs
|
||||
|
||||
from django.shortcuts import get_object_or_404
|
||||
from django.conf import settings
|
||||
from django.template.defaultfilters import truncatechars
|
||||
from django.db.models import Max
|
||||
|
||||
from rest_framework import permissions, status
|
||||
@@ -16,7 +15,7 @@ from compat.auth import parse_auth_header_value
|
||||
|
||||
from projects.models import Project
|
||||
from issues.models import Issue, IssueStateManager, Grouping
|
||||
from issues.utils import get_type_and_value_for_data, get_issue_grouper_for_data
|
||||
from issues.utils import get_type_and_value_for_data, get_issue_grouper_for_data, get_denormalized_fields_for_data
|
||||
from issues.regressions import issue_is_regression
|
||||
|
||||
import sentry_sdk_extensions
|
||||
@@ -105,13 +104,22 @@ class BaseIngestAPIView(APIView):
|
||||
# leave this at the top -- it may involve reading from the DB which should come before any DB writing
|
||||
pc_registry = get_pc_registry()
|
||||
|
||||
# I resisted the temptation to put `get_denormalized_fields_for_data` in an if-statement: you basically "always"
|
||||
# need this info... except when duplicate event-ids are sent. But the latter is the exception, and putting this
|
||||
# in an if-statement would require more rework (and possibly extra queries) than it's worth.
|
||||
denormalized_fields = get_denormalized_fields_for_data(event_data)
|
||||
# the 3 lines below are suggestive of a further inlining of the get_type_and_value_for_data function
|
||||
calculated_type, calculated_value = get_type_and_value_for_data(event_data)
|
||||
denormalized_fields["calculated_type"] = calculated_type
|
||||
denormalized_fields["calculated_value"] = calculated_value
|
||||
|
||||
grouping_key = get_issue_grouper_for_data(event_data, calculated_type, calculated_value)
|
||||
|
||||
if not Grouping.objects.filter(project=ingested_event.project, grouping_key=grouping_key).exists():
|
||||
# we don't have Project.issue_count here ('premature optimization') so we just do an aggregate instead.
|
||||
issue_ingest_order = Issue.objects.filter(project=ingested_event.project).aggregate(
|
||||
Max("ingest_order"))["ingest_order__max"] + 1
|
||||
max_current = Issue.objects.filter(project=ingested_event.project).aggregate(
|
||||
Max("ingest_order"))["ingest_order__max"]
|
||||
issue_ingest_order = max_current + 1 if max_current is not None else 1
|
||||
|
||||
issue = Issue.objects.create(
|
||||
ingest_order=issue_ingest_order,
|
||||
@@ -119,8 +127,7 @@ class BaseIngestAPIView(APIView):
|
||||
first_seen=ingested_event.timestamp,
|
||||
last_seen=ingested_event.timestamp,
|
||||
event_count=1,
|
||||
calculated_type=truncatechars(calculated_type, 255),
|
||||
calculated_value=truncatechars(calculated_value, 255),
|
||||
**denormalized_fields,
|
||||
)
|
||||
# even though in our data-model a given grouping does not imply a single Issue (in fact, that's the whole
|
||||
# point of groupings as a data-model), at-creation such implication does exist, because manual information
|
||||
@@ -146,8 +153,7 @@ class BaseIngestAPIView(APIView):
|
||||
issue.event_count if issue_created else issue.event_count + 1,
|
||||
issue,
|
||||
event_data,
|
||||
calculated_type,
|
||||
calculated_value,
|
||||
denormalized_fields,
|
||||
)
|
||||
if not event_created:
|
||||
# note: previously we created the event before the issue, which allowed for one less query. I don't see
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('issues', '0020_alter_issue_unique_together'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='issue',
|
||||
name='last_frame_filename',
|
||||
field=models.CharField(blank=True, default='', max_length=255),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='issue',
|
||||
name='last_frame_function',
|
||||
field=models.CharField(blank=True, default='', max_length=255),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='issue',
|
||||
name='last_frame_module',
|
||||
field=models.CharField(blank=True, default='', max_length=255),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='issue',
|
||||
name='transaction',
|
||||
field=models.CharField(blank=True, default='', max_length=200),
|
||||
),
|
||||
]
|
||||
@@ -38,6 +38,10 @@ class Issue(models.Model):
|
||||
event_count = models.IntegerField(blank=False, null=False)
|
||||
calculated_type = models.CharField(max_length=255, blank=True, null=False, default="")
|
||||
calculated_value = models.CharField(max_length=255, blank=True, null=False, default="")
|
||||
transaction = models.CharField(max_length=200, blank=True, null=False, default="")
|
||||
last_frame_filename = models.CharField(max_length=255, blank=True, null=False, default="")
|
||||
last_frame_module = models.CharField(max_length=255, blank=True, null=False, default="")
|
||||
last_frame_function = models.CharField(max_length=255, blank=True, null=False, default="")
|
||||
|
||||
# fields related to resolution:
|
||||
# what does this mean for the release-based use cases? it means what you filter on.
|
||||
@@ -53,37 +57,20 @@ class Issue(models.Model):
|
||||
unmute_on_volume_based_conditions = models.TextField(blank=False, null=False, default="[]") # json string
|
||||
unmute_after = models.DateTimeField(blank=True, null=True)
|
||||
|
||||
def save(self, *args, **kwargs):
|
||||
if self.ingest_order is None:
|
||||
# testing-only; in production this should never happen and instead have been done in the ingest view.
|
||||
max_current = self.ingest_order = Issue.objects.filter(project=self.project).aggregate(
|
||||
models.Max("ingest_order"))["ingest_order__max"]
|
||||
self.ingest_order = max_current + 1 if max_current is not None else 1
|
||||
super().save(*args, **kwargs)
|
||||
|
||||
def friendly_id(self):
|
||||
return f"{ self.project.slug.upper() }-{ self.ingest_order }"
|
||||
|
||||
def get_absolute_url(self):
|
||||
return f"/issues/issue/{ self.id }/event/last/"
|
||||
|
||||
def parsed_data(self):
|
||||
# TEMP solution; won't scale
|
||||
return json.loads(self.event_set.first().data)
|
||||
|
||||
def get_main_exception(self):
|
||||
# TODO: refactor (its usages) to a (filled-on-create) field
|
||||
|
||||
# Note: first event, last exception
|
||||
|
||||
# We call the last exception in the chain the main exception because it's the one you're most likely to care
|
||||
# about. I'd roughly distinguish 2 cases for reraising:
|
||||
#
|
||||
# 1. intentionally rephrasing/retyping exceptions to more clearly express their meaning. In that case you
|
||||
# certainly care more about the rephrased thing than the original, that's the whole point.
|
||||
#
|
||||
# 2. actual "accidents" happening while error-handling. In that case you care about the accident first (bugsink
|
||||
# is a system to help you think about cases that you didn't properly think about in the first place),
|
||||
# although you may also care about the root cause. (In fact, sometimes you care more about the root cause,
|
||||
# but I'd say you'll have to yak-shave your way there).
|
||||
|
||||
parsed_data = json.loads(self.event_set.first().data)
|
||||
exc = parsed_data.get("exception", {"values": []})
|
||||
values = exc["values"] # required by the json spec, so can be done safely
|
||||
return values[-1] if values else {}
|
||||
|
||||
def title(self):
|
||||
return get_title_for_exception_type_and_value(self.calculated_type, self.calculated_value)
|
||||
|
||||
|
||||
@@ -93,7 +93,7 @@
|
||||
<h1 class="text-4xl font-bold text-ellipsis whitespace-nowrap overflow-hidden pb-1 {# needed for descenders of 'g' #}">{{ issue.calculated_type }}</h1>
|
||||
<div class="text-xl text-ellipsis whitespace-nowrap overflow-hidden">{{ issue.calculated_value }}</div>
|
||||
{% if parsed_data.request %}<div class="italic mt-4">{{ parsed_data.request.method }} {{ parsed_data.request.url }}</div>{% endif %}
|
||||
<div class="text-ellipsis whitespace-nowrap overflow-hidden">{% with issue.get_main_exception.stacktrace.frames|last as last_frame %}<span class="font-bold">{% if last_frame.module %}{{ last_frame.module}}{% else %}{{ last_frame.filename }}{% endif %}</span>{% if last_frame.function %} in <span class="font-bold">{{ last_frame.function }}</span>{% endif %}{% endwith %}</div>
|
||||
<div class="text-ellipsis whitespace-nowrap overflow-hidden"><span class="font-bold">{% if issue.last_frame_module %}{{ issue.last_frame_module}}{% else %}{{ issue.last_frame_filename }}{% endif %}</span>{% if issue.last_frame_function %} in <span class="font-bold">{{ issue.last_frame_function }}</span>{% endif %}</div>
|
||||
</div> {# top, LHS (various texts) #}
|
||||
|
||||
</div>
|
||||
|
||||
@@ -154,7 +154,7 @@ https://flowbite.com/docs/forms/floating-label/
|
||||
</svg> {% endif %}{{ issue.title|truncatechars:100 }}</a>
|
||||
</div>
|
||||
<div class="text-sm">from <b>{{ issue.first_seen|date:"j M G:i" }}</b> | last <b>{{ issue.last_seen|date:"j M G:i" }}</b> | with <b>{{ issue.event_count }}</b> events
|
||||
{% if issue.parsed_data.transaction %}| <span class="font-bold">{{ issue.parsed_data.transaction }} </span>{% endif %}
|
||||
{% if issue.transaction %}| <span class="font-bold">{{ issue.transaction }} </span>{% endif %}
|
||||
</div>
|
||||
</td>
|
||||
|
||||
|
||||
@@ -7,6 +7,11 @@ from sentry.utils.safe import get_path, trim
|
||||
from sentry.utils.strings import strip
|
||||
|
||||
|
||||
def maybe_empty(s):
|
||||
# TODO deduplicate this with events/models.py
|
||||
return "" if not s else s
|
||||
|
||||
|
||||
def get_type_and_value_for_data(data):
|
||||
if "exception" in data and data["exception"]:
|
||||
return get_exception_type_and_value_for_exception(data)
|
||||
@@ -41,7 +46,17 @@ def get_exception_type_and_value_for_exception(data):
|
||||
if len(data["exception"]) == 0:
|
||||
return "<unknown>", ""
|
||||
|
||||
exception = get_path(data, "exception", "values", -1)
|
||||
# We use the last exception in the chain because it's the one you're most likely to care about. I'd roughly
|
||||
# distinguish 2 cases for reraising:
|
||||
#
|
||||
# 1. intentionally rephrasing/retyping exceptions to more clearly express their meaning. In that case you
|
||||
# certainly care more about the rephrased thing than the original, that's the whole point.
|
||||
#
|
||||
# 2. actual "accidents" happening while error-handling. In that case you care about the accident first (bugsink
|
||||
# is a system to help you think about cases that you didn't properly think about in the first place),
|
||||
# although you may also care about the root cause. (In fact, sometimes you care more about the root cause,
|
||||
# but I'd say you'll have to yak-shave your way there).
|
||||
exception = get_path(data, "exception", "values", -1) # .values is required by the json spec, so can be done safely
|
||||
if not exception:
|
||||
return "<unknown>", ""
|
||||
|
||||
@@ -94,6 +109,21 @@ def get_title_for_exception_type_and_value(type_, value):
|
||||
return "{}: {}".format(type_, value.splitlines()[0])
|
||||
|
||||
|
||||
def get_denormalized_fields_for_data(parsed_data):
|
||||
last_frame = get_crash_frame_from_event_data(parsed_data) or {}
|
||||
|
||||
module = maybe_empty(last_frame.get("module", ""))
|
||||
function = maybe_empty(get_function_name_for_frame(last_frame, parsed_data.get("platform")))
|
||||
filename = maybe_empty(last_frame.get("filename", ""))
|
||||
|
||||
return {
|
||||
"transaction": maybe_empty(parsed_data.get("transaction", ""))[:200],
|
||||
"last_frame_filename": filename[:255],
|
||||
"last_frame_module": module[:255],
|
||||
"last_frame_function": function[:255],
|
||||
}
|
||||
|
||||
|
||||
# utilities related to storing and retrieving release-versions; we use the fact that sentry (and we've adopted their
|
||||
# limitation) disallows the use of newlines in release-versions, so we can use newlines as a separator
|
||||
|
||||
|
||||
Reference in New Issue
Block a user