denormalize/cache last_frame_* and transaction on Event and Issue

for performance, but also fixes:

* not just the 'last frame' but the 'last relevant frame' (in-app)
* truncation is properly done (matching the DB size, and for each of the fields)
This commit is contained in:
Klaas van Schelven
2024-04-10 09:12:12 +02:00
parent ba6b158848
commit 4dfefec468
9 changed files with 163 additions and 40 deletions

View File

@@ -0,0 +1,26 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('events', '0016_alter_event_unique_together'),
]
operations = [
migrations.AddField(
model_name='event',
name='last_frame_filename',
field=models.CharField(blank=True, default='', max_length=255),
),
migrations.AddField(
model_name='event',
name='last_frame_function',
field=models.CharField(blank=True, default='', max_length=255),
),
migrations.AddField(
model_name='event',
name='last_frame_module',
field=models.CharField(blank=True, default='', max_length=255),
),
]

View File

@@ -0,0 +1,39 @@
# Generated by Django 4.2.11 on 2024-04-09 19:54
import json
from django.db import migrations
from issues.utils import get_denormalized_fields_for_data
def fill_denormalized_fields(apps, schema_editor):
# This function does both events and issues; we don't care about the fact that this is "formally incorrect" as long
# as we can keep developing for now.
Event = apps.get_model('events', 'Event')
for event in Event.objects.all():
denormalized_fields = get_denormalized_fields_for_data(json.loads(event.data))
for k, v in denormalized_fields.items():
setattr(event, k, v)
event.save()
# inefficient, because we do each issue multiple times, but who cares, this is just to have something "for now"
issue = event.issue
for k, v in denormalized_fields.items():
setattr(issue, k, v)
issue.save()
class Migration(migrations.Migration):
dependencies = [
('events', '0017_event_last_frame_filename_event_last_frame_function_and_more'),
('ingest', '0003_decompressedevent_debug_info'),
('issues', '0021_issue_last_frame_filename_issue_last_frame_function_and_more'),
('projects', '0008_set_project_slugs'),
('releases', '0003_alter_release_version'),
]
operations = [
migrations.RunPython(fill_denormalized_fields),
]

View File

@@ -139,6 +139,10 @@ class Event(models.Model):
# denormalized/cached fields:
calculated_type = models.CharField(max_length=255, blank=True, null=False, default="")
calculated_value = models.CharField(max_length=255, blank=True, null=False, default="")
# transaction = models.CharField(max_length=200, blank=True, null=False, default="") defined first-class above
last_frame_filename = models.CharField(max_length=255, blank=True, null=False, default="")
last_frame_module = models.CharField(max_length=255, blank=True, null=False, default="")
last_frame_function = models.CharField(max_length=255, blank=True, null=False, default="")
# 1-based, because this is for human consumption only, and using 0-based internally when we don't actually do
# anything with this value other than showing it to humans is super-confusing. Sorry Dijkstra!
@@ -165,7 +169,7 @@ class Event(models.Model):
return get_title_for_exception_type_and_value(self.calculated_type, self.calculated_value)
@classmethod
def from_ingested(cls, ingested_event, ingest_order, issue, parsed_data, calculated_type, calculated_value):
def from_ingested(cls, ingested_event, ingest_order, issue, parsed_data, denormalized_fields):
# 'from_ingested' may be a bit of a misnomer... the full 'from_ingested' is done in 'digest_event' in the views.
# below at least puts the parsed_data in the right place, and does some of the basic object set up (FKs to other
# objects etc).
@@ -184,7 +188,7 @@ class Event(models.Model):
'level': maybe_empty(parsed_data.get("level", "")),
'logger': maybe_empty(parsed_data.get("logger", "")),
'transaction': maybe_empty(parsed_data.get("transaction", "")),
# 'transaction': maybe_empty(parsed_data.get("transaction", "")), passed as part of denormalized_fields
'server_name': maybe_empty(parsed_data.get("server_name", "")),
'release': maybe_empty(parsed_data.get("release", "")),
@@ -200,9 +204,9 @@ class Event(models.Model):
'debug_info': ingested_event.debug_info,
'calculated_type': calculated_type,
'calculated_value': calculated_value,
'ingest_order': ingest_order,
**denormalized_fields,
}
)
return event, created

View File

@@ -3,7 +3,6 @@ import json # TODO consider faster APIs
from django.shortcuts import get_object_or_404
from django.conf import settings
from django.template.defaultfilters import truncatechars
from django.db.models import Max
from rest_framework import permissions, status
@@ -16,7 +15,7 @@ from compat.auth import parse_auth_header_value
from projects.models import Project
from issues.models import Issue, IssueStateManager, Grouping
from issues.utils import get_type_and_value_for_data, get_issue_grouper_for_data
from issues.utils import get_type_and_value_for_data, get_issue_grouper_for_data, get_denormalized_fields_for_data
from issues.regressions import issue_is_regression
import sentry_sdk_extensions
@@ -105,13 +104,22 @@ class BaseIngestAPIView(APIView):
# leave this at the top -- it may involve reading from the DB which should come before any DB writing
pc_registry = get_pc_registry()
# I resisted the temptation to put `get_denormalized_fields_for_data` in an if-statement: you basically "always"
# need this info... except when duplicate event-ids are sent. But the latter is the exception, and putting this
# in an if-statement would require more rework (and possibly extra queries) than it's worth.
denormalized_fields = get_denormalized_fields_for_data(event_data)
# the 3 lines below are suggestive of a further inlining of the get_type_and_value_for_data function
calculated_type, calculated_value = get_type_and_value_for_data(event_data)
denormalized_fields["calculated_type"] = calculated_type
denormalized_fields["calculated_value"] = calculated_value
grouping_key = get_issue_grouper_for_data(event_data, calculated_type, calculated_value)
if not Grouping.objects.filter(project=ingested_event.project, grouping_key=grouping_key).exists():
# we don't have Project.issue_count here ('premature optimization') so we just do an aggregate instead.
issue_ingest_order = Issue.objects.filter(project=ingested_event.project).aggregate(
Max("ingest_order"))["ingest_order__max"] + 1
max_current = Issue.objects.filter(project=ingested_event.project).aggregate(
Max("ingest_order"))["ingest_order__max"]
issue_ingest_order = max_current + 1 if max_current is not None else 1
issue = Issue.objects.create(
ingest_order=issue_ingest_order,
@@ -119,8 +127,7 @@ class BaseIngestAPIView(APIView):
first_seen=ingested_event.timestamp,
last_seen=ingested_event.timestamp,
event_count=1,
calculated_type=truncatechars(calculated_type, 255),
calculated_value=truncatechars(calculated_value, 255),
**denormalized_fields,
)
# even though in our data-model a given grouping does not imply a single Issue (in fact, that's the whole
# point of groupings as a data-model), at-creation such implication does exist, because manual information
@@ -146,8 +153,7 @@ class BaseIngestAPIView(APIView):
issue.event_count if issue_created else issue.event_count + 1,
issue,
event_data,
calculated_type,
calculated_value,
denormalized_fields,
)
if not event_created:
# note: previously we created the event before the issue, which allowed for one less query. I don't see

View File

@@ -0,0 +1,31 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('issues', '0020_alter_issue_unique_together'),
]
operations = [
migrations.AddField(
model_name='issue',
name='last_frame_filename',
field=models.CharField(blank=True, default='', max_length=255),
),
migrations.AddField(
model_name='issue',
name='last_frame_function',
field=models.CharField(blank=True, default='', max_length=255),
),
migrations.AddField(
model_name='issue',
name='last_frame_module',
field=models.CharField(blank=True, default='', max_length=255),
),
migrations.AddField(
model_name='issue',
name='transaction',
field=models.CharField(blank=True, default='', max_length=200),
),
]

View File

@@ -38,6 +38,10 @@ class Issue(models.Model):
event_count = models.IntegerField(blank=False, null=False)
calculated_type = models.CharField(max_length=255, blank=True, null=False, default="")
calculated_value = models.CharField(max_length=255, blank=True, null=False, default="")
transaction = models.CharField(max_length=200, blank=True, null=False, default="")
last_frame_filename = models.CharField(max_length=255, blank=True, null=False, default="")
last_frame_module = models.CharField(max_length=255, blank=True, null=False, default="")
last_frame_function = models.CharField(max_length=255, blank=True, null=False, default="")
# fields related to resolution:
# what does this mean for the release-based use cases? it means what you filter on.
@@ -53,37 +57,20 @@ class Issue(models.Model):
unmute_on_volume_based_conditions = models.TextField(blank=False, null=False, default="[]") # json string
unmute_after = models.DateTimeField(blank=True, null=True)
def save(self, *args, **kwargs):
if self.ingest_order is None:
# testing-only; in production this should never happen and instead have been done in the ingest view.
max_current = self.ingest_order = Issue.objects.filter(project=self.project).aggregate(
models.Max("ingest_order"))["ingest_order__max"]
self.ingest_order = max_current + 1 if max_current is not None else 1
super().save(*args, **kwargs)
def friendly_id(self):
return f"{ self.project.slug.upper() }-{ self.ingest_order }"
def get_absolute_url(self):
return f"/issues/issue/{ self.id }/event/last/"
def parsed_data(self):
# TEMP solution; won't scale
return json.loads(self.event_set.first().data)
def get_main_exception(self):
# TODO: refactor (its usages) to a (filled-on-create) field
# Note: first event, last exception
# We call the last exception in the chain the main exception because it's the one you're most likely to care
# about. I'd roughly distinguish 2 cases for reraising:
#
# 1. intentionally rephrasing/retyping exceptions to more clearly express their meaning. In that case you
# certainly care more about the rephrased thing than the original, that's the whole point.
#
# 2. actual "accidents" happening while error-handling. In that case you care about the accident first (bugsink
# is a system to help you think about cases that you didn't properly think about in the first place),
# although you may also care about the root cause. (In fact, sometimes you care more about the root cause,
# but I'd say you'll have to yak-shave your way there).
parsed_data = json.loads(self.event_set.first().data)
exc = parsed_data.get("exception", {"values": []})
values = exc["values"] # required by the json spec, so can be done safely
return values[-1] if values else {}
def title(self):
return get_title_for_exception_type_and_value(self.calculated_type, self.calculated_value)

View File

@@ -93,7 +93,7 @@
<h1 class="text-4xl font-bold text-ellipsis whitespace-nowrap overflow-hidden pb-1 {# needed for descenders of 'g' #}">{{ issue.calculated_type }}</h1>
<div class="text-xl text-ellipsis whitespace-nowrap overflow-hidden">{{ issue.calculated_value }}</div>
{% if parsed_data.request %}<div class="italic mt-4">{{ parsed_data.request.method }} {{ parsed_data.request.url }}</div>{% endif %}
<div class="text-ellipsis whitespace-nowrap overflow-hidden">{% with issue.get_main_exception.stacktrace.frames|last as last_frame %}<span class="font-bold">{% if last_frame.module %}{{ last_frame.module}}{% else %}{{ last_frame.filename }}{% endif %}</span>{% if last_frame.function %} in <span class="font-bold">{{ last_frame.function }}</span>{% endif %}{% endwith %}</div>
<div class="text-ellipsis whitespace-nowrap overflow-hidden"><span class="font-bold">{% if issue.last_frame_module %}{{ issue.last_frame_module}}{% else %}{{ issue.last_frame_filename }}{% endif %}</span>{% if issue.last_frame_function %} in <span class="font-bold">{{ issue.last_frame_function }}</span>{% endif %}</div>
</div> {# top, LHS (various texts) #}
</div>

View File

@@ -154,7 +154,7 @@ https://flowbite.com/docs/forms/floating-label/
</svg>&nbsp;&nbsp;{% endif %}{{ issue.title|truncatechars:100 }}</a>
</div>
<div class="text-sm">from <b>{{ issue.first_seen|date:"j M G:i" }}</b> | last <b>{{ issue.last_seen|date:"j M G:i" }}</b> | with <b>{{ issue.event_count }}</b> events
{% if issue.parsed_data.transaction %}| <span class="font-bold">{{ issue.parsed_data.transaction }} </span>{% endif %}
{% if issue.transaction %}| <span class="font-bold">{{ issue.transaction }} </span>{% endif %}
</div>
</td>

View File

@@ -7,6 +7,11 @@ from sentry.utils.safe import get_path, trim
from sentry.utils.strings import strip
def maybe_empty(s):
# TODO deduplicate this with events/models.py
return "" if not s else s
def get_type_and_value_for_data(data):
if "exception" in data and data["exception"]:
return get_exception_type_and_value_for_exception(data)
@@ -41,7 +46,17 @@ def get_exception_type_and_value_for_exception(data):
if len(data["exception"]) == 0:
return "<unknown>", ""
exception = get_path(data, "exception", "values", -1)
# We use the last exception in the chain because it's the one you're most likely to care about. I'd roughly
# distinguish 2 cases for reraising:
#
# 1. intentionally rephrasing/retyping exceptions to more clearly express their meaning. In that case you
# certainly care more about the rephrased thing than the original, that's the whole point.
#
# 2. actual "accidents" happening while error-handling. In that case you care about the accident first (bugsink
# is a system to help you think about cases that you didn't properly think about in the first place),
# although you may also care about the root cause. (In fact, sometimes you care more about the root cause,
# but I'd say you'll have to yak-shave your way there).
exception = get_path(data, "exception", "values", -1) # .values is required by the json spec, so can be done safely
if not exception:
return "<unknown>", ""
@@ -94,6 +109,21 @@ def get_title_for_exception_type_and_value(type_, value):
return "{}: {}".format(type_, value.splitlines()[0])
def get_denormalized_fields_for_data(parsed_data):
last_frame = get_crash_frame_from_event_data(parsed_data) or {}
module = maybe_empty(last_frame.get("module", ""))
function = maybe_empty(get_function_name_for_frame(last_frame, parsed_data.get("platform")))
filename = maybe_empty(last_frame.get("filename", ""))
return {
"transaction": maybe_empty(parsed_data.get("transaction", ""))[:200],
"last_frame_filename": filename[:255],
"last_frame_module": module[:255],
"last_frame_function": function[:255],
}
# utilities related to storing and retrieving release-versions; we use the fact that sentry (and we've adopted their
# limitation) disallows the use of newlines in release-versions, so we can use newlines as a separator