diff --git a/events/migrations/0017_event_last_frame_filename_event_last_frame_function_and_more.py b/events/migrations/0017_event_last_frame_filename_event_last_frame_function_and_more.py new file mode 100644 index 0000000..37d32b0 --- /dev/null +++ b/events/migrations/0017_event_last_frame_filename_event_last_frame_function_and_more.py @@ -0,0 +1,26 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('events', '0016_alter_event_unique_together'), + ] + + operations = [ + migrations.AddField( + model_name='event', + name='last_frame_filename', + field=models.CharField(blank=True, default='', max_length=255), + ), + migrations.AddField( + model_name='event', + name='last_frame_function', + field=models.CharField(blank=True, default='', max_length=255), + ), + migrations.AddField( + model_name='event', + name='last_frame_module', + field=models.CharField(blank=True, default='', max_length=255), + ), + ] diff --git a/events/migrations/0018_fill_denormalized_fields.py b/events/migrations/0018_fill_denormalized_fields.py new file mode 100644 index 0000000..2985080 --- /dev/null +++ b/events/migrations/0018_fill_denormalized_fields.py @@ -0,0 +1,39 @@ +# Generated by Django 4.2.11 on 2024-04-09 19:54 + +import json +from django.db import migrations +from issues.utils import get_denormalized_fields_for_data + + +def fill_denormalized_fields(apps, schema_editor): + # This function does both events and issues; we don't care about the fact that this is "formally incorrect" as long + # as we can keep developing for now. + + Event = apps.get_model('events', 'Event') + + for event in Event.objects.all(): + denormalized_fields = get_denormalized_fields_for_data(json.loads(event.data)) + for k, v in denormalized_fields.items(): + setattr(event, k, v) + event.save() + + # inefficient, because we do each issue multiple times, but who cares, this is just to have something "for now" + issue = event.issue + for k, v in denormalized_fields.items(): + setattr(issue, k, v) + issue.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ('events', '0017_event_last_frame_filename_event_last_frame_function_and_more'), + ('ingest', '0003_decompressedevent_debug_info'), + ('issues', '0021_issue_last_frame_filename_issue_last_frame_function_and_more'), + ('projects', '0008_set_project_slugs'), + ('releases', '0003_alter_release_version'), + ] + + operations = [ + migrations.RunPython(fill_denormalized_fields), + ] diff --git a/events/models.py b/events/models.py index 938f143..d8ac394 100644 --- a/events/models.py +++ b/events/models.py @@ -139,6 +139,10 @@ class Event(models.Model): # denormalized/cached fields: calculated_type = models.CharField(max_length=255, blank=True, null=False, default="") calculated_value = models.CharField(max_length=255, blank=True, null=False, default="") + # transaction = models.CharField(max_length=200, blank=True, null=False, default="") defined first-class above + last_frame_filename = models.CharField(max_length=255, blank=True, null=False, default="") + last_frame_module = models.CharField(max_length=255, blank=True, null=False, default="") + last_frame_function = models.CharField(max_length=255, blank=True, null=False, default="") # 1-based, because this is for human consumption only, and using 0-based internally when we don't actually do # anything with this value other than showing it to humans is super-confusing. Sorry Dijkstra! @@ -165,7 +169,7 @@ class Event(models.Model): return get_title_for_exception_type_and_value(self.calculated_type, self.calculated_value) @classmethod - def from_ingested(cls, ingested_event, ingest_order, issue, parsed_data, calculated_type, calculated_value): + def from_ingested(cls, ingested_event, ingest_order, issue, parsed_data, denormalized_fields): # 'from_ingested' may be a bit of a misnomer... the full 'from_ingested' is done in 'digest_event' in the views. # below at least puts the parsed_data in the right place, and does some of the basic object set up (FKs to other # objects etc). @@ -184,7 +188,7 @@ class Event(models.Model): 'level': maybe_empty(parsed_data.get("level", "")), 'logger': maybe_empty(parsed_data.get("logger", "")), - 'transaction': maybe_empty(parsed_data.get("transaction", "")), + # 'transaction': maybe_empty(parsed_data.get("transaction", "")), passed as part of denormalized_fields 'server_name': maybe_empty(parsed_data.get("server_name", "")), 'release': maybe_empty(parsed_data.get("release", "")), @@ -200,9 +204,9 @@ class Event(models.Model): 'debug_info': ingested_event.debug_info, - 'calculated_type': calculated_type, - 'calculated_value': calculated_value, 'ingest_order': ingest_order, + + **denormalized_fields, } ) return event, created diff --git a/ingest/views.py b/ingest/views.py index 1d09f8d..8af4e61 100644 --- a/ingest/views.py +++ b/ingest/views.py @@ -3,7 +3,6 @@ import json # TODO consider faster APIs from django.shortcuts import get_object_or_404 from django.conf import settings -from django.template.defaultfilters import truncatechars from django.db.models import Max from rest_framework import permissions, status @@ -16,7 +15,7 @@ from compat.auth import parse_auth_header_value from projects.models import Project from issues.models import Issue, IssueStateManager, Grouping -from issues.utils import get_type_and_value_for_data, get_issue_grouper_for_data +from issues.utils import get_type_and_value_for_data, get_issue_grouper_for_data, get_denormalized_fields_for_data from issues.regressions import issue_is_regression import sentry_sdk_extensions @@ -105,13 +104,22 @@ class BaseIngestAPIView(APIView): # leave this at the top -- it may involve reading from the DB which should come before any DB writing pc_registry = get_pc_registry() + # I resisted the temptation to put `get_denormalized_fields_for_data` in an if-statement: you basically "always" + # need this info... except when duplicate event-ids are sent. But the latter is the exception, and putting this + # in an if-statement would require more rework (and possibly extra queries) than it's worth. + denormalized_fields = get_denormalized_fields_for_data(event_data) + # the 3 lines below are suggestive of a further inlining of the get_type_and_value_for_data function calculated_type, calculated_value = get_type_and_value_for_data(event_data) + denormalized_fields["calculated_type"] = calculated_type + denormalized_fields["calculated_value"] = calculated_value + grouping_key = get_issue_grouper_for_data(event_data, calculated_type, calculated_value) if not Grouping.objects.filter(project=ingested_event.project, grouping_key=grouping_key).exists(): # we don't have Project.issue_count here ('premature optimization') so we just do an aggregate instead. - issue_ingest_order = Issue.objects.filter(project=ingested_event.project).aggregate( - Max("ingest_order"))["ingest_order__max"] + 1 + max_current = Issue.objects.filter(project=ingested_event.project).aggregate( + Max("ingest_order"))["ingest_order__max"] + issue_ingest_order = max_current + 1 if max_current is not None else 1 issue = Issue.objects.create( ingest_order=issue_ingest_order, @@ -119,8 +127,7 @@ class BaseIngestAPIView(APIView): first_seen=ingested_event.timestamp, last_seen=ingested_event.timestamp, event_count=1, - calculated_type=truncatechars(calculated_type, 255), - calculated_value=truncatechars(calculated_value, 255), + **denormalized_fields, ) # even though in our data-model a given grouping does not imply a single Issue (in fact, that's the whole # point of groupings as a data-model), at-creation such implication does exist, because manual information @@ -146,8 +153,7 @@ class BaseIngestAPIView(APIView): issue.event_count if issue_created else issue.event_count + 1, issue, event_data, - calculated_type, - calculated_value, + denormalized_fields, ) if not event_created: # note: previously we created the event before the issue, which allowed for one less query. I don't see diff --git a/issues/migrations/0021_issue_last_frame_filename_issue_last_frame_function_and_more.py b/issues/migrations/0021_issue_last_frame_filename_issue_last_frame_function_and_more.py new file mode 100644 index 0000000..754621a --- /dev/null +++ b/issues/migrations/0021_issue_last_frame_filename_issue_last_frame_function_and_more.py @@ -0,0 +1,31 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('issues', '0020_alter_issue_unique_together'), + ] + + operations = [ + migrations.AddField( + model_name='issue', + name='last_frame_filename', + field=models.CharField(blank=True, default='', max_length=255), + ), + migrations.AddField( + model_name='issue', + name='last_frame_function', + field=models.CharField(blank=True, default='', max_length=255), + ), + migrations.AddField( + model_name='issue', + name='last_frame_module', + field=models.CharField(blank=True, default='', max_length=255), + ), + migrations.AddField( + model_name='issue', + name='transaction', + field=models.CharField(blank=True, default='', max_length=200), + ), + ] diff --git a/issues/models.py b/issues/models.py index 7656869..160cc41 100644 --- a/issues/models.py +++ b/issues/models.py @@ -38,6 +38,10 @@ class Issue(models.Model): event_count = models.IntegerField(blank=False, null=False) calculated_type = models.CharField(max_length=255, blank=True, null=False, default="") calculated_value = models.CharField(max_length=255, blank=True, null=False, default="") + transaction = models.CharField(max_length=200, blank=True, null=False, default="") + last_frame_filename = models.CharField(max_length=255, blank=True, null=False, default="") + last_frame_module = models.CharField(max_length=255, blank=True, null=False, default="") + last_frame_function = models.CharField(max_length=255, blank=True, null=False, default="") # fields related to resolution: # what does this mean for the release-based use cases? it means what you filter on. @@ -53,37 +57,20 @@ class Issue(models.Model): unmute_on_volume_based_conditions = models.TextField(blank=False, null=False, default="[]") # json string unmute_after = models.DateTimeField(blank=True, null=True) + def save(self, *args, **kwargs): + if self.ingest_order is None: + # testing-only; in production this should never happen and instead have been done in the ingest view. + max_current = self.ingest_order = Issue.objects.filter(project=self.project).aggregate( + models.Max("ingest_order"))["ingest_order__max"] + self.ingest_order = max_current + 1 if max_current is not None else 1 + super().save(*args, **kwargs) + def friendly_id(self): return f"{ self.project.slug.upper() }-{ self.ingest_order }" def get_absolute_url(self): return f"/issues/issue/{ self.id }/event/last/" - def parsed_data(self): - # TEMP solution; won't scale - return json.loads(self.event_set.first().data) - - def get_main_exception(self): - # TODO: refactor (its usages) to a (filled-on-create) field - - # Note: first event, last exception - - # We call the last exception in the chain the main exception because it's the one you're most likely to care - # about. I'd roughly distinguish 2 cases for reraising: - # - # 1. intentionally rephrasing/retyping exceptions to more clearly express their meaning. In that case you - # certainly care more about the rephrased thing than the original, that's the whole point. - # - # 2. actual "accidents" happening while error-handling. In that case you care about the accident first (bugsink - # is a system to help you think about cases that you didn't properly think about in the first place), - # although you may also care about the root cause. (In fact, sometimes you care more about the root cause, - # but I'd say you'll have to yak-shave your way there). - - parsed_data = json.loads(self.event_set.first().data) - exc = parsed_data.get("exception", {"values": []}) - values = exc["values"] # required by the json spec, so can be done safely - return values[-1] if values else {} - def title(self): return get_title_for_exception_type_and_value(self.calculated_type, self.calculated_value) diff --git a/issues/templates/issues/base.html b/issues/templates/issues/base.html index 27730f1..945e1b0 100644 --- a/issues/templates/issues/base.html +++ b/issues/templates/issues/base.html @@ -93,7 +93,7 @@

{{ issue.calculated_type }}

{{ issue.calculated_value }}
{% if parsed_data.request %}
{{ parsed_data.request.method }} {{ parsed_data.request.url }}
{% endif %} -
{% with issue.get_main_exception.stacktrace.frames|last as last_frame %}{% if last_frame.module %}{{ last_frame.module}}{% else %}{{ last_frame.filename }}{% endif %}{% if last_frame.function %} in {{ last_frame.function }}{% endif %}{% endwith %}
+
{% if issue.last_frame_module %}{{ issue.last_frame_module}}{% else %}{{ issue.last_frame_filename }}{% endif %}{% if issue.last_frame_function %} in {{ issue.last_frame_function }}{% endif %}
{# top, LHS (various texts) #} diff --git a/issues/templates/issues/issue_list.html b/issues/templates/issues/issue_list.html index f2e9541..814e541 100644 --- a/issues/templates/issues/issue_list.html +++ b/issues/templates/issues/issue_list.html @@ -154,7 +154,7 @@ https://flowbite.com/docs/forms/floating-label/   {% endif %}{{ issue.title|truncatechars:100 }}
from {{ issue.first_seen|date:"j M G:i" }} | last {{ issue.last_seen|date:"j M G:i" }} | with {{ issue.event_count }} events - {% if issue.parsed_data.transaction %}| {{ issue.parsed_data.transaction }} {% endif %} + {% if issue.transaction %}| {{ issue.transaction }} {% endif %}
diff --git a/issues/utils.py b/issues/utils.py index f89fa63..9745c1e 100644 --- a/issues/utils.py +++ b/issues/utils.py @@ -7,6 +7,11 @@ from sentry.utils.safe import get_path, trim from sentry.utils.strings import strip +def maybe_empty(s): + # TODO deduplicate this with events/models.py + return "" if not s else s + + def get_type_and_value_for_data(data): if "exception" in data and data["exception"]: return get_exception_type_and_value_for_exception(data) @@ -41,7 +46,17 @@ def get_exception_type_and_value_for_exception(data): if len(data["exception"]) == 0: return "", "" - exception = get_path(data, "exception", "values", -1) + # We use the last exception in the chain because it's the one you're most likely to care about. I'd roughly + # distinguish 2 cases for reraising: + # + # 1. intentionally rephrasing/retyping exceptions to more clearly express their meaning. In that case you + # certainly care more about the rephrased thing than the original, that's the whole point. + # + # 2. actual "accidents" happening while error-handling. In that case you care about the accident first (bugsink + # is a system to help you think about cases that you didn't properly think about in the first place), + # although you may also care about the root cause. (In fact, sometimes you care more about the root cause, + # but I'd say you'll have to yak-shave your way there). + exception = get_path(data, "exception", "values", -1) # .values is required by the json spec, so can be done safely if not exception: return "", "" @@ -94,6 +109,21 @@ def get_title_for_exception_type_and_value(type_, value): return "{}: {}".format(type_, value.splitlines()[0]) +def get_denormalized_fields_for_data(parsed_data): + last_frame = get_crash_frame_from_event_data(parsed_data) or {} + + module = maybe_empty(last_frame.get("module", "")) + function = maybe_empty(get_function_name_for_frame(last_frame, parsed_data.get("platform"))) + filename = maybe_empty(last_frame.get("filename", "")) + + return { + "transaction": maybe_empty(parsed_data.get("transaction", ""))[:200], + "last_frame_filename": filename[:255], + "last_frame_module": module[:255], + "last_frame_function": function[:255], + } + + # utilities related to storing and retrieving release-versions; we use the fact that sentry (and we've adopted their # limitation) disallows the use of newlines in release-versions, so we can use newlines as a separator