Issue.delete_deferred(): first version (WIP)

Implemented using a batch-wise dependency-scanner in delayed
(snappea) style.

* no tests yet.
* no real point-of-entry in the (regular, non-admin) UI yet.
* no hiding of Issues which are delete-in-progress from the UI
* file storage not yet cleaned up
* project issue counts not yet updated
* dangling tag values: no cleanup mechanism yet.

See #50
This commit is contained in:
Klaas van Schelven
2025-06-27 11:39:03 +02:00
parent b5ffa154c1
commit e5dbeae514
7 changed files with 191 additions and 4 deletions

View File

@@ -1,7 +1,10 @@
from collections import defaultdict
from urllib.parse import urlparse from urllib.parse import urlparse
from django.core.mail import EmailMultiAlternatives from django.core.mail import EmailMultiAlternatives
from django.template.loader import get_template from django.template.loader import get_template
from django.apps import apps
from django.db.models import ForeignKey
from .version import version from .version import version
@@ -161,3 +164,67 @@ def eat_your_own_dogfood(sentry_dsn, **kwargs):
sentry_sdk.init( sentry_sdk.init(
**default_kwargs, **default_kwargs,
) )
def get_model_topography():
"""
Returns a dependency graph mapping:
referenced_model_key -> [
(referrer_model_class, fk_name),
...
]
"""
dep_graph = defaultdict(list)
for model in apps.get_models():
for field in model._meta.get_fields(include_hidden=True):
if isinstance(field, ForeignKey):
referenced_model = field.related_model
referenced_key = f"{referenced_model._meta.app_label}.{referenced_model.__name__}"
dep_graph[referenced_key].append((model, field.name))
return dep_graph
def delete_deps_with_budget(referring_model, fk_name, referred_ids, budget, dep_graph):
"""
Deletes all objects of type referring_model that refer to any of the referred_ids via fk_name.
Returns the number of deleted objects.
And does this recursively (i.e. if there are further dependencies, it will delete those as well).
"""
num_deleted = 0
# Fetch ids of referring objects and their referred ids
relevant_ids_here = list(
referring_model.objects.filter(**{f"{fk_name}__in": referred_ids}).order_by(f"{fk_name}_id", 'pk').values(
'pk', fk_name
)[:budget]
)
if not relevant_ids_here:
# we didn't find any referring objects. optimization: skip any recursion and referring_model.delete()
return 0
# The recursing bit:
for_recursion = dep_graph.get(f"{referring_model._meta.app_label}.{referring_model.__name__}", [])
for model_for_recursion, fk_name_for_recursion in for_recursion:
this_num_deleted = delete_deps_with_budget(
model_for_recursion,
fk_name_for_recursion,
[d["pk"] for d in relevant_ids_here],
budget - num_deleted,
dep_graph,
)
num_deleted += this_num_deleted
if num_deleted >= budget:
return num_deleted
# If this point is reached: we have deleted all referring objects that we could delete, and we still have budget
# left. We can now delete the referring objects themselves (limited by budget).
relevant_ids_after_rec = relevant_ids_here[:budget - num_deleted]
my_num_deleted, _ = referring_model.objects.filter(pk__in=[d['pk'] for d in relevant_ids_after_rec]).delete()
num_deleted += my_num_deleted
return num_deleted

View File

@@ -1,8 +1,14 @@
from django.contrib import admin from django.contrib import admin
from bugsink.transaction import immediate_atomic
from django.utils.decorators import method_decorator
from django.views.decorators.csrf import csrf_protect
from .models import Issue, Grouping, TurningPoint from .models import Issue, Grouping, TurningPoint
from .forms import IssueAdminForm from .forms import IssueAdminForm
csrf_protect_m = method_decorator(csrf_protect)
class GroupingInline(admin.TabularInline): class GroupingInline(admin.TabularInline):
model = Grouping model = Grouping
@@ -79,3 +85,28 @@ class IssueAdmin(admin.ModelAdmin):
'digested_event_count', 'digested_event_count',
'stored_event_count', 'stored_event_count',
] ]
def get_deleted_objects(self, objs, request):
to_delete = list(objs) + ["...all its related objects... (delayed)"]
model_count = {
Issue: len(objs),
}
perms_needed = set()
protected = []
return to_delete, model_count, perms_needed, protected
def delete_queryset(self, request, queryset):
# NOTE: not the most efficient; it will do for a first version.
with immediate_atomic():
for obj in queryset:
obj.delete_deferred()
def delete_model(self, request, obj):
with immediate_atomic():
obj.delete_deferred()
@csrf_protect_m
def delete_view(self, request, object_id, extra_context=None):
# the superclass version, but with the transaction.atomic context manager commented out (we do this ourselves)
# with transaction.atomic(using=router.db_for_write(self.model)):
return self._delete_view(request, object_id, extra_context)

View File

@@ -0,0 +1,16 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("issues", "0017_issue_list_indexes_must_start_with_project"),
]
operations = [
migrations.AddField(
model_name="issue",
name="is_deleted",
field=models.BooleanField(default=False),
),
]

View File

@@ -0,0 +1,16 @@
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("issues", "0018_issue_is_deleted"),
]
operations = [
migrations.AlterField(
model_name="grouping",
name="grouping_key_hash",
field=models.CharField(max_length=64, null=True),
),
]

View File

@@ -10,6 +10,7 @@ from django.conf import settings
from django.utils.functional import cached_property from django.utils.functional import cached_property
from bugsink.volume_based_condition import VolumeBasedCondition from bugsink.volume_based_condition import VolumeBasedCondition
from bugsink.transaction import delay_on_commit
from alerts.tasks import send_unmute_alert from alerts.tasks import send_unmute_alert
from compat.timestamp import parse_timestamp, format_timestamp from compat.timestamp import parse_timestamp, format_timestamp
from tags.models import IssueTag, TagValue from tags.models import IssueTag, TagValue
@@ -18,6 +19,8 @@ from .utils import (
parse_lines, serialize_lines, filter_qs_for_fixed_at, exclude_qs_for_fixed_at, parse_lines, serialize_lines, filter_qs_for_fixed_at, exclude_qs_for_fixed_at,
get_title_for_exception_type_and_value) get_title_for_exception_type_and_value)
from .tasks import delete_issue_deps
class IncongruentStateException(Exception): class IncongruentStateException(Exception):
pass pass
@@ -34,6 +37,8 @@ class Issue(models.Model):
project = models.ForeignKey( project = models.ForeignKey(
"projects.Project", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later' "projects.Project", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
is_deleted = models.BooleanField(default=False)
# 1-based for the same reasons as Event.digest_order # 1-based for the same reasons as Event.digest_order
digest_order = models.PositiveIntegerField(blank=False, null=False) digest_order = models.PositiveIntegerField(blank=False, null=False)
@@ -72,6 +77,21 @@ class Issue(models.Model):
self.digest_order = max_current + 1 if max_current is not None else 1 self.digest_order = max_current + 1 if max_current is not None else 1
super().save(*args, **kwargs) super().save(*args, **kwargs)
def delete_deferred(self):
"""Marks the issue as deleted, and schedules deletion of all related objects"""
self.is_deleted = True
self.save(update_fields=["is_deleted"])
# we set grouping_key_hash to None to ensure that event digests that happen simultaneously with the delayed
# cleanup will get their own fresh Grouping and hence Issue. This matches with the behavior that would happen
# if Issue deletion would have been instantaneous (i.e. it's the least surprising behavior).
#
# `issue=None` is explicitly _not_ part of this update, such that the actual deletion of the Groupings will be
# picked up as part of the delete_issue_deps task.
self.grouping_set.all().update(grouping_key_hash=None)
delay_on_commit(delete_issue_deps, str(self.id))
def friendly_id(self): def friendly_id(self):
return f"{ self.project.slug.upper() }-{ self.digest_order }" return f"{ self.project.slug.upper() }-{ self.digest_order }"
@@ -198,7 +218,7 @@ class Grouping(models.Model):
grouping_key = models.TextField(blank=False, null=False) grouping_key = models.TextField(blank=False, null=False)
# we hash the key to make it indexable on MySQL, see https://code.djangoproject.com/ticket/2495 # we hash the key to make it indexable on MySQL, see https://code.djangoproject.com/ticket/2495
grouping_key_hash = models.CharField(max_length=64, blank=False, null=False) grouping_key_hash = models.CharField(max_length=64, blank=False, null=True)
issue = models.ForeignKey("Issue", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later' issue = models.ForeignKey("Issue", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'

37
issues/tasks.py Normal file
View File

@@ -0,0 +1,37 @@
from snappea.decorators import shared_task
from bugsink.utils import get_model_topography, delete_deps_with_budget
from bugsink.transaction import immediate_atomic, delay_on_commit
@shared_task
def delete_issue_deps(issue_id):
from .models import Issue # avoid circular import
with immediate_atomic():
budget = 500
num_deleted = 0
dep_graph = get_model_topography()
for model_for_recursion, fk_name_for_recursion in dep_graph["issues.Issue"]:
this_num_deleted = delete_deps_with_budget(
model_for_recursion,
fk_name_for_recursion,
[issue_id],
budget - num_deleted,
dep_graph,
)
num_deleted += this_num_deleted
if num_deleted >= budget:
delay_on_commit(delete_issue_deps, issue_id)
return
if budget - num_deleted <= 0:
# no more budget for the self-delete.
delay_on_commit(delete_issue_deps, issue_id)
else:
# final step: delete the issue itself
Issue.objects.filter(pk=issue_id).delete()

View File

@@ -75,7 +75,7 @@ class EventTag(models.Model):
value = models.ForeignKey(TagValue, blank=False, null=False, on_delete=models.CASCADE) value = models.ForeignKey(TagValue, blank=False, null=False, on_delete=models.CASCADE)
# issue is a denormalization that allows for a single-table-index for efficient search. # issue is a denormalization that allows for a single-table-index for efficient search.
# SET_NULL: Issue deletion is not actually possible yet (in the regular UI), so this is somewhat moot (for now). # SET_NULL: to be re-evaulated in the context of Issue.delete_deferred
issue = models.ForeignKey( issue = models.ForeignKey(
'issues.Issue', blank=False, null=True, on_delete=models.SET_NULL, related_name="event_tags") 'issues.Issue', blank=False, null=True, on_delete=models.SET_NULL, related_name="event_tags")
@@ -84,7 +84,7 @@ class EventTag(models.Model):
# DO_NOTHING: we manually implement CASCADE (i.e. when an event is cleaned up, clean up associated tags) in the # DO_NOTHING: we manually implement CASCADE (i.e. when an event is cleaned up, clean up associated tags) in the
# eviction process. Why CASCADE? [1] you'll have to do it "at some point", so you might as well do it right when # eviction process. Why CASCADE? [1] you'll have to do it "at some point", so you might as well do it right when
# evicting (async in the 'most resilient setup' anyway, b/c that happens when ingesting) [2] the order of magnitude # evicting (async in the 'most resilient setup' anyway, b/c that happens when digesting) [2] the order of magnitude
# is "tens of deletions per event", so that's no reason to postpone. "Why manually" is explained in events/retention # is "tens of deletions per event", so that's no reason to postpone. "Why manually" is explained in events/retention
event = models.ForeignKey('events.Event', blank=False, null=False, on_delete=models.DO_NOTHING, related_name='tags') event = models.ForeignKey('events.Event', blank=False, null=False, on_delete=models.DO_NOTHING, related_name='tags')
@@ -115,7 +115,7 @@ class IssueTag(models.Model):
# value already implies key in our current setup. # value already implies key in our current setup.
value = models.ForeignKey(TagValue, blank=False, null=False, on_delete=models.CASCADE) value = models.ForeignKey(TagValue, blank=False, null=False, on_delete=models.CASCADE)
# SET_NULL: Issue deletion is not actually possible yet, so this is moot (for now). # SET_NULL: to be re-evaulated in the context of Issue.delete_deferred
issue = models.ForeignKey('issues.Issue', blank=False, null=True, on_delete=models.SET_NULL, related_name='tags') issue = models.ForeignKey('issues.Issue', blank=False, null=True, on_delete=models.SET_NULL, related_name='tags')
# 1. As it stands, there is only a single counter per issue-tagvalue combination. In principle/theory this type of # 1. As it stands, there is only a single counter per issue-tagvalue combination. In principle/theory this type of