diff --git a/bugsink/utils.py b/bugsink/utils.py index 2682820..71466d4 100644 --- a/bugsink/utils.py +++ b/bugsink/utils.py @@ -1,7 +1,10 @@ +from collections import defaultdict from urllib.parse import urlparse from django.core.mail import EmailMultiAlternatives from django.template.loader import get_template +from django.apps import apps +from django.db.models import ForeignKey from .version import version @@ -161,3 +164,67 @@ def eat_your_own_dogfood(sentry_dsn, **kwargs): sentry_sdk.init( **default_kwargs, ) + + +def get_model_topography(): + """ + Returns a dependency graph mapping: + referenced_model_key -> [ + (referrer_model_class, fk_name), + ... + ] + """ + dep_graph = defaultdict(list) + for model in apps.get_models(): + for field in model._meta.get_fields(include_hidden=True): + if isinstance(field, ForeignKey): + referenced_model = field.related_model + referenced_key = f"{referenced_model._meta.app_label}.{referenced_model.__name__}" + dep_graph[referenced_key].append((model, field.name)) + return dep_graph + + +def delete_deps_with_budget(referring_model, fk_name, referred_ids, budget, dep_graph): + """ + Deletes all objects of type referring_model that refer to any of the referred_ids via fk_name. + Returns the number of deleted objects. + And does this recursively (i.e. if there are further dependencies, it will delete those as well). + """ + num_deleted = 0 + + # Fetch ids of referring objects and their referred ids + relevant_ids_here = list( + referring_model.objects.filter(**{f"{fk_name}__in": referred_ids}).order_by(f"{fk_name}_id", 'pk').values( + 'pk', fk_name + )[:budget] + ) + + if not relevant_ids_here: + # we didn't find any referring objects. optimization: skip any recursion and referring_model.delete() + return 0 + + # The recursing bit: + for_recursion = dep_graph.get(f"{referring_model._meta.app_label}.{referring_model.__name__}", []) + + for model_for_recursion, fk_name_for_recursion in for_recursion: + this_num_deleted = delete_deps_with_budget( + model_for_recursion, + fk_name_for_recursion, + [d["pk"] for d in relevant_ids_here], + budget - num_deleted, + dep_graph, + ) + + num_deleted += this_num_deleted + + if num_deleted >= budget: + return num_deleted + + # If this point is reached: we have deleted all referring objects that we could delete, and we still have budget + # left. We can now delete the referring objects themselves (limited by budget). + relevant_ids_after_rec = relevant_ids_here[:budget - num_deleted] + + my_num_deleted, _ = referring_model.objects.filter(pk__in=[d['pk'] for d in relevant_ids_after_rec]).delete() + num_deleted += my_num_deleted + + return num_deleted diff --git a/issues/admin.py b/issues/admin.py index 67508f7..cc9402c 100644 --- a/issues/admin.py +++ b/issues/admin.py @@ -1,8 +1,14 @@ from django.contrib import admin +from bugsink.transaction import immediate_atomic +from django.utils.decorators import method_decorator +from django.views.decorators.csrf import csrf_protect + from .models import Issue, Grouping, TurningPoint from .forms import IssueAdminForm +csrf_protect_m = method_decorator(csrf_protect) + class GroupingInline(admin.TabularInline): model = Grouping @@ -79,3 +85,28 @@ class IssueAdmin(admin.ModelAdmin): 'digested_event_count', 'stored_event_count', ] + + def get_deleted_objects(self, objs, request): + to_delete = list(objs) + ["...all its related objects... (delayed)"] + model_count = { + Issue: len(objs), + } + perms_needed = set() + protected = [] + return to_delete, model_count, perms_needed, protected + + def delete_queryset(self, request, queryset): + # NOTE: not the most efficient; it will do for a first version. + with immediate_atomic(): + for obj in queryset: + obj.delete_deferred() + + def delete_model(self, request, obj): + with immediate_atomic(): + obj.delete_deferred() + + @csrf_protect_m + def delete_view(self, request, object_id, extra_context=None): + # the superclass version, but with the transaction.atomic context manager commented out (we do this ourselves) + # with transaction.atomic(using=router.db_for_write(self.model)): + return self._delete_view(request, object_id, extra_context) diff --git a/issues/migrations/0018_issue_is_deleted.py b/issues/migrations/0018_issue_is_deleted.py new file mode 100644 index 0000000..42f8a92 --- /dev/null +++ b/issues/migrations/0018_issue_is_deleted.py @@ -0,0 +1,16 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("issues", "0017_issue_list_indexes_must_start_with_project"), + ] + + operations = [ + migrations.AddField( + model_name="issue", + name="is_deleted", + field=models.BooleanField(default=False), + ), + ] diff --git a/issues/migrations/0019_alter_grouping_grouping_key_hash.py b/issues/migrations/0019_alter_grouping_grouping_key_hash.py new file mode 100644 index 0000000..fb6955f --- /dev/null +++ b/issues/migrations/0019_alter_grouping_grouping_key_hash.py @@ -0,0 +1,16 @@ +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("issues", "0018_issue_is_deleted"), + ] + + operations = [ + migrations.AlterField( + model_name="grouping", + name="grouping_key_hash", + field=models.CharField(max_length=64, null=True), + ), + ] diff --git a/issues/models.py b/issues/models.py index 8297129..d29beb6 100644 --- a/issues/models.py +++ b/issues/models.py @@ -10,6 +10,7 @@ from django.conf import settings from django.utils.functional import cached_property from bugsink.volume_based_condition import VolumeBasedCondition +from bugsink.transaction import delay_on_commit from alerts.tasks import send_unmute_alert from compat.timestamp import parse_timestamp, format_timestamp from tags.models import IssueTag, TagValue @@ -18,6 +19,8 @@ from .utils import ( parse_lines, serialize_lines, filter_qs_for_fixed_at, exclude_qs_for_fixed_at, get_title_for_exception_type_and_value) +from .tasks import delete_issue_deps + class IncongruentStateException(Exception): pass @@ -34,6 +37,8 @@ class Issue(models.Model): project = models.ForeignKey( "projects.Project", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later' + is_deleted = models.BooleanField(default=False) + # 1-based for the same reasons as Event.digest_order digest_order = models.PositiveIntegerField(blank=False, null=False) @@ -72,6 +77,21 @@ class Issue(models.Model): self.digest_order = max_current + 1 if max_current is not None else 1 super().save(*args, **kwargs) + def delete_deferred(self): + """Marks the issue as deleted, and schedules deletion of all related objects""" + self.is_deleted = True + self.save(update_fields=["is_deleted"]) + + # we set grouping_key_hash to None to ensure that event digests that happen simultaneously with the delayed + # cleanup will get their own fresh Grouping and hence Issue. This matches with the behavior that would happen + # if Issue deletion would have been instantaneous (i.e. it's the least surprising behavior). + # + # `issue=None` is explicitly _not_ part of this update, such that the actual deletion of the Groupings will be + # picked up as part of the delete_issue_deps task. + self.grouping_set.all().update(grouping_key_hash=None) + + delay_on_commit(delete_issue_deps, str(self.id)) + def friendly_id(self): return f"{ self.project.slug.upper() }-{ self.digest_order }" @@ -198,7 +218,7 @@ class Grouping(models.Model): grouping_key = models.TextField(blank=False, null=False) # we hash the key to make it indexable on MySQL, see https://code.djangoproject.com/ticket/2495 - grouping_key_hash = models.CharField(max_length=64, blank=False, null=False) + grouping_key_hash = models.CharField(max_length=64, blank=False, null=True) issue = models.ForeignKey("Issue", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later' diff --git a/issues/tasks.py b/issues/tasks.py new file mode 100644 index 0000000..d1df79b --- /dev/null +++ b/issues/tasks.py @@ -0,0 +1,37 @@ +from snappea.decorators import shared_task + +from bugsink.utils import get_model_topography, delete_deps_with_budget +from bugsink.transaction import immediate_atomic, delay_on_commit + + +@shared_task +def delete_issue_deps(issue_id): + from .models import Issue # avoid circular import + with immediate_atomic(): + budget = 500 + num_deleted = 0 + + dep_graph = get_model_topography() + + for model_for_recursion, fk_name_for_recursion in dep_graph["issues.Issue"]: + this_num_deleted = delete_deps_with_budget( + model_for_recursion, + fk_name_for_recursion, + [issue_id], + budget - num_deleted, + dep_graph, + ) + + num_deleted += this_num_deleted + + if num_deleted >= budget: + delay_on_commit(delete_issue_deps, issue_id) + return + + if budget - num_deleted <= 0: + # no more budget for the self-delete. + delay_on_commit(delete_issue_deps, issue_id) + + else: + # final step: delete the issue itself + Issue.objects.filter(pk=issue_id).delete() diff --git a/tags/models.py b/tags/models.py index 85c9660..84cee27 100644 --- a/tags/models.py +++ b/tags/models.py @@ -75,7 +75,7 @@ class EventTag(models.Model): value = models.ForeignKey(TagValue, blank=False, null=False, on_delete=models.CASCADE) # issue is a denormalization that allows for a single-table-index for efficient search. - # SET_NULL: Issue deletion is not actually possible yet (in the regular UI), so this is somewhat moot (for now). + # SET_NULL: to be re-evaulated in the context of Issue.delete_deferred issue = models.ForeignKey( 'issues.Issue', blank=False, null=True, on_delete=models.SET_NULL, related_name="event_tags") @@ -84,7 +84,7 @@ class EventTag(models.Model): # DO_NOTHING: we manually implement CASCADE (i.e. when an event is cleaned up, clean up associated tags) in the # eviction process. Why CASCADE? [1] you'll have to do it "at some point", so you might as well do it right when - # evicting (async in the 'most resilient setup' anyway, b/c that happens when ingesting) [2] the order of magnitude + # evicting (async in the 'most resilient setup' anyway, b/c that happens when digesting) [2] the order of magnitude # is "tens of deletions per event", so that's no reason to postpone. "Why manually" is explained in events/retention event = models.ForeignKey('events.Event', blank=False, null=False, on_delete=models.DO_NOTHING, related_name='tags') @@ -115,7 +115,7 @@ class IssueTag(models.Model): # value already implies key in our current setup. value = models.ForeignKey(TagValue, blank=False, null=False, on_delete=models.CASCADE) - # SET_NULL: Issue deletion is not actually possible yet, so this is moot (for now). + # SET_NULL: to be re-evaulated in the context of Issue.delete_deferred issue = models.ForeignKey('issues.Issue', blank=False, null=True, on_delete=models.SET_NULL, related_name='tags') # 1. As it stands, there is only a single counter per issue-tagvalue combination. In principle/theory this type of