From e45c61d6f0197d408cc6a41c7471add57ca7ae35 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Thu, 3 Jul 2025 10:53:38 +0200 Subject: [PATCH] Various models: .issue and .grouping; SET_NULL => DO_NOTHING I originally thought `SET_NULL` would be a good way to "do stuff later", but that's only so the degree that [1] updates are cheaper than deletes and [2] 2nd-order effects (further deletes in the dep-tree) are avoided. Now that we have explicit Issue-deletion (deps-first, delayed, properly batched) the SET_NULL behavior is always a no-op (but with cost in queries). As a result, in the test for issue deletion (which has deletes for many of the altered models), the following 8 queries are no longer done: ``` SELECT "issues_grouping"."id", [..many fields..] FROM "issues_grouping" WHERE "issues_grouping"."id" IN (1) UPDATE "events_event" SET "grouping_id" = NULL WHERE "events_event"."grouping_id" IN (1) [.. a few moments later..] SELECT "issues_issue"."id", [..many fields..] FROM "issues_issue" WHERE "issues_issue"."id" = 'uuid' UPDATE "issues_grouping" SET "issue_id" = NULL WHERE "issues_grouping"."issue_id" IN ('uuid') UPDATE "issues_turningpoint" SET "issue_id" = NULL WHERE "issues_turningpoint"."issue_id" IN ('uuid') UPDATE "events_event" SET "issue_id" = NULL WHERE "events_event"."issue_id" IN ('uuid') UPDATE "tags_eventtag" SET "issue_id" = NULL WHERE "tags_eventtag"."issue_id" IN ('uuid') UPDATE "tags_issuetag" SET "issue_id" = NULL WHERE "tags_issuetag"."issue_id" IN ('uuid') ``` (breaks the tests b/c of constraints and not always using factories; will fix next) --- ...move_events_with_null_issue_or_grouping.py | 34 +++++++++++++++++++ events/migrations/0021_alter_do_nothing.py | 27 +++++++++++++++ events/models.py | 7 ++-- .../0020_remove_objects_with_null_issue.py | 26 ++++++++++++++ issues/migrations/0021_alter_do_nothing.py | 26 ++++++++++++++ issues/models.py | 4 +-- issues/tests.py | 2 +- .../0003_remove_objects_with_null_issue.py | 26 ++++++++++++++ tags/migrations/0004_alter_do_nothing.py | 31 +++++++++++++++++ tags/models.py | 6 ++-- 10 files changed, 177 insertions(+), 12 deletions(-) create mode 100644 events/migrations/0020_remove_events_with_null_issue_or_grouping.py create mode 100644 events/migrations/0021_alter_do_nothing.py create mode 100644 issues/migrations/0020_remove_objects_with_null_issue.py create mode 100644 issues/migrations/0021_alter_do_nothing.py create mode 100644 tags/migrations/0003_remove_objects_with_null_issue.py create mode 100644 tags/migrations/0004_alter_do_nothing.py diff --git a/events/migrations/0020_remove_events_with_null_issue_or_grouping.py b/events/migrations/0020_remove_events_with_null_issue_or_grouping.py new file mode 100644 index 0000000..888bb1e --- /dev/null +++ b/events/migrations/0020_remove_events_with_null_issue_or_grouping.py @@ -0,0 +1,34 @@ +# Generated by Django 4.2.21 on 2025-07-03 08:30 + +from django.db import migrations + + +def remove_events_with_null_fks(apps, schema_editor): + # Up until now, we have various models w/ .issue=FK(null=True, on_delete=models.SET_NULL) + # Although it is "not expected" in the interface, issue-deletion would have led to those + # objects with a null issue. We're about to change that to .issue=FK(null=False, ...) which + # would crash if we don't remove those objects first. Object-removal is "fine" though, because + # as per the meaning of the SET_NULL, these objects were "dangling" anyway. + + Event = apps.get_model("events", "Event") + + Event.objects.filter(issue__isnull=True).delete() + + # overcomplete b/c .issue would imply this, done anyway in the name of "defensive programming" + Event.objects.filter(grouping__isnull=True).delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ("events", "0019_event_storage_backend"), + + # "in principle" order shouldn't matter, because the various objects that are being deleted here are all "fully + # contained" by the .issue; to be safe, however, we depend on the below, because of Grouping.objects.delete() + # (which would set Event.grouping=NULL, which the present migration takes into account). + ("issues", "0020_remove_objects_with_null_issue"), + ] + + operations = [ + migrations.RunPython(remove_events_with_null_fks, reverse_code=migrations.RunPython.noop), + ] diff --git a/events/migrations/0021_alter_do_nothing.py b/events/migrations/0021_alter_do_nothing.py new file mode 100644 index 0000000..a8a9c34 --- /dev/null +++ b/events/migrations/0021_alter_do_nothing.py @@ -0,0 +1,27 @@ +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("issues", "0021_alter_do_nothing"), + ("events", "0020_remove_events_with_null_issue_or_grouping"), + ] + + operations = [ + migrations.AlterField( + model_name="event", + name="grouping", + field=models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, to="issues.grouping" + ), + ), + migrations.AlterField( + model_name="event", + name="issue", + field=models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, to="issues.issue" + ), + ), + ] diff --git a/events/models.py b/events/models.py index 974108b..d24fbb6 100644 --- a/events/models.py +++ b/events/models.py @@ -72,11 +72,8 @@ class Event(models.Model): ingested_at = models.DateTimeField(blank=False, null=False) digested_at = models.DateTimeField(db_index=True, blank=False, null=False) - # not actually expected to be null, but we want to be able to delete issues without deleting events (cleanup later) - issue = models.ForeignKey("issues.Issue", blank=False, null=True, on_delete=models.SET_NULL) - - # not actually expected to be null - grouping = models.ForeignKey("issues.Grouping", blank=False, null=True, on_delete=models.SET_NULL) + issue = models.ForeignKey("issues.Issue", blank=False, null=False, on_delete=models.DO_NOTHING) + grouping = models.ForeignKey("issues.Grouping", blank=False, null=False, on_delete=models.DO_NOTHING) # The docs say: # > Required. Hexadecimal string representing a uuid4 value. The length is exactly 32 characters. Dashes are not diff --git a/issues/migrations/0020_remove_objects_with_null_issue.py b/issues/migrations/0020_remove_objects_with_null_issue.py new file mode 100644 index 0000000..cfb26ce --- /dev/null +++ b/issues/migrations/0020_remove_objects_with_null_issue.py @@ -0,0 +1,26 @@ +from django.db import migrations + + +def remove_objects_with_null_issue(apps, schema_editor): + # Up until now, we have various models w/ .issue=FK(null=True, on_delete=models.SET_NULL) + # Although it is "not expected" in the interface, issue-deletion would have led to those + # objects with a null issue. We're about to change that to .issue=FK(null=False, ...) which + # would crash if we don't remove those objects first. Object-removal is "fine" though, because + # as per the meaning of the SET_NULL, these objects were "dangling" anyway. + + Grouping = apps.get_model("issues", "Grouping") + TurningPoint = apps.get_model("issues", "TurningPoint") + + Grouping.objects.filter(issue__isnull=True).delete() + TurningPoint.objects.filter(issue__isnull=True).delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ("issues", "0019_alter_grouping_grouping_key_hash"), + ] + + operations = [ + migrations.RunPython(remove_objects_with_null_issue, reverse_code=migrations.RunPython.noop), + ] diff --git a/issues/migrations/0021_alter_do_nothing.py b/issues/migrations/0021_alter_do_nothing.py new file mode 100644 index 0000000..1824073 --- /dev/null +++ b/issues/migrations/0021_alter_do_nothing.py @@ -0,0 +1,26 @@ +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("issues", "0020_remove_objects_with_null_issue"), + ] + + operations = [ + migrations.AlterField( + model_name="grouping", + name="issue", + field=models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, to="issues.issue" + ), + ), + migrations.AlterField( + model_name="turningpoint", + name="issue", + field=models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, to="issues.issue" + ), + ), + ] diff --git a/issues/models.py b/issues/models.py index d29beb6..467cfd3 100644 --- a/issues/models.py +++ b/issues/models.py @@ -220,7 +220,7 @@ class Grouping(models.Model): # we hash the key to make it indexable on MySQL, see https://code.djangoproject.com/ticket/2495 grouping_key_hash = models.CharField(max_length=64, blank=False, null=True) - issue = models.ForeignKey("Issue", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later' + issue = models.ForeignKey("Issue", blank=False, null=False, on_delete=models.DO_NOTHING) def __str__(self): return self.grouping_key @@ -491,7 +491,7 @@ class TurningPoint(models.Model): # basically: an Event, but that name was already taken in our system :-) alternative names I considered: # "milestone", "state_change", "transition", "annotation", "episode" - issue = models.ForeignKey("Issue", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later' + issue = models.ForeignKey("Issue", blank=False, null=False, on_delete=models.DO_NOTHING) triggering_event = models.ForeignKey("events.Event", blank=True, null=True, on_delete=models.DO_NOTHING) # null: the system-user diff --git a/issues/tests.py b/issues/tests.py index f02334b..fd818f6 100644 --- a/issues/tests.py +++ b/issues/tests.py @@ -705,7 +705,7 @@ class IssueDeletionTestCase(TransactionTestCase): # assertNumQueries() is brittle and opaque. But at least the brittle part is quick to fix (a single number) and # provides a canary for performance regressions. - with self.assertNumQueries(25): + with self.assertNumQueries(17): self.issue.delete_deferred() # tests run w/ TASK_ALWAYS_EAGER, so in the below we can just check the database directly diff --git a/tags/migrations/0003_remove_objects_with_null_issue.py b/tags/migrations/0003_remove_objects_with_null_issue.py new file mode 100644 index 0000000..a28d427 --- /dev/null +++ b/tags/migrations/0003_remove_objects_with_null_issue.py @@ -0,0 +1,26 @@ +from django.db import migrations + + +def remove_objects_with_null_issue(apps, schema_editor): + # Up until now, we have various models w/ .issue=FK(null=True, on_delete=models.SET_NULL) + # Although it is "not expected" in the interface, issue-deletion would have led to those + # objects with a null issue. We're about to change that to .issue=FK(null=False, ...) which + # would crash if we don't remove those objects first. Object-removal is "fine" though, because + # as per the meaning of the SET_NULL, these objects were "dangling" anyway. + + EventTag = apps.get_model("tags", "EventTag") + IssueTag = apps.get_model("tags", "IssueTag") + + EventTag.objects.filter(issue__isnull=True).delete() + IssueTag.objects.filter(issue__isnull=True).delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ("tags", "0002_no_cascade"), + ] + + operations = [ + migrations.RunPython(remove_objects_with_null_issue, reverse_code=migrations.RunPython.noop), + ] diff --git a/tags/migrations/0004_alter_do_nothing.py b/tags/migrations/0004_alter_do_nothing.py new file mode 100644 index 0000000..f48e77e --- /dev/null +++ b/tags/migrations/0004_alter_do_nothing.py @@ -0,0 +1,31 @@ +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("issues", "0021_alter_do_nothing"), + ("tags", "0003_remove_objects_with_null_issue"), + ] + + operations = [ + migrations.AlterField( + model_name="eventtag", + name="issue", + field=models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, + related_name="event_tags", + to="issues.issue", + ), + ), + migrations.AlterField( + model_name="issuetag", + name="issue", + field=models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, + related_name="tags", + to="issues.issue", + ), + ), + ] diff --git a/tags/models.py b/tags/models.py index d73d7b5..ee195c6 100644 --- a/tags/models.py +++ b/tags/models.py @@ -75,9 +75,8 @@ class EventTag(models.Model): value = models.ForeignKey(TagValue, blank=False, null=False, on_delete=models.DO_NOTHING) # issue is a denormalization that allows for a single-table-index for efficient search. - # SET_NULL: to be re-evaulated in the context of Issue.delete_deferred issue = models.ForeignKey( - 'issues.Issue', blank=False, null=True, on_delete=models.SET_NULL, related_name="event_tags") + 'issues.Issue', blank=False, null=False, on_delete=models.DO_NOTHING, related_name="event_tags") # digest_order is a denormalization that allows for a single-table-index for efficient search. digest_order = models.PositiveIntegerField(blank=False, null=False) @@ -115,8 +114,7 @@ class IssueTag(models.Model): # value already implies key in our current setup. value = models.ForeignKey(TagValue, blank=False, null=False, on_delete=models.DO_NOTHING) - # SET_NULL: to be re-evaulated in the context of Issue.delete_deferred - issue = models.ForeignKey('issues.Issue', blank=False, null=True, on_delete=models.SET_NULL, related_name='tags') + issue = models.ForeignKey('issues.Issue', blank=False, null=False, on_delete=models.DO_NOTHING, related_name='tags') # 1. As it stands, there is only a single counter per issue-tagvalue combination. In principle/theory this type of # denormalization will break down when you want to show this kind of information filtered by some other dimension,