Various models: .issue and .grouping; SET_NULL => DO_NOTHING

I originally thought `SET_NULL` would be a good way to "do stuff later", but
that's only so the degree that [1] updates are cheaper than deletes and [2]
2nd-order effects (further deletes in the dep-tree) are avoided.

Now that we have explicit Issue-deletion (deps-first, delayed, properly batched)
the SET_NULL behavior is always a no-op (but with cost in queries).

As a result, in the test for issue deletion (which has deletes for many
of the altered models), the following 8 queries are no longer done:

```
SELECT "issues_grouping"."id", [..many fields..] FROM "issues_grouping" WHERE "issues_grouping"."id" IN (1)
UPDATE "events_event" SET "grouping_id" = NULL WHERE "events_event"."grouping_id" IN (1)

[.. a few moments later..]

SELECT "issues_issue"."id", [..many fields..] FROM "issues_issue" WHERE "issues_issue"."id" = 'uuid'
UPDATE "issues_grouping" SET "issue_id" = NULL WHERE "issues_grouping"."issue_id" IN ('uuid')
UPDATE "issues_turningpoint" SET "issue_id" = NULL WHERE "issues_turningpoint"."issue_id" IN ('uuid')
UPDATE "events_event" SET "issue_id" = NULL WHERE "events_event"."issue_id" IN ('uuid')
UPDATE "tags_eventtag" SET "issue_id" = NULL WHERE "tags_eventtag"."issue_id" IN ('uuid')
UPDATE "tags_issuetag" SET "issue_id" = NULL WHERE "tags_issuetag"."issue_id" IN ('uuid')
```

(breaks the tests b/c of constraints and not always using factories; will fix next)
This commit is contained in:
Klaas van Schelven
2025-07-03 10:53:38 +02:00
parent e58be0018f
commit e45c61d6f0
10 changed files with 177 additions and 12 deletions

View File

@@ -0,0 +1,34 @@
# Generated by Django 4.2.21 on 2025-07-03 08:30
from django.db import migrations
def remove_events_with_null_fks(apps, schema_editor):
# Up until now, we have various models w/ .issue=FK(null=True, on_delete=models.SET_NULL)
# Although it is "not expected" in the interface, issue-deletion would have led to those
# objects with a null issue. We're about to change that to .issue=FK(null=False, ...) which
# would crash if we don't remove those objects first. Object-removal is "fine" though, because
# as per the meaning of the SET_NULL, these objects were "dangling" anyway.
Event = apps.get_model("events", "Event")
Event.objects.filter(issue__isnull=True).delete()
# overcomplete b/c .issue would imply this, done anyway in the name of "defensive programming"
Event.objects.filter(grouping__isnull=True).delete()
class Migration(migrations.Migration):
dependencies = [
("events", "0019_event_storage_backend"),
# "in principle" order shouldn't matter, because the various objects that are being deleted here are all "fully
# contained" by the .issue; to be safe, however, we depend on the below, because of Grouping.objects.delete()
# (which would set Event.grouping=NULL, which the present migration takes into account).
("issues", "0020_remove_objects_with_null_issue"),
]
operations = [
migrations.RunPython(remove_events_with_null_fks, reverse_code=migrations.RunPython.noop),
]

View File

@@ -0,0 +1,27 @@
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("issues", "0021_alter_do_nothing"),
("events", "0020_remove_events_with_null_issue_or_grouping"),
]
operations = [
migrations.AlterField(
model_name="event",
name="grouping",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING, to="issues.grouping"
),
),
migrations.AlterField(
model_name="event",
name="issue",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING, to="issues.issue"
),
),
]

View File

@@ -72,11 +72,8 @@ class Event(models.Model):
ingested_at = models.DateTimeField(blank=False, null=False)
digested_at = models.DateTimeField(db_index=True, blank=False, null=False)
# not actually expected to be null, but we want to be able to delete issues without deleting events (cleanup later)
issue = models.ForeignKey("issues.Issue", blank=False, null=True, on_delete=models.SET_NULL)
# not actually expected to be null
grouping = models.ForeignKey("issues.Grouping", blank=False, null=True, on_delete=models.SET_NULL)
issue = models.ForeignKey("issues.Issue", blank=False, null=False, on_delete=models.DO_NOTHING)
grouping = models.ForeignKey("issues.Grouping", blank=False, null=False, on_delete=models.DO_NOTHING)
# The docs say:
# > Required. Hexadecimal string representing a uuid4 value. The length is exactly 32 characters. Dashes are not

View File

@@ -0,0 +1,26 @@
from django.db import migrations
def remove_objects_with_null_issue(apps, schema_editor):
# Up until now, we have various models w/ .issue=FK(null=True, on_delete=models.SET_NULL)
# Although it is "not expected" in the interface, issue-deletion would have led to those
# objects with a null issue. We're about to change that to .issue=FK(null=False, ...) which
# would crash if we don't remove those objects first. Object-removal is "fine" though, because
# as per the meaning of the SET_NULL, these objects were "dangling" anyway.
Grouping = apps.get_model("issues", "Grouping")
TurningPoint = apps.get_model("issues", "TurningPoint")
Grouping.objects.filter(issue__isnull=True).delete()
TurningPoint.objects.filter(issue__isnull=True).delete()
class Migration(migrations.Migration):
dependencies = [
("issues", "0019_alter_grouping_grouping_key_hash"),
]
operations = [
migrations.RunPython(remove_objects_with_null_issue, reverse_code=migrations.RunPython.noop),
]

View File

@@ -0,0 +1,26 @@
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("issues", "0020_remove_objects_with_null_issue"),
]
operations = [
migrations.AlterField(
model_name="grouping",
name="issue",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING, to="issues.issue"
),
),
migrations.AlterField(
model_name="turningpoint",
name="issue",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING, to="issues.issue"
),
),
]

View File

@@ -220,7 +220,7 @@ class Grouping(models.Model):
# we hash the key to make it indexable on MySQL, see https://code.djangoproject.com/ticket/2495
grouping_key_hash = models.CharField(max_length=64, blank=False, null=True)
issue = models.ForeignKey("Issue", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
issue = models.ForeignKey("Issue", blank=False, null=False, on_delete=models.DO_NOTHING)
def __str__(self):
return self.grouping_key
@@ -491,7 +491,7 @@ class TurningPoint(models.Model):
# basically: an Event, but that name was already taken in our system :-) alternative names I considered:
# "milestone", "state_change", "transition", "annotation", "episode"
issue = models.ForeignKey("Issue", blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
issue = models.ForeignKey("Issue", blank=False, null=False, on_delete=models.DO_NOTHING)
triggering_event = models.ForeignKey("events.Event", blank=True, null=True, on_delete=models.DO_NOTHING)
# null: the system-user

View File

@@ -705,7 +705,7 @@ class IssueDeletionTestCase(TransactionTestCase):
# assertNumQueries() is brittle and opaque. But at least the brittle part is quick to fix (a single number) and
# provides a canary for performance regressions.
with self.assertNumQueries(25):
with self.assertNumQueries(17):
self.issue.delete_deferred()
# tests run w/ TASK_ALWAYS_EAGER, so in the below we can just check the database directly

View File

@@ -0,0 +1,26 @@
from django.db import migrations
def remove_objects_with_null_issue(apps, schema_editor):
# Up until now, we have various models w/ .issue=FK(null=True, on_delete=models.SET_NULL)
# Although it is "not expected" in the interface, issue-deletion would have led to those
# objects with a null issue. We're about to change that to .issue=FK(null=False, ...) which
# would crash if we don't remove those objects first. Object-removal is "fine" though, because
# as per the meaning of the SET_NULL, these objects were "dangling" anyway.
EventTag = apps.get_model("tags", "EventTag")
IssueTag = apps.get_model("tags", "IssueTag")
EventTag.objects.filter(issue__isnull=True).delete()
IssueTag.objects.filter(issue__isnull=True).delete()
class Migration(migrations.Migration):
dependencies = [
("tags", "0002_no_cascade"),
]
operations = [
migrations.RunPython(remove_objects_with_null_issue, reverse_code=migrations.RunPython.noop),
]

View File

@@ -0,0 +1,31 @@
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("issues", "0021_alter_do_nothing"),
("tags", "0003_remove_objects_with_null_issue"),
]
operations = [
migrations.AlterField(
model_name="eventtag",
name="issue",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="event_tags",
to="issues.issue",
),
),
migrations.AlterField(
model_name="issuetag",
name="issue",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING,
related_name="tags",
to="issues.issue",
),
),
]

View File

@@ -75,9 +75,8 @@ class EventTag(models.Model):
value = models.ForeignKey(TagValue, blank=False, null=False, on_delete=models.DO_NOTHING)
# issue is a denormalization that allows for a single-table-index for efficient search.
# SET_NULL: to be re-evaulated in the context of Issue.delete_deferred
issue = models.ForeignKey(
'issues.Issue', blank=False, null=True, on_delete=models.SET_NULL, related_name="event_tags")
'issues.Issue', blank=False, null=False, on_delete=models.DO_NOTHING, related_name="event_tags")
# digest_order is a denormalization that allows for a single-table-index for efficient search.
digest_order = models.PositiveIntegerField(blank=False, null=False)
@@ -115,8 +114,7 @@ class IssueTag(models.Model):
# value already implies key in our current setup.
value = models.ForeignKey(TagValue, blank=False, null=False, on_delete=models.DO_NOTHING)
# SET_NULL: to be re-evaulated in the context of Issue.delete_deferred
issue = models.ForeignKey('issues.Issue', blank=False, null=True, on_delete=models.SET_NULL, related_name='tags')
issue = models.ForeignKey('issues.Issue', blank=False, null=False, on_delete=models.DO_NOTHING, related_name='tags')
# 1. As it stands, there is only a single counter per issue-tagvalue combination. In principle/theory this type of
# denormalization will break down when you want to show this kind of information filtered by some other dimension,