mirror of
https://github.com/jlengrand/bugsink.git
synced 2026-03-10 08:01:17 +00:00
Problem: on mysql `make_consistent` cannot always clean up `Event`s, because `EventTag` objects still point to them, leading to an integrityerror. The problem does not happen for `sqlite`, because sqlite does FK-checks on-commit. And the offending `EventTag` objects are "eventually cleaned up" (in the same transaction, in make_consistent) This is the "mostly works" solution, for the scenario we've encountered. Namely: remove EventTags which have no issue before removing Events. This works in practice because of the way Events-to-cleanup were created in the UI in practice, namely by removal of some Issue in the admin, triggering a `SET_NULL` on the `issue_id`. Removal of issue implies an analagous `SET_NULL` on the `EventTag`'s `issue_id`, and by removing those `EventTag`s before proceeding with the `Event`s, you avoid the FK constraint triggering. We don't want to fully reimplement `CASCADE` (as in Django) here, and the values of `on_delete` are "Design Decision Needed" and non-homogonous anyway, and we might soon implement proper deletions (see #50) anyway, so the "mostly works" solution will have to do for now. Fixes #132
158 lines
6.4 KiB
Python
158 lines
6.4 KiB
Python
from django.core.management.base import BaseCommand
|
|
from django.db.models import Count
|
|
|
|
from releases.models import Release
|
|
from issues.models import Issue, Grouping, TurningPoint
|
|
from events.models import Event
|
|
from projects.models import Project
|
|
from tags.models import TagKey, TagValue, EventTag, IssueTag
|
|
|
|
from bugsink.transaction import immediate_atomic
|
|
from bugsink.timed_sqlite_backend.base import allow_long_running_queries
|
|
from bugsink.moreiterutils import batched
|
|
|
|
|
|
class DryRunException(Exception):
|
|
# transaction.rollback doesn't work in atomic blocks; a poor man's substitute is to just raise something specific.
|
|
pass
|
|
|
|
|
|
def _delete_for_missing_fk(clazz, field_name):
|
|
"""
|
|
Delete all objects of class clazz of which the field field_name points to a non-existing object or null.
|
|
|
|
## Dangling FKs:
|
|
Non-existing objects may come into being when people muddle in the database directly with foreign key checks turned
|
|
off (note that fk checks are turned off by default in sqlite's CLI for backwards compatibility reasons).
|
|
|
|
In the future it's further possible that there will be pieces the actual Bugsink code where FK-checks are turned off
|
|
temporarily (e.g. when deleting a project with very many related objects). (In March 2025 there was no such code
|
|
yet)
|
|
|
|
To make make_consistent() do what it says on the can, we need to delete these dangling objects.
|
|
"""
|
|
BATCH_SIZE = 1_000
|
|
|
|
dangling_fks = set()
|
|
|
|
field = clazz._meta.get_field(field_name)
|
|
related_model = field.related_model
|
|
|
|
# We just load _all_ available PKs into memory; it's expected that environments with millions of records will
|
|
# have enough RAM to deal with this (millions would translate into ~Gigabytes of RAM).
|
|
available_keys = set(related_model.objects.values_list('pk', flat=True))
|
|
|
|
# construct "dangling_fks"
|
|
for batch in batched(clazz.objects.values_list(field.get_attname(), flat=True).distinct(), BATCH_SIZE):
|
|
for key in batch:
|
|
if key not in available_keys:
|
|
dangling_fks.add(key)
|
|
|
|
def _del(deletion_kwargs, msg_kind):
|
|
total_cnt, d_of_counts = clazz.objects.filter(**deletion_kwargs).delete()
|
|
count = d_of_counts.get(clazz._meta.label, 0)
|
|
if count == 0:
|
|
return
|
|
print("Deleted %d %ss, because their %s was %s" % (count, clazz.__name__, field_name, msg_kind))
|
|
|
|
_del({field.get_attname(): None}, "NULL")
|
|
for batch in batched(dangling_fks, BATCH_SIZE):
|
|
_del({field.get_attname() + '__in': batch}, "non-existing")
|
|
|
|
|
|
def make_consistent():
|
|
# Delete all issues that have no events; they should not exist and in fact break various templates and views
|
|
for issue in Issue.objects.annotate(fresh_event_count=Count('event')).filter(fresh_event_count=0):
|
|
print("Deleting issue %s, because it has 0 events" % issue)
|
|
issue.delete()
|
|
|
|
# Various "dangling pointer" deletions:
|
|
_delete_for_missing_fk(Issue, 'project')
|
|
|
|
_delete_for_missing_fk(Grouping, 'project')
|
|
_delete_for_missing_fk(Grouping, 'issue')
|
|
|
|
_delete_for_missing_fk(TurningPoint, 'issue')
|
|
|
|
_delete_for_missing_fk(Release, 'project')
|
|
|
|
_delete_for_missing_fk(EventTag, 'issue') # See #132 for the ordering of this statement
|
|
|
|
_delete_for_missing_fk(Event, 'project')
|
|
_delete_for_missing_fk(Event, 'issue')
|
|
|
|
_delete_for_missing_fk(IssueTag, 'project')
|
|
_delete_for_missing_fk(IssueTag, 'value')
|
|
_delete_for_missing_fk(IssueTag, 'issue')
|
|
|
|
_delete_for_missing_fk(EventTag, 'project')
|
|
_delete_for_missing_fk(EventTag, 'value')
|
|
_delete_for_missing_fk(EventTag, 'event')
|
|
|
|
_delete_for_missing_fk(TagValue, 'project')
|
|
_delete_for_missing_fk(TagValue, 'key')
|
|
|
|
_delete_for_missing_fk(TagKey, 'project')
|
|
|
|
for event in Event.objects.filter(turningpoint__isnull=False, never_evict=False).distinct():
|
|
print("Setting event %s to never_evict because it has a turningpoint" % event)
|
|
event.never_evict = True
|
|
event.save()
|
|
|
|
# Update counters
|
|
for issue in Issue.objects.all():
|
|
if issue.stored_event_count != issue.event_set.count():
|
|
print("Updating event count for issue %s from %d to %d" % (
|
|
issue, issue.stored_event_count, issue.event_set.count()))
|
|
issue.stored_event_count = issue.event_set.count()
|
|
issue.save()
|
|
|
|
for project in Project.objects.all():
|
|
if project.stored_event_count != project.event_set.count():
|
|
print("Updating event count for project %s from %d to %d" % (
|
|
project, project.stored_event_count, project.event_set.count()))
|
|
project.stored_event_count = project.event_set.count()
|
|
project.save()
|
|
|
|
# Alternatively (but does it work in MySQL?)
|
|
# Issue.objects.update(
|
|
# stored_event_count=Subquery(
|
|
# Issue.objects.filter(pk=OuterRef('pk')).annotate(recount=Count('event')).values('recount')[:1])
|
|
# )
|
|
|
|
# Project.objects.update(
|
|
# stored_event_count=Subquery(
|
|
# Project.objects.filter(pk=OuterRef('pk')).annotate(recount=Count('event')).values('recount')[:1])
|
|
# )
|
|
|
|
# Update Project.has_releases
|
|
|
|
for project in Project.objects.all():
|
|
if project.has_releases != project.release_set.exists():
|
|
print("Updating has_releases for project %s from %s to %s" % (
|
|
project, project.has_releases, project.release_set.exists()))
|
|
project.has_releases = project.release_set.exists()
|
|
project.save()
|
|
|
|
|
|
class Command(BaseCommand):
|
|
help = """Make the database consistent by deleting dangling objects (issues, events, etc) and updating counters."""
|
|
|
|
# In theory, this command should not be required, because Bugsink _should_ leave itself in a consistent state after
|
|
# every operation. However, in practice Bugsink may not always do as promised, people reach into the database for
|
|
# whatever reason, or things go out of whack during development.
|
|
|
|
def add_arguments(self, parser):
|
|
parser.add_argument('--dry-run', action='store_true', help="Roll back all changes after making them.")
|
|
|
|
def handle(self, *args, **options):
|
|
allow_long_running_queries()
|
|
|
|
try:
|
|
with immediate_atomic():
|
|
make_consistent()
|
|
if options['dry_run']:
|
|
raise DryRunException("Dry run requested; rolling back changes.")
|
|
except DryRunException:
|
|
print("Changes have been rolled back (dry-run)")
|