diff --git a/events/management/commands/cleanup_events.py b/events/management/commands/cleanup_events.py index de889a6..a5570c2 100644 --- a/events/management/commands/cleanup_events.py +++ b/events/management/commands/cleanup_events.py @@ -1,4 +1,5 @@ from django.core.management.base import BaseCommand +from django.core.management import call_command from issues.models import Issue from events.models import Event @@ -17,3 +18,5 @@ class Command(BaseCommand): print("nuking") Issue.objects.all().delete() Event.objects.all().delete() + + call_command("make_consistent") diff --git a/events/management/commands/make_consistent.py b/events/management/commands/make_consistent.py index 007e59a..d97da16 100644 --- a/events/management/commands/make_consistent.py +++ b/events/management/commands/make_consistent.py @@ -4,6 +4,7 @@ from django.db.models import Count from releases.models import Release from issues.models import Issue, Grouping, TurningPoint from events.models import Event +from projects.models import Project def make_consistent(): @@ -46,6 +47,21 @@ def make_consistent(): event.never_evict = True event.save() + # counter reset: do it last, because the above deletions may have changed the counts + for issue in Issue.objects.all(): + if issue.stored_event_count != issue.event_set.count(): + print("Updating event count for issue %s from %d to %d" % ( + issue, issue.stored_event_count, issue.event_set.count())) + issue.stored_event_count = issue.event_set.count() + issue.save() + + for project in Project.objects.all(): + if project.stored_event_count != project.event_set.count(): + print("Updating event count for project %s from %d to %d" % ( + project, project.stored_event_count, project.event_set.count())) + project.stored_event_count = project.event_set.count() + project.save() + class Command(BaseCommand): help = """In 'normal operation', this command should not be used, because normal operation leaves the DB in a diff --git a/events/retention.py b/events/retention.py index 522b6da..9839001 100644 --- a/events/retention.py +++ b/events/retention.py @@ -227,7 +227,7 @@ def evict_for_max_events(project, timestamp, stored_event_count=None, include_ne if max_total_irrelevance < -1: # < -1: as in `evict_for_irrelevance` if not include_never_evict: # everything that remains is 'never_evict'. 'never say never' and evict those as a last measure - return evict_for_max_events(project, timestamp, stored_event_count - evicted, True) + return evicted + evict_for_max_events(project, timestamp, stored_event_count - evicted, True) raise Exception("No more effective eviction possible but target not reached") # "should not happen" @@ -239,7 +239,7 @@ def evict_for_max_events(project, timestamp, stored_event_count=None, include_ne stored_event_count - evicted - 1, # down to: -1, because the +1 happens post-eviction orig_max_total_irrelevance, max_total_irrelevance, phase0.took, phase1.took, phase0.count, phase1.count) - return max_total_irrelevance + return evicted def evict_for_irrelevance( diff --git a/ingest/tests.py b/ingest/tests.py index fea2fb7..afc3644 100644 --- a/ingest/tests.py +++ b/ingest/tests.py @@ -521,6 +521,19 @@ class IngestViewTestCase(TransactionTestCase): # but at least this closes the door for the next event self.assertEqual(now + relativedelta(minutes=5), project.quota_exceeded_until) + def test_ingest_updates_stored_event_counts(self): + request = self.request_factory.post("/api/1/store/") + + BaseIngestAPIView().digest_event(**_digest_params(create_event_data(), self.quiet_project, request)) + + self.assertEqual(1, Issue.objects.count()) + self.assertEqual(1, Issue.objects.get().stored_event_count) + self.assertEqual(1, Project.objects.get(id=self.quiet_project.id).stored_event_count) + + BaseIngestAPIView().digest_event(**_digest_params(create_event_data(), self.quiet_project, request)) + self.assertEqual(2, Issue.objects.get().stored_event_count) + self.assertEqual(2, Project.objects.get(id=self.quiet_project.id).stored_event_count) + class TestParser(RegularTestCase): diff --git a/ingest/views.py b/ingest/views.py index e01eba8..d8ab9fa 100644 --- a/ingest/views.py +++ b/ingest/views.py @@ -236,6 +236,7 @@ class BaseIngestAPIView(View): first_seen=ingested_at, last_seen=ingested_at, digested_event_count=1, + stored_event_count=0, # we increment this below **denormalized_fields, ) issue_created = True @@ -255,11 +256,8 @@ class BaseIngestAPIView(View): issue.last_seen = ingested_at issue.digested_event_count += 1 - # NOTE: possibly expensive. "in theory" we can just do some bookkeeping for a denormalized value, but that may - # be hard to keep in-sync in practice. Let's check the actual cost first. # +1 because we're about to add one event. - project_stored_event_count = (project.event_set.count() or 0) + 1 - issue_stored_event_count = (issue.event_set.count() or 0) + 1 + project_stored_event_count = project.stored_event_count + 1 if should_evict(project, digested_at, project_stored_event_count): # Note: I considered pushing this into some async process, but it makes reasoning much harder, and it's @@ -268,13 +266,19 @@ class BaseIngestAPIView(View): # cron-like. (not exactly the same, because for cron-like time savings are possible if the cron-likeness # causes the work to be outside of the 'rush hour' -- OTOH this also introduces a lot of complexity about # "what is a limit anyway, if you can go either over it, or work is done before the limit is reached") - evict_for_max_events(project, digested_at, project_stored_event_count) + evicted = evict_for_max_events(project, digested_at, project_stored_event_count) + else: + evicted = 0 + + issue.stored_event_count = issue.stored_event_count + 1 - evicted # +1 because we're about to add one event + project.stored_event_count = project_stored_event_count - evicted + project.save(update_fields=["stored_event_count"]) event, event_created = Event.from_ingested( event_metadata, digested_at, issue.digested_event_count, - issue_stored_event_count, + issue.stored_event_count, issue, grouping, event_data, diff --git a/issues/admin.py b/issues/admin.py index f1a1c65..67508f7 100644 --- a/issues/admin.py +++ b/issues/admin.py @@ -51,6 +51,7 @@ class IssueAdmin(admin.ModelAdmin): 'unmute_on_volume_based_conditions', 'unmute_after', 'digested_event_count', + 'stored_event_count', ] inlines = [ @@ -62,6 +63,7 @@ class IssueAdmin(admin.ModelAdmin): "title", "project", "digested_event_count", + "stored_event_count", ] list_filter = [ "project", @@ -75,4 +77,5 @@ class IssueAdmin(admin.ModelAdmin): 'calculated_type', 'calculated_value', 'digested_event_count', + 'stored_event_count', ] diff --git a/issues/migrations/0008_issue_stored_event_count.py b/issues/migrations/0008_issue_stored_event_count.py new file mode 100644 index 0000000..32279e1 --- /dev/null +++ b/issues/migrations/0008_issue_stored_event_count.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.18 on 2025-02-06 10:04 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("issues", "0004_b_squashed"), + ] + + operations = [ + migrations.AddField( + model_name="issue", + name="stored_event_count", + field=models.IntegerField(default=0, editable=False), + ), + ] diff --git a/issues/migrations/0009_fill_stored_event_count.py b/issues/migrations/0009_fill_stored_event_count.py new file mode 100644 index 0000000..4476a87 --- /dev/null +++ b/issues/migrations/0009_fill_stored_event_count.py @@ -0,0 +1,24 @@ +from django.db import migrations + + +def unfill_stored_event_count(apps, schema_editor): + Issue = apps.get_model("issues", "Issue") + Issue.objects.all().update(stored_event_count=0) + + +def fill_stored_event_count(apps, schema_editor): + Issue = apps.get_model("issues", "Issue") + for issue in Issue.objects.all(): + issue.stored_event_count = issue.event_set.count() + issue.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ("issues", "0008_issue_stored_event_count"), + ] + + operations = [ + migrations.RunPython(fill_stored_event_count, unfill_stored_event_count), + ] diff --git a/issues/models.py b/issues/models.py index 94ea7d2..0b7866c 100644 --- a/issues/models.py +++ b/issues/models.py @@ -39,6 +39,7 @@ class Issue(models.Model): last_seen = models.DateTimeField(blank=False, null=False) # based on event.ingested_at first_seen = models.DateTimeField(blank=False, null=False) # based on event.ingested_at digested_event_count = models.IntegerField(blank=False, null=False) + stored_event_count = models.IntegerField(blank=False, null=False, default=0, editable=False) calculated_type = models.CharField(max_length=255, blank=True, null=False, default="") calculated_value = models.CharField(max_length=255, blank=True, null=False, default="") transaction = models.CharField(max_length=200, blank=True, null=False, default="") diff --git a/issues/templates/issues/event_list.html b/issues/templates/issues/event_list.html index 9148dd1..edcb2dd 100644 --- a/issues/templates/issues/event_list.html +++ b/issues/templates/issues/event_list.html @@ -27,7 +27,7 @@
Showing {{ page_obj.start_index }} - {{ page_obj.end_index }} of {{ page_obj.paginator.count }} - {% if project.digested_event_count != project.event_count %} + {% if project.digested_event_count != project.stored_event_count %} available events ({{ project.digested_event_count }} total observed). {% else %} total events. diff --git a/issues/views.py b/issues/views.py index bd63196..eb68692 100644 --- a/issues/views.py +++ b/issues/views.py @@ -46,6 +46,18 @@ GLOBAL_MUTE_OPTIONS = [ ] +class KnownCountPaginator(Paginator): + """optimization: we know the total count of the queryset, so we can avoid a count() query""" + + def __init__(self, *args, **kwargs): + self._count = kwargs.pop("count") + super().__init__(*args, **kwargs) + + @property + def count(self): + return self._count + + def _is_valid_action(action, issue): """We take the 'strict' approach of complaining even when the action is simply a no-op, because you're already in the desired state.""" @@ -510,7 +522,7 @@ def issue_event_list(request, issue): event_list = issue.event_set.order_by("digest_order") # re 250: in general "big is good" because it allows a lot "at a glance". - paginator = Paginator(event_list, 250) + paginator = KnownCountPaginator(event_list, 250, count=issue.stored_event_count) page_number = request.GET.get("page") page_obj = paginator.get_page(page_number) diff --git a/phonehome/tasks.py b/phonehome/tasks.py index 88ba624..70b5299 100644 --- a/phonehome/tasks.py +++ b/phonehome/tasks.py @@ -103,8 +103,10 @@ def _make_message_body(): "project_count": Project.objects.count(), "team_count": Team.objects.count(), - # event-counts [per some interval (e.g. 24 hours)] is a possible future enhancement. If that turns out to be - # expensive, one thing to consider is pulling _make_message_body() out of the immediate_atomic() block. + # event-counts [per some interval (e.g. 24 hours)] is a possible future enhancement. We've already seen that + # such counts are expensive though, but if _make_message_body() is pulled out of the immediate_atomic() + # block (which is OK, since there is no need for some kind of 'transactional consistency' to register this + # simple metadata fact), then it might be OK to add some more expensive queries here. }, } diff --git a/projects/migrations/0010_project_stored_event_count.py b/projects/migrations/0010_project_stored_event_count.py new file mode 100644 index 0000000..7a124c3 --- /dev/null +++ b/projects/migrations/0010_project_stored_event_count.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.18 on 2025-02-06 10:04 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("projects", "0003_project_projects_pr_name_11d782_idx"), + ] + + operations = [ + migrations.AddField( + model_name="project", + name="stored_event_count", + field=models.IntegerField(default=0, editable=False), + ), + ] diff --git a/projects/migrations/0011_fill_stored_event_count.py b/projects/migrations/0011_fill_stored_event_count.py new file mode 100644 index 0000000..9b79d14 --- /dev/null +++ b/projects/migrations/0011_fill_stored_event_count.py @@ -0,0 +1,23 @@ +from django.db import migrations + +def unfill_stored_event_count(apps, schema_editor): + Project = apps.get_model("projects", "Project") + Project.objects.all().update(stored_event_count=0) + + +def fill_stored_event_count(apps, schema_editor): + Project = apps.get_model("projects", "Project") + for project in Project.objects.all(): + project.stored_event_count = project.event_set.count() + project.save() + + +class Migration(migrations.Migration): + + dependencies = [ + ("projects", "0010_project_stored_event_count"), + ] + + operations = [ + migrations.RunPython(fill_stored_event_count, unfill_stored_event_count), + ] diff --git a/projects/models.py b/projects/models.py index c347f93..0afe493 100644 --- a/projects/models.py +++ b/projects/models.py @@ -52,6 +52,7 @@ class Project(models.Model): # denormalized/cached/counted fields below has_releases = models.BooleanField(editable=False, default=False) digested_event_count = models.PositiveIntegerField(null=False, blank=False, default=0, editable=False) + stored_event_count = models.IntegerField(blank=False, null=False, default=0, editable=False) # alerting conditions alert_on_new_issue = models.BooleanField(default=True)