diff --git a/tags/tasks.py b/tags/tasks.py index c6ba68f..13a9908 100644 --- a/tags/tasks.py +++ b/tags/tasks.py @@ -103,7 +103,11 @@ def vacuum_eventless_issuetags(min_id=0): # # This task aims to reconcile that, in a delayed and resumable fashion. - BATCH_SIZE = 512 # close to 500, and a multiple of 64 + # Empirically determined: at this size, each batch is approx .3s (local dev, sqlite); Note that we're "nearer to the + # edge of the object-graph" than for e.g. even-retention, so we can both afford bigger batches (less cascading + # effects per item) as well as need bigger batches (because there are more expected items in a fanning-out + # object-graph). + BATCH_SIZE = 2048 # Community wisdom (says ChatGPT, w/o source): queries with dozens of OR clauses can slow down significantly. 64 is # a safe, batch size that avoids planner overhead and keeps things fast across databases. diff --git a/tags/tests.py b/tags/tests.py index 806a609..318f1af 100644 --- a/tags/tests.py +++ b/tags/tests.py @@ -323,10 +323,10 @@ class VacuumEventlessIssueTagsTestCase(TransactionTestCase): def test_many_tags_spanning_chunks(self): event = create_event(self.project, issue=self.issue) - store_tags(event, self.issue, {f"key-{i}": f"value-{i}" for i in range(512 + 1)}) # bigger than BATCH_SIZE + store_tags(event, self.issue, {f"key-{i}": f"value-{i}" for i in range(2048 + 1)}) # bigger than BATCH_SIZE # check setup: all issue tags are there - self.assertEqual(IssueTag.objects.filter(issue=self.issue).count(), 513) + self.assertEqual(IssueTag.objects.filter(issue=self.issue).count(), 2048 + 1) event.delete_deferred() vacuum_eventless_issuetags()