vacuum_eventless_issuetags: tune batch-size

See #134
2026-03-10 08:01:17 +00:00 · 2025-07-08 16:16:54 +02:00
parent 674d84909f
commit 1965b0f8c2
2 changed files with 7 additions and 3 deletions
--- a/tags/tasks.py
+++ b/tags/tasks.py
@@ -103,7 +103,11 @@ def vacuum_eventless_issuetags(min_id=0):
    #
    # This task aims to reconcile that, in a delayed and resumable fashion.

-    BATCH_SIZE = 512  # close to 500, and a multiple of 64
+    # Empirically determined: at this size, each batch is approx .3s (local dev, sqlite); Note that we're "nearer to the
+    # edge of the object-graph" than for e.g. even-retention, so we can both afford bigger batches (less cascading
+    # effects per item) as well as need bigger batches (because there are more expected items in a fanning-out
+    # object-graph).
+    BATCH_SIZE = 2048

    # Community wisdom (says ChatGPT, w/o source): queries with dozens of OR clauses can slow down significantly. 64 is
    # a safe, batch size that avoids planner overhead and keeps things fast across databases.
--- a/tags/tests.py
+++ b/tags/tests.py
@@ -323,10 +323,10 @@ class VacuumEventlessIssueTagsTestCase(TransactionTestCase):

    def test_many_tags_spanning_chunks(self):
        event = create_event(self.project, issue=self.issue)
-        store_tags(event, self.issue, {f"key-{i}": f"value-{i}" for i in range(512 + 1)})  # bigger than BATCH_SIZE
+        store_tags(event, self.issue, {f"key-{i}": f"value-{i}" for i in range(2048 + 1)})  # bigger than BATCH_SIZE

        # check setup: all issue tags are there
-        self.assertEqual(IssueTag.objects.filter(issue=self.issue).count(), 513)
+        self.assertEqual(IssueTag.objects.filter(issue=self.issue).count(), 2048 + 1)

        event.delete_deferred()
        vacuum_eventless_issuetags()