From d62e53fdf8e7d216a00f02c5dec945e3bd2c2630 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Tue, 8 Jul 2025 15:21:26 +0200 Subject: [PATCH] store_tags: support 'very many' (~500) tags --- tags/models.py | 12 ++++++++++++ tags/tests.py | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/tags/models.py b/tags/models.py index 1964d4a..3c5981f 100644 --- a/tags/models.py +++ b/tags/models.py @@ -23,6 +23,7 @@ from django.db.models import Q, F from projects.models import Project from tags.utils import deduce_tags, is_mostly_unique +from bugsink.moreiterutils import batched # Notes on .project as it lives on TagValue, IssueTag and EventTag: # In all cases, project could be derived through other means: for TagValue it's implied by TagKey.project; for IssueTag @@ -165,6 +166,17 @@ def digest_tags(event_data, event, issue): def store_tags(event, issue, tags): + # observed: a non-batched implementation of store_tags() would crash (e.g. in sqlite: Expression tree is too large + # (maximum depth 1000)); + + # The value of 64 was arrived at by trying all powers of 2 (locally, on sqlite), observing that 256 was the last + # non-failing one, and then taking a factor 4 safety-margin. Realistically, 64 tags _per event_ "should be enough + # for anyone". + for kv_batch in batched(tags.items(), 64): + _store_tags(event, issue, {k: v for k, v in kv_batch}) + + +def _store_tags(event, issue, tags): if not tags: return # short-circuit; which is a performance optimization which also avoids some the need for further guards diff --git a/tags/tests.py b/tags/tests.py index 703ea39..8eed0b5 100644 --- a/tags/tests.py +++ b/tags/tests.py @@ -129,6 +129,13 @@ class StoreTagsTestCase(DjangoTestCase): self.assertEqual(IssueTag.objects.count(), 2) self.assertEqual(2, IssueTag.objects.filter(value__key__key="foo").count()) + def test_store_many_tags(self): + # observed: a non-batched implementation of store_tags() would crash (e.g. in sqlite: Expression tree is too + # large (maximum depth 1000)); if the below doesn't crash, we've got a batched implementation that works + event = create_event(self.project, issue=self.issue) + store_tags(event, self.issue, {f"key-{i}": f"value-{i}" for i in range(512)}) + self.assertEqual(IssueTag.objects.filter(issue=self.issue).count(), 512) + class SearchParserTestCase(RegularTestCase):