store_tags: support 'very many' (~500) tags

This commit is contained in:
Klaas van Schelven
2025-07-08 15:21:26 +02:00
parent a247528baa
commit d62e53fdf8
2 changed files with 19 additions and 0 deletions

View File

@@ -23,6 +23,7 @@ from django.db.models import Q, F
from projects.models import Project from projects.models import Project
from tags.utils import deduce_tags, is_mostly_unique from tags.utils import deduce_tags, is_mostly_unique
from bugsink.moreiterutils import batched
# Notes on .project as it lives on TagValue, IssueTag and EventTag: # Notes on .project as it lives on TagValue, IssueTag and EventTag:
# In all cases, project could be derived through other means: for TagValue it's implied by TagKey.project; for IssueTag # In all cases, project could be derived through other means: for TagValue it's implied by TagKey.project; for IssueTag
@@ -165,6 +166,17 @@ def digest_tags(event_data, event, issue):
def store_tags(event, issue, tags): def store_tags(event, issue, tags):
# observed: a non-batched implementation of store_tags() would crash (e.g. in sqlite: Expression tree is too large
# (maximum depth 1000));
# The value of 64 was arrived at by trying all powers of 2 (locally, on sqlite), observing that 256 was the last
# non-failing one, and then taking a factor 4 safety-margin. Realistically, 64 tags _per event_ "should be enough
# for anyone".
for kv_batch in batched(tags.items(), 64):
_store_tags(event, issue, {k: v for k, v in kv_batch})
def _store_tags(event, issue, tags):
if not tags: if not tags:
return # short-circuit; which is a performance optimization which also avoids some the need for further guards return # short-circuit; which is a performance optimization which also avoids some the need for further guards

View File

@@ -129,6 +129,13 @@ class StoreTagsTestCase(DjangoTestCase):
self.assertEqual(IssueTag.objects.count(), 2) self.assertEqual(IssueTag.objects.count(), 2)
self.assertEqual(2, IssueTag.objects.filter(value__key__key="foo").count()) self.assertEqual(2, IssueTag.objects.filter(value__key__key="foo").count())
def test_store_many_tags(self):
# observed: a non-batched implementation of store_tags() would crash (e.g. in sqlite: Expression tree is too
# large (maximum depth 1000)); if the below doesn't crash, we've got a batched implementation that works
event = create_event(self.project, issue=self.issue)
store_tags(event, self.issue, {f"key-{i}": f"value-{i}" for i in range(512)})
self.assertEqual(IssueTag.objects.filter(issue=self.issue).count(), 512)
class SearchParserTestCase(RegularTestCase): class SearchParserTestCase(RegularTestCase):