issue tags: single query rather than one-per-tag

2026-03-10 15:48:31 +00:00 · 2025-03-03 13:42:18 +01:00
parent e6bc660731
commit bbfee84c6a
1 changed files with 15 additions and 11 deletions
--- a/issues/models.py
+++ b/issues/models.py
@@ -1,6 +1,7 @@
 import json
 import uuid
 from functools import partial
+from itertools import groupby

 from django.db import models, transaction
 from django.db.models import F, Value
@@ -127,20 +128,23 @@ class Issue(models.Model):
    def _get_issue_tags(self, other_cutoff, other_label):
        result = []

-        ds = self.tags.filter(value__key__mostly_unique=False).order_by("value__key__key").values("value__key")\
-            .annotate(cnt=models.Count("value")).distinct()
+        all_issue_tags = (
+            IssueTag.objects
+            .filter(project_id=self.project_id, issue=self)
+            .order_by("value__key__key", "-count")
+            .select_related("value", "value__key"))

-        for d in ds:
-            issue_tags = [
-                issue_tag
-                for issue_tag in
-                (IssueTag.objects
-                 .filter(project_id=self.project_id, issue=self, value__key=d['value__key'])
-                 .order_by("-count")
-                 .select_related("value", "value__key"))
-            ]
+        # NOTE: at some point in the past we did grouping in SQL first; and then did 1 per-group query to get the
+        # the actual values. This was less efficient in terms of the number of queries, but had the advantage of quickly
+        # stopping work when there were very many values for a certain key. We now rely on the 'mostly_unique' property
+        # for the line below to not do way too much work; if it ever turns out to be so, we may even push the
+        # 'mostly_unique' property to be dynamically calculated (setting it to True when very many values are observed)
+        grouped = groupby(all_issue_tags, lambda x: x.value.key.key)

+        for key, group in grouped:
+            issue_tags = list(group)
            total_seen = sum(issue_tag.count for issue_tag in issue_tags)
+
            seen_till_now = 0
            if len(issue_tags) > other_cutoff:
                issue_tags = issue_tags[:other_cutoff - 1]  # cut off one more to make room for "Other"