mirror of
https://github.com/jlengrand/bugsink.git
synced 2026-03-10 08:01:17 +00:00
Add 'mostly_unique' property to tags
This commit is contained in:
@@ -116,7 +116,7 @@ class Issue(models.Model):
|
||||
# the 2-step process allows for the filter on count;
|
||||
# one could argue that this is also possible in a single query though...
|
||||
|
||||
ds = self.tags.order_by("value__key__key").values("value__key")\
|
||||
ds = self.tags.filter(value__key__mostly_unique=False).order_by("value__key__key").values("value__key")\
|
||||
.annotate(cnt=models.Count("value")).distinct()
|
||||
|
||||
for d in ds:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Generated by Django 4.2.19 on 2025-02-27 19:46
|
||||
# Generated by Django 4.2.19 on 2025-03-03 09:42
|
||||
|
||||
from django.db import migrations, models
|
||||
import django.db.models.deletion
|
||||
@@ -10,8 +10,8 @@ class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("issues", "0010_issue_list_indexes"),
|
||||
("projects", "0011_fill_stored_event_count"),
|
||||
("events", "0019_event_storage_backend"),
|
||||
("projects", "0011_fill_stored_event_count"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
@@ -28,6 +28,7 @@ class Migration(migrations.Migration):
|
||||
),
|
||||
),
|
||||
("key", models.CharField(max_length=32)),
|
||||
("mostly_unique", models.BooleanField(default=False)),
|
||||
(
|
||||
"project",
|
||||
models.ForeignKey(
|
||||
|
||||
@@ -5,8 +5,9 @@ counting. Some notes:
|
||||
* Arbitrary Tags can be set programatically in the SDKs, which we need to support (Sentry API Compatability).
|
||||
* Some "synthetic" Tags are introduced by Bugsink itself: attributes of an Event are deduced and stored explicitly as a
|
||||
Tag. The main reason to do this: stay flexible in terms of DB design and allow for generic code for searching and
|
||||
counting. _However_, we don't make a commitment to any particular implementation, and if the deduce-and-store approach
|
||||
turns out to be a performance bottleneck, it may be replaced. Particular notes on what we deduce are in `deduce_tags`.
|
||||
counting (especially in the light of Issues, where a single tag can have many values). _However_, we don't make a
|
||||
commitment to any particular implementation, and if the deduce-and-store approach turns out to be a performance
|
||||
bottleneck, it may be replaced. Particular notes on what we deduce are in `deduce_tags`.
|
||||
|
||||
https://docs.sentry.io/platforms/python/enriching-events/tags/
|
||||
|
||||
@@ -21,13 +22,18 @@ from django.db import models
|
||||
from django.db.models import Q, F
|
||||
|
||||
from projects.models import Project
|
||||
from tags.utils import deduce_tags
|
||||
from tags.utils import deduce_tags, is_mostly_unique
|
||||
|
||||
|
||||
class TagKey(models.Model):
|
||||
project = models.ForeignKey(Project, blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
|
||||
key = models.CharField(max_length=32, blank=False, null=False)
|
||||
|
||||
# Tags that are "mostly unique" are not displayed in the issue tag counts, because the distribution of values is
|
||||
# too flat to provide useful information. Another way of thinking about this is "this is a tag for searching, but
|
||||
# not for counting".
|
||||
mostly_unique = models.BooleanField(default=False)
|
||||
|
||||
# I briefly considered being explicit about is_deduced; but it's annoying to store this info on the TagKey, and it's
|
||||
# probably redundant if we just come up with a list of "reserved" tags or similar.
|
||||
# is_deduced = models.BooleanField(default=False)
|
||||
@@ -136,7 +142,8 @@ def store_tags(event, issue, tags):
|
||||
# # why this is only worth it for very small numbers of tags (1 in the current setup).
|
||||
#
|
||||
# for key, value in tags.items():
|
||||
# tag_key, _ = TagKey.objects.get_or_create(project_id=event.project_id, key=key)
|
||||
# tag_key, _ = TagKey.objects.get_or_create(
|
||||
# project_id=event.project_id, key=key, mostly_unique=is_mostly_unique(key))
|
||||
# tag_value, _ = TagValue.objects.get_or_create(project_id=event.project_id, key=tag_key, value=value)
|
||||
# EventTag.objects.get_or_create(project_id=event.project_id, value=tag_value, event=event)
|
||||
# IssueTag.objects.get_or_create(project_id=event.project_id, value=tag_value, issue=issue)
|
||||
@@ -144,8 +151,11 @@ def store_tags(event, issue, tags):
|
||||
# # the 0-case is implied here too, which avoids some further guards in the code below
|
||||
# return
|
||||
|
||||
# there is some principled point here that there is always a single value of mostly_unique per key, but this point
|
||||
# is not formalized in our datbase schema; it "just happens to work correctly" (at least as long as we don't change
|
||||
# the list of mostly unique keys, at which point we'll have to do a datamigration).
|
||||
TagKey.objects.bulk_create([
|
||||
TagKey(project_id=event.project_id, key=key) for key in tags.keys()
|
||||
TagKey(project_id=event.project_id, key=key, mostly_unique=is_mostly_unique(key)) for key in tags.keys()
|
||||
], ignore_conflicts=True)
|
||||
|
||||
# Select-back what we just created (or was already there); this is needed because "Enabling the ignore_conflicts or
|
||||
|
||||
@@ -79,3 +79,16 @@ def deduce_tags(event_data):
|
||||
# mechanism
|
||||
|
||||
return tags
|
||||
|
||||
|
||||
def is_mostly_unique(key):
|
||||
if key.startswith("user"):
|
||||
return True
|
||||
|
||||
if key.startswith("trace"):
|
||||
return True
|
||||
|
||||
if key in ["browser.version", "browser"]:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user