From 89cab4f3c8642d83b52d0c44bede0279aec499f1 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Fri, 5 Jan 2024 20:20:14 +0100 Subject: [PATCH] WIP: midway checkin in the PeriodCounter registry stuff --- bugsink/period_counter.py | 15 ++++- bugsink/registry.py | 92 +++++++++++++++++++++++++++++++ bugsink/volume_based_condition.py | 2 +- events/models.py | 1 + ingest/views.py | 20 ++++++- issues/models.py | 3 + projects/models.py | 6 ++ 7 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 bugsink/registry.py diff --git a/bugsink/period_counter.py b/bugsink/period_counter.py index 2aea12d..158a0cd 100644 --- a/bugsink/period_counter.py +++ b/bugsink/period_counter.py @@ -25,6 +25,10 @@ TL_HOUR = 4 TL_MINUTE = 5 +def noop(): + pass + + def _prev_tup(tup, n=1): aslist = list(tup) @@ -85,6 +89,9 @@ class PeriodCounter(object): self.event_listeners = {tuple_length: {} for tuple_length in range(TL_MINUTE + 1)} def inc(self, datetime_utc, n=1): + # we only allow UTC, and we generally use Django model fields, which are UTC, so this should be good: + assert datetime_utc.tzinfo == timezone.utc + tup = datetime_utc.timetuple() for tl, mx in enumerate([MAX_TOTALS, MAX_YEARS, MAX_MONTHS, MAX_DAYS, MAX_HOURS, MAX_MINUTES]): @@ -105,8 +112,12 @@ class PeriodCounter(object): event_listeners_for_tl[(nr_of_periods, gte_threshold)] = (wbt, wbf, True) wbt() - def add_event_listener(self, period_name, nr_of_periods, gte_threshold, when_becomes_true, when_becomes_false, - initial_event_state=None, tup=None): + def add_event_listener(self, period_name, nr_of_periods, gte_threshold, when_becomes_true=noop, + when_becomes_false=noop, initial_event_state=None, tup=None): + # note: the 'events' here are not bugsink-events; but the more general concept of 'an event'; we may consider a + # different name for this in the future because of that. + + # tup means: tup for the current time, which can be used to determine the initial state of the event listener. if len([arg for arg in [initial_event_state, tup] if arg is None]) != 1: # either be explicit, or let us deduce diff --git a/bugsink/registry.py b/bugsink/registry.py new file mode 100644 index 0000000..b4d2757 --- /dev/null +++ b/bugsink/registry.py @@ -0,0 +1,92 @@ +import json + +from .period_counter import PeriodCounter +from .volume_based_condition import VolumeBasedCondition + +from issues.models import Issue + + +def create_unmute_issue_handler(issue_id): + def unmute(): + # or just push this into a classmethod + # or make this using .update to avoid 1 of the 2 DB queries + issue = Issue.objects.get(id=issue_id) + issue.is_muted = False + issue.unmute_on_volume_based_conditions = "[]" + issue.save() + + return unmute + + +class PeriodCounterRegistry(object): + + def load_from_scratch(self, projects, issues, ordered_events, now_tup): + by_project = {} + by_issue = {} + issue_pcs_by_project = {} + + for project in projects: + by_project[project.id] = PeriodCounter() + + for issue in issues: + by_issue[issue.id] = PeriodCounter() + if issue.project_id not in issue_pcs_by_project[issue.project_id]: + issue_pcs_by_project[issue.project_id] = [] + issue_pcs_by_project[issue.project_id].append(by_issue[issue.id]) + + for event in ordered_events: + project_pc = by_project[event.project_id] + project_pc.inc(event.timestamp) + + issue_pc = by_issue[event.issue_id] + issue_pc.inc(event.timestamp) + + for project in projects: + project_pc = by_project[project.id] + + volume_based_conditions = [ + VolumeBasedCondition.from_dict(vbc_s) + for vbc_s in json.loads(project.alert_on_volume_based_conditions) + ] + + for issue_pc in issue_pcs_by_project[project.id]: + for vbc in volume_based_conditions: + issue_pc.add_event_listener( + period_name=vbc.period_name, + nr_of_periods=vbc.nr_of_periods, + gte_threshold=vbc.volume, + when_becomes_true=..., # do the alert. and stop monitoring, at least when it's 'first time' + # er rijzen echter opnieuw vragen rond "moet je niet gewoon (un)muting gebruiken als middel? + # ook rijst de vraag: hoe nuttig is die "any time" nou helemaal? want: eenmaal overschreden + # blijf je vaak dezelfde conditie overschrijden... + # antwoord: ik zou zeggen "ignore any information pre-tuple-x"... maar allemaal tamelijk + # advanced + when_becomes_false=..., # what... really? 'stop monitoring' could be the answer. + tup=now_tup, + ) + + for issue in issues.filter(is_muted=True): + issue_pc = by_issue[issue.id] + + unmute_vbcs = [ + VolumeBasedCondition.from_dict(vbc_s) + for vbc_s in json.loads(issue.unmute_on_volume_based_conditions) + ] + + for vbc in unmute_vbcs: + issue_pc.add_event_listener( + period_name=vbc.period_name, + nr_of_periods=vbc.nr_of_periods, + gte_threshold=vbc.volume, + when_becomes_true=create_unmute_issue_handler(issue.id), + tup=now_tup, + auto_remove=True, # (at least when it's 'first time'... but this may be the only thing we support) + ) + + return by_project, by_issue + + +# some TODOs: +# +# * quotas (per project, per issue) +# * alerting (settings live on project, but alerts and therefore the listeners are per issue) diff --git a/bugsink/volume_based_condition.py b/bugsink/volume_based_condition.py index 21bc747..2a26aae 100644 --- a/bugsink/volume_based_condition.py +++ b/bugsink/volume_based_condition.py @@ -10,7 +10,7 @@ class VolumeBasedCondition(object): self.volume = volume @classmethod - def from_json_str(cls, json_str): + def from_json_str(cls, json_str): # TODO had toch gewoon dict moeten wezen json_dict = json.loads(json_str) return VolumeBasedCondition( json_dict['any_or_first'], diff --git a/events/models.py b/events/models.py index c1cc295..16799c4 100644 --- a/events/models.py +++ b/events/models.py @@ -47,6 +47,7 @@ class Event(models.Model): # https://develop.sentry.dev/sdk/event-payloads/types/ (more up-to-date and complete) id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) # This ID is internal to bugsink + server_side_timestamp = models.DateTimeField(db_index=True, blank=False, null=False) # TODO set this on-ingest # > Required. Hexadecimal string representing a uuid4 value. The length is exactly 32 characters. Dashes are not # > allowed. Has to be lowercase. diff --git a/ingest/views.py b/ingest/views.py index 25223e0..3004849 100644 --- a/ingest/views.py +++ b/ingest/views.py @@ -1,3 +1,4 @@ +from datetime import datetime, timezone import json # TODO consider faster APIs from django.shortcuts import get_object_or_404 @@ -55,9 +56,18 @@ class BaseIngestAPIView(APIView): return get_object_or_404(Project, pk=project_id, sentry_key=sentry_key) def process_event(self, event_data, request, project): + # because we want to count events before having created event objects (quota may block the latter) we cannot + # depend on event.timestamp; instead, we look on the clock once here, and then use that for both the project + # and issue period counters. + now = datetime.now(timezone.utc) + + project_pc = project_period_counters[project.id] + project_pc.inc(now) + DecompressedEvent.objects.create( project=project, data=json.dumps(event_data), # TODO don't parse-then-print for BaseIngestion + timestamp=now, # TODO this doesn't work because of auto_add_now ) debug_info = request.META.get("HTTP_X_BUGSINK_DEBUGINFO", "") @@ -76,11 +86,17 @@ class BaseIngestAPIView(APIView): ) issue.events.add(event) + issue_pc = issue_period_counters[issue.id] + issue_pc.inc(now) + if issue_created: - pass # alerting code goes here + if project.alert_on_new_issue: + alert_for_new_issue.delay(issue) elif issue_is_regression(issue, event.release): # new issues cannot be regressions by definition, hence 'else' - pass # alerting code goes here + if project.alert_on_regression: + alert_for_regression.delay(issue) + IssueResolver.reopen(issue) # TODO bookkeeping of events_at goes here. diff --git a/issues/models.py b/issues/models.py index c360d21..a639b6e 100644 --- a/issues/models.py +++ b/issues/models.py @@ -22,6 +22,9 @@ class Issue(models.Model): fixed_at = models.TextField(blank=False, null=False, default='[]') events_at = models.TextField(blank=False, null=False, default='[]') + is_muted = models.BooleanField(default=False) + unmute_on_volume_based_conditions = models.TextField(blank=False, null=False, default="[]") # json string + def get_absolute_url(self): return f"/issues/issue/{ self.id }/event/last/" diff --git a/projects/models.py b/projects/models.py index 1819995..80261b5 100644 --- a/projects/models.py +++ b/projects/models.py @@ -44,6 +44,12 @@ class Project(models.Model): ] """ + # alerting conditions + alert_on_new_issue = models.BooleanField(default=True) + alert_on_regression = models.BooleanField(default=True) + alert_on_unmute = models.BooleanField(default=True) + alert_on_volume_based_conditions = models.TextField(blank=False, null=False, default="[]") # json string + def get_latest_release(self): # TODO perfomance considerations... this can be denormalized/cached at the project level from releases.models import ordered_releases