WIP: midway checkin in the PeriodCounter registry stuff

This commit is contained in:
Klaas van Schelven
2024-01-05 20:20:14 +01:00
parent 700c9e68b4
commit 89cab4f3c8
7 changed files with 134 additions and 5 deletions

View File

@@ -25,6 +25,10 @@ TL_HOUR = 4
TL_MINUTE = 5
def noop():
pass
def _prev_tup(tup, n=1):
aslist = list(tup)
@@ -85,6 +89,9 @@ class PeriodCounter(object):
self.event_listeners = {tuple_length: {} for tuple_length in range(TL_MINUTE + 1)}
def inc(self, datetime_utc, n=1):
# we only allow UTC, and we generally use Django model fields, which are UTC, so this should be good:
assert datetime_utc.tzinfo == timezone.utc
tup = datetime_utc.timetuple()
for tl, mx in enumerate([MAX_TOTALS, MAX_YEARS, MAX_MONTHS, MAX_DAYS, MAX_HOURS, MAX_MINUTES]):
@@ -105,8 +112,12 @@ class PeriodCounter(object):
event_listeners_for_tl[(nr_of_periods, gte_threshold)] = (wbt, wbf, True)
wbt()
def add_event_listener(self, period_name, nr_of_periods, gte_threshold, when_becomes_true, when_becomes_false,
initial_event_state=None, tup=None):
def add_event_listener(self, period_name, nr_of_periods, gte_threshold, when_becomes_true=noop,
when_becomes_false=noop, initial_event_state=None, tup=None):
# note: the 'events' here are not bugsink-events; but the more general concept of 'an event'; we may consider a
# different name for this in the future because of that.
# tup means: tup for the current time, which can be used to determine the initial state of the event listener.
if len([arg for arg in [initial_event_state, tup] if arg is None]) != 1:
# either be explicit, or let us deduce

92
bugsink/registry.py Normal file
View File

@@ -0,0 +1,92 @@
import json
from .period_counter import PeriodCounter
from .volume_based_condition import VolumeBasedCondition
from issues.models import Issue
def create_unmute_issue_handler(issue_id):
def unmute():
# or just push this into a classmethod
# or make this using .update to avoid 1 of the 2 DB queries
issue = Issue.objects.get(id=issue_id)
issue.is_muted = False
issue.unmute_on_volume_based_conditions = "[]"
issue.save()
return unmute
class PeriodCounterRegistry(object):
def load_from_scratch(self, projects, issues, ordered_events, now_tup):
by_project = {}
by_issue = {}
issue_pcs_by_project = {}
for project in projects:
by_project[project.id] = PeriodCounter()
for issue in issues:
by_issue[issue.id] = PeriodCounter()
if issue.project_id not in issue_pcs_by_project[issue.project_id]:
issue_pcs_by_project[issue.project_id] = []
issue_pcs_by_project[issue.project_id].append(by_issue[issue.id])
for event in ordered_events:
project_pc = by_project[event.project_id]
project_pc.inc(event.timestamp)
issue_pc = by_issue[event.issue_id]
issue_pc.inc(event.timestamp)
for project in projects:
project_pc = by_project[project.id]
volume_based_conditions = [
VolumeBasedCondition.from_dict(vbc_s)
for vbc_s in json.loads(project.alert_on_volume_based_conditions)
]
for issue_pc in issue_pcs_by_project[project.id]:
for vbc in volume_based_conditions:
issue_pc.add_event_listener(
period_name=vbc.period_name,
nr_of_periods=vbc.nr_of_periods,
gte_threshold=vbc.volume,
when_becomes_true=..., # do the alert. and stop monitoring, at least when it's 'first time'
# er rijzen echter opnieuw vragen rond "moet je niet gewoon (un)muting gebruiken als middel?
# ook rijst de vraag: hoe nuttig is die "any time" nou helemaal? want: eenmaal overschreden
# blijf je vaak dezelfde conditie overschrijden...
# antwoord: ik zou zeggen "ignore any information pre-tuple-x"... maar allemaal tamelijk
# advanced
when_becomes_false=..., # what... really? 'stop monitoring' could be the answer.
tup=now_tup,
)
for issue in issues.filter(is_muted=True):
issue_pc = by_issue[issue.id]
unmute_vbcs = [
VolumeBasedCondition.from_dict(vbc_s)
for vbc_s in json.loads(issue.unmute_on_volume_based_conditions)
]
for vbc in unmute_vbcs:
issue_pc.add_event_listener(
period_name=vbc.period_name,
nr_of_periods=vbc.nr_of_periods,
gte_threshold=vbc.volume,
when_becomes_true=create_unmute_issue_handler(issue.id),
tup=now_tup,
auto_remove=True, # (at least when it's 'first time'... but this may be the only thing we support)
)
return by_project, by_issue
# some TODOs:
#
# * quotas (per project, per issue)
# * alerting (settings live on project, but alerts and therefore the listeners are per issue)

View File

@@ -10,7 +10,7 @@ class VolumeBasedCondition(object):
self.volume = volume
@classmethod
def from_json_str(cls, json_str):
def from_json_str(cls, json_str): # TODO had toch gewoon dict moeten wezen
json_dict = json.loads(json_str)
return VolumeBasedCondition(
json_dict['any_or_first'],

View File

@@ -47,6 +47,7 @@ class Event(models.Model):
# https://develop.sentry.dev/sdk/event-payloads/types/ (more up-to-date and complete)
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) # This ID is internal to bugsink
server_side_timestamp = models.DateTimeField(db_index=True, blank=False, null=False) # TODO set this on-ingest
# > Required. Hexadecimal string representing a uuid4 value. The length is exactly 32 characters. Dashes are not
# > allowed. Has to be lowercase.

View File

@@ -1,3 +1,4 @@
from datetime import datetime, timezone
import json # TODO consider faster APIs
from django.shortcuts import get_object_or_404
@@ -55,9 +56,18 @@ class BaseIngestAPIView(APIView):
return get_object_or_404(Project, pk=project_id, sentry_key=sentry_key)
def process_event(self, event_data, request, project):
# because we want to count events before having created event objects (quota may block the latter) we cannot
# depend on event.timestamp; instead, we look on the clock once here, and then use that for both the project
# and issue period counters.
now = datetime.now(timezone.utc)
project_pc = project_period_counters[project.id]
project_pc.inc(now)
DecompressedEvent.objects.create(
project=project,
data=json.dumps(event_data), # TODO don't parse-then-print for BaseIngestion
timestamp=now, # TODO this doesn't work because of auto_add_now
)
debug_info = request.META.get("HTTP_X_BUGSINK_DEBUGINFO", "")
@@ -76,11 +86,17 @@ class BaseIngestAPIView(APIView):
)
issue.events.add(event)
issue_pc = issue_period_counters[issue.id]
issue_pc.inc(now)
if issue_created:
pass # alerting code goes here
if project.alert_on_new_issue:
alert_for_new_issue.delay(issue)
elif issue_is_regression(issue, event.release): # new issues cannot be regressions by definition, hence 'else'
pass # alerting code goes here
if project.alert_on_regression:
alert_for_regression.delay(issue)
IssueResolver.reopen(issue)
# TODO bookkeeping of events_at goes here.

View File

@@ -22,6 +22,9 @@ class Issue(models.Model):
fixed_at = models.TextField(blank=False, null=False, default='[]')
events_at = models.TextField(blank=False, null=False, default='[]')
is_muted = models.BooleanField(default=False)
unmute_on_volume_based_conditions = models.TextField(blank=False, null=False, default="[]") # json string
def get_absolute_url(self):
return f"/issues/issue/{ self.id }/event/last/"

View File

@@ -44,6 +44,12 @@ class Project(models.Model):
]
"""
# alerting conditions
alert_on_new_issue = models.BooleanField(default=True)
alert_on_regression = models.BooleanField(default=True)
alert_on_unmute = models.BooleanField(default=True)
alert_on_volume_based_conditions = models.TextField(blank=False, null=False, default="[]") # json string
def get_latest_release(self):
# TODO perfomance considerations... this can be denormalized/cached at the project level
from releases.models import ordered_releases