From 6bc00c366a381c61a1bfc4a135ffd4eb98d46e18 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Fri, 5 Jan 2024 20:31:51 +0100 Subject: [PATCH] no volume-based alerts (only volume-based muting) --- DESIGN-alerts.md | 52 +++++++++++++++++++++---------- bugsink/period_counter.py | 16 +++++++--- bugsink/registry.py | 29 ----------------- bugsink/tests.py | 4 +-- bugsink/volume_based_condition.py | 4 +-- projects/models.py | 1 - 6 files changed, 49 insertions(+), 57 deletions(-) diff --git a/DESIGN-alerts.md b/DESIGN-alerts.md index 2431d53..0fdefb9 100644 --- a/DESIGN-alerts.md +++ b/DESIGN-alerts.md @@ -4,28 +4,46 @@ Default setting, may be adapted at organisational level. -You should get alerts for: +You should get alerts for any state change for the worse, such as: -* **Any State Change for the Worse:** - - New issues - - Regressions - - Unmuting (could be due to volume) +* New issues +* Regressions +* Unmuting (could be due to volume) - The basic belief underlying this is: "you should care about the errors on your project". And therefore, you'll be - alerted when they occur. +The basic belief underlying this is: "you should care about the errors on your project". And therefore, you'll be +alerted when they occur. -You may configure alerts for: +### Volume-Based Rules for unmuting -* **Volume-Based Rules:** - - _Any time_ more than 5 events per the last 3 hours occur - - _First time_ more than 10 events per day - - _First time_ the total number of events exceeds 100 - - The idea is: to avoid getting swamped, set thresholds slightly higher then 1 (over some period). +Volume based rules may be configured per-issue for unmuting. e.g. - An alternative approach is to automatically unmute issues based on such rules (and to initially mute them as long as - they don't match the rules yet); and then send the notification on-unmute. (by the definition of unmuting, the alert - occurs only the _first time_ the condition occurs) +- First time more than 5 events per the last 3 hours occur +- First time more than 10 events per day +- First time the total number of events exceeds 100 + +The idea is: "I know about this issue, I've determined it's not important, and I only want to get notified when it start +occurrung a lot (for some definition of 'a lot')" + +The tie-in with alerts is: you can send notification on-unmute. (by the definition of unmuting, the alert +occurs only the _first time_ the condition occurs) + +## "Later": auto-mute w/ default unmute rules + +At some point we may make it so that you can configure per-project that issues are initially muted (on create) but with +some predefined set of volume-based unmute conditions, which is set on the muted issues on-create. + +(This is in place of project-level volume-based alert settings. The main reason for this is that I want to keep a +symmetry/tracability between what's going on in the UI and the alerts that are going out) + +## "Later": more than first-time-only + +(The below only works if you let go of the idea that auto-mute is the only way you get volume-based alerts). + +Rather than just having first-time only rules, you could have alerting rules that are triggered when a condition +persists. e.g. + +* Any time more than 5 events occur the past 3 hours (ignoring any events that occured before the last time this was + triggered). ## Personal Notification Settings diff --git a/bugsink/period_counter.py b/bugsink/period_counter.py index 158a0cd..3c7848e 100644 --- a/bugsink/period_counter.py +++ b/bugsink/period_counter.py @@ -98,22 +98,28 @@ class PeriodCounter(object): is_new_period = _inc(self.counts[tl], tup[:tl], n, mx) event_listeners_for_tl = self.event_listeners[tl] - for ((nr_of_periods, gte_threshold), (wbt, wbf, is_true)) in list(event_listeners_for_tl.items()): + for ((nr_of_periods, gte_threshold), (wbt, wbf, ar, is_true)) in list(event_listeners_for_tl.items()): if is_true: if not is_new_period: continue # no new period means: never becomes false, because no old period becomes irrelevant if not self._get_event_state(tup[:tl], tl, nr_of_periods, gte_threshold): - event_listeners_for_tl[(nr_of_periods, gte_threshold)] = (wbt, wbf, False) + if ar: + del event_listeners_for_tl[(nr_of_periods, gte_threshold)] + else: + event_listeners_for_tl[(nr_of_periods, gte_threshold)] = (wbt, wbf, ar, False) wbf() else: if self._get_event_state(tup[:tl], tl, nr_of_periods, gte_threshold): - event_listeners_for_tl[(nr_of_periods, gte_threshold)] = (wbt, wbf, True) + if ar: + del event_listeners_for_tl[(nr_of_periods, gte_threshold)] + else: + event_listeners_for_tl[(nr_of_periods, gte_threshold)] = (wbt, wbf, ar, True) wbt() def add_event_listener(self, period_name, nr_of_periods, gte_threshold, when_becomes_true=noop, - when_becomes_false=noop, initial_event_state=None, tup=None): + when_becomes_false=noop, auto_remove=False, initial_event_state=None, tup=None): # note: the 'events' here are not bugsink-events; but the more general concept of 'an event'; we may consider a # different name for this in the future because of that. @@ -128,7 +134,7 @@ class PeriodCounter(object): initial_event_state = self._get_event_state(tup, tl, nr_of_periods, gte_threshold) self.event_listeners[tl][(nr_of_periods, gte_threshold)] = \ - (when_becomes_true, when_becomes_false, initial_event_state) + (when_becomes_true, when_becomes_false, auto_remove, initial_event_state) def _tl_for_period(self, period_name): return { diff --git a/bugsink/registry.py b/bugsink/registry.py index b4d2757..7683ed7 100644 --- a/bugsink/registry.py +++ b/bugsink/registry.py @@ -23,16 +23,12 @@ class PeriodCounterRegistry(object): def load_from_scratch(self, projects, issues, ordered_events, now_tup): by_project = {} by_issue = {} - issue_pcs_by_project = {} for project in projects: by_project[project.id] = PeriodCounter() for issue in issues: by_issue[issue.id] = PeriodCounter() - if issue.project_id not in issue_pcs_by_project[issue.project_id]: - issue_pcs_by_project[issue.project_id] = [] - issue_pcs_by_project[issue.project_id].append(by_issue[issue.id]) for event in ordered_events: project_pc = by_project[event.project_id] @@ -41,30 +37,6 @@ class PeriodCounterRegistry(object): issue_pc = by_issue[event.issue_id] issue_pc.inc(event.timestamp) - for project in projects: - project_pc = by_project[project.id] - - volume_based_conditions = [ - VolumeBasedCondition.from_dict(vbc_s) - for vbc_s in json.loads(project.alert_on_volume_based_conditions) - ] - - for issue_pc in issue_pcs_by_project[project.id]: - for vbc in volume_based_conditions: - issue_pc.add_event_listener( - period_name=vbc.period_name, - nr_of_periods=vbc.nr_of_periods, - gte_threshold=vbc.volume, - when_becomes_true=..., # do the alert. and stop monitoring, at least when it's 'first time' - # er rijzen echter opnieuw vragen rond "moet je niet gewoon (un)muting gebruiken als middel? - # ook rijst de vraag: hoe nuttig is die "any time" nou helemaal? want: eenmaal overschreden - # blijf je vaak dezelfde conditie overschrijden... - # antwoord: ik zou zeggen "ignore any information pre-tuple-x"... maar allemaal tamelijk - # advanced - when_becomes_false=..., # what... really? 'stop monitoring' could be the answer. - tup=now_tup, - ) - for issue in issues.filter(is_muted=True): issue_pc = by_issue[issue.id] @@ -89,4 +61,3 @@ class PeriodCounterRegistry(object): # some TODOs: # # * quotas (per project, per issue) -# * alerting (settings live on project, but alerts and therefore the listeners are per issue) diff --git a/bugsink/tests.py b/bugsink/tests.py index 45649e6..123eaad 100644 --- a/bugsink/tests.py +++ b/bugsink/tests.py @@ -123,9 +123,9 @@ class PeriodCounterTestCase(TestCase): class VolumeBasedConditionTestCase(TestCase): def test_serialization(self): - vbc = VolumeBasedCondition("any", "day", 1, 100) + vbc = VolumeBasedCondition("day", 1, 100) json_str = vbc.to_json_str() - self.assertEquals('{"any_or_first": "any", "period": "day", "nr_of_periods": 1, "volume": 100}', json_str) + self.assertEquals('{"period": "day", "nr_of_periods": 1, "volume": 100}', json_str) vbc2 = VolumeBasedCondition.from_json_str(json_str) self.assertEquals(vbc, vbc2) diff --git a/bugsink/volume_based_condition.py b/bugsink/volume_based_condition.py index 2a26aae..5ec214b 100644 --- a/bugsink/volume_based_condition.py +++ b/bugsink/volume_based_condition.py @@ -3,8 +3,7 @@ import json class VolumeBasedCondition(object): - def __init__(self, any_or_first, period, nr_of_periods, volume): - self.any_or_first = any_or_first + def __init__(self, period, nr_of_periods, volume): self.period = period self.nr_of_periods = nr_of_periods self.volume = volume @@ -13,7 +12,6 @@ class VolumeBasedCondition(object): def from_json_str(cls, json_str): # TODO had toch gewoon dict moeten wezen json_dict = json.loads(json_str) return VolumeBasedCondition( - json_dict['any_or_first'], json_dict['period'], json_dict['nr_of_periods'], json_dict['volume'], diff --git a/projects/models.py b/projects/models.py index 80261b5..90ef439 100644 --- a/projects/models.py +++ b/projects/models.py @@ -48,7 +48,6 @@ class Project(models.Model): alert_on_new_issue = models.BooleanField(default=True) alert_on_regression = models.BooleanField(default=True) alert_on_unmute = models.BooleanField(default=True) - alert_on_volume_based_conditions = models.TextField(blank=False, null=False, default="[]") # json string def get_latest_release(self): # TODO perfomance considerations... this can be denormalized/cached at the project level