mirror of
https://github.com/jlengrand/bugsink.git
synced 2026-03-09 23:51:20 +00:00
Nothing worrying, but good to have checked this regardless and important to have a green pipeline. Fix #175
72 lines
4.2 KiB
Python
72 lines
4.2 KiB
Python
from datetime import timezone, datetime
|
|
|
|
from django.db.models import Min
|
|
|
|
from bugsink.period_utils import add_periods_to_datetime, sub_periods_from_datetime
|
|
from bugsink.utils import assert_
|
|
|
|
|
|
def _filter_for_periods(qs, period_name, nr_of_periods, now):
|
|
if period_name == "total":
|
|
return qs
|
|
|
|
return qs.filter(digested_at__gte=sub_periods_from_datetime(now, nr_of_periods, period_name))
|
|
|
|
|
|
def check_for_thresholds(qs, now, thresholds, add_for_current=0):
|
|
# thresholds :: [(period_name, nr_of_periods, gte_threshold), ...]
|
|
# returns [(state, below_threshold_from, check_again_after, (period_name, nr_of_periods, gte_threshold)), ...]
|
|
|
|
# This function does aggregation, so it's reasonably expensive (I haven't measured exactly, but it seems to be at
|
|
# least as expensive per-call as the whole of the rest of digestion). We solve this by not calling it often, using
|
|
# the `check_again_after` mechanism (which relies on simple counting, and the fact that a threshold for any given
|
|
# period of time will certainly not be crossed sooner than that the number of observations over _any_ time period
|
|
# exceeds the given threshold. The amorization then happens over the difference between the threshold and the
|
|
# actually observed number of events over the relevant time-period; in other words, unless for some weird reason you
|
|
# are consitently super-close to the quota but not over-quota the amortization will happen over a reasonable
|
|
# fraction of the quota, and if the quota is reasonably high (which is the only relevant-for-performance case
|
|
# anyway) this means the cost will be amortized away. (e.g. quota of 1_000; a check every 100 events in a bad case).
|
|
# The only relevant cost that this mechanism thus adds is the per-project counting of digested events.
|
|
|
|
# we only allow UTC, and we generally use Django model fields, which are UTC, so this should be good:
|
|
assert_(now.tzinfo == timezone.utc)
|
|
|
|
states = []
|
|
|
|
for (period_name, nr_of_periods, gte_threshold) in thresholds:
|
|
count = _filter_for_periods(qs, period_name, nr_of_periods, now).count() + add_for_current
|
|
state = count >= gte_threshold
|
|
|
|
if state:
|
|
|
|
if period_name == "total":
|
|
# when counting the 'total', there will never be a time when the condition becomes false. We
|
|
# just pick an arbitrarily large date; we'll deal with it by the end of the myria-annum.
|
|
# unlikely to actually end up in the DB (because it would imply the use of a quota for total).
|
|
below_threshold_from = datetime(9999, 12, 31, 23, 59, tzinfo=timezone.utc)
|
|
|
|
else:
|
|
# `below_threshold_from` is the first moment in time where the condition no longer applies. Assuming
|
|
# the present function is called "often enough" (i.e is called for the moment the switch to state=True
|
|
# happens, and not thereafter), there will be _excactly_ `gte_threshold` items in the qs (potentially
|
|
# with 1 implied one if `add_for_current` applies). Taking the min of those and adding the time period
|
|
# brings us to the point in time where the condition will become False again.
|
|
#
|
|
# (The assumption of "often enough, and no more" holds for us because for quota we stop accepting events
|
|
# once the quota is met; for muted we remove the vbc once unmuted). For the "overshoot" case (see tests,
|
|
# not really expected) this has the consequence of seeing a result that is "too old", and hence going
|
|
# back to accepting too soon. But this is self-correcting, so no need to deal with it.
|
|
below_threshold_from = add_periods_to_datetime(
|
|
_filter_for_periods(qs, period_name, nr_of_periods, now).aggregate(
|
|
agg=Min('digested_at'))['agg'] or now, # `or now` to handle funny `gte_threshold==0`
|
|
nr_of_periods, period_name)
|
|
|
|
else:
|
|
below_threshold_from = None
|
|
|
|
check_again_after = gte_threshold - count
|
|
|
|
states.append((state, below_threshold_from, check_again_after, (period_name, nr_of_periods, gte_threshold)))
|
|
|
|
return states
|