diff --git a/snappea/settings.py b/snappea/settings.py index e8b4b90..5d8c3e6 100644 --- a/snappea/settings.py +++ b/snappea/settings.py @@ -32,6 +32,8 @@ DEFAULTS = { "TASK_QS_LIMIT": 100, + "STATS_RETENTION_MINUTES": 60 * 24 * 7, + "HOOK_ADD_TASK_KWARGS": "snappea.utils.dont_add_anything", "HOOK_RUN_TASK_CONTEXT": "snappea.utils.no_context", } diff --git a/snappea/stats.py b/snappea/stats.py index 7d17bb2..e444161 100644 --- a/snappea/stats.py +++ b/snappea/stats.py @@ -10,6 +10,7 @@ from bugsink.timed_sqlite_backend.base import different_runtime_limit from performance.context_managers import time_to_logger from .models import Task, Stat +from .settings import get_settings performance_logger = logging.getLogger("bugsink.performance.snappea") @@ -57,7 +58,7 @@ class Stats: def _possibly_write(self): # we only write once-a-minute; this means the cost of writing stats is amortized (at least when it matters, i.e. - # under pressure) by approx 1/(60*30); (the cost (see time_to_logger) was 8ms on my local env in initial tests) + # under pressure) by approx 1/(60*30); (the cost (see time_to_logger) was 15ms on my local env in initial tests) # # "edge" cases, in which nothing is written: # * snappea-shutdown @@ -74,30 +75,41 @@ class Stats: # the Stat w/ timestamp x is for the one-minute bucket from that point in time forwards: timestamp = datetime(*(self.last_write_at), tzinfo=timezone.utc) - with time_to_logger(performance_logger, "Snappea write Stats"): - with immediate_atomic(using="snappea"): # explicit is better than impl.; and we combine read/write here - # having stats is great, but I don't want to hog task-processing too long (which would happen - # precisely when the backlog grows large) - with different_runtime_limit(0.1): - try: - task_counts = Task.objects.values("task_name").annotate(count=Count("task_name")) - except OperationalError as e: - if e.args[0] != "interrupted": - raise - task_counts = None - - task_counts_d = {d['task_name']: d['count'] for d in task_counts} if task_counts else None - stats = [ - Stat( - timestamp=timestamp, - task_name=task_name, - task_count=task_counts_d.get(task_name, 0) if task_counts is not None else None, - **kwargs, - ) for task_name, kwargs in self.d.items() - ] - - Stat.objects.bulk_create(stats) + self._write(timestamp) # re-init: self.last_write_at = tup self.d = {} + + def _write(self, timestamp): + if get_settings().STATS_RETENTION_MINUTES == 0: + # no stats retention; don't write anything either. this won't attempt to clean up either, which is OK in my + # book + return + + with time_to_logger(performance_logger, "Snappea write Stats"): + with immediate_atomic(using="snappea"): # explicit is better than impl.; and we combine read/write here + # having stats is great, but I don't want to hog task-processing too long (which would happen + # precisely when the backlog grows large) + with different_runtime_limit(0.1): + try: + task_counts = Task.objects.values("task_name").annotate(count=Count("task_name")) + except OperationalError as e: + if e.args[0] != "interrupted": + raise + task_counts = None + + task_counts_d = {d['task_name']: d['count'] for d in task_counts} if task_counts else None + stats = [ + Stat( + timestamp=timestamp, + task_name=task_name, + task_count=task_counts_d.get(task_name, 0) if task_counts is not None else None, + **kwargs, + ) for task_name, kwargs in self.d.items() + ] + + Stat.objects.bulk_create(stats) + + Stat.objects.filter( + timestamp__lt=timestamp - timedelta(minutes=get_settings().STATS_RETENTION_MINUTES)).delete()