Snappea stats: configurable retention

2026-03-10 08:01:17 +00:00 · 2025-04-16 17:10:15 +02:00
parent 06342010dd
commit 4cedffc1b7
2 changed files with 38 additions and 24 deletions
--- a/snappea/settings.py
+++ b/snappea/settings.py
@@ -32,6 +32,8 @@ DEFAULTS = {

    "TASK_QS_LIMIT": 100,

+    "STATS_RETENTION_MINUTES": 60 * 24 * 7,
+
    "HOOK_ADD_TASK_KWARGS": "snappea.utils.dont_add_anything",
    "HOOK_RUN_TASK_CONTEXT": "snappea.utils.no_context",
 }
--- a/snappea/stats.py
+++ b/snappea/stats.py
@@ -10,6 +10,7 @@ from bugsink.timed_sqlite_backend.base import different_runtime_limit
 from performance.context_managers import time_to_logger

 from .models import Task, Stat
+from .settings import get_settings

 performance_logger = logging.getLogger("bugsink.performance.snappea")

@@ -57,7 +58,7 @@ class Stats:

    def _possibly_write(self):
        # we only write once-a-minute; this means the cost of writing stats is amortized (at least when it matters, i.e.
-        # under pressure) by approx 1/(60*30); (the cost (see time_to_logger) was 8ms on my local env in initial tests)
+        # under pressure) by approx 1/(60*30); (the cost (see time_to_logger) was 15ms on my local env in initial tests)
        #
        # "edge" cases, in which nothing is written:
        # * snappea-shutdown
@@ -74,30 +75,41 @@ class Stats:
            # the Stat w/ timestamp x is for the one-minute bucket from that point in time forwards:
            timestamp = datetime(*(self.last_write_at), tzinfo=timezone.utc)

-            with time_to_logger(performance_logger, "Snappea write Stats"):
-                with immediate_atomic(using="snappea"):  # explicit is better than impl.; and we combine read/write here
-                    # having stats is great, but I don't want to hog task-processing too long (which would happen
-                    # precisely when the backlog grows large)
-                    with different_runtime_limit(0.1):
-                        try:
-                            task_counts = Task.objects.values("task_name").annotate(count=Count("task_name"))
-                        except OperationalError as e:
-                            if e.args[0] != "interrupted":
-                                raise
-                            task_counts = None
-
-                    task_counts_d = {d['task_name']: d['count'] for d in task_counts} if task_counts else None
-                    stats = [
-                        Stat(
-                            timestamp=timestamp,
-                            task_name=task_name,
-                            task_count=task_counts_d.get(task_name, 0) if task_counts is not None else None,
-                            **kwargs,
-                        ) for task_name, kwargs in self.d.items()
-                    ]
-
-                    Stat.objects.bulk_create(stats)
+            self._write(timestamp)

            # re-init:
            self.last_write_at = tup
            self.d = {}
+
+    def _write(self, timestamp):
+        if get_settings().STATS_RETENTION_MINUTES == 0:
+            # no stats retention; don't write anything either. this won't attempt to clean up either, which is OK in my
+            # book
+            return
+
+        with time_to_logger(performance_logger, "Snappea write Stats"):
+            with immediate_atomic(using="snappea"):  # explicit is better than impl.; and we combine read/write here
+                # having stats is great, but I don't want to hog task-processing too long (which would happen
+                # precisely when the backlog grows large)
+                with different_runtime_limit(0.1):
+                    try:
+                        task_counts = Task.objects.values("task_name").annotate(count=Count("task_name"))
+                    except OperationalError as e:
+                        if e.args[0] != "interrupted":
+                            raise
+                        task_counts = None
+
+                task_counts_d = {d['task_name']: d['count'] for d in task_counts} if task_counts else None
+                stats = [
+                    Stat(
+                        timestamp=timestamp,
+                        task_name=task_name,
+                        task_count=task_counts_d.get(task_name, 0) if task_counts is not None else None,
+                        **kwargs,
+                    ) for task_name, kwargs in self.d.items()
+                ]
+
+                Stat.objects.bulk_create(stats)
+
+                Stat.objects.filter(
+                    timestamp__lt=timestamp - timedelta(minutes=get_settings().STATS_RETENTION_MINUTES)).delete()