Snappea: workaholic mode

2026-03-10 08:01:17 +00:00 · 2024-08-28 08:58:35 +02:00
parent 66bece58c1
commit 5d6983042a
2 changed files with 34 additions and 0 deletions
--- a/snappea/foreman.py
+++ b/snappea/foreman.py
@@ -261,6 +261,13 @@ class Foreman:
            while self.create_workers() == self.settings.TASK_QS_LIMIT:
                pass  # `== TASK_QS_LIMIT`: as documented above

+            # in workaholic mode, the checks-after-unblock may have led to the conclusion "I need to stop but I can't
+            # because there is still work to do". For that case, we need to do another check at the moment the work
+            # might have actually been done and before blocking again, i.e. here. Note that the other place where we
+            # block is the .acquire() in create_workers(), but there we _know_ we're not out of Tasks (by definition of
+            # acquiring the worker semaphore), so this single check is enough.
+            self.check_for_stopping()
+
    def create_workers(self):
        """returns the number of workers created (AKA tasks done)"""

@@ -336,6 +343,13 @@ class Foreman:
    def check_for_stopping(self):
        if not self.stopping:
            return
+
+        if self.settings.WORKAHOLIC:
+            with durable_atomic(using="snappea"):
+                if Task.objects.exists():
+                    logger.info("Not stopping yet: Workaholic mode, waiting for all tasks to finish")
+                    return
+
        logger.info("Stopping")

        # Loop over all tasks, waiting for them to finish. If they don't finish in time (GRACEFUL_TIMEOUT), we'll kill
--- a/snappea/settings.py
+++ b/snappea/settings.py
@@ -8,6 +8,26 @@ DEFAULTS = {
    "WAKEUP_CALLS_DIR": "/tmp/snappea.wakeup",

    "NUM_WORKERS": 4,
+
+    # Workaholic mode: I will not stop, even when I'm told to, until _all_ of my tasks are done. This was built for the
+    # case of Docker but might just be useful outside it. Consider:
+    #
+    # * snappea and the server are in the same container, and communicate via an sqlite queue (file) in the container.
+    # * containers are supposed to be disposable; the message queue will be disposed of when the container is; the
+    #   ingested (but not yet digested) events in the /tmp/ dir will be too, by the way.
+    # * snappea may get a TERM signal because either the container is being stopped, or when the server exits (via
+    #   bugsink-server-unified).
+    #
+    # Given the above, it's better for snappea to do all the work it can before it gets killed the drastic way when
+    # Docker gets impatient, than to quickly shut down and leave the server with a bunch of unprocessed events. This is
+    # what the "workaholic" mode is for.
+    #
+    # Note about scenario that we don't deal with 100%: on docker-stop, the sigterm is sent to both processes at the
+    # same time. Gunicorn may then take some time to fully shut down while still serving requests, and in that
+    # time-taking enqueue new tasks; such tasks would not be picked up, even in workaholic mode. An improvement could be
+    # to shut down in-order, but for now this is in "perfectionism" territory for us.
+    "WORKAHOLIC": False,
+
    "GRACEFUL_TIMEOUT": 10,

    "TASK_QS_LIMIT": 100,