diff --git a/bugsink/context_processors.py b/bugsink/context_processors.py index 76679c6..7093b8f 100644 --- a/bugsink/context_processors.py +++ b/bugsink/context_processors.py @@ -40,7 +40,7 @@ def get_snappea_warnings(): return [] with durable_atomic(using="snappea"): - with different_runtime_limit(0.1): + with different_runtime_limit(0.1, using="snappea"): try: task_count = Task.objects.all().count() except OperationalError as e: diff --git a/bugsink/settings/development.py b/bugsink/settings/development.py index 03bafdc..b6b23dc 100644 --- a/bugsink/settings/development.py +++ b/bugsink/settings/development.py @@ -64,8 +64,12 @@ elif os.getenv("DB", "sqlite") == "sqlite": # store databases. DATABASES["default"]["NAME"] = BASE_DIR / 'db.sqlite3' DATABASES["default"]["TEST"]["NAME"] = BASE_DIR / 'test.sqlite3' - DATABASES["default"]["OPTIONS"]["query_timeout"] = 0.1 # canary config: fail-fast in development. + DATABASES["default"]["OPTIONS"]["query_timeout"] = 0.11 # canary config: fail-fast in development. + DATABASES["snappea"]["NAME"] = BASE_DIR / 'snappea.sqlite3' + # canary config: fail-fast. slightly distinct value from the above to allow for eadier debugging of the timeout + # mechanism itself. (i.e. to eyeball where the value came from) + DATABASES["snappea"]["OPTIONS"]["query_timeout"] = 0.12 else: raise ValueError("Unknown DB", os.getenv("DB")) diff --git a/bugsink/timed_sqlite_backend/base.py b/bugsink/timed_sqlite_backend/base.py index 4cca055..df76a4e 100644 --- a/bugsink/timed_sqlite_backend/base.py +++ b/bugsink/timed_sqlite_backend/base.py @@ -1,33 +1,78 @@ +from collections import namedtuple from copy import deepcopy import time from contextlib import contextmanager from django.conf import settings +from threading import local from django.db import DEFAULT_DB_ALIAS from django.db.backends.sqlite3.base import ( DatabaseWrapper as UnpatchedDatabaseWrapper, SQLiteCursorWrapper as UnpatchedSQLiteCursorWrapper, ) +# We disinguish between the default runtime limit for a connection (set in the settings) and a runtime limit set by the +# "with different_runtime_limit" idiom, i.e. temporarily. The reason we need to distinguish these two concepts (and keep +# track of their values) explicitly, and provide the fallback getter mechanism (cm if available, otherwise +# connection-default) rather than have the program's stack determine this implicitly, is that we do not generally know +# the moment the connection default value is set. It's set when a connection is needed, which may in fact be _after_ +# some CMs have already been called. (For nested CMs we do not have this problem, so we just keep track of "old" values +# inside the CMs as they live on the Python stack) +Runtimes = namedtuple("Runtimes", ["default_for_connection", "override_by_cm"]) +NoneRuntimes = Runtimes(None, None) -_runtime_limit = 5.0 +thread_locals = local() + + +def _set_runtime_limit(using, is_default_for_connection, seconds): + if using is None: + using = DEFAULT_DB_ALIAS + + if not hasattr(thread_locals, "runtime_limits"): + thread_locals.runtime_limits = {} + + if is_default_for_connection: + thread_locals.runtime_limits[using] = Runtimes( + default_for_connection=seconds, + override_by_cm=thread_locals.runtime_limits.get(using, NoneRuntimes).override_by_cm, + ) + else: + thread_locals.runtime_limits[using] = Runtimes( + default_for_connection=thread_locals.runtime_limits.get(using, NoneRuntimes).default_for_connection, + override_by_cm=seconds, + ) + + +def _get_runtime_limit(using=None): + if using is None: + using = DEFAULT_DB_ALIAS + + if not hasattr(thread_locals, "runtime_limits"): + # somewhat overly defensive, since you'd always pass through the DatabaseWrapper which sets at least once. + thread_locals.runtime_limits = {} + + tup = thread_locals.runtime_limits.get(using, NoneRuntimes) + return tup.override_by_cm if tup.override_by_cm is not None else tup.default_for_connection def allow_long_running_queries(): """Set a global flag to allow long-running queries. Useful for one-off commands, where slowness is expected.""" - global _runtime_limit - _runtime_limit = float("inf") + # we use is_default_for_connection=False here, to make this act like a "context manager which doesn't reset", i.e. + # what we do here takes precedence over the connection default value. + _set_runtime_limit(using=None, is_default_for_connection=False, seconds=float("inf")) @contextmanager -def different_runtime_limit(seconds): - global _runtime_limit - old = _runtime_limit - _runtime_limit = seconds +def different_runtime_limit(seconds, using=None): + if using is None: + using = DEFAULT_DB_ALIAS + + old = _get_runtime_limit(using=using) + _set_runtime_limit(using=using, is_default_for_connection=False, seconds=seconds) try: yield finally: - _runtime_limit = old + _set_runtime_limit(using=using, is_default_for_connection=False, seconds=old) @contextmanager @@ -35,7 +80,7 @@ def limit_runtime(conn): start = time.time() def check_time(): - if time.time() > start + _runtime_limit: + if time.time() > start + _get_runtime_limit(): return 1 return 0 @@ -74,11 +119,9 @@ class PrintOnClose(object): class DatabaseWrapper(UnpatchedDatabaseWrapper): def __init__(self, settings_dict, alias=DEFAULT_DB_ALIAS): - global _runtime_limit settings_dict = deepcopy(settings_dict) - # clobbering of the global var should not be a problem, because connections are single-threaded in Django - configured_runtime_limit = settings_dict.get("OPTIONS").pop("query_timeout", 5.0) - _runtime_limit = configured_runtime_limit + configured_runtime_limit = settings_dict.get("OPTIONS", {}).pop("query_timeout", 5.0) + _set_runtime_limit(using=alias, is_default_for_connection=True, seconds=configured_runtime_limit) super().__init__(settings_dict, alias=alias) diff --git a/bugsink/views.py b/bugsink/views.py index 4858bbb..475844e 100644 --- a/bugsink/views.py +++ b/bugsink/views.py @@ -182,8 +182,8 @@ def counts(request): # when you have some 7 - 10 models (tag-related, events, issues) that can have many instances, spending max .3 on # each before giving up would seem reasonable to stay below th 5s limit; the rest is via the caches anyway. - with different_runtime_limit(0.3): - for app_label in interesting_apps: + for app_label in interesting_apps: + with different_runtime_limit(0.3, using="snappea" if app_label == "snappea" else "default"): counts[app_label] = {} app_config = apps.apps.get_app_config(app_label) for model in app_config.get_models(): diff --git a/snappea/stats.py b/snappea/stats.py index 53723e2..491f580 100644 --- a/snappea/stats.py +++ b/snappea/stats.py @@ -107,7 +107,7 @@ class Stats: with immediate_atomic(using="snappea"): # explicit is better than impl.; and we combine read/write here # having stats is great, but I don't want to hog task-processing too long (which would happen # precisely when the backlog grows large) - with different_runtime_limit(0.1): + with different_runtime_limit(0.1, using="snappea"): try: task_counts = Task.objects.values("task_name").annotate(count=Count("task_name")) except OperationalError as e: