Remove the periodCounter and the PC registry

direct consequence of switching to SQL-based counting
2026-03-10 08:01:17 +00:00 · 2024-07-15 14:24:58 +02:00
parent 5ce840f62f
commit d56a8663a7
10 changed files with 4 additions and 784 deletions
--- a/performance/management/commands/document_performance_insights.py
+++ b/performance/management/commands/document_performance_insights.py
@@ -1,165 +0,0 @@
-from django.core.management.base import BaseCommand
-
-import random
-import time
-from datetime import datetime, timezone
-
-from django.conf import settings
-
-from bugsink.period_counter import _prev_tup, PeriodCounter
-from performance.bursty_data import generate_bursty_data, buckets_to_points_in_time
-from bugsink.registry import get_pc_registry
-
-from projects.models import Project
-from issues.models import Issue
-from events.models import Event
-
-
-# this file is the beginning of an approach to getting a handle on performance.
-
-
-class Command(BaseCommand):
-    help = "..."
-
-    def handle(self, *args, **options):
-        if "performance" not in str(settings.DATABASES["default"]["NAME"]):
-            raise ValueError("This command should only be run on the performance-test database")
-
-        print_thoughts_about_prev_tup()
-        print_thoughts_about_inc()
-        print_thoughts_about_event_evaluation()
-        print_thoughts_about_pc_registry()
-
-
-class passed_time(object):
-    def __enter__(self):
-        self.t0 = time.time()
-        return self
-
-    def __exit__(self, type, value, traceback):
-        self.elapsed = (time.time() - self.t0) * 1_000  # miliseconds is a good unit for timeing things
-
-
-def print_thoughts_about_prev_tup():
-    v = (2020, 1, 1, 10, 10)
-    with passed_time() as t:
-        for i in range(1_000):
-            v = _prev_tup(v)
-
-    print(f"""## _prev_tup()
-
-1_000 iterations of _prev_tup in {t.elapsed:.3f}ms. The main thing we care about is not this little
-private helper though, but PeriodCounter.inc(). Let's test that next.
-
-""")
-
-
-def print_thoughts_about_inc():
-    random.seed(42)
-
-    pc = PeriodCounter()
-
-    # make sure the pc has some data before we start
-    for point in buckets_to_points_in_time(
-        generate_bursty_data(num_buckets=350, expected_nr_of_bursts=10),
-        datetime(2020, 10, 15, tzinfo=timezone.utc),
-        datetime(2021, 10, 15, 10, 5, tzinfo=timezone.utc),
-        10_000,
-            ):
-
-        pc.inc(point)
-
-    points = buckets_to_points_in_time(
-        generate_bursty_data(num_buckets=25, expected_nr_of_bursts=5),
-        datetime(2021, 10, 15, 10, 5, tzinfo=timezone.utc),
-        datetime(2021, 10, 16, 10, 5, tzinfo=timezone.utc),
-        1000)
-
-    with passed_time() as t:
-        for point in points:
-            pc.inc(point)
-
-    print(f"""## PeriodCounter.inc()
-
-1_000 iterations of PeriodCounter.inc() in {t.elapsed:.3f}ms. We care about evaluation of some event more though. Let's
-test that next.
-""")
-
-
-def print_thoughts_about_event_evaluation():
-    random.seed(42)
-
-    pc = PeriodCounter()
-
-    def noop():
-        pass
-
-    # Now, let's add some event-listeners. These are chosen to match a typical setup of quota for a given Issue or
-    # Project. In this setup, the monthly maximum is spread out in a way that the smaller parts are a bit more than just
-    # splitting things equally. Why? We want some flexibility for bursts of activity without using up the entire budget
-    # for a longer time all at once.
-    pc.add_event_listener("day",    30, 10_000, noop, noop, initial_event_state=False)  # 1 month rolling window
-    pc.add_event_listener("hour",   24,  1_000, noop, noop, initial_event_state=False)  # 1 day rolling window
-    pc.add_event_listener("minute", 60,    200, noop, noop, initial_event_state=False)  # 1 hour rolling window
-
-    # make sure the pc has some data before we start. we pick a 1-month period to match the listeners in the above.
-    for point in buckets_to_points_in_time(
-        generate_bursty_data(num_buckets=350, expected_nr_of_bursts=10),
-        datetime(2021, 10, 15, tzinfo=timezone.utc),
-        datetime(2021, 11, 15, 10, 5, tzinfo=timezone.utc),
-        10_000,
-            ):
-
-        pc.inc(point)
-
-    # now we start the test: we generate a bursty data-set for a 1-day period, and see how long it takes to evaluate
-    points = buckets_to_points_in_time(
-        generate_bursty_data(num_buckets=25, expected_nr_of_bursts=5),
-        datetime(2021, 11, 15, 10, 5, tzinfo=timezone.utc),
-        datetime(2021, 11, 16, 10, 5, tzinfo=timezone.utc),
-        1000)
-
-    with passed_time() as t:
-        for point in points:
-            pc.inc(point)
-
-    print(f"""## PeriodCounter.inc()
-
-1_000 iterations of PeriodCounter.inc() in {t.elapsed:.3f}ms. (when 3 event-listeners are active). I'm not sure exactly
-what a good performance would be here, but I can say the following: this means when a 1,000 events happen in a second,
-the period-counter uses up 3% of the budget. A first guess would be: this is good enough.""")
-
-
-def print_thoughts_about_pc_registry():
-    # note: in load_performance_insights we use minimal (non-data-containing) events here. this may not be
-    # representative of real world performance. having said that: this immediately triggers the thought that for real
-    # initialization only timestamps and issue_ids are needed, and that we should adjust the code accordingly
-
-    with passed_time() as t:
-        get_pc_registry()
-
-    print(f"""## get_pc_registry()
-
-getting the pc-registry takes {t.elapsed:.3f}ms. (with the default fixtures, which contain
-
-* { Project.objects.count() } projects,
-* { Issue.objects.count() } issues,
-* { Event.objects.count() } events
-
-This means (surprisingly) we can take our eye off optimizing this particular part of code (for now), because:
-
-* in the (expected) production setup where we we cut ingestion and handling in 2 parts, 6s delay on the handling server
-  boot is fine.
-* in the debugserver (integrated ingestion/handling) we don't expect 100k events; and even if we did a 6s delay on the
-  first event/request is fine.
-
-Counterpoint: on playground.bugsink.com I just observed 42s to initalize 150k events, which is ~5 times more slow than
-the above. It's also a "real hiccup". Anyway, there's too many questions about period counter (e.g. how to share
-across processes, or the consequences of quota) to focus on this particular point first.
-
-Ways forward once we do decide to improve:
-
-* regular saving of state (savepoint in time, with "unhandled after") (the regularity of saving is left as an exercise
-  to the reader)
-* more granular caching/loading, e.g. load per project/issue on demand
-""")
--- a/performance/out/some_script.txt
+++ b/performance/out/some_script.txt
@@ -1,41 +0,0 @@
-## _prev_tup()
-
-1_000 iterations of _prev_tup in 0.832ms. The main thing we care about is not this little
-private helper though, but PeriodCounter.inc(). Let's test that next.
-
-
-## PeriodCounter.inc()
-
-1_000 iterations of PeriodCounter.inc() in 7.885ms. We care about evaluation of some event more though. Let's
-test that next.
-
-## PeriodCounter.inc()
-
-1_000 iterations of PeriodCounter.inc() in 29.567ms. (when 3 event-listeners are active). I'm not sure exactly
-what a good performance would be here, but I can say the following: this means when a 1,000 events happen in a second,
-the period-counter uses up 3% of the budget. A first guess would be: this is good enough.
-## get_pc_registry()
-
-getting the pc-registry takes 6615.371ms. (with the default fixtures, which contain
-
-* 10 projects,
-* 1000 issues,
-* 100000 events
-
-This means (surprisingly) we can take our eye off optimizing this particular part of code (for now), because:
-
-* in the (expected) production setup where we we cut ingestion and handling in 2 parts, 6s delay on the handling server
-  boot is fine.
-* in the debugserver (integrated ingestion/handling) we don't expect 100k events; and even if we did a 6s delay on the
-  first event/request is fine.
-
-Counterpoint: on playground.bugsink.com I just observed 42s to initalize 150k events, which is ~5 times more slow than
-the above. It's also a "real hiccup". Anyway, there's too many questions about period counter (e.g. how to share
-across processes, or the consequences of quota) to focus on this particular point first.
-
-Ways forward once we do decide to improve:
-
-* regular saving of state (savepoint in time, with "unhandled after") (the regularity of saving is left as an exercise
-  to the reader)
-* more granular caching/loading, e.g. load per project/issue on demand
-