thoughts about pc registry; performance of the pc registry

This commit is contained in:
Klaas van Schelven
2024-01-05 23:07:31 +01:00
parent 3810ba18f4
commit 5cf1445eeb
8 changed files with 279 additions and 3 deletions

View File

@@ -41,6 +41,8 @@ INSTALLED_APPS = [
'ingest', 'ingest',
'issues', 'issues',
'events', 'events',
'performance',
] ]
TAILWIND_APP_NAME = 'theme' TAILWIND_APP_NAME = 'theme'

0
performance/__init__.py Normal file
View File

View File

View File

@@ -0,0 +1,161 @@
from django.core.management.base import BaseCommand
import random
import time
from datetime import datetime, timezone
from django.conf import settings
from bugsink.period_counter import _prev_tup, PeriodCounter
from performance.bursty_data import generate_bursty_data, buckets_to_points_in_time
from bugsink.registry import get_pc_registry
from projects.models import Project
from issues.models import Issue
from events.models import Event
# this file is the beginning of an approach to getting a handle on performance.
class Command(BaseCommand):
help = "..."
def handle(self, *args, **options):
if "performance" not in str(settings.DATABASES["default"]["NAME"]):
raise ValueError("This command should only be run on the performance-test database")
print_thoughts_about_prev_tup()
print_thoughts_about_inc()
print_thoughts_about_event_evaluation()
print_thoughts_about_pc_registry()
class passed_time(object):
def __enter__(self):
self.t0 = time.time()
return self
def __exit__(self, type, value, traceback):
self.elapsed = (time.time() - self.t0) * 1_000 # miliseconds is a good unit for timeing things
def print_thoughts_about_prev_tup():
v = (2020, 1, 1, 10, 10)
with passed_time() as t:
for i in range(1_000):
v = _prev_tup(v)
print(f"""## _prev_tup()
1_000 iterations of _prev_tup in {t.elapsed:.3f}ms. The main thing we care about is not this little
private helper though, but PeriodCounter.inc(). Let's test that next.
""")
def print_thoughts_about_inc():
random.seed(42)
pc = PeriodCounter()
# make sure the pc has some data before we start
for point in buckets_to_points_in_time(
generate_bursty_data(num_buckets=350, expected_nr_of_bursts=10),
datetime(2020, 10, 15, tzinfo=timezone.utc),
datetime(2021, 10, 15, 10, 5, tzinfo=timezone.utc),
10_000,
):
pc.inc(point)
points = buckets_to_points_in_time(
generate_bursty_data(num_buckets=25, expected_nr_of_bursts=5),
datetime(2021, 10, 15, 10, 5, tzinfo=timezone.utc),
datetime(2021, 10, 16, 10, 5, tzinfo=timezone.utc),
1000)
with passed_time() as t:
for point in points:
pc.inc(point)
print(f"""## PeriodCounter.inc()
1_000 iterations of PeriodCounter.inc() in {t.elapsed:.3f}ms. We care about evaluation of some event more though. Let's
test that next.
""")
def print_thoughts_about_event_evaluation():
random.seed(42)
pc = PeriodCounter()
def noop():
pass
# Now, let's add some event-listeners. These are chosen to match a typical setup of quota for a given Issue or
# Project. In this setup, the monthly maximum is spread out in a way that the smaller parts are a bit more than just
# splitting things equally. Why? We want some flexibility for bursts of activity without using up the entire budget
# for a longer time all at once.
pc.add_event_listener("day", 30, 10_000, noop, noop, initial_event_state=False) # 1 month rolling window
pc.add_event_listener("hour", 24, 1_000, noop, noop, initial_event_state=False) # 1 day rolling window
pc.add_event_listener("minute", 60, 200, noop, noop, initial_event_state=False) # 1 hour rolling window
# make sure the pc has some data before we start. we pick a 1-month period to match the listeners in the above.
for point in buckets_to_points_in_time(
generate_bursty_data(num_buckets=350, expected_nr_of_bursts=10),
datetime(2021, 10, 15, tzinfo=timezone.utc),
datetime(2021, 11, 15, 10, 5, tzinfo=timezone.utc),
10_000,
):
pc.inc(point)
# now we start the test: we generate a bursty data-set for a 1-day period, and see how long it takes to evaluate
points = buckets_to_points_in_time(
generate_bursty_data(num_buckets=25, expected_nr_of_bursts=5),
datetime(2021, 11, 15, 10, 5, tzinfo=timezone.utc),
datetime(2021, 11, 16, 10, 5, tzinfo=timezone.utc),
1000)
with passed_time() as t:
for point in points:
pc.inc(point)
print(f"""## PeriodCounter.inc()
1_000 iterations of PeriodCounter.inc() in {t.elapsed:.3f}ms. (when 3 event-listeners are active). I'm not sure exactly
what a good performance would be here, but I can say the following: this means when a 1,000 events happen in a second,
the period-counter uses up 3% of the budget. A first guess would be: this is good enough.""")
def print_thoughts_about_pc_registry():
# note: in load_performance_insights we use minimal (non-data-containing) events here. this may not be
# representative of real world performance. having said that: this immediately triggers the thought that for real
# initialization only timestamps and issue_ids are needed, and that we should adjust the code accordingly
with passed_time() as t:
get_pc_registry()
print(f"""## get_pc_registry()
getting the pc-registry takes {t.elapsed:.3f}ms. (with the default fixtures, which contain
* { Project.objects.count() } projects,
* { Issue.objects.count() } issues,
* { Event.objects.count() } events
This means (surprisingly) we can take our eye off optimizing this particular part of code (for now), because:
* in the (expected) production setup where we we cut ingestion and handling in 2 parts, 6s delay on the handling server
boot is fine.
* in the debugserver (integrated ingestion/handling) we don't expect 100k events; and even if we did a 6s delay on the
first event/request is fine.
Ways forward once we do decide to improve:
* regular saving of state (savepoint in time, with "unhandled after") (the regularity of saving is left as an exercise
to the reader)
* more granular caching/loading, e.g. load per project/issue on demand
""")

View File

@@ -0,0 +1,61 @@
from django.core.management.base import BaseCommand
import uuid
import random
from datetime import datetime, timezone
from django.conf import settings
from performance.bursty_data import generate_bursty_data, buckets_to_points_in_time
from projects.models import Project
from issues.models import Issue
from events.models import Event
class Command(BaseCommand):
help = "..."
def handle(self, *args, **options):
if "performance" not in str(settings.DATABASES["default"]["NAME"]):
raise ValueError("This command should only be run on the performance-test database")
Project.objects.all().delete()
Issue.objects.all().delete()
Event.objects.all().delete()
# as a first approach, let's focus on a 'typical' (whatever that means) local setup (not hosted), for a small
# team. maybe 10 people would work on max 10 projects. let's assume we have 10k per-project limits for events
# set up. and let's assume 100 issues per project (far from inbox-zero, approach bug-sewer territory)
#
projects = [Project.objects.create(name="project %s" % i) for i in range(10)]
issues_by_project = {}
for p in projects:
issues_by_project[p.id] = [Issue.objects.create(project=p, hash="hash %d" % i) for i in range(100)]
# now we have 10 projects, each with 100 issues. let's create 10k events for each project.
for p in projects:
print("loading 10k events for project", p.name)
points = buckets_to_points_in_time(
generate_bursty_data(num_buckets=350, expected_nr_of_bursts=10),
datetime(2020, 10, 15, tzinfo=timezone.utc),
datetime(2021, 10, 15, 10, 5, tzinfo=timezone.utc),
10_000,
)
for i, point in enumerate(points):
if i % 1_000 == 0:
print("loaded", i, "events")
# note: because we use such minimal (non-data-containing) events here, the setup in the below may
# actually not be representative of real world performance.
Event.objects.create(
project=p,
issue=random.choice(issues_by_project[p.id]),
server_side_timestamp=point,
timestamp=point,
event_id=uuid.uuid4().hex,
has_exception=True,
has_logentry=True,
data="{}",
)

View File

@@ -1,16 +1,37 @@
## _prev_tup() ## _prev_tup()
1_000 iterations of _prev_tup in 0.816ms. The main thing we care about is not this little 1_000 iterations of _prev_tup in 0.832ms. The main thing we care about is not this little
private helper though, but PeriodCounter.inc(). Let's test that next. private helper though, but PeriodCounter.inc(). Let's test that next.
## PeriodCounter.inc() ## PeriodCounter.inc()
1_000 iterations of PeriodCounter.inc() in 7.766ms. We care about evaluation of some event more though. Let's 1_000 iterations of PeriodCounter.inc() in 7.885ms. We care about evaluation of some event more though. Let's
test that next. test that next.
## PeriodCounter.inc() ## PeriodCounter.inc()
1_000 iterations of PeriodCounter.inc() in 29.593ms. (when 3 event-listeners are active). I'm not sure exactly 1_000 iterations of PeriodCounter.inc() in 29.567ms. (when 3 event-listeners are active). I'm not sure exactly
what a good performance would be here, but I can say the following: this means when a 1,000 events happen in a second, what a good performance would be here, but I can say the following: this means when a 1,000 events happen in a second,
the period-counter uses up 3% of the budget. A first guess would be: this is good enough. the period-counter uses up 3% of the budget. A first guess would be: this is good enough.
## get_pc_registry()
getting the pc-registry takes 6615.371ms. (with the default fixtures, which contain
* 10 projects,
* 1000 issues,
* 100000 events
This means (surprisingly) we can take our eye off optimizing this particular part of code (for now), because:
* in the (expected) production setup where we we cut ingestion and handling in 2 parts, 6s delay on the handling server
boot is fine.
* in the debugserver (integrated ingestion/handling) we don't expect 100k events; and even if we did a 6s delay on the
first event/request is fine.
Ways forward once we do decide to improve:
* regular saving of state (savepoint in time, with "unhandled after") (the regularity of saving is left as an exercise
to the reader)
* more granular caching/loading, e.g. load per project/issue on demand

View File

@@ -5,6 +5,9 @@ from datetime import datetime, timezone
from bugsink.period_counter import _prev_tup, PeriodCounter from bugsink.period_counter import _prev_tup, PeriodCounter
from performance.bursty_data import generate_bursty_data, buckets_to_points_in_time from performance.bursty_data import generate_bursty_data, buckets_to_points_in_time
from projects.models import Project
from issues.models import Issue
from events.models import Event
# this file is the beginning of an approach to getting a handle on performance. # this file is the beginning of an approach to getting a handle on performance.
@@ -109,6 +112,34 @@ what a good performance would be here, but I can say the following: this means w
the period-counter uses up 3% of the budget. A first guess would be: this is good enough.""") the period-counter uses up 3% of the budget. A first guess would be: this is good enough.""")
def print_thoughts_about_pc_registry():
# as a first approach, let's focus on a 'typical' (whatever that means) local setup (not hosted), for a small team.
# maybe 10 people would work on max 10 projects. let's assume we have 10k per-project limits for events set up. and
# let's assume 100 issues per project (far from inbox-zero, approach bug-sewer territory)
#
projects = [Project.objects.create(name="project %s" % i) for i in range(10)]
issues_by_project = {}
for p in projects:
issues_by_project[p.id] = [Issue.objects.create(project=p, hash="hash %d" % i) for i in range(100)]
# now we have 10 projects, each with 100 issues. let's create 10k events for each project.
for p in projects:
points = buckets_to_points_in_time(
generate_bursty_data(num_buckets=350, expected_nr_of_bursts=10),
datetime(2020, 10, 15, tzinfo=timezone.utc),
datetime(2021, 10, 15, 10, 5, tzinfo=timezone.utc),
10_000,
)
for point in points:
# note: because we use such minimal (non-data-containing) events here, the setup in the below may actually
# not be representative of real world performance. having said that: this immediately triggers the thought
# that for real initialization only timestamps and issue_ids are needed, and that we should adjust the code
# accordingly
Event.objects.create(project=p, issue=random.choice(issues_by_project[p.id]), server_side_timestamp=point)
print_thoughts_about_prev_tup() print_thoughts_about_prev_tup()
print_thoughts_about_inc() print_thoughts_about_inc()
print_thoughts_about_event_evaluation() print_thoughts_about_event_evaluation()