From da04ebe6c7fa7a096538a1792f64614925f7b693 Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Fri, 5 Jan 2024 17:47:59 +0100 Subject: [PATCH] prev_tup with n parameter this makes the usages of prev_tup approx 3 times faster --- bugsink/period_counter.py | 30 +++++++++++++++++++----------- bugsink/tests.py | 10 +++++++++- performance/out/some_script.txt | 8 ++++---- performance/some_script.py | 2 +- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/bugsink/period_counter.py b/bugsink/period_counter.py index afd935d..77c3986 100644 --- a/bugsink/period_counter.py +++ b/bugsink/period_counter.py @@ -25,30 +25,38 @@ TL_HOUR = 4 TL_MINUTE = 5 -def apply_n(f, n, v): - for i in range(n): - v = f(v) - return v - - -def _prev_tup(tup): +def _prev_tup(tup, n=1): aslist = list(tup) + + # if n > 1 we try to first remove the largest possible chunk from the last element of the tuple, so that we can + # then do the remainder in the loop (for performance reasons) + if n > 1: + DONE_IN_LOOP = 1 + first_chunk = min(n - DONE_IN_LOOP, max(0, tup[-1] - MIN_VALUE_AT_TUP_INDEX[-1] - DONE_IN_LOOP)) + aslist[-1] -= first_chunk + remainder = n - first_chunk - DONE_IN_LOOP + else: + remainder = 0 + for tup_index, val in reversed(list(enumerate(aslist))): if aslist[tup_index] == MIN_VALUE_AT_TUP_INDEX[tup_index]: if tup_index == 2: # day roll-over: just use a datetime aslist = list((datetime(*aslist, tzinfo=timezone.utc) - timedelta(days=1)).timetuple()[:len(tup)]) - break + break # we've used a timedelta, so we don't need to do months/years "by hand" in the loop else: # roll over to max aslist[tup_index] = MAX_VALUE_AT_TUP_INDEX[tup_index] - # implied because no break: continue with the left hand side + # implied because no break: continue with the left hand side of the tuple else: aslist[tup_index] -= 1 break + if remainder > 0: + return _prev_tup(aslist, remainder) + return tuple(aslist) @@ -58,7 +66,7 @@ def _inc(d, tup, n, max_age): if tup not in d: if len(d) > 0: new_period = True - min_tup = apply_n(_prev_tup, max_age - 1, tup) + min_tup = _prev_tup(tup, max_age - 1) for k, v in list(d.items()): if k < min_tup: del d[k] @@ -123,7 +131,7 @@ class PeriodCounter(object): }[period_name] def _get_event_state(self, tup, tl, how_many_periods, gte_threshold): - min_tup = apply_n(_prev_tup, how_many_periods - 1, tup) + min_tup = _prev_tup(tup, how_many_periods - 1) if tup != () else () d = self.counts[tl] total = sum([v for k, v in d.items() if k >= min_tup]) diff --git a/bugsink/tests.py b/bugsink/tests.py index 79cc715..e4b1f4e 100644 --- a/bugsink/tests.py +++ b/bugsink/tests.py @@ -21,7 +21,7 @@ class callback(object): class PeriodCounterTestCase(TestCase): - def test_prev_tup(self): + def test_prev_tup_near_rollover(self): self.assertEquals((2020,), _prev_tup((2021,))) self.assertEquals((2020, 1), _prev_tup((2020, 2))) @@ -40,12 +40,20 @@ class PeriodCounterTestCase(TestCase): self.assertEquals((2020, 1, 1, 0, 0), _prev_tup((2020, 1, 1, 0, 1))) self.assertEquals((2019, 12, 31, 23, 59), _prev_tup((2020, 1, 1, 0, 0))) + def test_prev_tup_large_number_of_applications(self): self.assertEquals((1920,), apply_n(_prev_tup, 100, (2020,))) self.assertEquals((2010, 5), apply_n(_prev_tup, 120, (2020, 5))) self.assertEquals((2019, 5, 7,), apply_n(_prev_tup, 366, (2020, 5, 7))) self.assertEquals((2020, 5, 6, 20,), apply_n(_prev_tup, 24, (2020, 5, 7, 20,))) self.assertEquals((2020, 5, 6, 20, 12), apply_n(_prev_tup, 1440, (2020, 5, 7, 20, 12))) + def test_prev_tup_with_explicit_n(self): + self.assertEquals(_prev_tup((2020,), 100), apply_n(_prev_tup, 100, (2020,))) + self.assertEquals(_prev_tup((2020, 5), 120), apply_n(_prev_tup, 120, (2020, 5))) + self.assertEquals(_prev_tup((2020, 5, 7), 366), apply_n(_prev_tup, 366, (2020, 5, 7))) + self.assertEquals(_prev_tup((2020, 5, 7, 20,), 24), apply_n(_prev_tup, 24, (2020, 5, 7, 20,))) + self.assertEquals(_prev_tup((2020, 5, 7, 20, 12), 1440), apply_n(_prev_tup, 1440, (2020, 5, 7, 20, 12))) + def test_foo(self): datetime_utc = datetime.now(timezone.utc) # basically I just want to write this down somewhere pc = PeriodCounter() diff --git a/performance/out/some_script.txt b/performance/out/some_script.txt index 7084820..330408c 100644 --- a/performance/out/some_script.txt +++ b/performance/out/some_script.txt @@ -1,16 +1,16 @@ ## _prev_tup() -1_000 iterations of _prev_tup in 0.794ms. The main thing we care about is not this little +1_000 iterations of _prev_tup in 0.816ms. The main thing we care about is not this little private helper though, but PeriodCounter.inc(). Let's test that next. ## PeriodCounter.inc() -1_000 iterations of PeriodCounter.inc() in 25.186ms. We care about evaluation of some event more though. Let's +1_000 iterations of PeriodCounter.inc() in 7.766ms. We care about evaluation of some event more though. Let's test that next. ## PeriodCounter.inc() -1_000 iterations of PeriodCounter.inc() in 93.481ms. (when 3 event-listeners are active). I'm not sure exactly +1_000 iterations of PeriodCounter.inc() in 29.593ms. (when 3 event-listeners are active). I'm not sure exactly what a good performance would be here, but I can say the following: this means when a 1,000 events happen in a second, -the period-counter uses up 10% of the budget. A first guess would be: this is good enough. +the period-counter uses up 3% of the budget. A first guess would be: this is good enough. diff --git a/performance/some_script.py b/performance/some_script.py index 4709fca..9c9d6e0 100644 --- a/performance/some_script.py +++ b/performance/some_script.py @@ -106,7 +106,7 @@ def print_thoughts_about_event_evaluation(): 1_000 iterations of PeriodCounter.inc() in {t.elapsed:.3f}ms. (when 3 event-listeners are active). I'm not sure exactly what a good performance would be here, but I can say the following: this means when a 1,000 events happen in a second, -the period-counter uses up 10% of the budget. A first guess would be: this is good enough.""") +the period-counter uses up 3% of the budget. A first guess would be: this is good enough.""") print_thoughts_about_prev_tup()