From da04ebe6c7fa7a096538a1792f64614925f7b693 Mon Sep 17 00:00:00 2001
From: Klaas van Schelven <klaas@vanschelven.com>
Date: Fri, 5 Jan 2024 17:47:59 +0100
Subject: [PATCH] prev_tup with n parameter

this makes the usages of prev_tup approx 3 times faster
---
 bugsink/period_counter.py       | 30 +++++++++++++++++++-----------
 bugsink/tests.py                | 10 +++++++++-
 performance/out/some_script.txt |  8 ++++----
 performance/some_script.py      |  2 +-
 4 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/bugsink/period_counter.py b/bugsink/period_counter.py
index afd935d..77c3986 100644
--- a/bugsink/period_counter.py
+++ b/bugsink/period_counter.py
@@ -25,30 +25,38 @@ TL_HOUR = 4
 TL_MINUTE = 5
 
 
-def apply_n(f, n, v):
-    for i in range(n):
-        v = f(v)
-    return v
-
-
-def _prev_tup(tup):
+def _prev_tup(tup, n=1):
     aslist = list(tup)
+
+    # if n > 1 we try to first remove the largest possible chunk from the last element of the tuple, so that we can
+    # then do the remainder in the loop (for performance reasons)
+    if n > 1:
+        DONE_IN_LOOP = 1
+        first_chunk = min(n - DONE_IN_LOOP, max(0, tup[-1] - MIN_VALUE_AT_TUP_INDEX[-1] - DONE_IN_LOOP))
+        aslist[-1] -= first_chunk
+        remainder = n - first_chunk - DONE_IN_LOOP
+    else:
+        remainder = 0
+
     for tup_index, val in reversed(list(enumerate(aslist))):
         if aslist[tup_index] == MIN_VALUE_AT_TUP_INDEX[tup_index]:
             if tup_index == 2:
                 # day roll-over: just use a datetime
                 aslist = list((datetime(*aslist, tzinfo=timezone.utc) - timedelta(days=1)).timetuple()[:len(tup)])
-                break
+                break  # we've used a timedelta, so we don't need to do months/years "by hand" in the loop
 
             else:
                 # roll over to max
                 aslist[tup_index] = MAX_VALUE_AT_TUP_INDEX[tup_index]
-                # implied because no break: continue with the left hand side
+                # implied because no break: continue with the left hand side of the tuple
 
         else:
             aslist[tup_index] -= 1
             break
 
+    if remainder > 0:
+        return _prev_tup(aslist, remainder)
+
     return tuple(aslist)
 
 
@@ -58,7 +66,7 @@ def _inc(d, tup, n, max_age):
     if tup not in d:
         if len(d) > 0:
             new_period = True
-            min_tup = apply_n(_prev_tup, max_age - 1, tup)
+            min_tup = _prev_tup(tup, max_age - 1)
             for k, v in list(d.items()):
                 if k < min_tup:
                     del d[k]
@@ -123,7 +131,7 @@ class PeriodCounter(object):
         }[period_name]
 
     def _get_event_state(self, tup, tl, how_many_periods, gte_threshold):
-        min_tup = apply_n(_prev_tup, how_many_periods - 1, tup)
+        min_tup = _prev_tup(tup, how_many_periods - 1) if tup != () else ()
         d = self.counts[tl]
         total = sum([v for k, v in d.items() if k >= min_tup])
 
diff --git a/bugsink/tests.py b/bugsink/tests.py
index 79cc715..e4b1f4e 100644
--- a/bugsink/tests.py
+++ b/bugsink/tests.py
@@ -21,7 +21,7 @@ class callback(object):
 
 class PeriodCounterTestCase(TestCase):
 
-    def test_prev_tup(self):
+    def test_prev_tup_near_rollover(self):
         self.assertEquals((2020,), _prev_tup((2021,)))
 
         self.assertEquals((2020,  1), _prev_tup((2020,  2)))
@@ -40,12 +40,20 @@ class PeriodCounterTestCase(TestCase):
         self.assertEquals((2020,  1,  1,  0,  0), _prev_tup((2020,  1,  1,  0,  1)))
         self.assertEquals((2019, 12, 31, 23, 59), _prev_tup((2020,  1,  1,  0,  0)))
 
+    def test_prev_tup_large_number_of_applications(self):
         self.assertEquals((1920,), apply_n(_prev_tup, 100, (2020,)))
         self.assertEquals((2010, 5), apply_n(_prev_tup, 120, (2020, 5)))
         self.assertEquals((2019, 5, 7,), apply_n(_prev_tup, 366, (2020, 5, 7)))
         self.assertEquals((2020, 5, 6, 20,), apply_n(_prev_tup, 24, (2020, 5, 7, 20,)))
         self.assertEquals((2020, 5, 6, 20, 12), apply_n(_prev_tup, 1440, (2020, 5, 7, 20, 12)))
 
+    def test_prev_tup_with_explicit_n(self):
+        self.assertEquals(_prev_tup((2020,), 100), apply_n(_prev_tup, 100, (2020,)))
+        self.assertEquals(_prev_tup((2020, 5), 120), apply_n(_prev_tup, 120, (2020, 5)))
+        self.assertEquals(_prev_tup((2020, 5, 7), 366), apply_n(_prev_tup, 366, (2020, 5, 7)))
+        self.assertEquals(_prev_tup((2020, 5, 7, 20,), 24), apply_n(_prev_tup, 24, (2020, 5, 7, 20,)))
+        self.assertEquals(_prev_tup((2020, 5, 7, 20, 12), 1440), apply_n(_prev_tup, 1440, (2020, 5, 7, 20, 12)))
+
     def test_foo(self):
         datetime_utc = datetime.now(timezone.utc)  # basically I just want to write this down somewhere
         pc = PeriodCounter()
diff --git a/performance/out/some_script.txt b/performance/out/some_script.txt
index 7084820..330408c 100644
--- a/performance/out/some_script.txt
+++ b/performance/out/some_script.txt
@@ -1,16 +1,16 @@
 ## _prev_tup()
 
-1_000 iterations of _prev_tup in 0.794ms. The main thing we care about is not this little
+1_000 iterations of _prev_tup in 0.816ms. The main thing we care about is not this little
 private helper though, but PeriodCounter.inc(). Let's test that next.
 
 
 ## PeriodCounter.inc()
 
-1_000 iterations of PeriodCounter.inc() in 25.186ms. We care about evaluation of some event more though. Let's
+1_000 iterations of PeriodCounter.inc() in 7.766ms. We care about evaluation of some event more though. Let's
 test that next.
 
 ## PeriodCounter.inc()
 
-1_000 iterations of PeriodCounter.inc() in 93.481ms. (when 3 event-listeners are active). I'm not sure exactly
+1_000 iterations of PeriodCounter.inc() in 29.593ms. (when 3 event-listeners are active). I'm not sure exactly
 what a good performance would be here, but I can say the following: this means when a 1,000 events happen in a second,
-the period-counter uses up 10% of the budget. A first guess would be: this is good enough.
+the period-counter uses up 3% of the budget. A first guess would be: this is good enough.
diff --git a/performance/some_script.py b/performance/some_script.py
index 4709fca..9c9d6e0 100644
--- a/performance/some_script.py
+++ b/performance/some_script.py
@@ -106,7 +106,7 @@ def print_thoughts_about_event_evaluation():
 
 1_000 iterations of PeriodCounter.inc() in {t.elapsed:.3f}ms. (when 3 event-listeners are active). I'm not sure exactly
 what a good performance would be here, but I can say the following: this means when a 1,000 events happen in a second,
-the period-counter uses up 10% of the budget. A first guess would be: this is good enough.""")
+the period-counter uses up 3% of the budget. A first guess would be: this is good enough.""")
 
 
 print_thoughts_about_prev_tup()