mirror of
https://github.com/jlengrand/bugsink.git
synced 2026-03-10 08:01:17 +00:00
Add tool to generate insight in retention (and fix bugs that that insight revelead)
This commit is contained in:
@@ -22,12 +22,12 @@ def get_epoch_bounds(lower, upper=None):
|
||||
return Q()
|
||||
|
||||
if lower is None:
|
||||
return Q(timestamp__lt=datetime_for_epoch(upper))
|
||||
return Q(server_side_timestamp__lt=datetime_for_epoch(upper))
|
||||
|
||||
if upper is None:
|
||||
return Q(timestamp__gte=datetime_for_epoch(lower))
|
||||
return Q(server_side_timestamp__gte=datetime_for_epoch(lower))
|
||||
|
||||
return Q(timestamp__gte=datetime_for_epoch(lower), timestamp__lt=datetime_for_epoch(upper))
|
||||
return Q(server_side_timestamp__gte=datetime_for_epoch(lower), server_side_timestamp__lt=datetime_for_epoch(upper))
|
||||
|
||||
|
||||
def nonzero_leading_bits(n):
|
||||
@@ -188,6 +188,7 @@ def evict_for_max_events(project, timestamp, stored_event_count=None):
|
||||
max_total_irrelevance -= 1
|
||||
|
||||
evict_for_irrelevance(
|
||||
project,
|
||||
max_total_irrelevance,
|
||||
list(filter_for_work(epoch_bounds_with_irrelevance, pairs, max_total_irrelevance)))
|
||||
|
||||
@@ -199,10 +200,13 @@ def evict_for_max_events(project, timestamp, stored_event_count=None):
|
||||
raise Exception("No more effective eviction possible but target not reached")
|
||||
|
||||
# print("Evicted down to %d with a max_total_irrelevance of %d" % (observed_size, max_total_irrelevance)) TODO log
|
||||
for query in connection.queries[pre:]:
|
||||
print(query['sql'])
|
||||
print("Reached", stored_event_count, "events")
|
||||
return max_total_irrelevance
|
||||
|
||||
|
||||
def evict_for_irrelevance(max_total_irrelevance, epoch_bounds_with_irrelevance):
|
||||
def evict_for_irrelevance(project, max_total_irrelevance, epoch_bounds_with_irrelevance):
|
||||
# print("evict_for_irrelevance(%d, %s)" % (max_total_irrelevance, epoch_bounds_with_irrelevance))
|
||||
|
||||
# max_total_irrelevance, i.e. the total may not exceed this (but it may equal it)
|
||||
@@ -210,7 +214,7 @@ def evict_for_irrelevance(max_total_irrelevance, epoch_bounds_with_irrelevance):
|
||||
for (_, epoch_ub_exclusive), irrelevance_for_age in epoch_bounds_with_irrelevance:
|
||||
max_item_irrelevance = max_total_irrelevance - irrelevance_for_age
|
||||
|
||||
evict_for_epoch_and_irrelevance(epoch_ub_exclusive, max_item_irrelevance)
|
||||
evict_for_epoch_and_irrelevance(project, epoch_ub_exclusive, max_item_irrelevance)
|
||||
|
||||
if max_item_irrelevance <= -1:
|
||||
# in the actual eviction, the test on max_item_irrelevance is done exclusively, i.e. only items of greater
|
||||
@@ -219,7 +223,7 @@ def evict_for_irrelevance(max_total_irrelevance, epoch_bounds_with_irrelevance):
|
||||
break
|
||||
|
||||
|
||||
def evict_for_epoch_and_irrelevance(max_epoch, max_irrelevance):
|
||||
def evict_for_epoch_and_irrelevance(project, max_epoch, max_irrelevance):
|
||||
# print("evict_for_epoch_and_irrelevance(%s, %s)" % (max_epoch, max_irrelevance))
|
||||
|
||||
from .models import Event
|
||||
@@ -244,7 +248,7 @@ def evict_for_epoch_and_irrelevance(max_epoch, max_irrelevance):
|
||||
# this call, and only when `B` is cleaned will the points `x` be cleaned. (as-is, they are part of the selection,
|
||||
# but will already have been deleted)
|
||||
|
||||
qs = Event.objects.filter(irrelevance_for_retention__gt=max_irrelevance)
|
||||
qs = Event.objects.filter(project=project, irrelevance_for_retention__gt=max_irrelevance)
|
||||
|
||||
if max_epoch is not None:
|
||||
qs = qs.filter(server_side_timestamp__lt=datetime_for_epoch(max_epoch))
|
||||
|
||||
66
events/retention_insight.py
Normal file
66
events/retention_insight.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from .retention import get_epoch_bounds_with_irrelevance, get_irrelevance_pairs, datetime_for_epoch
|
||||
from .models import Event
|
||||
|
||||
|
||||
def retention_insight_values(project):
|
||||
timestamp = datetime.now(tz=timezone.utc)
|
||||
|
||||
epoch_bounds_with_irrelevance = get_epoch_bounds_with_irrelevance(project, timestamp)
|
||||
pairs = list(get_irrelevance_pairs(project, epoch_bounds_with_irrelevance))
|
||||
|
||||
print("epoch_bounds_with_irrelevance")
|
||||
for x in epoch_bounds_with_irrelevance:
|
||||
print(x)
|
||||
|
||||
print("pairs")
|
||||
for x in pairs:
|
||||
print(x)
|
||||
|
||||
yielded = 0
|
||||
for (age_based_irrelevance, max_obsered_irrelevance), ((lb, ub), _) in reversed(list(zip(pairs, epoch_bounds_with_irrelevance))):
|
||||
print("?", age_based_irrelevance, max_obsered_irrelevance, lb, ub)
|
||||
results = {}
|
||||
for irrelevance in range(max_obsered_irrelevance + 1):
|
||||
qs = Event.objects.filter(
|
||||
project=project,
|
||||
irrelevance_for_retention=irrelevance
|
||||
)
|
||||
if lb is not None:
|
||||
qs = qs.filter(server_side_timestamp__gte=datetime_for_epoch(lb))
|
||||
if ub is not None:
|
||||
qs = qs.filter(server_side_timestamp__lt=datetime_for_epoch(ub))
|
||||
|
||||
howmany = qs.count()
|
||||
results[irrelevance] = howmany
|
||||
yielded += howmany
|
||||
|
||||
yield (lb, results) # lb makes more sense visually
|
||||
|
||||
assert Event.objects.filter(project=project).count() == yielded, "%d != %d" % (Event.objects.filter(project=project).count(), yielded)
|
||||
|
||||
|
||||
def retention_insight(project):
|
||||
data = list(retention_insight_values(project))
|
||||
print(data)
|
||||
|
||||
max_irrelevance = max(max(d.keys() for _, d in data), default=0)
|
||||
# max_count = max(max(d.values() for _, d in data), default=0) idea: use for formatting, but dates are bigger
|
||||
|
||||
# len("2000-01-01 16h") == 14 -> 16 for padding
|
||||
fmt = lambda epoch: datetime_for_epoch(epoch).strftime("%Y-%m-%d %Hh ") if epoch is not None else " " * 16 # noqa
|
||||
|
||||
# headers
|
||||
print(" " * 5, end="")
|
||||
for epoch, _ in data:
|
||||
print(fmt(epoch), end="")
|
||||
print()
|
||||
|
||||
for irrelevance in range(max_irrelevance + 1):
|
||||
print("%3d| " % irrelevance, end="")
|
||||
for epoch, results in data:
|
||||
print("%14d " % results.get(irrelevance, 0), end="")
|
||||
print()
|
||||
|
||||
print("Total: ", sum(sum(d.values()) for _, d in data))
|
||||
Reference in New Issue
Block a user