diff --git a/events/retention.py b/events/retention.py index ab998db..99ef70d 100644 --- a/events/retention.py +++ b/events/retention.py @@ -22,12 +22,12 @@ def get_epoch_bounds(lower, upper=None): return Q() if lower is None: - return Q(timestamp__lt=datetime_for_epoch(upper)) + return Q(server_side_timestamp__lt=datetime_for_epoch(upper)) if upper is None: - return Q(timestamp__gte=datetime_for_epoch(lower)) + return Q(server_side_timestamp__gte=datetime_for_epoch(lower)) - return Q(timestamp__gte=datetime_for_epoch(lower), timestamp__lt=datetime_for_epoch(upper)) + return Q(server_side_timestamp__gte=datetime_for_epoch(lower), server_side_timestamp__lt=datetime_for_epoch(upper)) def nonzero_leading_bits(n): @@ -188,6 +188,7 @@ def evict_for_max_events(project, timestamp, stored_event_count=None): max_total_irrelevance -= 1 evict_for_irrelevance( + project, max_total_irrelevance, list(filter_for_work(epoch_bounds_with_irrelevance, pairs, max_total_irrelevance))) @@ -199,10 +200,13 @@ def evict_for_max_events(project, timestamp, stored_event_count=None): raise Exception("No more effective eviction possible but target not reached") # print("Evicted down to %d with a max_total_irrelevance of %d" % (observed_size, max_total_irrelevance)) TODO log + for query in connection.queries[pre:]: + print(query['sql']) + print("Reached", stored_event_count, "events") return max_total_irrelevance -def evict_for_irrelevance(max_total_irrelevance, epoch_bounds_with_irrelevance): +def evict_for_irrelevance(project, max_total_irrelevance, epoch_bounds_with_irrelevance): # print("evict_for_irrelevance(%d, %s)" % (max_total_irrelevance, epoch_bounds_with_irrelevance)) # max_total_irrelevance, i.e. the total may not exceed this (but it may equal it) @@ -210,7 +214,7 @@ def evict_for_irrelevance(max_total_irrelevance, epoch_bounds_with_irrelevance): for (_, epoch_ub_exclusive), irrelevance_for_age in epoch_bounds_with_irrelevance: max_item_irrelevance = max_total_irrelevance - irrelevance_for_age - evict_for_epoch_and_irrelevance(epoch_ub_exclusive, max_item_irrelevance) + evict_for_epoch_and_irrelevance(project, epoch_ub_exclusive, max_item_irrelevance) if max_item_irrelevance <= -1: # in the actual eviction, the test on max_item_irrelevance is done exclusively, i.e. only items of greater @@ -219,7 +223,7 @@ def evict_for_irrelevance(max_total_irrelevance, epoch_bounds_with_irrelevance): break -def evict_for_epoch_and_irrelevance(max_epoch, max_irrelevance): +def evict_for_epoch_and_irrelevance(project, max_epoch, max_irrelevance): # print("evict_for_epoch_and_irrelevance(%s, %s)" % (max_epoch, max_irrelevance)) from .models import Event @@ -244,7 +248,7 @@ def evict_for_epoch_and_irrelevance(max_epoch, max_irrelevance): # this call, and only when `B` is cleaned will the points `x` be cleaned. (as-is, they are part of the selection, # but will already have been deleted) - qs = Event.objects.filter(irrelevance_for_retention__gt=max_irrelevance) + qs = Event.objects.filter(project=project, irrelevance_for_retention__gt=max_irrelevance) if max_epoch is not None: qs = qs.filter(server_side_timestamp__lt=datetime_for_epoch(max_epoch)) diff --git a/events/retention_insight.py b/events/retention_insight.py new file mode 100644 index 0000000..950f3a7 --- /dev/null +++ b/events/retention_insight.py @@ -0,0 +1,66 @@ +from datetime import datetime, timezone + +from .retention import get_epoch_bounds_with_irrelevance, get_irrelevance_pairs, datetime_for_epoch +from .models import Event + + +def retention_insight_values(project): + timestamp = datetime.now(tz=timezone.utc) + + epoch_bounds_with_irrelevance = get_epoch_bounds_with_irrelevance(project, timestamp) + pairs = list(get_irrelevance_pairs(project, epoch_bounds_with_irrelevance)) + + print("epoch_bounds_with_irrelevance") + for x in epoch_bounds_with_irrelevance: + print(x) + + print("pairs") + for x in pairs: + print(x) + + yielded = 0 + for (age_based_irrelevance, max_obsered_irrelevance), ((lb, ub), _) in reversed(list(zip(pairs, epoch_bounds_with_irrelevance))): + print("?", age_based_irrelevance, max_obsered_irrelevance, lb, ub) + results = {} + for irrelevance in range(max_obsered_irrelevance + 1): + qs = Event.objects.filter( + project=project, + irrelevance_for_retention=irrelevance + ) + if lb is not None: + qs = qs.filter(server_side_timestamp__gte=datetime_for_epoch(lb)) + if ub is not None: + qs = qs.filter(server_side_timestamp__lt=datetime_for_epoch(ub)) + + howmany = qs.count() + results[irrelevance] = howmany + yielded += howmany + + yield (lb, results) # lb makes more sense visually + + assert Event.objects.filter(project=project).count() == yielded, "%d != %d" % (Event.objects.filter(project=project).count(), yielded) + + +def retention_insight(project): + data = list(retention_insight_values(project)) + print(data) + + max_irrelevance = max(max(d.keys() for _, d in data), default=0) + # max_count = max(max(d.values() for _, d in data), default=0) idea: use for formatting, but dates are bigger + + # len("2000-01-01 16h") == 14 -> 16 for padding + fmt = lambda epoch: datetime_for_epoch(epoch).strftime("%Y-%m-%d %Hh ") if epoch is not None else " " * 16 # noqa + + # headers + print(" " * 5, end="") + for epoch, _ in data: + print(fmt(epoch), end="") + print() + + for irrelevance in range(max_irrelevance + 1): + print("%3d| " % irrelevance, end="") + for epoch, results in data: + print("%14d " % results.get(irrelevance, 0), end="") + print() + + print("Total: ", sum(sum(d.values()) for _, d in data))