mirror of
https://github.com/jlengrand/bugsink.git
synced 2026-03-10 08:01:17 +00:00
Eviction: use deletion counts to keep track of the work
This saves a query in the (small) loop (namely: selection counts of remaining items) It also allows us to stop sooner (evict less).
This commit is contained in:
@@ -199,18 +199,19 @@ def evict_for_max_events(project, timestamp, stored_event_count=None):
|
||||
max_total_irrelevance = orig_max_total_irrelevance = max(sum(pair) for pair in pairs)
|
||||
|
||||
with time_and_query_count() as phase1:
|
||||
while stored_event_count > lowered_target(project.retention_max_event_count): # > as in `should_evict`
|
||||
deleted = 0
|
||||
while stored_event_count - deleted > lowered_target(project.retention_max_event_count): # > as in should_evict
|
||||
# -1 at the beginning of the loop; this means the actually observed max value is precisely the first thing
|
||||
# that will be evicted (since `evict_for_irrelevance` will evict anything above (but not including) the
|
||||
# given value)
|
||||
max_total_irrelevance -= 1
|
||||
|
||||
evict_for_irrelevance(
|
||||
deleted += evict_for_irrelevance(
|
||||
project,
|
||||
max_total_irrelevance,
|
||||
list(filter_for_work(epoch_bounds_with_irrelevance, pairs, max_total_irrelevance)))
|
||||
|
||||
stored_event_count = Event.objects.filter(project=project).count() + 1
|
||||
list(filter_for_work(epoch_bounds_with_irrelevance, pairs, max_total_irrelevance)),
|
||||
(project.retention_max_event_count - lowered_target(project.retention_max_event_count)) - deleted,
|
||||
)
|
||||
|
||||
if max_total_irrelevance < -1: # < -1: as in `evict_for_irrelevance`
|
||||
# could still happen if there's max_size items that cannot be evicted at all
|
||||
@@ -220,21 +221,22 @@ def evict_for_max_events(project, timestamp, stored_event_count=None):
|
||||
# phase 1: DELETE (evictions) and SELECT total count ("are we there yet?")
|
||||
performance_logger.info(
|
||||
"%6.2fms EVICT; down to %d, max irr. from %d to %d in %dms+%dms and %d+%d queries",
|
||||
phase0.took + phase1.took, stored_event_count, orig_max_total_irrelevance, max_total_irrelevance, phase0.took,
|
||||
phase1.took, phase0.count, phase1.count)
|
||||
phase0.took + phase1.took, stored_event_count - deleted, orig_max_total_irrelevance, max_total_irrelevance,
|
||||
phase0.took, phase1.took, phase0.count, phase1.count)
|
||||
|
||||
return max_total_irrelevance
|
||||
|
||||
|
||||
def evict_for_irrelevance(project, max_total_irrelevance, epoch_bounds_with_irrelevance):
|
||||
# print("evict_for_irrelevance(%d, %s)" % (max_total_irrelevance, epoch_bounds_with_irrelevance))
|
||||
def evict_for_irrelevance(project, max_total_irrelevance, epoch_bounds_with_irrelevance, max_event_count=None):
|
||||
# max_total_irrelevance: the total may not exceed this (but it may equal it)
|
||||
# max_event_count is optional in anticipation of non-count (i.e. size-based) based methods of eviction
|
||||
|
||||
# max_total_irrelevance, i.e. the total may not exceed this (but it may equal it)
|
||||
deleted = 0
|
||||
|
||||
for (_, epoch_ub_exclusive), irrelevance_for_age in epoch_bounds_with_irrelevance:
|
||||
max_item_irrelevance = max_total_irrelevance - irrelevance_for_age
|
||||
|
||||
evict_for_epoch_and_irrelevance(project, epoch_ub_exclusive, max_item_irrelevance)
|
||||
deleted += evict_for_epoch_and_irrelevance(project, epoch_ub_exclusive, max_item_irrelevance)
|
||||
|
||||
if max_item_irrelevance <= -1:
|
||||
# in the actual eviction, the test on max_item_irrelevance is done exclusively, i.e. only items of greater
|
||||
@@ -242,6 +244,15 @@ def evict_for_irrelevance(project, max_total_irrelevance, epoch_bounds_with_irre
|
||||
# with max_item_irrelevance = -1. This means that if we just did such an eviction, we're done for all epochs
|
||||
break
|
||||
|
||||
if max_event_count is not None and deleted >= max_event_count:
|
||||
# We've reached the target; we can stop early. In this case not all events with greater than max_total_irr
|
||||
# will have been evicted; if this is the case older items are more likely to be spared (because epochs are
|
||||
# visited in reverse order).
|
||||
print("BREAKING as just programmed")
|
||||
break
|
||||
|
||||
return deleted
|
||||
|
||||
|
||||
def evict_for_epoch_and_irrelevance(project, max_epoch, max_irrelevance):
|
||||
# print("evict_for_epoch_and_irrelevance(%s, %s)" % (max_epoch, max_irrelevance))
|
||||
@@ -273,4 +284,8 @@ def evict_for_epoch_and_irrelevance(project, max_epoch, max_irrelevance):
|
||||
if max_epoch is not None:
|
||||
qs = qs.filter(server_side_timestamp__lt=datetime_for_epoch(max_epoch))
|
||||
|
||||
qs.delete()
|
||||
# NOTE: we could take the idea of limiting the number of deletions to a desired maximum a bit further here; however,
|
||||
# Django does not support this out of the box (i.e. it does not support LIMIT in DELETE queries). Sqlite does in
|
||||
# fact support it (whereas many other DBs do not), so we have this available as a future improvement.
|
||||
nr_of_deletions, _ = qs.delete()
|
||||
return nr_of_deletions
|
||||
|
||||
Reference in New Issue
Block a user