mirror of
https://github.com/jlengrand/bugsink.git
synced 2026-03-09 23:51:20 +00:00
@@ -1,9 +1,15 @@
|
||||
from datetime import datetime, timezone
|
||||
from uuid import UUID
|
||||
import json
|
||||
import sourcemap
|
||||
from issues.utils import get_values
|
||||
|
||||
from bugsink.transaction import delay_on_commit
|
||||
|
||||
from compat.timestamp import format_timestamp
|
||||
|
||||
from files.models import FileMetadata
|
||||
from files.tasks import record_file_accesses
|
||||
|
||||
|
||||
# Dijkstra, Sourcemaps and Python lists start at 0, but editors and our UI show lines starting at 1.
|
||||
@@ -116,6 +122,9 @@ def apply_sourcemaps(event_data):
|
||||
debug_id__in=debug_id_for_filename.values(), file_type="source_map").select_related("file")
|
||||
}
|
||||
|
||||
metadata_ids = [metadata_obj.id for metadata_obj in metadata_obj_lookup.values()]
|
||||
delay_on_commit(record_file_accesses, metadata_ids, format_timestamp(datetime.now(timezone.utc)))
|
||||
|
||||
filenames_with_metas = [
|
||||
(filename, metadata_obj_lookup[debug_id])
|
||||
for (filename, debug_id) in debug_id_for_filename.items()
|
||||
|
||||
@@ -7,14 +7,14 @@ from .models import Chunk, File, FileMetadata
|
||||
|
||||
@admin.register(Chunk)
|
||||
class ChunkAdmin(admin.ModelAdmin):
|
||||
list_display = ('checksum', 'size')
|
||||
list_display = ('checksum', 'size', 'created_at')
|
||||
search_fields = ('checksum',)
|
||||
readonly_fields = ('data',)
|
||||
|
||||
|
||||
@admin.register(File)
|
||||
class FileAdmin(admin.ModelAdmin):
|
||||
list_display = ('filename', 'checksum', 'size', 'download_link')
|
||||
list_display = ('filename', 'checksum', 'size', 'download_link', 'created_at', 'accessed_at')
|
||||
search_fields = ('checksum',)
|
||||
readonly_fields = ('data', 'download_link')
|
||||
|
||||
@@ -27,5 +27,5 @@ class FileAdmin(admin.ModelAdmin):
|
||||
|
||||
@admin.register(FileMetadata)
|
||||
class FileMetadataAdmin(admin.ModelAdmin):
|
||||
list_display = ('debug_id', 'file_type', 'file')
|
||||
list_display = ('debug_id', 'file_type', 'file', 'created_at')
|
||||
search_fields = ('file__checksum', 'debug_id', 'file_type')
|
||||
|
||||
0
files/management/__init__.py
Normal file
0
files/management/__init__.py
Normal file
0
files/management/commands/__init__.py
Normal file
0
files/management/commands/__init__.py
Normal file
10
files/management/commands/vacuum_files.py
Normal file
10
files/management/commands/vacuum_files.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
from files.tasks import vacuum_files
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Kick off (sourcemaps-)files cleanup by vacuuming old entries."
|
||||
|
||||
def handle(self, *args, **options):
|
||||
vacuum_files.delay()
|
||||
self.stdout.write("Called vacuum_files.delay(); the task will run in the background (snapea).")
|
||||
@@ -0,0 +1,44 @@
|
||||
from django.db import migrations, models
|
||||
import django.utils.timezone
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("files", "0001_initial"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name="chunk",
|
||||
name="created_at",
|
||||
field=models.DateTimeField(
|
||||
auto_now_add=True, db_index=True, default=django.utils.timezone.now
|
||||
),
|
||||
preserve_default=False,
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="file",
|
||||
name="accessed_at",
|
||||
field=models.DateTimeField(
|
||||
auto_now_add=True, db_index=True, default=django.utils.timezone.now
|
||||
),
|
||||
preserve_default=False,
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="file",
|
||||
name="created_at",
|
||||
field=models.DateTimeField(
|
||||
auto_now_add=True, db_index=True, default=django.utils.timezone.now
|
||||
),
|
||||
preserve_default=False,
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="filemetadata",
|
||||
name="created_at",
|
||||
field=models.DateTimeField(
|
||||
auto_now_add=True, db_index=True, default=django.utils.timezone.now
|
||||
),
|
||||
preserve_default=False,
|
||||
),
|
||||
]
|
||||
@@ -5,6 +5,7 @@ class Chunk(models.Model):
|
||||
checksum = models.CharField(max_length=40, unique=True) # unique implies index, which we also use for lookups
|
||||
size = models.PositiveIntegerField()
|
||||
data = models.BinaryField(null=False) # as with Events, we can "eventually" move this out of the database
|
||||
created_at = models.DateTimeField(auto_now_add=True, editable=False, db_index=True)
|
||||
|
||||
def __str__(self):
|
||||
return self.checksum
|
||||
@@ -23,6 +24,8 @@ class File(models.Model):
|
||||
|
||||
size = models.PositiveIntegerField()
|
||||
data = models.BinaryField(null=False) # as with Events, we can "eventually" move this out of the database
|
||||
created_at = models.DateTimeField(auto_now_add=True, editable=False, db_index=True)
|
||||
accessed_at = models.DateTimeField(auto_now_add=True, editable=False, db_index=True)
|
||||
|
||||
def __str__(self):
|
||||
return self.filename
|
||||
@@ -36,6 +39,7 @@ class FileMetadata(models.Model):
|
||||
debug_id = models.UUIDField(max_length=40, null=True, blank=True)
|
||||
file_type = models.CharField(max_length=255, null=True, blank=True)
|
||||
data = models.TextField() # we just dump the rest in here; let's see how much we really need.
|
||||
created_at = models.DateTimeField(auto_now_add=True, editable=False, db_index=True)
|
||||
|
||||
def __str__(self):
|
||||
# somewhat useless when debug_id is None; but that's not the case we care about ATM
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
from datetime import timedelta
|
||||
from zipfile import ZipFile
|
||||
import json
|
||||
from hashlib import sha1
|
||||
from io import BytesIO
|
||||
from os.path import basename
|
||||
from django.utils import timezone
|
||||
|
||||
from compat.timestamp import parse_timestamp
|
||||
from snappea.decorators import shared_task
|
||||
|
||||
from bugsink.transaction import immediate_atomic
|
||||
from bugsink.transaction import immediate_atomic, delay_on_commit
|
||||
from bugsink.app_settings import get_settings
|
||||
|
||||
from .models import Chunk, File, FileMetadata
|
||||
@@ -92,3 +95,54 @@ def assemble_file(checksum, chunk_checksums, filename):
|
||||
# is unlikely.
|
||||
chunks.delete()
|
||||
return result
|
||||
|
||||
|
||||
@shared_task
|
||||
def record_file_accesses(metadata_ids, accessed_at):
|
||||
# implemented as a task to get around the fact that file-access happens in an otherwise read-only view (and the fact
|
||||
# that the access happened is a write to the DB).
|
||||
|
||||
# a few thoughts on the context of "doing this as a task": [1] the expected througput is relatively low (UI) so the
|
||||
# task overhead should be OK [2] it's not "absolutely criticial" to always record this (99% is enough) and [3] it's
|
||||
# not related to the reading transaction _at all_ (all we need to record is the fact that it happened.
|
||||
#
|
||||
# thought on instead pulling it to the top of the UI's view: code-wise, it's annoying but doable (annoying b/c
|
||||
# 'for_request_method' won't work anymore). But this would still make this key UI view depend on the write lock
|
||||
# which is such a shame for responsiveness so we'll stick with task-based.
|
||||
|
||||
with immediate_atomic():
|
||||
parsed_accessed_at = parse_timestamp(accessed_at)
|
||||
|
||||
# note: filtering on IDs comes with "robust for deletions" out-of-the-box (and: 2 queries only)
|
||||
file_ids = FileMetadata.objects.filter(id__in=metadata_ids).values_list("file_id", flat=True)
|
||||
File.objects.filter(id__in=file_ids).update(accessed_at=parsed_accessed_at)
|
||||
|
||||
|
||||
@shared_task
|
||||
def vacuum_files():
|
||||
now = timezone.now()
|
||||
with immediate_atomic():
|
||||
# budget is not yet tuned; reasons for high values: we're dealing with "leaves in the model-dep-tree here";
|
||||
# reasons for low values: deletion of files might just be expensive.
|
||||
budget = 500
|
||||
num_deleted = 0
|
||||
|
||||
for model, field_name, max_days in [
|
||||
(Chunk, 'created_at', 1,), # 1 is already quite long... Chunks are used immediately, or not at all.
|
||||
(File, 'accessed_at', 90),
|
||||
# for FileMetadata we rely on cascading from File (which will always happen "eventually")
|
||||
]:
|
||||
|
||||
while num_deleted < budget:
|
||||
ids = (model.objects.filter(**{f"{field_name}__lt": now - timedelta(days=max_days)})[:budget].
|
||||
values_list('id', flat=True))
|
||||
|
||||
if len(ids) == 0:
|
||||
break
|
||||
|
||||
model.objects.filter(id__in=ids).delete()
|
||||
num_deleted += len(ids)
|
||||
|
||||
if num_deleted == budget:
|
||||
# budget exhausted but possibly more to delete, so we re-schedule the task
|
||||
delay_on_commit(vacuum_files)
|
||||
|
||||
Reference in New Issue
Block a user