From 0bd899fdfac5d4a6aeed116c36d0b3f80226bdbb Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Thu, 10 Apr 2025 17:00:46 +0200 Subject: [PATCH] File.filename (for display purposes) --- files/admin.py | 2 +- files/migrations/0001_initial.py | 1 + files/models.py | 12 +++++++++--- files/views.py | 8 ++++++-- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/files/admin.py b/files/admin.py index 532296e..b5163c4 100644 --- a/files/admin.py +++ b/files/admin.py @@ -11,7 +11,7 @@ class ChunkAdmin(admin.ModelAdmin): @admin.register(File) class FileAdmin(admin.ModelAdmin): - list_display = ('checksum', 'size') + list_display = ('filename', 'checksum', 'size') search_fields = ('checksum',) readonly_fields = ('data',) diff --git a/files/migrations/0001_initial.py b/files/migrations/0001_initial.py index 969dda4..9785c9e 100644 --- a/files/migrations/0001_initial.py +++ b/files/migrations/0001_initial.py @@ -41,6 +41,7 @@ class Migration(migrations.Migration): ), ), ("checksum", models.CharField(max_length=40, unique=True)), + ("filename", models.CharField(max_length=255)), ("size", models.PositiveIntegerField()), ("data", models.BinaryField()), ], diff --git a/files/models.py b/files/models.py index 6c58219..91ba42c 100644 --- a/files/models.py +++ b/files/models.py @@ -11,15 +11,21 @@ class Chunk(models.Model): class File(models.Model): - # NOTE: as it stands, this is exactly the same thing as Chunk; and since we do single-chunk uploads, optimizations - # are imaginable. Make it work first though + # NOTE: since we do single-chunk uploads, optimizations are imaginable. Make it work first though checksum = models.CharField(max_length=40, unique=True) # unique implies index, which we also use for lookups + + # the filename is not unique, nor meaningful in the sense that you could use it to identify the file. It is only + # here for convenience, i.e. to eye-ball the file in a list. note that we store by checksum, and the filename gets + # associated on the first successful store. i.e. it's possible that a file would be stored again with a different + # name but that would go undetected by us. all that is to say: convenience thingie without strong guarantees. + filename = models.CharField(max_length=255) + size = models.PositiveIntegerField() data = models.BinaryField(null=False) # as with Events, we can "eventually" move this out of the database def __str__(self): - return self.checksum + return self.filename class FileMetadata(models.Model): diff --git a/files/views.py b/files/views.py index 02854f5..90e1dd2 100644 --- a/files/views.py +++ b/files/views.py @@ -143,7 +143,7 @@ def assemble_artifact_bundle(bundle_checksum, chunk_checksums): # NOTE: there's also the concept of an artifact bundle as _tied_ to a release, i.e. without debug_ids. We don't # support that, but if we ever were to support it we'd need a separate method/param to distinguish it. - bundle_file, _ = assemble_file(bundle_checksum, chunk_checksums) + bundle_file, _ = assemble_file(bundle_checksum, chunk_checksums, filename=f"{bundle_checksum}.zip") bundle_zip = ZipFile(BytesIO(bundle_file.data)) # NOTE: in-memory handling of zips. manifest_bytes = bundle_zip.read("manifest.json") @@ -154,9 +154,12 @@ def assemble_artifact_bundle(bundle_checksum, chunk_checksums): checksum = sha1(file_data).hexdigest() + filename = manifest_entry.get("url", filename)[:255] + file, _ = File.objects.get_or_create( checksum=checksum, defaults={ + "filename": filename, "size": len(file_data), "data": file_data, }) @@ -179,7 +182,7 @@ def assemble_artifact_bundle(bundle_checksum, chunk_checksums): # NOTE we _could_ get rid of the file at this point (but we don't). Ties in to broader questions of retention. -def assemble_file(checksum, chunk_checksums): +def assemble_file(checksum, chunk_checksums, filename): """Assembles a file from chunks""" # NOTE: unimplemented checks/tricks @@ -200,6 +203,7 @@ def assemble_file(checksum, chunk_checksums): defaults={ "size": len(data), "data": data, + "filename": filename, })