File.filename (for display purposes)

2026-03-10 08:01:17 +00:00 · 2025-04-10 17:00:46 +02:00
parent 87130043e3
commit 0bd899fdfa
4 changed files with 17 additions and 6 deletions
--- a/files/admin.py
+++ b/files/admin.py
@@ -11,7 +11,7 @@ class ChunkAdmin(admin.ModelAdmin):

@admin.register(File)
 class FileAdmin(admin.ModelAdmin):
-    list_display = ('checksum', 'size')
+    list_display = ('filename', 'checksum', 'size')
    search_fields = ('checksum',)
    readonly_fields = ('data',)

--- a/files/migrations/0001_initial.py
+++ b/files/migrations/0001_initial.py
@@ -41,6 +41,7 @@ class Migration(migrations.Migration):
                    ),
                ),
                ("checksum", models.CharField(max_length=40, unique=True)),
+                ("filename", models.CharField(max_length=255)),
                ("size", models.PositiveIntegerField()),
                ("data", models.BinaryField()),
            ],
--- a/files/models.py
+++ b/files/models.py
@@ -11,15 +11,21 @@ class Chunk(models.Model):


 class File(models.Model):
-    # NOTE: as it stands, this is exactly the same thing as Chunk; and since we do single-chunk uploads, optimizations
-    # are imaginable. Make it work first though
+    # NOTE: since we do single-chunk uploads, optimizations are imaginable. Make it work first though

    checksum = models.CharField(max_length=40, unique=True)  # unique implies index, which we also use for lookups
+
+    # the filename is not unique, nor meaningful in the sense that you could use it to identify the file. It is only
+    # here for convenience, i.e. to eye-ball the file in a list. note that we store by checksum, and the filename gets
+    # associated on the first successful store. i.e. it's possible that a file would be stored again with a different
+    # name but that would go undetected by us. all that is to say: convenience thingie without strong guarantees.
+    filename = models.CharField(max_length=255)
+
    size = models.PositiveIntegerField()
    data = models.BinaryField(null=False)  # as with Events, we can "eventually" move this out of the database

    def __str__(self):
-        return self.checksum
+        return self.filename


 class FileMetadata(models.Model):
--- a/files/views.py
+++ b/files/views.py
@@ -143,7 +143,7 @@ def assemble_artifact_bundle(bundle_checksum, chunk_checksums):
    # NOTE: there's also the concept of an artifact bundle as _tied_ to a release, i.e. without debug_ids. We don't
    # support that, but if we ever were to support it we'd need a separate method/param to distinguish it.

-    bundle_file, _ = assemble_file(bundle_checksum, chunk_checksums)
+    bundle_file, _ = assemble_file(bundle_checksum, chunk_checksums, filename=f"{bundle_checksum}.zip")

    bundle_zip = ZipFile(BytesIO(bundle_file.data))  # NOTE: in-memory handling of zips.
    manifest_bytes = bundle_zip.read("manifest.json")
@@ -154,9 +154,12 @@ def assemble_artifact_bundle(bundle_checksum, chunk_checksums):

        checksum = sha1(file_data).hexdigest()

+        filename = manifest_entry.get("url", filename)[:255]
+
        file, _ = File.objects.get_or_create(
            checksum=checksum,
            defaults={
+                "filename": filename,
                "size": len(file_data),
                "data": file_data,
            })
@@ -179,7 +182,7 @@ def assemble_artifact_bundle(bundle_checksum, chunk_checksums):
    # NOTE we _could_ get rid of the file at this point (but we don't). Ties in to broader questions of retention.


-def assemble_file(checksum, chunk_checksums):
+def assemble_file(checksum, chunk_checksums, filename):
    """Assembles a file from chunks"""

    # NOTE: unimplemented checks/tricks
@@ -200,6 +203,7 @@ def assemble_file(checksum, chunk_checksums):
        defaults={
            "size": len(data),
            "data": data,
+            "filename": filename,
        })