diff --git a/bugsink/app_settings.py b/bugsink/app_settings.py index 2cafd9e..f2696ea 100644 --- a/bugsink/app_settings.py +++ b/bugsink/app_settings.py @@ -45,6 +45,7 @@ DEFAULTS = { "VALIDATE_ON_DIGEST": "none", # other legal values are "warn" and "strict" "KEEP_ENVELOPES": 0, # set to a number to store that many; 0 means "store none". This is for debugging. "API_LOG_UNIMPLEMENTED_CALLS": False, # if True, log unimplemented API calls; see #153 + "KEEP_ARTIFACT_BUNDLES": False, # if True, artifact bundles are kept in the database on-upload (for debugging) # MAX* below mirror the (current) values for the Sentry Relay "MAX_EVENT_SIZE": _MEBIBYTE, diff --git a/files/tasks.py b/files/tasks.py index 71f9c97..53a9c8e 100644 --- a/files/tasks.py +++ b/files/tasks.py @@ -5,7 +5,9 @@ from io import BytesIO from os.path import basename from snappea.decorators import shared_task + from bugsink.transaction import immediate_atomic +from bugsink.app_settings import get_settings from .models import Chunk, File, FileMetadata @@ -56,7 +58,9 @@ def assemble_artifact_bundle(bundle_checksum, chunk_checksums): } ) - # NOTE we _could_ get rid of the file at this point (but we don't). Ties in to broader questions of retention. + if not get_settings().KEEP_ARTIFACT_BUNDLES: + # delete the bundle file after processing, since we don't need it anymore. + bundle_file.delete() def assemble_file(checksum, chunk_checksums, filename): @@ -75,10 +79,16 @@ def assemble_file(checksum, chunk_checksums, filename): if sha1(data).hexdigest() != checksum: raise Exception("checksum mismatch") - return File.objects.get_or_create( + result = File.objects.get_or_create( checksum=checksum, defaults={ "size": len(data), "data": data, "filename": filename, }) + + # the assumption here is: chunks are basically use-once, so we can delete them after use. "in theory" a chunk may + # be used in multiple files (which are still being assembled) but with chunksizes in the order of 1MiB, I'd say this + # is unlikely. + chunks.delete() + return result