diff --git a/bugsink/urls.py b/bugsink/urls.py index 2a65174..9e15360 100644 --- a/bugsink/urls.py +++ b/bugsink/urls.py @@ -14,7 +14,7 @@ from teams.views import debug_email as debug_teams_email from bugsink.app_settings import get_settings from users.views import signup, confirm_email, resend_confirmation, request_reset_password, reset_password, preferences from ingest.views import download_envelope -from files.views import chunk_upload, artifact_bundle_assemble, api_root, api_catch_all +from files.views import chunk_upload, artifact_bundle_assemble, difs_assemble, api_root, api_catch_all from bugsink.decorators import login_exempt from events.api_views import EventViewSet @@ -71,6 +71,9 @@ urlpatterns = [ path("api/0/organizations//artifactbundle/assemble/", artifact_bundle_assemble, name="artifact_bundle_assemble"), + path("api/0/projects///files/difs/assemble/", difs_assemble, + name="difs_assemble"), + path('api/', include('ingest.urls')), path('api/0/', api_root, name='api_root'), diff --git a/files/tasks.py b/files/tasks.py index a193708..4e6a62b 100644 --- a/files/tasks.py +++ b/files/tasks.py @@ -105,8 +105,8 @@ def assemble_file(checksum, chunk_checksums, filename): # NOTE: unimplemented checks/tricks # * total file-size v.s. some max - # * explicit check chunk availability (as it stands, our processing is synchronous, so no need) - # * skip-on-checksum-exists + # * explicit check chunk availability + # * skip this whole thing when the (whole-file) checksum exists chunks = Chunk.objects.filter(checksum__in=chunk_checksums) chunks_dicts = {chunk.checksum: chunk for chunk in chunks} @@ -117,7 +117,7 @@ def assemble_file(checksum, chunk_checksums, filename): if sha1(data, usedforsecurity=False).hexdigest() != checksum: raise Exception("checksum mismatch") - result = File.objects.get_or_create( + file, created = File.objects.get_or_create( checksum=checksum, defaults={ "size": len(data), @@ -129,7 +129,7 @@ def assemble_file(checksum, chunk_checksums, filename): # be used in multiple files (which are still being assembled) but with chunksizes in the order of 1MiB, I'd say this # is unlikely. chunks.delete() - return result + return file, created @shared_task diff --git a/files/views.py b/files/views.py index 74db12e..e357fe2 100644 --- a/files/views.py +++ b/files/views.py @@ -15,8 +15,8 @@ from bugsink.app_settings import get_settings from bugsink.transaction import durable_atomic, immediate_atomic from bsmain.models import AuthToken -from .models import Chunk, File -from .tasks import assemble_artifact_bundle +from .models import Chunk, File, FileMetadata +from .tasks import assemble_artifact_bundle, assemble_file logger = logging.getLogger("bugsink.api") @@ -86,7 +86,8 @@ def get_chunk_upload_settings(request, organization_slug): # yet. "release_files", - # this would seem to be the "javascript sourcemaps" thing, but how exactly I did not check yet. + # on second reading I would say: this is "actual source code", but I did not check yet and "don't touch it" + # (even though we don't actually have an implementation for sources yet) "sources", # https://github.com/getsentry/sentry/discussions/46967 @@ -100,7 +101,7 @@ def get_chunk_upload_settings(request, organization_slug): # "artifact_bundles_v2", # the rest of the options are below: - # "debug_files", + "debug_files", # "release_files", # "pdbs", # "bcsymbolmaps", @@ -199,6 +200,78 @@ def artifact_bundle_assemble(request, organization_slug): return JsonResponse({"state": ChunkFileState.CREATED, "missingChunks": []}) +@csrf_exempt # we're in API context here; this could potentially be pulled up to a higher level though +@requires_auth_token +def difs_assemble(request, organization_slug, project_slug): + # TODO move to tasks.something.delay + # TODO think about the right transaction around this + data = json.loads(request.body) + + file_checksums = set(data.keys()) + + existing_files = { + f.file.checksum: f + for f in FileMetadata.objects.filter(file__checksum__in=file_checksums) + } + + all_requested_chunks = { + chunk + for file_info in data.values() + for chunk in file_info.get("chunks", []) + } + + available_chunks = set( + Chunk.objects.filter(checksum__in=all_requested_chunks).values_list("checksum", flat=True) + ) + + response = {} + + for file_checksum, file_info in data.items(): + if file_checksum in existing_files: + response[file_checksum] = { + "state": ChunkFileState.OK, + "missingChunks": [], + # "dif": serialize(existing_files[file_checksum]), # TODO: figure out if this is required. + } + continue + + file_chunks = file_info.get("chunks", []) + + # the sentry-cli sends an empty "chunks" list when just polling for file existence; since we already handled the + # case of existing files above, we can simply return NOT_FOUND here. + if not file_chunks: + response[file_checksum] = { + "state": ChunkFileState.NOT_FOUND, + "missingChunks": [], + } + continue + + missing_chunks = [c for c in file_chunks if c not in available_chunks] + if missing_chunks: + response[file_checksum] = { + "state": ChunkFileState.NOT_FOUND, + "missingChunks": missing_chunks, + } + continue + + file, _ = assemble_file(file_checksum, file_chunks, filename=file_info["name"]) + FileMetadata.objects.get_or_create( + debug_id=file_info["debug_id"], + file_type="dif", # I think? check! + defaults={ + "file": file, + "data": "{}", # this is the "catch all" field but I don't think we have anything in this case. + } + ) + + response[file_checksum] = { + "state": ChunkFileState.OK, + "missingChunks": [], + } + + return JsonResponse(response) + + @user_passes_test(lambda u: u.is_superuser) @durable_atomic def download_file(request, checksum):