Warn (in the logs) on multiple-debug-ids source uploads

See #157, #158
This commit is contained in:
Klaas van Schelven
2025-07-23 12:11:20 +02:00
parent e80855a8b9
commit bd1eabe60b

View File

@@ -1,3 +1,5 @@
import re
import logging
from datetime import timedelta
from zipfile import ZipFile
import json
@@ -14,6 +16,16 @@ from bugsink.app_settings import get_settings
from .models import Chunk, File, FileMetadata
logger = logging.getLogger("bugsink.api")
# "In the wild", we have run into non-unique debug IDs (one in code, one in comment-at-bottom). This regex matches a
# known pattern for "one in code", such that we can at least warn if it's not the same at the actually reported one.
# See #157
IN_CODE_DEBUG_ID_REGEX = re.compile(
r'e\._sentryDebugIds\[.*?\]\s*=\s*["\']([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})["\']'
)
@shared_task
def assemble_artifact_bundle(bundle_checksum, chunk_checksums):
@@ -61,6 +73,18 @@ def assemble_artifact_bundle(bundle_checksum, chunk_checksums):
}
)
# the in-code regexes show up in the _minified_ source only (the sourcemap's original source code will not
# have been "polluted" with it yet, since it's the original).
if file_type == "minified_source":
mismatches = set(IN_CODE_DEBUG_ID_REGEX.findall(file_data.decode("utf-8"))) - {debug_id}
if mismatches:
logger.warning(
"File %s contains multiple debug IDs. Uploaded as %s, but also found: %s.",
filename,
debug_id,
", ".join(sorted(mismatches)),
)
if not get_settings().KEEP_ARTIFACT_BUNDLES:
# delete the bundle file after processing, since we don't need it anymore.
bundle_file.delete()