diff --git a/events/urls.py b/events/urls.py index d6d447a..42b0b38 100644 --- a/events/urls.py +++ b/events/urls.py @@ -1,8 +1,9 @@ from django.urls import path -from .views import decompressed_event_detail +from .views import decompressed_event_detail, debug_get_hash urlpatterns = [ path('event//', decompressed_event_detail), + path('debug_get_hash//', debug_get_hash), ] diff --git a/events/views.py b/events/views.py index 283fc6b..b3cf3ed 100644 --- a/events/views.py +++ b/events/views.py @@ -3,6 +3,7 @@ import json from django.shortcuts import render, get_object_or_404 from ingest.models import DecompressedEvent +from issues.utils import get_hash_for_data def decompressed_event_detail(request, pk): @@ -19,3 +20,13 @@ def decompressed_event_detail(request, pk): "parsed_data": parsed_data, "exceptions": exceptions, }) + + +def debug_get_hash(request, decompressed_event_pk): + # debug view; not for eternity + + obj = get_object_or_404(DecompressedEvent, pk=decompressed_event_pk) + + parsed_data = json.loads(obj.data) + + print(get_hash_for_data(parsed_data)) diff --git a/issues/utils.py b/issues/utils.py new file mode 100644 index 0000000..7273731 --- /dev/null +++ b/issues/utils.py @@ -0,0 +1,40 @@ +import hashlib +from typing import List, Optional + +from sentry.eventtypes.base import DefaultEvent +from sentry.eventtypes.error import ErrorEvent + + +def default_hash_input(title: str, culprit: str, type_) -> str: + return title + culprit + type_ + + +def generate_hash( + title: str, culprit: str, type_, extra: Optional[List[str]] = None +) -> str: + """Generate insecure hash used for grouping issues""" + if extra: + hash_input = "".join( + [ + default_hash_input(title, culprit, type_) + if part == "{{ default }}" + else part + for part in extra + ] + ) + else: + hash_input = default_hash_input(title, culprit, type_) + return hashlib.md5(hash_input.encode()).hexdigest() + + +def get_hash_for_data(data): + if "exception" in data and data["exception"]: + eventtype = ErrorEvent() + else: + eventtype = DefaultEvent() + + metadata = eventtype.get_metadata(data) + + title = eventtype.get_title(metadata) + culprit = eventtype.get_location(data) + return generate_hash(title, culprit, type(eventtype).__name__, data.get("fingerprint")) diff --git a/sentry/constants.py b/sentry/constants.py new file mode 100644 index 0000000..8255f24 --- /dev/null +++ b/sentry/constants.py @@ -0,0 +1,493 @@ +""" +These settings act as the default (base) settings for the Sentry-provided +web-server +""" + + +import logging +import os.path +from collections import OrderedDict, namedtuple +from datetime import timedelta + +from django.utils.translation import gettext_lazy as _ + +# from sentry.utils.integrationdocs import load_doc +# from sentry.utils.geo import rust_geoip + +# import semaphore + + +def get_all_languages(): + results = [] + for path in os.listdir(os.path.join(MODULE_ROOT, "locale")): + if path.startswith("."): + continue + if "_" in path: + pre, post = path.split("_", 1) + path = "{}-{}".format(pre, post.lower()) + results.append(path) + return results + + +MODULE_ROOT = os.path.dirname(__import__("sentry").__file__) +DATA_ROOT = os.path.join(MODULE_ROOT, "data") + +BAD_RELEASE_CHARS = "\n\f\t/" +MAX_VERSION_LENGTH = 200 +MAX_COMMIT_LENGTH = 64 +COMMIT_RANGE_DELIMITER = ".." + +SORT_OPTIONS = OrderedDict( + ( + ("priority", _("Priority")), + ("date", _("Last Seen")), + ("new", _("First Seen")), + ("freq", _("Frequency")), + ) +) + +SEARCH_SORT_OPTIONS = OrderedDict( + (("score", _("Score")), ("date", _("Last Seen")), ("new", _("First Seen"))) +) + +# XXX: Deprecated: use GroupStatus instead +STATUS_UNRESOLVED = 0 +STATUS_RESOLVED = 1 +STATUS_IGNORED = 2 + +STATUS_CHOICES = { + "resolved": STATUS_RESOLVED, + "unresolved": STATUS_UNRESOLVED, + "ignored": STATUS_IGNORED, + # TODO(dcramer): remove in 9.0 + "muted": STATUS_IGNORED, +} + +# Normalize counts to the 15 minute marker. This value MUST be less than 60. A +# value of 0 would store counts for every minute, and is the lowest level of +# accuracy provided. +MINUTE_NORMALIZATION = 15 + +MAX_TAG_KEY_LENGTH = 32 +MAX_TAG_VALUE_LENGTH = 200 +MAX_CULPRIT_LENGTH = 200 +MAX_EMAIL_FIELD_LENGTH = 75 + +ENVIRONMENT_NAME_PATTERN = r"^[^\n\r\f\/]*$" +ENVIRONMENT_NAME_MAX_LENGTH = 64 + +SENTRY_APP_SLUG_MAX_LENGTH = 64 + +# Team slugs which may not be used. Generally these are top level URL patterns +# which we don't want to worry about conflicts on. +RESERVED_ORGANIZATION_SLUGS = frozenset( + ( + "admin", + "manage", + "login", + "account", + "register", + "api", + "accept", + "organizations", + "teams", + "projects", + "help", + "docs", + "logout", + "404", + "500", + "_static", + "out", + "debug", + "remote", + "get-cli", + "blog", + "welcome", + "features", + "customers", + "integrations", + "signup", + "pricing", + "subscribe", + "enterprise", + "about", + "jobs", + "thanks", + "guide", + "privacy", + "security", + "terms", + "from", + "sponsorship", + "for", + "at", + "platforms", + "branding", + "vs", + "answers", + "_admin", + "support", + "contact", + "onboarding", + "ext", + "extension", + "extensions", + "plugins", + "themonitor", + "settings", + "legal", + "avatar", + "organization-avatar", + "project-avatar", + "team-avatar", + "careers", + "_experiment", + "sentry-apps", + ) +) + +RESERVED_PROJECT_SLUGS = frozenset( + ( + "api-keys", + "audit-log", + "auth", + "members", + "projects", + "rate-limits", + "repos", + "settings", + "teams", + "billing", + "payments", + "legal", + "subscription", + "support", + "integrations", + "developer-settings", + "usage", + ) +) + +LOG_LEVELS = { + logging.NOTSET: "sample", + logging.DEBUG: "debug", + logging.INFO: "info", + logging.WARNING: "warning", + logging.ERROR: "error", + logging.FATAL: "fatal", +} +DEFAULT_LOG_LEVEL = "error" +DEFAULT_LOGGER_NAME = "" +LOG_LEVELS_MAP = {v: k for k, v in LOG_LEVELS.items()} + +# Default alerting threshold values +DEFAULT_ALERT_PROJECT_THRESHOLD = (500, 25) # 500%, 25 events +DEFAULT_ALERT_GROUP_THRESHOLD = (1000, 25) # 1000%, 25 events + +# Default sort option for the group stream +DEFAULT_SORT_OPTION = "date" + +# Setup languages for only available locales +# _language_map = dict(settings.LANGUAGES) +# LANGUAGES = [(k, _language_map[k]) for k in get_all_languages() if k in _language_map] +# del _language_map + +# TODO(dcramer): We eventually want to make this user-editable +TAG_LABELS = { + "exc_type": "Exception Type", + "sentry:user": "User", + "sentry:release": "Release", + "sentry:dist": "Distribution", + "os": "OS", + "url": "URL", + "server_name": "Server", +} + +PROTECTED_TAG_KEYS = frozenset(["environment", "release", "sentry:release"]) + +# TODO(dcramer): once this is more flushed out we want this to be extendable +SENTRY_RULES = ( + "sentry.rules.actions.notify_event.NotifyEventAction", + "sentry.rules.actions.notify_event_service.NotifyEventServiceAction", + "sentry.rules.conditions.every_event.EveryEventCondition", + "sentry.rules.conditions.first_seen_event.FirstSeenEventCondition", + "sentry.rules.conditions.regression_event.RegressionEventCondition", + "sentry.rules.conditions.reappeared_event.ReappearedEventCondition", + "sentry.rules.conditions.tagged_event.TaggedEventCondition", + "sentry.rules.conditions.event_frequency.EventFrequencyCondition", + "sentry.rules.conditions.event_frequency.EventUniqueUserFrequencyCondition", + "sentry.rules.conditions.event_attribute.EventAttributeCondition", + "sentry.rules.conditions.level.LevelCondition", +) + +# methods as defined by http://www.w3.org/Protocols/rfc2616/rfc2616-sec9.html + PATCH +HTTP_METHODS = ( + "GET", + "POST", + "PUT", + "OPTIONS", + "HEAD", + "DELETE", + "TRACE", + "CONNECT", + "PATCH", +) + +# See https://github.com/getsentry/semaphore/blob/master/general/src/protocol/constants.rs +# VALID_PLATFORMS = semaphore.VALID_PLATFORMS +VALID_PLATFORMS = [ + "as3", + "c", + "cfml", + "cocoa", + "csharp", + "elixir", + "go", + "groovy", + "haskell", + "java", + "javascript", + "native", + "node", + "objc", + "other", + "perl", + "php", + "python", + "ruby", +] + +OK_PLUGIN_ENABLED = _("The {name} integration has been enabled.") + +OK_PLUGIN_DISABLED = _("The {name} integration has been disabled.") + +OK_PLUGIN_SAVED = _("Configuration for the {name} integration has been saved.") + +WARN_SESSION_EXPIRED = "Your session has expired." # TODO: translate this + +# Maximum length of a symbol +MAX_SYM = 256 + +# Known debug information file mimetypes +KNOWN_DIF_FORMATS = { + "text/x-breakpad": "breakpad", + "application/x-mach-binary": "macho", + "application/x-elf-binary": "elf", + "application/x-dosexec": "pe", + "application/x-ms-pdb": "pdb", + "text/x-proguard+plain": "proguard", + "application/x-sentry-bundle+zip": "sourcebundle", +} + +NATIVE_UNKNOWN_STRING = "" + +# Maximum number of release files that can be "skipped" (i.e., maximum paginator offset) +# inside release files API endpoints. +# If this number is too large, it may cause problems because of inefficient +# LIMIT-OFFSET database queries. +# These problems should be solved after we implement artifact bundles workflow. +MAX_RELEASE_FILES_OFFSET = 20000 + +# to go from an integration id (in _platforms.json) to the platform +# data, such as documentation url or humanized name. +# example: java-logback -> {"type": "framework", +# "link": "https://docs.getsentry.com/hosted/clients/java/modules/logback/", +# "id": "java-logback", +# "name": "Logback"} +INTEGRATION_ID_TO_PLATFORM_DATA = {} + + +# def _load_platform_data(): +# INTEGRATION_ID_TO_PLATFORM_DATA.clear() +# data = load_doc("_platforms") + +# if not data: +# return + +# for platform in data["platforms"]: +# integrations = platform.pop("integrations") +# if integrations: +# for integration in integrations: +# integration_id = integration.pop("id") +# if integration["type"] != "language": +# integration["language"] = platform["id"] +# INTEGRATION_ID_TO_PLATFORM_DATA[integration_id] = integration + + +# _load_platform_data() + +# special cases where the marketing slug differs from the integration id +# (in _platforms.json). missing values (for example: "java") should assume +# the marketing slug is the same as the integration id: +# javascript, node, python, php, ruby, go, swift, objc, java, perl, elixir +MARKETING_SLUG_TO_INTEGRATION_ID = { + "kotlin": "java", + "scala": "java", + "spring": "java", + "android": "java-android", + "react": "javascript-react", + "angular": "javascript-angular", + "angular2": "javascript-angular2", + "ember": "javascript-ember", + "backbone": "javascript-backbone", + "vue": "javascript-vue", + "express": "node-express", + "koa": "node-koa", + "django": "python-django", + "flask": "python-flask", + "sanic": "python-sanic", + "tornado": "python-tornado", + "celery": "python-celery", + "rq": "python-rq", + "bottle": "python-bottle", + "pythonawslambda": "python-awslambda", + "pyramid": "python-pyramid", + "pylons": "python-pylons", + "laravel": "php-laravel", + "symfony": "php-symfony2", + "rails": "ruby-rails", + "sinatra": "ruby-sinatra", + "dotnet": "csharp", +} + + +# to go from a marketing page slug like /for/android/ to the integration id +# (in _platforms.json), for looking up documentation urls, etc. +def get_integration_id_for_marketing_slug(slug): + if slug in MARKETING_SLUG_TO_INTEGRATION_ID: + return MARKETING_SLUG_TO_INTEGRATION_ID[slug] + + if slug in INTEGRATION_ID_TO_PLATFORM_DATA: + return slug + + +# special cases where the integration sent with the SDK differ from +# the integration id (in _platforms.json) +# {PLATFORM: {INTEGRATION_SENT: integration_id, ...}, ...} +PLATFORM_INTEGRATION_TO_INTEGRATION_ID = { + "java": {"java.util.logging": "java-logging"}, + # TODO: add more special cases... +} + + +# to go from event data to the integration id (in _platforms.json), +# for example an event like: +# {"platform": "java", +# "sdk": {"name": "sentry-java", +# "integrations": ["java.util.logging"]}} -> java-logging +def get_integration_id_for_event(platform, sdk_name, integrations): + if integrations: + for integration in integrations: + # check special cases + if ( + platform in PLATFORM_INTEGRATION_TO_INTEGRATION_ID + and integration in PLATFORM_INTEGRATION_TO_INTEGRATION_ID[platform] + ): + return PLATFORM_INTEGRATION_TO_INTEGRATION_ID[platform][integration] + + # try -, for example "java-log4j" + integration_id = "%s-%s" % (platform, integration) + if integration_id in INTEGRATION_ID_TO_PLATFORM_DATA: + return integration_id + + # try sdk name, for example "sentry-java" -> "java" or "raven-java:log4j" -> "java-log4j" + sdk_name = ( + sdk_name.lower().replace("sentry-", "").replace("raven-", "").replace(":", "-") + ) + if sdk_name in INTEGRATION_ID_TO_PLATFORM_DATA: + return sdk_name + + # try platform name, for example "java" + if platform in INTEGRATION_ID_TO_PLATFORM_DATA: + return platform + + +class ObjectStatus(object): + VISIBLE = 0 + HIDDEN = 1 + PENDING_DELETION = 2 + DELETION_IN_PROGRESS = 3 + + ACTIVE = 0 + DISABLED = 1 + + @classmethod + def as_choices(cls): + return ( + (cls.ACTIVE, "active"), + (cls.DISABLED, "disabled"), + (cls.PENDING_DELETION, "pending_deletion"), + (cls.DELETION_IN_PROGRESS, "deletion_in_progress"), + ) + + +class SentryAppStatus(object): + UNPUBLISHED = 0 + PUBLISHED = 1 + INTERNAL = 2 + UNPUBLISHED_STR = "unpublished" + PUBLISHED_STR = "published" + INTERNAL_STR = "internal" + + @classmethod + def as_choices(cls): + return ( + (cls.UNPUBLISHED, cls.UNPUBLISHED_STR), + (cls.PUBLISHED, cls.PUBLISHED_STR), + (cls.INTERNAL, cls.INTERNAL_STR), + ) + + @classmethod + def as_str(cls, status): + if status == cls.UNPUBLISHED: + return cls.UNPUBLISHED_STR + elif status == cls.PUBLISHED: + return cls.PUBLISHED_STR + elif status == cls.INTERNAL: + return cls.INTERNAL_STR + + +class SentryAppInstallationStatus(object): + PENDING = 0 + INSTALLED = 1 + PENDING_STR = "pending" + INSTALLED_STR = "installed" + + @classmethod + def as_choices(cls): + return ((cls.PENDING, cls.PENDING_STR), (cls.INSTALLED, cls.INSTALLED_STR)) + + @classmethod + def as_str(cls, status): + if status == cls.PENDING: + return cls.PENDING_STR + elif status == cls.INSTALLED: + return cls.INSTALLED_STR + + +StatsPeriod = namedtuple("StatsPeriod", ("segments", "interval")) + +LEGACY_RATE_LIMIT_OPTIONS = frozenset( + ("sentry:project-rate-limit", "sentry:account-rate-limit") +) + + +# We need to limit the range of valid timestamps of an event because that +# timestamp is used to control data retention. +MAX_SECS_IN_FUTURE = 60 +MAX_SECS_IN_PAST = 2592000 # 30 days +ALLOWED_FUTURE_DELTA = timedelta(seconds=MAX_SECS_IN_FUTURE) + +# DEFAULT_STORE_NORMALIZER_ARGS = dict( +# geoip_lookup=rust_geoip, +# stacktrace_frames_hard_limit=settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT, +# max_stacktrace_frames=settings.SENTRY_MAX_STACKTRACE_FRAMES, +# max_secs_in_future=MAX_SECS_IN_FUTURE, +# max_secs_in_past=MAX_SECS_IN_PAST, +# enable_trimming=True, +# ) + +INTERNAL_INTEGRATION_TOKEN_COUNT_MAX = 20 + +ALL_ACCESS_PROJECTS = {-1} diff --git a/sentry/culprit.py b/sentry/culprit.py new file mode 100644 index 0000000..c007836 --- /dev/null +++ b/sentry/culprit.py @@ -0,0 +1,74 @@ +""" +This file implements the legacy culprit system. The culprit at this point is +just used as a fallback if no transaction is set. When a transaction is set +the culprit is overridden by the transaction value. + +Over time we want to fully phase out the culprit. Until then this is the +code that generates it. +""" + + +from sentry.constants import MAX_CULPRIT_LENGTH +from sentry.utils.safe import get_path +from sentry.utils.strings import truncatechars + + +def generate_culprit(data): + platform = data.get("platform") + exceptions = get_path(data, "exception", "values", filter=True) + if exceptions: + # Synthetic events no longer get a culprit + last_exception = get_path(exceptions, -1) + if get_path(last_exception, "mechanism", "synthetic"): + return "" + + stacktraces = [ + e["stacktrace"] for e in exceptions if get_path(e, "stacktrace", "frames") + ] + else: + stacktrace = data.get("stacktrace") + if stacktrace and stacktrace.get("frames"): + stacktraces = [stacktrace] + else: + stacktraces = None + + culprit = None + + if not culprit and stacktraces: + culprit = get_stacktrace_culprit(get_path(stacktraces, -1), platform=platform) + + if not culprit and data.get("request"): + culprit = get_path(data, "request", "url") + + return truncatechars(culprit or "", MAX_CULPRIT_LENGTH) + + +def get_stacktrace_culprit(stacktrace, platform): + default = None + for frame in reversed(stacktrace["frames"]): + if not frame: + continue + if frame.get("in_app"): + culprit = get_frame_culprit(frame, platform=platform) + if culprit: + return culprit + elif default is None: + default = get_frame_culprit(frame, platform=platform) + return default + + +def get_frame_culprit(frame, platform): + # If this frame has a platform, we use it instead of the one that + # was passed in (as that one comes from the exception which might + # not necessarily be the same platform). + platform = frame.get("platform") or platform + if platform in ("objc", "cocoa", "native"): + return frame.get("function") or "?" + fileloc = frame.get("module") or frame.get("filename") + if not fileloc: + return "" + elif platform in ("javascript", "node"): + # function and fileloc might be unicode here, so let it coerce + # to a unicode string if needed. + return "%s(%s)" % (frame.get("function") or "?", fileloc) + return "%s in %s" % (fileloc, frame.get("function") or "?") diff --git a/sentry/eventtypes/__init__.py b/sentry/eventtypes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sentry/eventtypes/base.py b/sentry/eventtypes/base.py new file mode 100644 index 0000000..79becd6 --- /dev/null +++ b/sentry/eventtypes/base.py @@ -0,0 +1,48 @@ +from django.utils.encoding import force_str + +from sentry.culprit import generate_culprit +from sentry.utils.safe import get_path +from sentry.utils.strings import strip, truncatechars + + +class BaseEvent: + id = None + + def get_metadata(self, data): + raise NotImplementedError + + def get_title(self, metadata): + raise NotImplementedError + + def get_location(self, data): + return None + + +class DefaultEvent(BaseEvent): + key = "default" + + def get_metadata(self, data): + message = strip( + get_path(data, "logentry", "formatted") + or get_path(data, "logentry", "message") + or get_path(data, "message", "formatted") + or get_path(data, "message") + ) + + if message: + title = truncatechars(message.splitlines()[0], 100) + else: + title = "" + + return {"title": title} + + def get_title(self, metadata): + return metadata.get("title") or "" + + def get_location(self, data): + return force_str( + data.get("culprit") + or data.get("transaction") + or generate_culprit(data) + or "" + ) diff --git a/sentry/eventtypes/error.py b/sentry/eventtypes/error.py new file mode 100644 index 0000000..45f2aca --- /dev/null +++ b/sentry/eventtypes/error.py @@ -0,0 +1,75 @@ +from django.utils.encoding import force_str + +from sentry.culprit import generate_culprit +from sentry.stacktraces.functions import get_function_name_for_frame +from sentry.stacktraces.processing import get_crash_frame_from_event_data +from sentry.utils.safe import get_path, trim, truncatechars + +from .base import BaseEvent + + +def get_crash_location(data): + frame = get_crash_frame_from_event_data( + data, + frame_filter=lambda x: x.get("function") + not in (None, "", ""), + ) + if frame is not None: + func = get_function_name_for_frame(frame, data.get("platform")) + return frame.get("filename") or frame.get("abs_path"), func + + +class ErrorEvent(BaseEvent): + key = "error" + + def get_metadata(self, data): + # Check for undocumented interface where exception has no values. Go SDK does this. + # https://docs.sentry.io/development/sdk-dev/event-payloads/exception/ + # exception can be an list instead of a dictionary + if isinstance(data.get("exception"), list): + if len(data["exception"]) == 0: + return {} + # Force documented interface + data["exception"] = {"values": data["exception"]} + exception = get_path(data, "exception", "values", -1) + if not exception: + return {} + + loc = get_crash_location(data) + rv = {"value": trim(get_path(exception, "value", default=""), 1024)} + + # If the exception mechanism indicates a synthetic exception we do not + # want to record the type and value into the metadata. + if not get_path(exception, "mechanism", "synthetic"): + rv["type"] = trim(get_path(exception, "type", default="Error"), 128) + + # Attach crash location if available + if loc is not None: + fn, func = loc + if fn: + rv["filename"] = fn + if func: + rv["function"] = func + + return rv + + def get_title(self, metadata): + ty = metadata.get("type") + if ty is None: + return metadata.get("function") or "" + if not metadata.get("value"): + return ty + try: + return "{}: {}".format(ty, truncatechars(metadata["value"].splitlines()[0])) + except AttributeError: + # GlitchTip modification + # Exception value is specified as a string, sometimes it isn't. This is a fallback. + return "{}: {}".format(ty, str(metadata["value"])) + + def get_location(self, data): + return force_str( + data.get("culprit") + or data.get("transaction") + or generate_culprit(data) + or "" + ) diff --git a/sentry/stacktraces/functions.py b/sentry/stacktraces/functions.py new file mode 100644 index 0000000..2c22351 --- /dev/null +++ b/sentry/stacktraces/functions.py @@ -0,0 +1,228 @@ +import re + +from .platform import get_behavior_family_for_platform + +_windecl_hash = re.compile(r"^@?(.*?)@[0-9]+$") +_rust_hash = re.compile(r"::h[a-z0-9]{16}$") +_cpp_trailer_re = re.compile(r"(\bconst\b|&)$") +_rust_blanket_re = re.compile(r"^([A-Z] as )") +_lambda_re = re.compile( + r"""(?x) + # gcc + (?: + \{ + lambda\(.*?\)\#\d+ + \} + ) | + # msvc + (?: + \blambda_[a-f0-9]{32}\b + ) | + # clang + (?: + \$_\d+\b + ) + """ +) +_anon_namespace_re = re.compile( + r"""(?x) + \?A0x[a-f0-9]{8}:: + """ +) + +PAIRS = {"(": ")", "{": "}", "[": "]", "<": ">"} + + +def replace_enclosed_string(s, start, end, replacement=None): + if start not in s: + return s + + depth = 0 + + rv = [] + pair_start = None + for idx, char in enumerate(s): + if char == start: + if depth == 0: + pair_start = idx + depth += 1 + elif char == end: + depth -= 1 + if depth == 0: + if replacement is not None: + if callable(replacement): + rv.append(replacement(s[pair_start + 1 : idx], pair_start)) + else: + rv.append(replacement) + elif depth == 0: + rv.append(char) + + return "".join(rv) + + +def split_func_tokens(s): + buf = [] + rv = [] + stack = [] + end = 0 + + for idx, char in enumerate(s): + if char in PAIRS: + stack.append(PAIRS[char]) + elif stack and char == stack[-1]: + stack.pop() + if not stack: + buf.append(s[end : idx + 1]) + end = idx + 1 + elif not stack: + if char.isspace(): + if buf: + rv.append(buf) + buf = [] + else: + buf.append(s[end : idx + 1]) + end = idx + 1 + + if buf: + rv.append(buf) + + return ["".join(x) for x in rv] + + +def trim_function_name(function, platform, normalize_lambdas=True): + """Given a function value from the frame's function attribute this returns + a trimmed version that can be stored in `function_name`. This is only used + if the client did not supply a value itself already. + """ + if get_behavior_family_for_platform(platform) != "native": + return function + if function in ("", ""): + return function + + original_function = function + function = function.strip() + + # Ensure we don't operate on objc functions + if function.startswith(("[", "+[", "-[")): + return function + + # Chop off C++ trailers + while True: + match = _cpp_trailer_re.search(function) + if match is None: + break + function = function[: match.start()].rstrip() + + # Because operator<< really screws with our balancing, so let's work + # around that by replacing it with a character we do not observe in + # `split_func_tokens` or `replace_enclosed_string`. + function = ( + function.replace("operator<<", "operator⟨⟨") + .replace("operator<", "operator⟨") + .replace("operator()", "operator◯") + .replace(" -> ", " ⟿ ") + .replace("`anonymous namespace'", "〔anonymousnamespace〕") + ) + + # normalize C++ lambdas. This is necessary because different + # compilers use different rules for now to name a lambda and they are + # all quite inconsistent. This does not give us perfect answers to + # this problem but closer. In particular msvc will call a lambda + # something like `lambda_deadbeefeefffeeffeeff` whereas clang for + # instance will name it `main::$_0` which will tell us in which outer + # function it was declared. + if normalize_lambdas: + function = _lambda_re.sub("lambda", function) + + # Normalize MSVC anonymous namespaces from inline functions. For inline + # functions, the compiler inconsistently renders anonymous namespaces with + # their hash. For regular functions, "`anonymous namespace'" is used. + # The regular expression matches the trailing "::" to avoid accidental + # replacement in mangled function names. + if normalize_lambdas: + function = _anon_namespace_re.sub("〔anonymousnamespace〕::", function) + + # Remove the arguments if there is one. + def process_args(value, start): + value = value.strip() + if value in ("anonymous namespace", "operator"): + return "(%s)" % value + return "" + + function = replace_enclosed_string(function, "(", ")", process_args) + + # Resolve generic types, but special case rust which uses things like + # ::baz to denote traits. + def process_generics(value, start): + # Special case for lambdas + if value == "lambda" or _lambda_re.match(value): + return "<%s>" % value + + if start > 0: + return "" + + # Rust special cases + value = _rust_blanket_re.sub("", value) # prefer trait for blanket impls + value = replace_enclosed_string(value, "<", ">", process_generics) + return value.split(" as ", 1)[0] + + function = replace_enclosed_string(function, "<", ">", process_generics) + + tokens = split_func_tokens(function) + + # MSVC demangles generic operator functions with a space between the + # function name and the generics. Ensure that those two components both end + # up in the function name. + if len(tokens) > 1 and tokens[-1] == "": + tokens.pop() + tokens[-1] += " " + + # find the token which is the function name. Since we chopped of C++ + # trailers there are only two cases we care about: the token left to + # the -> return marker which is for instance used in Swift and if that + # is not found, the last token in the last. + # + # ["unsigned", "int", "whatever"] -> whatever + # ["@objc", "whatever", "->", "int"] -> whatever + try: + func_token = tokens[tokens.index("⟿") - 1] + except ValueError: + if tokens: + func_token = tokens[-1] + else: + func_token = None + + if func_token: + function = ( + func_token.replace("⟨", "<") + .replace("◯", "()") + .replace(" ⟿ ", " -> ") + .replace("〔anonymousnamespace〕", "`anonymous namespace'") + ) + + # This really should never happen + else: + function = original_function + + # trim off rust markers + function = _rust_hash.sub("", function) + + # trim off windows decl markers + return _windecl_hash.sub("\\1", function) + + +def get_function_name_for_frame(frame, platform=None): + """Given a frame object or dictionary this returns the actual function + name trimmed. + """ + if hasattr(frame, "get_raw_data"): + frame = frame.get_raw_data() + + # if there is a raw function, prioritize the function unchanged + if frame.get("raw_function"): + return frame.get("function") + + # otherwise trim the function on demand + rv = frame.get("function") + if rv: + return trim_function_name(rv, frame.get("platform") or platform) diff --git a/sentry/stacktraces/platform.py b/sentry/stacktraces/platform.py new file mode 100644 index 0000000..01cf0d6 --- /dev/null +++ b/sentry/stacktraces/platform.py @@ -0,0 +1,10 @@ +NATIVE_PLATFORMS = frozenset(("objc", "cocoa", "swift", "native", "c")) +JAVASCRIPT_PLATFORMS = frozenset(("javascript", "node")) + + +def get_behavior_family_for_platform(platform): + if platform in NATIVE_PLATFORMS: + return "native" + if platform in JAVASCRIPT_PLATFORMS: + return "javascript" + return "other" diff --git a/sentry/stacktraces/processing.py b/sentry/stacktraces/processing.py new file mode 100644 index 0000000..58cd78c --- /dev/null +++ b/sentry/stacktraces/processing.py @@ -0,0 +1,26 @@ +from sentry.utils.safe import get_path + + +def get_crash_frame_from_event_data(data, frame_filter=None): + frames = get_path( + data, "exception", "values", -1, "stacktrace", "frames" + ) or get_path(data, "stacktrace", "frames") + if not frames: + threads = get_path(data, "threads", "values") + if threads and len(threads) == 1: + frames = get_path(threads, 0, "stacktrace", "frames") + + default = None + for frame in reversed(frames or ()): + if frame is None: + continue + if frame_filter is not None: + if not frame_filter(frame): + continue + if frame.get("in_app"): + return frame + if default is None: + default = frame + + if default: + return default diff --git a/sentry/utils/safe.py b/sentry/utils/safe.py new file mode 100644 index 0000000..5b15721 --- /dev/null +++ b/sentry/utils/safe.py @@ -0,0 +1,95 @@ +import collections +import json + +from django.utils.encoding import force_str + +from sentry.utils.strings import truncatechars + +SENTRY_MAX_VARIABLE_SIZE = 512 + + +def get_path(data, *path, **kwargs): + """ + Safely resolves data from a recursive data structure. A value is only + returned if the full path exists, otherwise ``None`` is returned. + If the ``default`` argument is specified, it is returned instead of ``None``. + If the ``filter`` argument is specified and the value is a list, it is + filtered with the given callback. Alternatively, pass ``True`` as filter to + only filter ``None`` values. + """ + default = kwargs.pop("default", None) + f = kwargs.pop("filter", None) + for k in kwargs: + raise TypeError("set_path() got an undefined keyword argument '%s'" % k) + + for p in path: + if isinstance(data, collections.abc.Mapping) and p in data: + data = data[p] + elif isinstance(data, (list, tuple)) and -len(data) <= p < len(data): + data = data[p] + else: + return default + + if f and data and isinstance(data, (list, tuple)): + data = list(filter((lambda x: x is not None) if f is True else f, data)) + + return data if data is not None else default + + +def trim( + value, + max_size=SENTRY_MAX_VARIABLE_SIZE, + max_depth=6, + object_hook=None, + _depth=0, + _size=0, + **kwargs +): + """ + Truncates a value to ```MAX_VARIABLE_SIZE```. + The method of truncation depends on the type of value. + """ + options = { + "max_depth": max_depth, + "max_size": max_size, + "object_hook": object_hook, + "_depth": _depth + 1, + } + + if _depth > max_depth: + if not isinstance(value, str): + value = json.dumps(value) + return trim(value, _size=_size, max_size=max_size) + + elif isinstance(value, dict): + result = {} + _size += 2 + for k in sorted(value.keys()): + v = value[k] + trim_v = trim(v, _size=_size, **options) + result[k] = trim_v + _size += len(force_str(trim_v)) + 1 + if _size >= max_size: + break + + elif isinstance(value, (list, tuple)): + result = [] + _size += 2 + for v in value: + trim_v = trim(v, _size=_size, **options) + result.append(trim_v) + _size += len(force_str(trim_v)) + if _size >= max_size: + break + if isinstance(value, tuple): + result = tuple(result) + + elif isinstance(value, str): + result = truncatechars(value, max_size - _size) + + else: + result = value + + if object_hook is None: + return result + return object_hook(result) diff --git a/sentry/utils/strings.py b/sentry/utils/strings.py new file mode 100644 index 0000000..3960aeb --- /dev/null +++ b/sentry/utils/strings.py @@ -0,0 +1,12 @@ +from django.utils.encoding import smart_str + + +def truncatechars(value: str, chars=100): + """Truncate string and append …""" + return (value[:chars] + "…") if len(value) > chars else value + + +def strip(value): + if not value: + return "" + return smart_str(value).strip()