getting hash for issue: use GlitchTip's approach as an early stand-in

af9a700a8706f20771b005804d8c92ca95c8b072 in GlitchTip
This commit is contained in:
Klaas van Schelven
2023-11-04 22:14:39 +01:00
parent 1a5bf7d56c
commit fc7e186918
13 changed files with 1114 additions and 1 deletions

View File

@@ -1,8 +1,9 @@
from django.urls import path
from .views import decompressed_event_detail
from .views import decompressed_event_detail, debug_get_hash
urlpatterns = [
path('event/<uuid:pk>/', decompressed_event_detail),
path('debug_get_hash/<uuid:decompressed_event_pk>/', debug_get_hash),
]

View File

@@ -3,6 +3,7 @@ import json
from django.shortcuts import render, get_object_or_404
from ingest.models import DecompressedEvent
from issues.utils import get_hash_for_data
def decompressed_event_detail(request, pk):
@@ -19,3 +20,13 @@ def decompressed_event_detail(request, pk):
"parsed_data": parsed_data,
"exceptions": exceptions,
})
def debug_get_hash(request, decompressed_event_pk):
# debug view; not for eternity
obj = get_object_or_404(DecompressedEvent, pk=decompressed_event_pk)
parsed_data = json.loads(obj.data)
print(get_hash_for_data(parsed_data))

40
issues/utils.py Normal file
View File

@@ -0,0 +1,40 @@
import hashlib
from typing import List, Optional
from sentry.eventtypes.base import DefaultEvent
from sentry.eventtypes.error import ErrorEvent
def default_hash_input(title: str, culprit: str, type_) -> str:
return title + culprit + type_
def generate_hash(
title: str, culprit: str, type_, extra: Optional[List[str]] = None
) -> str:
"""Generate insecure hash used for grouping issues"""
if extra:
hash_input = "".join(
[
default_hash_input(title, culprit, type_)
if part == "{{ default }}"
else part
for part in extra
]
)
else:
hash_input = default_hash_input(title, culprit, type_)
return hashlib.md5(hash_input.encode()).hexdigest()
def get_hash_for_data(data):
if "exception" in data and data["exception"]:
eventtype = ErrorEvent()
else:
eventtype = DefaultEvent()
metadata = eventtype.get_metadata(data)
title = eventtype.get_title(metadata)
culprit = eventtype.get_location(data)
return generate_hash(title, culprit, type(eventtype).__name__, data.get("fingerprint"))

493
sentry/constants.py Normal file
View File

@@ -0,0 +1,493 @@
"""
These settings act as the default (base) settings for the Sentry-provided
web-server
"""
import logging
import os.path
from collections import OrderedDict, namedtuple
from datetime import timedelta
from django.utils.translation import gettext_lazy as _
# from sentry.utils.integrationdocs import load_doc
# from sentry.utils.geo import rust_geoip
# import semaphore
def get_all_languages():
results = []
for path in os.listdir(os.path.join(MODULE_ROOT, "locale")):
if path.startswith("."):
continue
if "_" in path:
pre, post = path.split("_", 1)
path = "{}-{}".format(pre, post.lower())
results.append(path)
return results
MODULE_ROOT = os.path.dirname(__import__("sentry").__file__)
DATA_ROOT = os.path.join(MODULE_ROOT, "data")
BAD_RELEASE_CHARS = "\n\f\t/"
MAX_VERSION_LENGTH = 200
MAX_COMMIT_LENGTH = 64
COMMIT_RANGE_DELIMITER = ".."
SORT_OPTIONS = OrderedDict(
(
("priority", _("Priority")),
("date", _("Last Seen")),
("new", _("First Seen")),
("freq", _("Frequency")),
)
)
SEARCH_SORT_OPTIONS = OrderedDict(
(("score", _("Score")), ("date", _("Last Seen")), ("new", _("First Seen")))
)
# XXX: Deprecated: use GroupStatus instead
STATUS_UNRESOLVED = 0
STATUS_RESOLVED = 1
STATUS_IGNORED = 2
STATUS_CHOICES = {
"resolved": STATUS_RESOLVED,
"unresolved": STATUS_UNRESOLVED,
"ignored": STATUS_IGNORED,
# TODO(dcramer): remove in 9.0
"muted": STATUS_IGNORED,
}
# Normalize counts to the 15 minute marker. This value MUST be less than 60. A
# value of 0 would store counts for every minute, and is the lowest level of
# accuracy provided.
MINUTE_NORMALIZATION = 15
MAX_TAG_KEY_LENGTH = 32
MAX_TAG_VALUE_LENGTH = 200
MAX_CULPRIT_LENGTH = 200
MAX_EMAIL_FIELD_LENGTH = 75
ENVIRONMENT_NAME_PATTERN = r"^[^\n\r\f\/]*$"
ENVIRONMENT_NAME_MAX_LENGTH = 64
SENTRY_APP_SLUG_MAX_LENGTH = 64
# Team slugs which may not be used. Generally these are top level URL patterns
# which we don't want to worry about conflicts on.
RESERVED_ORGANIZATION_SLUGS = frozenset(
(
"admin",
"manage",
"login",
"account",
"register",
"api",
"accept",
"organizations",
"teams",
"projects",
"help",
"docs",
"logout",
"404",
"500",
"_static",
"out",
"debug",
"remote",
"get-cli",
"blog",
"welcome",
"features",
"customers",
"integrations",
"signup",
"pricing",
"subscribe",
"enterprise",
"about",
"jobs",
"thanks",
"guide",
"privacy",
"security",
"terms",
"from",
"sponsorship",
"for",
"at",
"platforms",
"branding",
"vs",
"answers",
"_admin",
"support",
"contact",
"onboarding",
"ext",
"extension",
"extensions",
"plugins",
"themonitor",
"settings",
"legal",
"avatar",
"organization-avatar",
"project-avatar",
"team-avatar",
"careers",
"_experiment",
"sentry-apps",
)
)
RESERVED_PROJECT_SLUGS = frozenset(
(
"api-keys",
"audit-log",
"auth",
"members",
"projects",
"rate-limits",
"repos",
"settings",
"teams",
"billing",
"payments",
"legal",
"subscription",
"support",
"integrations",
"developer-settings",
"usage",
)
)
LOG_LEVELS = {
logging.NOTSET: "sample",
logging.DEBUG: "debug",
logging.INFO: "info",
logging.WARNING: "warning",
logging.ERROR: "error",
logging.FATAL: "fatal",
}
DEFAULT_LOG_LEVEL = "error"
DEFAULT_LOGGER_NAME = ""
LOG_LEVELS_MAP = {v: k for k, v in LOG_LEVELS.items()}
# Default alerting threshold values
DEFAULT_ALERT_PROJECT_THRESHOLD = (500, 25) # 500%, 25 events
DEFAULT_ALERT_GROUP_THRESHOLD = (1000, 25) # 1000%, 25 events
# Default sort option for the group stream
DEFAULT_SORT_OPTION = "date"
# Setup languages for only available locales
# _language_map = dict(settings.LANGUAGES)
# LANGUAGES = [(k, _language_map[k]) for k in get_all_languages() if k in _language_map]
# del _language_map
# TODO(dcramer): We eventually want to make this user-editable
TAG_LABELS = {
"exc_type": "Exception Type",
"sentry:user": "User",
"sentry:release": "Release",
"sentry:dist": "Distribution",
"os": "OS",
"url": "URL",
"server_name": "Server",
}
PROTECTED_TAG_KEYS = frozenset(["environment", "release", "sentry:release"])
# TODO(dcramer): once this is more flushed out we want this to be extendable
SENTRY_RULES = (
"sentry.rules.actions.notify_event.NotifyEventAction",
"sentry.rules.actions.notify_event_service.NotifyEventServiceAction",
"sentry.rules.conditions.every_event.EveryEventCondition",
"sentry.rules.conditions.first_seen_event.FirstSeenEventCondition",
"sentry.rules.conditions.regression_event.RegressionEventCondition",
"sentry.rules.conditions.reappeared_event.ReappearedEventCondition",
"sentry.rules.conditions.tagged_event.TaggedEventCondition",
"sentry.rules.conditions.event_frequency.EventFrequencyCondition",
"sentry.rules.conditions.event_frequency.EventUniqueUserFrequencyCondition",
"sentry.rules.conditions.event_attribute.EventAttributeCondition",
"sentry.rules.conditions.level.LevelCondition",
)
# methods as defined by http://www.w3.org/Protocols/rfc2616/rfc2616-sec9.html + PATCH
HTTP_METHODS = (
"GET",
"POST",
"PUT",
"OPTIONS",
"HEAD",
"DELETE",
"TRACE",
"CONNECT",
"PATCH",
)
# See https://github.com/getsentry/semaphore/blob/master/general/src/protocol/constants.rs
# VALID_PLATFORMS = semaphore.VALID_PLATFORMS
VALID_PLATFORMS = [
"as3",
"c",
"cfml",
"cocoa",
"csharp",
"elixir",
"go",
"groovy",
"haskell",
"java",
"javascript",
"native",
"node",
"objc",
"other",
"perl",
"php",
"python",
"ruby",
]
OK_PLUGIN_ENABLED = _("The {name} integration has been enabled.")
OK_PLUGIN_DISABLED = _("The {name} integration has been disabled.")
OK_PLUGIN_SAVED = _("Configuration for the {name} integration has been saved.")
WARN_SESSION_EXPIRED = "Your session has expired." # TODO: translate this
# Maximum length of a symbol
MAX_SYM = 256
# Known debug information file mimetypes
KNOWN_DIF_FORMATS = {
"text/x-breakpad": "breakpad",
"application/x-mach-binary": "macho",
"application/x-elf-binary": "elf",
"application/x-dosexec": "pe",
"application/x-ms-pdb": "pdb",
"text/x-proguard+plain": "proguard",
"application/x-sentry-bundle+zip": "sourcebundle",
}
NATIVE_UNKNOWN_STRING = "<unknown>"
# Maximum number of release files that can be "skipped" (i.e., maximum paginator offset)
# inside release files API endpoints.
# If this number is too large, it may cause problems because of inefficient
# LIMIT-OFFSET database queries.
# These problems should be solved after we implement artifact bundles workflow.
MAX_RELEASE_FILES_OFFSET = 20000
# to go from an integration id (in _platforms.json) to the platform
# data, such as documentation url or humanized name.
# example: java-logback -> {"type": "framework",
# "link": "https://docs.getsentry.com/hosted/clients/java/modules/logback/",
# "id": "java-logback",
# "name": "Logback"}
INTEGRATION_ID_TO_PLATFORM_DATA = {}
# def _load_platform_data():
# INTEGRATION_ID_TO_PLATFORM_DATA.clear()
# data = load_doc("_platforms")
# if not data:
# return
# for platform in data["platforms"]:
# integrations = platform.pop("integrations")
# if integrations:
# for integration in integrations:
# integration_id = integration.pop("id")
# if integration["type"] != "language":
# integration["language"] = platform["id"]
# INTEGRATION_ID_TO_PLATFORM_DATA[integration_id] = integration
# _load_platform_data()
# special cases where the marketing slug differs from the integration id
# (in _platforms.json). missing values (for example: "java") should assume
# the marketing slug is the same as the integration id:
# javascript, node, python, php, ruby, go, swift, objc, java, perl, elixir
MARKETING_SLUG_TO_INTEGRATION_ID = {
"kotlin": "java",
"scala": "java",
"spring": "java",
"android": "java-android",
"react": "javascript-react",
"angular": "javascript-angular",
"angular2": "javascript-angular2",
"ember": "javascript-ember",
"backbone": "javascript-backbone",
"vue": "javascript-vue",
"express": "node-express",
"koa": "node-koa",
"django": "python-django",
"flask": "python-flask",
"sanic": "python-sanic",
"tornado": "python-tornado",
"celery": "python-celery",
"rq": "python-rq",
"bottle": "python-bottle",
"pythonawslambda": "python-awslambda",
"pyramid": "python-pyramid",
"pylons": "python-pylons",
"laravel": "php-laravel",
"symfony": "php-symfony2",
"rails": "ruby-rails",
"sinatra": "ruby-sinatra",
"dotnet": "csharp",
}
# to go from a marketing page slug like /for/android/ to the integration id
# (in _platforms.json), for looking up documentation urls, etc.
def get_integration_id_for_marketing_slug(slug):
if slug in MARKETING_SLUG_TO_INTEGRATION_ID:
return MARKETING_SLUG_TO_INTEGRATION_ID[slug]
if slug in INTEGRATION_ID_TO_PLATFORM_DATA:
return slug
# special cases where the integration sent with the SDK differ from
# the integration id (in _platforms.json)
# {PLATFORM: {INTEGRATION_SENT: integration_id, ...}, ...}
PLATFORM_INTEGRATION_TO_INTEGRATION_ID = {
"java": {"java.util.logging": "java-logging"},
# TODO: add more special cases...
}
# to go from event data to the integration id (in _platforms.json),
# for example an event like:
# {"platform": "java",
# "sdk": {"name": "sentry-java",
# "integrations": ["java.util.logging"]}} -> java-logging
def get_integration_id_for_event(platform, sdk_name, integrations):
if integrations:
for integration in integrations:
# check special cases
if (
platform in PLATFORM_INTEGRATION_TO_INTEGRATION_ID
and integration in PLATFORM_INTEGRATION_TO_INTEGRATION_ID[platform]
):
return PLATFORM_INTEGRATION_TO_INTEGRATION_ID[platform][integration]
# try <platform>-<integration>, for example "java-log4j"
integration_id = "%s-%s" % (platform, integration)
if integration_id in INTEGRATION_ID_TO_PLATFORM_DATA:
return integration_id
# try sdk name, for example "sentry-java" -> "java" or "raven-java:log4j" -> "java-log4j"
sdk_name = (
sdk_name.lower().replace("sentry-", "").replace("raven-", "").replace(":", "-")
)
if sdk_name in INTEGRATION_ID_TO_PLATFORM_DATA:
return sdk_name
# try platform name, for example "java"
if platform in INTEGRATION_ID_TO_PLATFORM_DATA:
return platform
class ObjectStatus(object):
VISIBLE = 0
HIDDEN = 1
PENDING_DELETION = 2
DELETION_IN_PROGRESS = 3
ACTIVE = 0
DISABLED = 1
@classmethod
def as_choices(cls):
return (
(cls.ACTIVE, "active"),
(cls.DISABLED, "disabled"),
(cls.PENDING_DELETION, "pending_deletion"),
(cls.DELETION_IN_PROGRESS, "deletion_in_progress"),
)
class SentryAppStatus(object):
UNPUBLISHED = 0
PUBLISHED = 1
INTERNAL = 2
UNPUBLISHED_STR = "unpublished"
PUBLISHED_STR = "published"
INTERNAL_STR = "internal"
@classmethod
def as_choices(cls):
return (
(cls.UNPUBLISHED, cls.UNPUBLISHED_STR),
(cls.PUBLISHED, cls.PUBLISHED_STR),
(cls.INTERNAL, cls.INTERNAL_STR),
)
@classmethod
def as_str(cls, status):
if status == cls.UNPUBLISHED:
return cls.UNPUBLISHED_STR
elif status == cls.PUBLISHED:
return cls.PUBLISHED_STR
elif status == cls.INTERNAL:
return cls.INTERNAL_STR
class SentryAppInstallationStatus(object):
PENDING = 0
INSTALLED = 1
PENDING_STR = "pending"
INSTALLED_STR = "installed"
@classmethod
def as_choices(cls):
return ((cls.PENDING, cls.PENDING_STR), (cls.INSTALLED, cls.INSTALLED_STR))
@classmethod
def as_str(cls, status):
if status == cls.PENDING:
return cls.PENDING_STR
elif status == cls.INSTALLED:
return cls.INSTALLED_STR
StatsPeriod = namedtuple("StatsPeriod", ("segments", "interval"))
LEGACY_RATE_LIMIT_OPTIONS = frozenset(
("sentry:project-rate-limit", "sentry:account-rate-limit")
)
# We need to limit the range of valid timestamps of an event because that
# timestamp is used to control data retention.
MAX_SECS_IN_FUTURE = 60
MAX_SECS_IN_PAST = 2592000 # 30 days
ALLOWED_FUTURE_DELTA = timedelta(seconds=MAX_SECS_IN_FUTURE)
# DEFAULT_STORE_NORMALIZER_ARGS = dict(
# geoip_lookup=rust_geoip,
# stacktrace_frames_hard_limit=settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT,
# max_stacktrace_frames=settings.SENTRY_MAX_STACKTRACE_FRAMES,
# max_secs_in_future=MAX_SECS_IN_FUTURE,
# max_secs_in_past=MAX_SECS_IN_PAST,
# enable_trimming=True,
# )
INTERNAL_INTEGRATION_TOKEN_COUNT_MAX = 20
ALL_ACCESS_PROJECTS = {-1}

74
sentry/culprit.py Normal file
View File

@@ -0,0 +1,74 @@
"""
This file implements the legacy culprit system. The culprit at this point is
just used as a fallback if no transaction is set. When a transaction is set
the culprit is overridden by the transaction value.
Over time we want to fully phase out the culprit. Until then this is the
code that generates it.
"""
from sentry.constants import MAX_CULPRIT_LENGTH
from sentry.utils.safe import get_path
from sentry.utils.strings import truncatechars
def generate_culprit(data):
platform = data.get("platform")
exceptions = get_path(data, "exception", "values", filter=True)
if exceptions:
# Synthetic events no longer get a culprit
last_exception = get_path(exceptions, -1)
if get_path(last_exception, "mechanism", "synthetic"):
return ""
stacktraces = [
e["stacktrace"] for e in exceptions if get_path(e, "stacktrace", "frames")
]
else:
stacktrace = data.get("stacktrace")
if stacktrace and stacktrace.get("frames"):
stacktraces = [stacktrace]
else:
stacktraces = None
culprit = None
if not culprit and stacktraces:
culprit = get_stacktrace_culprit(get_path(stacktraces, -1), platform=platform)
if not culprit and data.get("request"):
culprit = get_path(data, "request", "url")
return truncatechars(culprit or "", MAX_CULPRIT_LENGTH)
def get_stacktrace_culprit(stacktrace, platform):
default = None
for frame in reversed(stacktrace["frames"]):
if not frame:
continue
if frame.get("in_app"):
culprit = get_frame_culprit(frame, platform=platform)
if culprit:
return culprit
elif default is None:
default = get_frame_culprit(frame, platform=platform)
return default
def get_frame_culprit(frame, platform):
# If this frame has a platform, we use it instead of the one that
# was passed in (as that one comes from the exception which might
# not necessarily be the same platform).
platform = frame.get("platform") or platform
if platform in ("objc", "cocoa", "native"):
return frame.get("function") or "?"
fileloc = frame.get("module") or frame.get("filename")
if not fileloc:
return ""
elif platform in ("javascript", "node"):
# function and fileloc might be unicode here, so let it coerce
# to a unicode string if needed.
return "%s(%s)" % (frame.get("function") or "?", fileloc)
return "%s in %s" % (fileloc, frame.get("function") or "?")

View File

48
sentry/eventtypes/base.py Normal file
View File

@@ -0,0 +1,48 @@
from django.utils.encoding import force_str
from sentry.culprit import generate_culprit
from sentry.utils.safe import get_path
from sentry.utils.strings import strip, truncatechars
class BaseEvent:
id = None
def get_metadata(self, data):
raise NotImplementedError
def get_title(self, metadata):
raise NotImplementedError
def get_location(self, data):
return None
class DefaultEvent(BaseEvent):
key = "default"
def get_metadata(self, data):
message = strip(
get_path(data, "logentry", "formatted")
or get_path(data, "logentry", "message")
or get_path(data, "message", "formatted")
or get_path(data, "message")
)
if message:
title = truncatechars(message.splitlines()[0], 100)
else:
title = "<unlabeled event>"
return {"title": title}
def get_title(self, metadata):
return metadata.get("title") or "<untitled>"
def get_location(self, data):
return force_str(
data.get("culprit")
or data.get("transaction")
or generate_culprit(data)
or ""
)

View File

@@ -0,0 +1,75 @@
from django.utils.encoding import force_str
from sentry.culprit import generate_culprit
from sentry.stacktraces.functions import get_function_name_for_frame
from sentry.stacktraces.processing import get_crash_frame_from_event_data
from sentry.utils.safe import get_path, trim, truncatechars
from .base import BaseEvent
def get_crash_location(data):
frame = get_crash_frame_from_event_data(
data,
frame_filter=lambda x: x.get("function")
not in (None, "<redacted>", "<unknown>"),
)
if frame is not None:
func = get_function_name_for_frame(frame, data.get("platform"))
return frame.get("filename") or frame.get("abs_path"), func
class ErrorEvent(BaseEvent):
key = "error"
def get_metadata(self, data):
# Check for undocumented interface where exception has no values. Go SDK does this.
# https://docs.sentry.io/development/sdk-dev/event-payloads/exception/
# exception can be an list instead of a dictionary
if isinstance(data.get("exception"), list):
if len(data["exception"]) == 0:
return {}
# Force documented interface
data["exception"] = {"values": data["exception"]}
exception = get_path(data, "exception", "values", -1)
if not exception:
return {}
loc = get_crash_location(data)
rv = {"value": trim(get_path(exception, "value", default=""), 1024)}
# If the exception mechanism indicates a synthetic exception we do not
# want to record the type and value into the metadata.
if not get_path(exception, "mechanism", "synthetic"):
rv["type"] = trim(get_path(exception, "type", default="Error"), 128)
# Attach crash location if available
if loc is not None:
fn, func = loc
if fn:
rv["filename"] = fn
if func:
rv["function"] = func
return rv
def get_title(self, metadata):
ty = metadata.get("type")
if ty is None:
return metadata.get("function") or "<unknown>"
if not metadata.get("value"):
return ty
try:
return "{}: {}".format(ty, truncatechars(metadata["value"].splitlines()[0]))
except AttributeError:
# GlitchTip modification
# Exception value is specified as a string, sometimes it isn't. This is a fallback.
return "{}: {}".format(ty, str(metadata["value"]))
def get_location(self, data):
return force_str(
data.get("culprit")
or data.get("transaction")
or generate_culprit(data)
or ""
)

View File

@@ -0,0 +1,228 @@
import re
from .platform import get_behavior_family_for_platform
_windecl_hash = re.compile(r"^@?(.*?)@[0-9]+$")
_rust_hash = re.compile(r"::h[a-z0-9]{16}$")
_cpp_trailer_re = re.compile(r"(\bconst\b|&)$")
_rust_blanket_re = re.compile(r"^([A-Z] as )")
_lambda_re = re.compile(
r"""(?x)
# gcc
(?:
\{
lambda\(.*?\)\#\d+
\}
) |
# msvc
(?:
\blambda_[a-f0-9]{32}\b
) |
# clang
(?:
\$_\d+\b
)
"""
)
_anon_namespace_re = re.compile(
r"""(?x)
\?A0x[a-f0-9]{8}::
"""
)
PAIRS = {"(": ")", "{": "}", "[": "]", "<": ">"}
def replace_enclosed_string(s, start, end, replacement=None):
if start not in s:
return s
depth = 0
rv = []
pair_start = None
for idx, char in enumerate(s):
if char == start:
if depth == 0:
pair_start = idx
depth += 1
elif char == end:
depth -= 1
if depth == 0:
if replacement is not None:
if callable(replacement):
rv.append(replacement(s[pair_start + 1 : idx], pair_start))
else:
rv.append(replacement)
elif depth == 0:
rv.append(char)
return "".join(rv)
def split_func_tokens(s):
buf = []
rv = []
stack = []
end = 0
for idx, char in enumerate(s):
if char in PAIRS:
stack.append(PAIRS[char])
elif stack and char == stack[-1]:
stack.pop()
if not stack:
buf.append(s[end : idx + 1])
end = idx + 1
elif not stack:
if char.isspace():
if buf:
rv.append(buf)
buf = []
else:
buf.append(s[end : idx + 1])
end = idx + 1
if buf:
rv.append(buf)
return ["".join(x) for x in rv]
def trim_function_name(function, platform, normalize_lambdas=True):
"""Given a function value from the frame's function attribute this returns
a trimmed version that can be stored in `function_name`. This is only used
if the client did not supply a value itself already.
"""
if get_behavior_family_for_platform(platform) != "native":
return function
if function in ("<redacted>", "<unknown>"):
return function
original_function = function
function = function.strip()
# Ensure we don't operate on objc functions
if function.startswith(("[", "+[", "-[")):
return function
# Chop off C++ trailers
while True:
match = _cpp_trailer_re.search(function)
if match is None:
break
function = function[: match.start()].rstrip()
# Because operator<< really screws with our balancing, so let's work
# around that by replacing it with a character we do not observe in
# `split_func_tokens` or `replace_enclosed_string`.
function = (
function.replace("operator<<", "operator⟨⟨")
.replace("operator<", "operator⟨")
.replace("operator()", "operator◯")
.replace(" -> ", "")
.replace("`anonymous namespace'", "anonymousnamespace")
)
# normalize C++ lambdas. This is necessary because different
# compilers use different rules for now to name a lambda and they are
# all quite inconsistent. This does not give us perfect answers to
# this problem but closer. In particular msvc will call a lambda
# something like `lambda_deadbeefeefffeeffeeff` whereas clang for
# instance will name it `main::$_0` which will tell us in which outer
# function it was declared.
if normalize_lambdas:
function = _lambda_re.sub("lambda", function)
# Normalize MSVC anonymous namespaces from inline functions. For inline
# functions, the compiler inconsistently renders anonymous namespaces with
# their hash. For regular functions, "`anonymous namespace'" is used.
# The regular expression matches the trailing "::" to avoid accidental
# replacement in mangled function names.
if normalize_lambdas:
function = _anon_namespace_re.sub("anonymousnamespace::", function)
# Remove the arguments if there is one.
def process_args(value, start):
value = value.strip()
if value in ("anonymous namespace", "operator"):
return "(%s)" % value
return ""
function = replace_enclosed_string(function, "(", ")", process_args)
# Resolve generic types, but special case rust which uses things like
# <Foo as Bar>::baz to denote traits.
def process_generics(value, start):
# Special case for lambdas
if value == "lambda" or _lambda_re.match(value):
return "<%s>" % value
if start > 0:
return "<T>"
# Rust special cases
value = _rust_blanket_re.sub("", value) # prefer trait for blanket impls
value = replace_enclosed_string(value, "<", ">", process_generics)
return value.split(" as ", 1)[0]
function = replace_enclosed_string(function, "<", ">", process_generics)
tokens = split_func_tokens(function)
# MSVC demangles generic operator functions with a space between the
# function name and the generics. Ensure that those two components both end
# up in the function name.
if len(tokens) > 1 and tokens[-1] == "<T>":
tokens.pop()
tokens[-1] += " <T>"
# find the token which is the function name. Since we chopped of C++
# trailers there are only two cases we care about: the token left to
# the -> return marker which is for instance used in Swift and if that
# is not found, the last token in the last.
#
# ["unsigned", "int", "whatever"] -> whatever
# ["@objc", "whatever", "->", "int"] -> whatever
try:
func_token = tokens[tokens.index("") - 1]
except ValueError:
if tokens:
func_token = tokens[-1]
else:
func_token = None
if func_token:
function = (
func_token.replace("", "<")
.replace("", "()")
.replace("", " -> ")
.replace("anonymousnamespace", "`anonymous namespace'")
)
# This really should never happen
else:
function = original_function
# trim off rust markers
function = _rust_hash.sub("", function)
# trim off windows decl markers
return _windecl_hash.sub("\\1", function)
def get_function_name_for_frame(frame, platform=None):
"""Given a frame object or dictionary this returns the actual function
name trimmed.
"""
if hasattr(frame, "get_raw_data"):
frame = frame.get_raw_data()
# if there is a raw function, prioritize the function unchanged
if frame.get("raw_function"):
return frame.get("function")
# otherwise trim the function on demand
rv = frame.get("function")
if rv:
return trim_function_name(rv, frame.get("platform") or platform)

View File

@@ -0,0 +1,10 @@
NATIVE_PLATFORMS = frozenset(("objc", "cocoa", "swift", "native", "c"))
JAVASCRIPT_PLATFORMS = frozenset(("javascript", "node"))
def get_behavior_family_for_platform(platform):
if platform in NATIVE_PLATFORMS:
return "native"
if platform in JAVASCRIPT_PLATFORMS:
return "javascript"
return "other"

View File

@@ -0,0 +1,26 @@
from sentry.utils.safe import get_path
def get_crash_frame_from_event_data(data, frame_filter=None):
frames = get_path(
data, "exception", "values", -1, "stacktrace", "frames"
) or get_path(data, "stacktrace", "frames")
if not frames:
threads = get_path(data, "threads", "values")
if threads and len(threads) == 1:
frames = get_path(threads, 0, "stacktrace", "frames")
default = None
for frame in reversed(frames or ()):
if frame is None:
continue
if frame_filter is not None:
if not frame_filter(frame):
continue
if frame.get("in_app"):
return frame
if default is None:
default = frame
if default:
return default

95
sentry/utils/safe.py Normal file
View File

@@ -0,0 +1,95 @@
import collections
import json
from django.utils.encoding import force_str
from sentry.utils.strings import truncatechars
SENTRY_MAX_VARIABLE_SIZE = 512
def get_path(data, *path, **kwargs):
"""
Safely resolves data from a recursive data structure. A value is only
returned if the full path exists, otherwise ``None`` is returned.
If the ``default`` argument is specified, it is returned instead of ``None``.
If the ``filter`` argument is specified and the value is a list, it is
filtered with the given callback. Alternatively, pass ``True`` as filter to
only filter ``None`` values.
"""
default = kwargs.pop("default", None)
f = kwargs.pop("filter", None)
for k in kwargs:
raise TypeError("set_path() got an undefined keyword argument '%s'" % k)
for p in path:
if isinstance(data, collections.abc.Mapping) and p in data:
data = data[p]
elif isinstance(data, (list, tuple)) and -len(data) <= p < len(data):
data = data[p]
else:
return default
if f and data and isinstance(data, (list, tuple)):
data = list(filter((lambda x: x is not None) if f is True else f, data))
return data if data is not None else default
def trim(
value,
max_size=SENTRY_MAX_VARIABLE_SIZE,
max_depth=6,
object_hook=None,
_depth=0,
_size=0,
**kwargs
):
"""
Truncates a value to ```MAX_VARIABLE_SIZE```.
The method of truncation depends on the type of value.
"""
options = {
"max_depth": max_depth,
"max_size": max_size,
"object_hook": object_hook,
"_depth": _depth + 1,
}
if _depth > max_depth:
if not isinstance(value, str):
value = json.dumps(value)
return trim(value, _size=_size, max_size=max_size)
elif isinstance(value, dict):
result = {}
_size += 2
for k in sorted(value.keys()):
v = value[k]
trim_v = trim(v, _size=_size, **options)
result[k] = trim_v
_size += len(force_str(trim_v)) + 1
if _size >= max_size:
break
elif isinstance(value, (list, tuple)):
result = []
_size += 2
for v in value:
trim_v = trim(v, _size=_size, **options)
result.append(trim_v)
_size += len(force_str(trim_v))
if _size >= max_size:
break
if isinstance(value, tuple):
result = tuple(result)
elif isinstance(value, str):
result = truncatechars(value, max_size - _size)
else:
result = value
if object_hook is None:
return result
return object_hook(result)

12
sentry/utils/strings.py Normal file
View File

@@ -0,0 +1,12 @@
from django.utils.encoding import smart_str
def truncatechars(value: str, chars=100):
"""Truncate string and append …"""
return (value[:chars] + "") if len(value) > chars else value
def strip(value):
if not value:
return ""
return smart_str(value).strip()