getting hash for issue: use GlitchTip's approach as an early stand-in

af9a700a8706f20771b005804d8c92ca95c8b072 in GlitchTip
2026-02-15 10:59:06 -06:00 · 2023-11-04 22:14:39 +01:00
parent 1a5bf7d56c
commit fc7e186918
13 changed files with 1114 additions and 1 deletions
--- a/events/urls.py
+++ b/events/urls.py
@@ -1,8 +1,9 @@
 from django.urls import path

-from .views import decompressed_event_detail
+from .views import decompressed_event_detail, debug_get_hash


 urlpatterns = [
    path('event/<uuid:pk>/', decompressed_event_detail),
+    path('debug_get_hash/<uuid:decompressed_event_pk>/', debug_get_hash),
 ]
--- a/events/views.py
+++ b/events/views.py
@@ -3,6 +3,7 @@ import json
 from django.shortcuts import render, get_object_or_404

 from ingest.models import DecompressedEvent
+from issues.utils import get_hash_for_data


 def decompressed_event_detail(request, pk):
@@ -19,3 +20,13 @@ def decompressed_event_detail(request, pk):
        "parsed_data": parsed_data,
        "exceptions": exceptions,
    })
+
+
+def debug_get_hash(request, decompressed_event_pk):
+    # debug view; not for eternity
+
+    obj = get_object_or_404(DecompressedEvent, pk=decompressed_event_pk)
+
+    parsed_data = json.loads(obj.data)
+
+    print(get_hash_for_data(parsed_data))
--- a/issues/utils.py
+++ b/issues/utils.py
@@ -0,0 +1,40 @@
+import hashlib
+from typing import List, Optional
+
+from sentry.eventtypes.base import DefaultEvent
+from sentry.eventtypes.error import ErrorEvent
+
+
+def default_hash_input(title: str, culprit: str, type_) -> str:
+    return title + culprit + type_
+
+
+def generate_hash(
+    title: str, culprit: str, type_, extra: Optional[List[str]] = None
+) -> str:
+    """Generate insecure hash used for grouping issues"""
+    if extra:
+        hash_input = "".join(
+            [
+                default_hash_input(title, culprit, type_)
+                if part == "{{ default }}"
+                else part
+                for part in extra
+            ]
+        )
+    else:
+        hash_input = default_hash_input(title, culprit, type_)
+    return hashlib.md5(hash_input.encode()).hexdigest()
+
+
+def get_hash_for_data(data):
+    if "exception" in data and data["exception"]:
+        eventtype = ErrorEvent()
+    else:
+        eventtype = DefaultEvent()
+
+    metadata = eventtype.get_metadata(data)
+
+    title = eventtype.get_title(metadata)
+    culprit = eventtype.get_location(data)
+    return generate_hash(title, culprit, type(eventtype).__name__, data.get("fingerprint"))
--- a/sentry/constants.py
+++ b/sentry/constants.py
@@ -0,0 +1,493 @@
+"""
+These settings act as the default (base) settings for the Sentry-provided
+web-server
+"""
+
+
+import logging
+import os.path
+from collections import OrderedDict, namedtuple
+from datetime import timedelta
+
+from django.utils.translation import gettext_lazy as _
+
+# from sentry.utils.integrationdocs import load_doc
+# from sentry.utils.geo import rust_geoip
+
+# import semaphore
+
+
+def get_all_languages():
+    results = []
+    for path in os.listdir(os.path.join(MODULE_ROOT, "locale")):
+        if path.startswith("."):
+            continue
+        if "_" in path:
+            pre, post = path.split("_", 1)
+            path = "{}-{}".format(pre, post.lower())
+        results.append(path)
+    return results
+
+
+MODULE_ROOT = os.path.dirname(__import__("sentry").__file__)
+DATA_ROOT = os.path.join(MODULE_ROOT, "data")
+
+BAD_RELEASE_CHARS = "\n\f\t/"
+MAX_VERSION_LENGTH = 200
+MAX_COMMIT_LENGTH = 64
+COMMIT_RANGE_DELIMITER = ".."
+
+SORT_OPTIONS = OrderedDict(
+    (
+        ("priority", _("Priority")),
+        ("date", _("Last Seen")),
+        ("new", _("First Seen")),
+        ("freq", _("Frequency")),
+    )
+)
+
+SEARCH_SORT_OPTIONS = OrderedDict(
+    (("score", _("Score")), ("date", _("Last Seen")), ("new", _("First Seen")))
+)
+
+# XXX: Deprecated: use GroupStatus instead
+STATUS_UNRESOLVED = 0
+STATUS_RESOLVED = 1
+STATUS_IGNORED = 2
+
+STATUS_CHOICES = {
+    "resolved": STATUS_RESOLVED,
+    "unresolved": STATUS_UNRESOLVED,
+    "ignored": STATUS_IGNORED,
+    # TODO(dcramer): remove in 9.0
+    "muted": STATUS_IGNORED,
+}
+
+# Normalize counts to the 15 minute marker. This value MUST be less than 60. A
+# value of 0 would store counts for every minute, and is the lowest level of
+# accuracy provided.
+MINUTE_NORMALIZATION = 15
+
+MAX_TAG_KEY_LENGTH = 32
+MAX_TAG_VALUE_LENGTH = 200
+MAX_CULPRIT_LENGTH = 200
+MAX_EMAIL_FIELD_LENGTH = 75
+
+ENVIRONMENT_NAME_PATTERN = r"^[^\n\r\f\/]*$"
+ENVIRONMENT_NAME_MAX_LENGTH = 64
+
+SENTRY_APP_SLUG_MAX_LENGTH = 64
+
+# Team slugs which may not be used. Generally these are top level URL patterns
+# which we don't want to worry about conflicts on.
+RESERVED_ORGANIZATION_SLUGS = frozenset(
+    (
+        "admin",
+        "manage",
+        "login",
+        "account",
+        "register",
+        "api",
+        "accept",
+        "organizations",
+        "teams",
+        "projects",
+        "help",
+        "docs",
+        "logout",
+        "404",
+        "500",
+        "_static",
+        "out",
+        "debug",
+        "remote",
+        "get-cli",
+        "blog",
+        "welcome",
+        "features",
+        "customers",
+        "integrations",
+        "signup",
+        "pricing",
+        "subscribe",
+        "enterprise",
+        "about",
+        "jobs",
+        "thanks",
+        "guide",
+        "privacy",
+        "security",
+        "terms",
+        "from",
+        "sponsorship",
+        "for",
+        "at",
+        "platforms",
+        "branding",
+        "vs",
+        "answers",
+        "_admin",
+        "support",
+        "contact",
+        "onboarding",
+        "ext",
+        "extension",
+        "extensions",
+        "plugins",
+        "themonitor",
+        "settings",
+        "legal",
+        "avatar",
+        "organization-avatar",
+        "project-avatar",
+        "team-avatar",
+        "careers",
+        "_experiment",
+        "sentry-apps",
+    )
+)
+
+RESERVED_PROJECT_SLUGS = frozenset(
+    (
+        "api-keys",
+        "audit-log",
+        "auth",
+        "members",
+        "projects",
+        "rate-limits",
+        "repos",
+        "settings",
+        "teams",
+        "billing",
+        "payments",
+        "legal",
+        "subscription",
+        "support",
+        "integrations",
+        "developer-settings",
+        "usage",
+    )
+)
+
+LOG_LEVELS = {
+    logging.NOTSET: "sample",
+    logging.DEBUG: "debug",
+    logging.INFO: "info",
+    logging.WARNING: "warning",
+    logging.ERROR: "error",
+    logging.FATAL: "fatal",
+}
+DEFAULT_LOG_LEVEL = "error"
+DEFAULT_LOGGER_NAME = ""
+LOG_LEVELS_MAP = {v: k for k, v in LOG_LEVELS.items()}
+
+# Default alerting threshold values
+DEFAULT_ALERT_PROJECT_THRESHOLD = (500, 25)  # 500%, 25 events
+DEFAULT_ALERT_GROUP_THRESHOLD = (1000, 25)  # 1000%, 25 events
+
+# Default sort option for the group stream
+DEFAULT_SORT_OPTION = "date"
+
+# Setup languages for only available locales
+# _language_map = dict(settings.LANGUAGES)
+# LANGUAGES = [(k, _language_map[k]) for k in get_all_languages() if k in _language_map]
+# del _language_map
+
+# TODO(dcramer): We eventually want to make this user-editable
+TAG_LABELS = {
+    "exc_type": "Exception Type",
+    "sentry:user": "User",
+    "sentry:release": "Release",
+    "sentry:dist": "Distribution",
+    "os": "OS",
+    "url": "URL",
+    "server_name": "Server",
+}
+
+PROTECTED_TAG_KEYS = frozenset(["environment", "release", "sentry:release"])
+
+# TODO(dcramer): once this is more flushed out we want this to be extendable
+SENTRY_RULES = (
+    "sentry.rules.actions.notify_event.NotifyEventAction",
+    "sentry.rules.actions.notify_event_service.NotifyEventServiceAction",
+    "sentry.rules.conditions.every_event.EveryEventCondition",
+    "sentry.rules.conditions.first_seen_event.FirstSeenEventCondition",
+    "sentry.rules.conditions.regression_event.RegressionEventCondition",
+    "sentry.rules.conditions.reappeared_event.ReappearedEventCondition",
+    "sentry.rules.conditions.tagged_event.TaggedEventCondition",
+    "sentry.rules.conditions.event_frequency.EventFrequencyCondition",
+    "sentry.rules.conditions.event_frequency.EventUniqueUserFrequencyCondition",
+    "sentry.rules.conditions.event_attribute.EventAttributeCondition",
+    "sentry.rules.conditions.level.LevelCondition",
+)
+
+# methods as defined by http://www.w3.org/Protocols/rfc2616/rfc2616-sec9.html + PATCH
+HTTP_METHODS = (
+    "GET",
+    "POST",
+    "PUT",
+    "OPTIONS",
+    "HEAD",
+    "DELETE",
+    "TRACE",
+    "CONNECT",
+    "PATCH",
+)
+
+# See https://github.com/getsentry/semaphore/blob/master/general/src/protocol/constants.rs
+# VALID_PLATFORMS = semaphore.VALID_PLATFORMS
+VALID_PLATFORMS = [
+    "as3",
+    "c",
+    "cfml",
+    "cocoa",
+    "csharp",
+    "elixir",
+    "go",
+    "groovy",
+    "haskell",
+    "java",
+    "javascript",
+    "native",
+    "node",
+    "objc",
+    "other",
+    "perl",
+    "php",
+    "python",
+    "ruby",
+]
+
+OK_PLUGIN_ENABLED = _("The {name} integration has been enabled.")
+
+OK_PLUGIN_DISABLED = _("The {name} integration has been disabled.")
+
+OK_PLUGIN_SAVED = _("Configuration for the {name} integration has been saved.")
+
+WARN_SESSION_EXPIRED = "Your session has expired."  # TODO: translate this
+
+# Maximum length of a symbol
+MAX_SYM = 256
+
+# Known debug information file mimetypes
+KNOWN_DIF_FORMATS = {
+    "text/x-breakpad": "breakpad",
+    "application/x-mach-binary": "macho",
+    "application/x-elf-binary": "elf",
+    "application/x-dosexec": "pe",
+    "application/x-ms-pdb": "pdb",
+    "text/x-proguard+plain": "proguard",
+    "application/x-sentry-bundle+zip": "sourcebundle",
+}
+
+NATIVE_UNKNOWN_STRING = "<unknown>"
+
+# Maximum number of release files that can be "skipped" (i.e., maximum paginator offset)
+# inside release files API endpoints.
+# If this number is too large, it may cause problems because of inefficient
+# LIMIT-OFFSET database queries.
+# These problems should be solved after we implement artifact bundles workflow.
+MAX_RELEASE_FILES_OFFSET = 20000
+
+# to go from an integration id (in _platforms.json) to the platform
+# data, such as documentation url or humanized name.
+# example: java-logback -> {"type": "framework",
+#                           "link": "https://docs.getsentry.com/hosted/clients/java/modules/logback/",
+#                           "id": "java-logback",
+#                           "name": "Logback"}
+INTEGRATION_ID_TO_PLATFORM_DATA = {}
+
+
+# def _load_platform_data():
+#     INTEGRATION_ID_TO_PLATFORM_DATA.clear()
+#     data = load_doc("_platforms")
+
+#     if not data:
+#         return
+
+#     for platform in data["platforms"]:
+#         integrations = platform.pop("integrations")
+#         if integrations:
+#             for integration in integrations:
+#                 integration_id = integration.pop("id")
+#                 if integration["type"] != "language":
+#                     integration["language"] = platform["id"]
+#                 INTEGRATION_ID_TO_PLATFORM_DATA[integration_id] = integration
+
+
+# _load_platform_data()
+
+# special cases where the marketing slug differs from the integration id
+# (in _platforms.json). missing values (for example: "java") should assume
+# the marketing slug is the same as the integration id:
+# javascript, node, python, php, ruby, go, swift, objc, java, perl, elixir
+MARKETING_SLUG_TO_INTEGRATION_ID = {
+    "kotlin": "java",
+    "scala": "java",
+    "spring": "java",
+    "android": "java-android",
+    "react": "javascript-react",
+    "angular": "javascript-angular",
+    "angular2": "javascript-angular2",
+    "ember": "javascript-ember",
+    "backbone": "javascript-backbone",
+    "vue": "javascript-vue",
+    "express": "node-express",
+    "koa": "node-koa",
+    "django": "python-django",
+    "flask": "python-flask",
+    "sanic": "python-sanic",
+    "tornado": "python-tornado",
+    "celery": "python-celery",
+    "rq": "python-rq",
+    "bottle": "python-bottle",
+    "pythonawslambda": "python-awslambda",
+    "pyramid": "python-pyramid",
+    "pylons": "python-pylons",
+    "laravel": "php-laravel",
+    "symfony": "php-symfony2",
+    "rails": "ruby-rails",
+    "sinatra": "ruby-sinatra",
+    "dotnet": "csharp",
+}
+
+
+# to go from a marketing page slug like /for/android/ to the integration id
+# (in _platforms.json), for looking up documentation urls, etc.
+def get_integration_id_for_marketing_slug(slug):
+    if slug in MARKETING_SLUG_TO_INTEGRATION_ID:
+        return MARKETING_SLUG_TO_INTEGRATION_ID[slug]
+
+    if slug in INTEGRATION_ID_TO_PLATFORM_DATA:
+        return slug
+
+
+# special cases where the integration sent with the SDK differ from
+# the integration id (in _platforms.json)
+# {PLATFORM: {INTEGRATION_SENT: integration_id, ...}, ...}
+PLATFORM_INTEGRATION_TO_INTEGRATION_ID = {
+    "java": {"java.util.logging": "java-logging"},
+    # TODO: add more special cases...
+}
+
+
+# to go from event data to the integration id (in _platforms.json),
+# for example an event like:
+# {"platform": "java",
+#  "sdk": {"name": "sentry-java",
+#          "integrations": ["java.util.logging"]}} -> java-logging
+def get_integration_id_for_event(platform, sdk_name, integrations):
+    if integrations:
+        for integration in integrations:
+            # check special cases
+            if (
+                platform in PLATFORM_INTEGRATION_TO_INTEGRATION_ID
+                and integration in PLATFORM_INTEGRATION_TO_INTEGRATION_ID[platform]
+            ):
+                return PLATFORM_INTEGRATION_TO_INTEGRATION_ID[platform][integration]
+
+            # try <platform>-<integration>, for example "java-log4j"
+            integration_id = "%s-%s" % (platform, integration)
+            if integration_id in INTEGRATION_ID_TO_PLATFORM_DATA:
+                return integration_id
+
+    # try sdk name, for example "sentry-java" -> "java" or "raven-java:log4j" -> "java-log4j"
+    sdk_name = (
+        sdk_name.lower().replace("sentry-", "").replace("raven-", "").replace(":", "-")
+    )
+    if sdk_name in INTEGRATION_ID_TO_PLATFORM_DATA:
+        return sdk_name
+
+    # try platform name, for example "java"
+    if platform in INTEGRATION_ID_TO_PLATFORM_DATA:
+        return platform
+
+
+class ObjectStatus(object):
+    VISIBLE = 0
+    HIDDEN = 1
+    PENDING_DELETION = 2
+    DELETION_IN_PROGRESS = 3
+
+    ACTIVE = 0
+    DISABLED = 1
+
+    @classmethod
+    def as_choices(cls):
+        return (
+            (cls.ACTIVE, "active"),
+            (cls.DISABLED, "disabled"),
+            (cls.PENDING_DELETION, "pending_deletion"),
+            (cls.DELETION_IN_PROGRESS, "deletion_in_progress"),
+        )
+
+
+class SentryAppStatus(object):
+    UNPUBLISHED = 0
+    PUBLISHED = 1
+    INTERNAL = 2
+    UNPUBLISHED_STR = "unpublished"
+    PUBLISHED_STR = "published"
+    INTERNAL_STR = "internal"
+
+    @classmethod
+    def as_choices(cls):
+        return (
+            (cls.UNPUBLISHED, cls.UNPUBLISHED_STR),
+            (cls.PUBLISHED, cls.PUBLISHED_STR),
+            (cls.INTERNAL, cls.INTERNAL_STR),
+        )
+
+    @classmethod
+    def as_str(cls, status):
+        if status == cls.UNPUBLISHED:
+            return cls.UNPUBLISHED_STR
+        elif status == cls.PUBLISHED:
+            return cls.PUBLISHED_STR
+        elif status == cls.INTERNAL:
+            return cls.INTERNAL_STR
+
+
+class SentryAppInstallationStatus(object):
+    PENDING = 0
+    INSTALLED = 1
+    PENDING_STR = "pending"
+    INSTALLED_STR = "installed"
+
+    @classmethod
+    def as_choices(cls):
+        return ((cls.PENDING, cls.PENDING_STR), (cls.INSTALLED, cls.INSTALLED_STR))
+
+    @classmethod
+    def as_str(cls, status):
+        if status == cls.PENDING:
+            return cls.PENDING_STR
+        elif status == cls.INSTALLED:
+            return cls.INSTALLED_STR
+
+
+StatsPeriod = namedtuple("StatsPeriod", ("segments", "interval"))
+
+LEGACY_RATE_LIMIT_OPTIONS = frozenset(
+    ("sentry:project-rate-limit", "sentry:account-rate-limit")
+)
+
+
+# We need to limit the range of valid timestamps of an event because that
+# timestamp is used to control data retention.
+MAX_SECS_IN_FUTURE = 60
+MAX_SECS_IN_PAST = 2592000  # 30 days
+ALLOWED_FUTURE_DELTA = timedelta(seconds=MAX_SECS_IN_FUTURE)
+
+# DEFAULT_STORE_NORMALIZER_ARGS = dict(
+#     geoip_lookup=rust_geoip,
+#     stacktrace_frames_hard_limit=settings.SENTRY_STACKTRACE_FRAMES_HARD_LIMIT,
+#     max_stacktrace_frames=settings.SENTRY_MAX_STACKTRACE_FRAMES,
+#     max_secs_in_future=MAX_SECS_IN_FUTURE,
+#     max_secs_in_past=MAX_SECS_IN_PAST,
+#     enable_trimming=True,
+# )
+
+INTERNAL_INTEGRATION_TOKEN_COUNT_MAX = 20
+
+ALL_ACCESS_PROJECTS = {-1}
--- a/sentry/culprit.py
+++ b/sentry/culprit.py
@@ -0,0 +1,74 @@
+"""
+This file implements the legacy culprit system.  The culprit at this point is
+just used as a fallback if no transaction is set.  When a transaction is set
+the culprit is overridden by the transaction value.
+
+Over time we want to fully phase out the culprit.  Until then this is the
+code that generates it.
+"""
+
+
+from sentry.constants import MAX_CULPRIT_LENGTH
+from sentry.utils.safe import get_path
+from sentry.utils.strings import truncatechars
+
+
+def generate_culprit(data):
+    platform = data.get("platform")
+    exceptions = get_path(data, "exception", "values", filter=True)
+    if exceptions:
+        # Synthetic events no longer get a culprit
+        last_exception = get_path(exceptions, -1)
+        if get_path(last_exception, "mechanism", "synthetic"):
+            return ""
+
+        stacktraces = [
+            e["stacktrace"] for e in exceptions if get_path(e, "stacktrace", "frames")
+        ]
+    else:
+        stacktrace = data.get("stacktrace")
+        if stacktrace and stacktrace.get("frames"):
+            stacktraces = [stacktrace]
+        else:
+            stacktraces = None
+
+    culprit = None
+
+    if not culprit and stacktraces:
+        culprit = get_stacktrace_culprit(get_path(stacktraces, -1), platform=platform)
+
+    if not culprit and data.get("request"):
+        culprit = get_path(data, "request", "url")
+
+    return truncatechars(culprit or "", MAX_CULPRIT_LENGTH)
+
+
+def get_stacktrace_culprit(stacktrace, platform):
+    default = None
+    for frame in reversed(stacktrace["frames"]):
+        if not frame:
+            continue
+        if frame.get("in_app"):
+            culprit = get_frame_culprit(frame, platform=platform)
+            if culprit:
+                return culprit
+        elif default is None:
+            default = get_frame_culprit(frame, platform=platform)
+    return default
+
+
+def get_frame_culprit(frame, platform):
+    # If this frame has a platform, we use it instead of the one that
+    # was passed in (as that one comes from the exception which might
+    # not necessarily be the same platform).
+    platform = frame.get("platform") or platform
+    if platform in ("objc", "cocoa", "native"):
+        return frame.get("function") or "?"
+    fileloc = frame.get("module") or frame.get("filename")
+    if not fileloc:
+        return ""
+    elif platform in ("javascript", "node"):
+        # function and fileloc might be unicode here, so let it coerce
+        # to a unicode string if needed.
+        return "%s(%s)" % (frame.get("function") or "?", fileloc)
+    return "%s in %s" % (fileloc, frame.get("function") or "?")
--- a/sentry/eventtypes/init.py
+++ b/sentry/eventtypes/init.py
--- a/sentry/eventtypes/base.py
+++ b/sentry/eventtypes/base.py
@@ -0,0 +1,48 @@
+from django.utils.encoding import force_str
+
+from sentry.culprit import generate_culprit
+from sentry.utils.safe import get_path
+from sentry.utils.strings import strip, truncatechars
+
+
+class BaseEvent:
+    id = None
+
+    def get_metadata(self, data):
+        raise NotImplementedError
+
+    def get_title(self, metadata):
+        raise NotImplementedError
+
+    def get_location(self, data):
+        return None
+
+
+class DefaultEvent(BaseEvent):
+    key = "default"
+
+    def get_metadata(self, data):
+        message = strip(
+            get_path(data, "logentry", "formatted")
+            or get_path(data, "logentry", "message")
+            or get_path(data, "message", "formatted")
+            or get_path(data, "message")
+        )
+
+        if message:
+            title = truncatechars(message.splitlines()[0], 100)
+        else:
+            title = "<unlabeled event>"
+
+        return {"title": title}
+
+    def get_title(self, metadata):
+        return metadata.get("title") or "<untitled>"
+
+    def get_location(self, data):
+        return force_str(
+            data.get("culprit")
+            or data.get("transaction")
+            or generate_culprit(data)
+            or ""
+        )
--- a/sentry/eventtypes/error.py
+++ b/sentry/eventtypes/error.py
@@ -0,0 +1,75 @@
+from django.utils.encoding import force_str
+
+from sentry.culprit import generate_culprit
+from sentry.stacktraces.functions import get_function_name_for_frame
+from sentry.stacktraces.processing import get_crash_frame_from_event_data
+from sentry.utils.safe import get_path, trim, truncatechars
+
+from .base import BaseEvent
+
+
+def get_crash_location(data):
+    frame = get_crash_frame_from_event_data(
+        data,
+        frame_filter=lambda x: x.get("function")
+        not in (None, "<redacted>", "<unknown>"),
+    )
+    if frame is not None:
+        func = get_function_name_for_frame(frame, data.get("platform"))
+        return frame.get("filename") or frame.get("abs_path"), func
+
+
+class ErrorEvent(BaseEvent):
+    key = "error"
+
+    def get_metadata(self, data):
+        # Check for undocumented interface where exception has no values. Go SDK does this.
+        # https://docs.sentry.io/development/sdk-dev/event-payloads/exception/
+        # exception can be an list instead of a dictionary
+        if isinstance(data.get("exception"), list):
+            if len(data["exception"]) == 0:
+                return {}
+            # Force documented interface
+            data["exception"] = {"values": data["exception"]}
+        exception = get_path(data, "exception", "values", -1)
+        if not exception:
+            return {}
+
+        loc = get_crash_location(data)
+        rv = {"value": trim(get_path(exception, "value", default=""), 1024)}
+
+        # If the exception mechanism indicates a synthetic exception we do not
+        # want to record the type and value into the metadata.
+        if not get_path(exception, "mechanism", "synthetic"):
+            rv["type"] = trim(get_path(exception, "type", default="Error"), 128)
+
+        # Attach crash location if available
+        if loc is not None:
+            fn, func = loc
+            if fn:
+                rv["filename"] = fn
+            if func:
+                rv["function"] = func
+
+        return rv
+
+    def get_title(self, metadata):
+        ty = metadata.get("type")
+        if ty is None:
+            return metadata.get("function") or "<unknown>"
+        if not metadata.get("value"):
+            return ty
+        try:
+            return "{}: {}".format(ty, truncatechars(metadata["value"].splitlines()[0]))
+        except AttributeError:
+            # GlitchTip modification
+            # Exception value is specified as a string, sometimes it isn't. This is a fallback.
+            return "{}: {}".format(ty, str(metadata["value"]))
+
+    def get_location(self, data):
+        return force_str(
+            data.get("culprit")
+            or data.get("transaction")
+            or generate_culprit(data)
+            or ""
+        )
--- a/sentry/stacktraces/functions.py
+++ b/sentry/stacktraces/functions.py
@@ -0,0 +1,228 @@
+import re
+
+from .platform import get_behavior_family_for_platform
+
+_windecl_hash = re.compile(r"^@?(.*?)@[0-9]+$")
+_rust_hash = re.compile(r"::h[a-z0-9]{16}$")
+_cpp_trailer_re = re.compile(r"(\bconst\b|&)$")
+_rust_blanket_re = re.compile(r"^([A-Z] as )")
+_lambda_re = re.compile(
+    r"""(?x)
+    # gcc
+    (?:
+        \{
+            lambda\(.*?\)\#\d+
+        \}
+    ) |
+    # msvc
+    (?:
+        \blambda_[a-f0-9]{32}\b
+    ) |
+    # clang
+    (?:
+        \$_\d+\b
+    )
+    """
+)
+_anon_namespace_re = re.compile(
+    r"""(?x)
+    \?A0x[a-f0-9]{8}::
+    """
+)
+
+PAIRS = {"(": ")", "{": "}", "[": "]", "<": ">"}
+
+
+def replace_enclosed_string(s, start, end, replacement=None):
+    if start not in s:
+        return s
+
+    depth = 0
+
+    rv = []
+    pair_start = None
+    for idx, char in enumerate(s):
+        if char == start:
+            if depth == 0:
+                pair_start = idx
+            depth += 1
+        elif char == end:
+            depth -= 1
+            if depth == 0:
+                if replacement is not None:
+                    if callable(replacement):
+                        rv.append(replacement(s[pair_start + 1 : idx], pair_start))
+                    else:
+                        rv.append(replacement)
+        elif depth == 0:
+            rv.append(char)
+
+    return "".join(rv)
+
+
+def split_func_tokens(s):
+    buf = []
+    rv = []
+    stack = []
+    end = 0
+
+    for idx, char in enumerate(s):
+        if char in PAIRS:
+            stack.append(PAIRS[char])
+        elif stack and char == stack[-1]:
+            stack.pop()
+            if not stack:
+                buf.append(s[end : idx + 1])
+                end = idx + 1
+        elif not stack:
+            if char.isspace():
+                if buf:
+                    rv.append(buf)
+                buf = []
+            else:
+                buf.append(s[end : idx + 1])
+            end = idx + 1
+
+    if buf:
+        rv.append(buf)
+
+    return ["".join(x) for x in rv]
+
+
+def trim_function_name(function, platform, normalize_lambdas=True):
+    """Given a function value from the frame's function attribute this returns
+    a trimmed version that can be stored in `function_name`.  This is only used
+    if the client did not supply a value itself already.
+    """
+    if get_behavior_family_for_platform(platform) != "native":
+        return function
+    if function in ("<redacted>", "<unknown>"):
+        return function
+
+    original_function = function
+    function = function.strip()
+
+    # Ensure we don't operate on objc functions
+    if function.startswith(("[", "+[", "-[")):
+        return function
+
+    # Chop off C++ trailers
+    while True:
+        match = _cpp_trailer_re.search(function)
+        if match is None:
+            break
+        function = function[: match.start()].rstrip()
+
+    # Because operator<< really screws with our balancing, so let's work
+    # around that by replacing it with a character we do not observe in
+    # `split_func_tokens` or `replace_enclosed_string`.
+    function = (
+        function.replace("operator<<", "operator⟨⟨")
+        .replace("operator<", "operator⟨")
+        .replace("operator()", "operator◯")
+        .replace(" -> ", " ⟿ ")
+        .replace("`anonymous namespace'", "〔anonymousnamespace〕")
+    )
+
+    # normalize C++ lambdas.  This is necessary because different
+    # compilers use different rules for now to name a lambda and they are
+    # all quite inconsistent.  This does not give us perfect answers to
+    # this problem but closer.  In particular msvc will call a lambda
+    # something like `lambda_deadbeefeefffeeffeeff` whereas clang for
+    # instance will name it `main::$_0` which will tell us in which outer
+    # function it was declared.
+    if normalize_lambdas:
+        function = _lambda_re.sub("lambda", function)
+
+    # Normalize MSVC anonymous namespaces from inline functions.  For inline
+    # functions, the compiler inconsistently renders anonymous namespaces with
+    # their hash.  For regular functions,  "`anonymous namespace'" is used.
+    # The regular expression matches the trailing "::" to avoid accidental
+    # replacement in mangled function names.
+    if normalize_lambdas:
+        function = _anon_namespace_re.sub("〔anonymousnamespace〕::", function)
+
+    # Remove the arguments if there is one.
+    def process_args(value, start):
+        value = value.strip()
+        if value in ("anonymous namespace", "operator"):
+            return "(%s)" % value
+        return ""
+
+    function = replace_enclosed_string(function, "(", ")", process_args)
+
+    # Resolve generic types, but special case rust which uses things like
+    # <Foo as Bar>::baz to denote traits.
+    def process_generics(value, start):
+        # Special case for lambdas
+        if value == "lambda" or _lambda_re.match(value):
+            return "<%s>" % value
+
+        if start > 0:
+            return "<T>"
+
+        # Rust special cases
+        value = _rust_blanket_re.sub("", value)  # prefer trait for blanket impls
+        value = replace_enclosed_string(value, "<", ">", process_generics)
+        return value.split(" as ", 1)[0]
+
+    function = replace_enclosed_string(function, "<", ">", process_generics)
+
+    tokens = split_func_tokens(function)
+
+    # MSVC demangles generic operator functions with a space between the
+    # function name and the generics. Ensure that those two components both end
+    # up in the function name.
+    if len(tokens) > 1 and tokens[-1] == "<T>":
+        tokens.pop()
+        tokens[-1] += " <T>"
+
+    # find the token which is the function name.  Since we chopped of C++
+    # trailers there are only two cases we care about: the token left to
+    # the -> return marker which is for instance used in Swift and if that
+    # is not found, the last token in the last.
+    #
+    # ["unsigned", "int", "whatever"] -> whatever
+    # ["@objc", "whatever", "->", "int"] -> whatever
+    try:
+        func_token = tokens[tokens.index("⟿") - 1]
+    except ValueError:
+        if tokens:
+            func_token = tokens[-1]
+        else:
+            func_token = None
+
+    if func_token:
+        function = (
+            func_token.replace("⟨", "<")
+            .replace("◯", "()")
+            .replace(" ⟿ ", " -> ")
+            .replace("〔anonymousnamespace〕", "`anonymous namespace'")
+        )
+
+    # This really should never happen
+    else:
+        function = original_function
+
+    # trim off rust markers
+    function = _rust_hash.sub("", function)
+
+    # trim off windows decl markers
+    return _windecl_hash.sub("\\1", function)
+
+
+def get_function_name_for_frame(frame, platform=None):
+    """Given a frame object or dictionary this returns the actual function
+    name trimmed.
+    """
+    if hasattr(frame, "get_raw_data"):
+        frame = frame.get_raw_data()
+
+    # if there is a raw function, prioritize the function unchanged
+    if frame.get("raw_function"):
+        return frame.get("function")
+
+    # otherwise trim the function on demand
+    rv = frame.get("function")
+    if rv:
+        return trim_function_name(rv, frame.get("platform") or platform)
--- a/sentry/stacktraces/platform.py
+++ b/sentry/stacktraces/platform.py
@@ -0,0 +1,10 @@
+NATIVE_PLATFORMS = frozenset(("objc", "cocoa", "swift", "native", "c"))
+JAVASCRIPT_PLATFORMS = frozenset(("javascript", "node"))
+
+
+def get_behavior_family_for_platform(platform):
+    if platform in NATIVE_PLATFORMS:
+        return "native"
+    if platform in JAVASCRIPT_PLATFORMS:
+        return "javascript"
+    return "other"
--- a/sentry/stacktraces/processing.py
+++ b/sentry/stacktraces/processing.py
@@ -0,0 +1,26 @@
+from sentry.utils.safe import get_path
+
+
+def get_crash_frame_from_event_data(data, frame_filter=None):
+    frames = get_path(
+        data, "exception", "values", -1, "stacktrace", "frames"
+    ) or get_path(data, "stacktrace", "frames")
+    if not frames:
+        threads = get_path(data, "threads", "values")
+        if threads and len(threads) == 1:
+            frames = get_path(threads, 0, "stacktrace", "frames")
+
+    default = None
+    for frame in reversed(frames or ()):
+        if frame is None:
+            continue
+        if frame_filter is not None:
+            if not frame_filter(frame):
+                continue
+        if frame.get("in_app"):
+            return frame
+        if default is None:
+            default = frame
+
+    if default:
+        return default
--- a/sentry/utils/safe.py
+++ b/sentry/utils/safe.py
@@ -0,0 +1,95 @@
+import collections
+import json
+
+from django.utils.encoding import force_str
+
+from sentry.utils.strings import truncatechars
+
+SENTRY_MAX_VARIABLE_SIZE = 512
+
+
+def get_path(data, *path, **kwargs):
+    """
+    Safely resolves data from a recursive data structure. A value is only
+    returned if the full path exists, otherwise ``None`` is returned.
+    If the ``default`` argument is specified, it is returned instead of ``None``.
+    If the ``filter`` argument is specified and the value is a list, it is
+    filtered with the given callback. Alternatively, pass ``True`` as filter to
+    only filter ``None`` values.
+    """
+    default = kwargs.pop("default", None)
+    f = kwargs.pop("filter", None)
+    for k in kwargs:
+        raise TypeError("set_path() got an undefined keyword argument '%s'" % k)
+
+    for p in path:
+        if isinstance(data, collections.abc.Mapping) and p in data:
+            data = data[p]
+        elif isinstance(data, (list, tuple)) and -len(data) <= p < len(data):
+            data = data[p]
+        else:
+            return default
+
+    if f and data and isinstance(data, (list, tuple)):
+        data = list(filter((lambda x: x is not None) if f is True else f, data))
+
+    return data if data is not None else default
+
+
+def trim(
+    value,
+    max_size=SENTRY_MAX_VARIABLE_SIZE,
+    max_depth=6,
+    object_hook=None,
+    _depth=0,
+    _size=0,
+    **kwargs
+):
+    """
+    Truncates a value to ```MAX_VARIABLE_SIZE```.
+    The method of truncation depends on the type of value.
+    """
+    options = {
+        "max_depth": max_depth,
+        "max_size": max_size,
+        "object_hook": object_hook,
+        "_depth": _depth + 1,
+    }
+
+    if _depth > max_depth:
+        if not isinstance(value, str):
+            value = json.dumps(value)
+        return trim(value, _size=_size, max_size=max_size)
+
+    elif isinstance(value, dict):
+        result = {}
+        _size += 2
+        for k in sorted(value.keys()):
+            v = value[k]
+            trim_v = trim(v, _size=_size, **options)
+            result[k] = trim_v
+            _size += len(force_str(trim_v)) + 1
+            if _size >= max_size:
+                break
+
+    elif isinstance(value, (list, tuple)):
+        result = []
+        _size += 2
+        for v in value:
+            trim_v = trim(v, _size=_size, **options)
+            result.append(trim_v)
+            _size += len(force_str(trim_v))
+            if _size >= max_size:
+                break
+        if isinstance(value, tuple):
+            result = tuple(result)
+
+    elif isinstance(value, str):
+        result = truncatechars(value, max_size - _size)
+
+    else:
+        result = value
+
+    if object_hook is None:
+        return result
+    return object_hook(result)
--- a/sentry/utils/strings.py
+++ b/sentry/utils/strings.py
@@ -0,0 +1,12 @@
+from django.utils.encoding import smart_str
+
+
+def truncatechars(value: str, chars=100):
+    """Truncate string and append …"""
+    return (value[:chars] + "…") if len(value) > chars else value
+
+
+def strip(value):
+    if not value:
+        return ""
+    return smart_str(value).strip()