mirror of
https://github.com/bugsink/bugsink.git
synced 2025-12-19 19:49:44 -06:00
@@ -42,7 +42,6 @@ DEFAULTS = {
|
||||
"TEAM_CREATION": CB_MEMBERS, # who can create new teams. default: members, which means "any member of the site"
|
||||
|
||||
# System inner workings:
|
||||
"DIGEST_IMMEDIATELY": True,
|
||||
"VALIDATE_ON_DIGEST": "none", # other legal values are "warn" and "strict"
|
||||
"KEEP_ENVELOPES": 0, # set to a number to store that many; 0 means "store none". This is for debugging.
|
||||
"API_LOG_UNIMPLEMENTED_CALLS": False, # if True, log unimplemented API calls; see #153
|
||||
@@ -50,6 +49,7 @@ DEFAULTS = {
|
||||
|
||||
# MAX* below mirror the (current) values for the Sentry Relay
|
||||
"MAX_EVENT_SIZE": _MEBIBYTE,
|
||||
"MAX_ATTACHMENT_SIZE": 100 * _MEBIBYTE,
|
||||
"MAX_EVENT_COMPRESSED_SIZE": 200 * _KIBIBYTE, # Note: this only applies to the deprecated "store" endpoint.
|
||||
"MAX_ENVELOPE_SIZE": 100 * _MEBIBYTE,
|
||||
"MAX_ENVELOPE_COMPRESSED_SIZE": 20 * _MEBIBYTE,
|
||||
@@ -73,6 +73,9 @@ DEFAULTS = {
|
||||
# Security:
|
||||
"MINIMIZE_INFORMATION_EXPOSURE": False,
|
||||
"PHONEHOME": True,
|
||||
|
||||
# Feature flags:
|
||||
"FEATURE_MINIDUMPS": False, # minidumps are experimental/early-stage and likely a DOS-magnet; disabled by default
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ eat_your_own_dogfood(SENTRY_DSN)
|
||||
|
||||
|
||||
# Our Docker image is hard-coded to run with snappea in the background; this means we hard-code (as opposed to reading
|
||||
# the from the env) certain variables: TASK_ALWAYS_EAGER, WORKAHOLIC and DIGEST_IMMEDIATELY.
|
||||
# the from the env) certain variables: TASK_ALWAYS_EAGER and WORKAHOLIC
|
||||
SNAPPEA = {
|
||||
"TASK_ALWAYS_EAGER": False, # hard-coded, corresponds to Docker setup
|
||||
"WORKAHOLIC": True, # hard-coded, corresponds to Docker setup
|
||||
@@ -136,8 +136,6 @@ CB_NOBODY = "CB_NOBODY"
|
||||
|
||||
|
||||
BUGSINK = {
|
||||
"DIGEST_IMMEDIATELY": False, # hard-coded, corresponds to Docker setup
|
||||
|
||||
# The URL where the Bugsink instance is hosted. This is used in the email notifications and to construct DSNs.
|
||||
"BASE_URL": os.getenv("BASE_URL", f"http://localhost:{_PORT}"), # no trailing slash
|
||||
|
||||
@@ -181,6 +179,9 @@ BUGSINK = {
|
||||
os.getenv("MINIMIZE_INFORMATION_EXPOSURE", "false").lower() in ("true", "1", "yes"),
|
||||
|
||||
"PHONEHOME": os.getenv("PHONEHOME", "true").lower() in ("true", "1", "yes"),
|
||||
|
||||
# Feature flags
|
||||
"FEATURE_MINIDUMPS": os.getenv("PHONEHOME", "false").lower() in ("true", "1", "yes"),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -68,11 +68,6 @@ BUGSINK = {
|
||||
# you can customize this as e.g. "My Bugsink" or "Bugsink for My Company"
|
||||
# "SITE_TITLE": "Bugsink",
|
||||
|
||||
# When running locally, it is recommended to configure the Bugsink to digest events immediately. (This is basically
|
||||
# implied by the "TASK_ALWAYS_EAGER" setting above, but setting DIGEST_IMMEDIATELY to True removes one more step
|
||||
# from the process.)
|
||||
"DIGEST_IMMEDIATELY": True,
|
||||
|
||||
# You are licenced to run Bugsink locally in single-user mode. By changing the settings below, you may open the door
|
||||
# to more uses; make sure to buy a licence if you do.
|
||||
"SINGLE_USER": True,
|
||||
|
||||
@@ -102,10 +102,6 @@ BUGSINK = {
|
||||
"SINGLE_TEAM": False,
|
||||
"TEAM_CREATION": CB_MEMBERS, # who can create new teams. default: members, which means "any member of the site"
|
||||
|
||||
# In the singleserver production setup, we do not digest events immediately, but instead offload this to Snappea.
|
||||
# This ensures a more response and reliable server when there are peak loads in the events.
|
||||
"DIGEST_IMMEDIATELY": False,
|
||||
|
||||
# "MAX_EVENT_SIZE": _MEBIBYTE,
|
||||
# "MAX_EVENT_COMPRESSED_SIZE": 200 * _KIBIBYTE,
|
||||
# "MAX_ENVELOPE_SIZE": 100 * _MEBIBYTE,
|
||||
|
||||
@@ -9,11 +9,51 @@ from django.utils.translation import get_supported_language_variant
|
||||
from django.utils.translation.trans_real import parse_accept_lang_header
|
||||
from django.utils import translation
|
||||
from django.urls import get_script_prefix
|
||||
from django.http import HttpResponseBadRequest
|
||||
|
||||
|
||||
performance_logger = logging.getLogger("bugsink.performance.views")
|
||||
|
||||
|
||||
class ContentEncodingCheckMiddleware:
|
||||
"""
|
||||
We don't just globally interpret Content-Encoding for all views since:
|
||||
|
||||
1. this increases our attack service (or forces us to reason about how it doesn't)
|
||||
2. forces us to think about the interplay of Django's POST/FILES handling and maximums (DATA_UPLOAD_MAX_MEMORY_SIZE)
|
||||
and our own maximums and handling.
|
||||
3. the various maximums for reading from streaming requests are per-view (underlying data-type) anyway.
|
||||
|
||||
Instead, the only global thing we do is "fail explicitly".
|
||||
"""
|
||||
|
||||
# NOTE: once this list becomes long, we could switch to a per-view decorator (with the maximum bytes as a value)
|
||||
SUPPORTED_VIEWS = [
|
||||
"ingest-store",
|
||||
"ingest-envelope",
|
||||
"ingest-minidump",
|
||||
|
||||
"api_catch_all",
|
||||
]
|
||||
|
||||
def __init__(self, get_response):
|
||||
self.get_response = get_response
|
||||
|
||||
def __call__(self, request):
|
||||
return self.get_response(request)
|
||||
|
||||
def process_view(self, request, view_func, view_args, view_kwargs):
|
||||
if request.resolver_match:
|
||||
view_name = request.resolver_match.view_name
|
||||
else:
|
||||
view_name = "[unknown]"
|
||||
|
||||
if "HTTP_CONTENT_ENCODING" in request.META and view_name not in self.SUPPORTED_VIEWS:
|
||||
return HttpResponseBadRequest(f"Content-Encoding handling is not supported for endpoint `{view_name}`")
|
||||
|
||||
return None # proceed normally
|
||||
|
||||
|
||||
class DisallowChunkedMiddleware:
|
||||
def __init__(self, get_response):
|
||||
self.get_response = get_response
|
||||
|
||||
@@ -129,6 +129,7 @@ AUTH_USER_MODEL = "users.User"
|
||||
TAILWIND_APP_NAME = 'theme'
|
||||
|
||||
MIDDLEWARE = [
|
||||
"bugsink.middleware.ContentEncodingCheckMiddleware",
|
||||
'bugsink.middleware.SetRemoteAddrMiddleware',
|
||||
'bugsink.middleware.DisallowChunkedMiddleware',
|
||||
'django.middleware.security.SecurityMiddleware',
|
||||
|
||||
@@ -83,8 +83,6 @@ SERVER_EMAIL = DEFAULT_FROM_EMAIL = 'Klaas van Schelven <klaas@bugsink.com>'
|
||||
|
||||
|
||||
BUGSINK = {
|
||||
"DIGEST_IMMEDIATELY": False,
|
||||
|
||||
# "MAX_EVENT_SIZE": _MEBIBYTE,
|
||||
# "MAX_EVENT_COMPRESSED_SIZE": 200 * _KIBIBYTE,
|
||||
# "MAX_ENVELOPE_SIZE": 100 * _MEBIBYTE,
|
||||
@@ -111,6 +109,9 @@ BUGSINK = {
|
||||
"MAX_EMAILS_PER_MONTH": 10, # for development: a thing to tune if you want to the the quota system
|
||||
|
||||
"KEEP_ARTIFACT_BUNDLES": True, # in development: useful to preserve sourcemap uploads
|
||||
|
||||
# in development we want optional features enabled to [1] play with them and [2] have the tests work
|
||||
"FEATURE_MINIDUMPS": True,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import io
|
||||
import brotli
|
||||
|
||||
from bugsink.app_settings import get_settings
|
||||
from bugsink.utils import assert_
|
||||
|
||||
|
||||
DEFAULT_CHUNK_SIZE = 8 * 1024
|
||||
@@ -119,22 +120,65 @@ class GeneratorReader:
|
||||
del self.buffer[:size]
|
||||
return result
|
||||
|
||||
def readline(self, size=-1):
|
||||
newline_index = self.buffer.find(b"\n")
|
||||
while newline_index == -1:
|
||||
chunk = self.read(DEFAULT_CHUNK_SIZE)
|
||||
if not chunk:
|
||||
break
|
||||
self.buffer.extend(chunk)
|
||||
newline_index = self.buffer.find(b"\n")
|
||||
|
||||
if newline_index != -1:
|
||||
end = newline_index + 1
|
||||
else:
|
||||
end = len(self.buffer)
|
||||
|
||||
if size >= 0:
|
||||
end = min(end, size)
|
||||
|
||||
result = bytes(self.buffer[:end])
|
||||
del self.buffer[:end]
|
||||
return result
|
||||
|
||||
|
||||
def content_encoding_reader(request):
|
||||
encoding = request.META.get("HTTP_CONTENT_ENCODING", "").lower()
|
||||
|
||||
if encoding == "gzip":
|
||||
return GeneratorReader(zlib_generator(request, WBITS_PARAM_FOR_GZIP), bad_request_exceptions=(zlib.error,))
|
||||
return GeneratorReader(
|
||||
zlib_generator(request._stream, WBITS_PARAM_FOR_GZIP),
|
||||
bad_request_exceptions=(zlib.error,),
|
||||
)
|
||||
|
||||
if encoding == "deflate":
|
||||
return GeneratorReader(zlib_generator(request, WBITS_PARAM_FOR_DEFLATE), bad_request_exceptions=(zlib.error,))
|
||||
return GeneratorReader(
|
||||
zlib_generator(request._stream, WBITS_PARAM_FOR_DEFLATE),
|
||||
bad_request_exceptions=(zlib.error,)
|
||||
)
|
||||
|
||||
if encoding == "br":
|
||||
return GeneratorReader(brotli_generator(request), bad_request_exceptions=(brotli.error, BrotliError))
|
||||
return GeneratorReader(
|
||||
brotli_generator(request._stream),
|
||||
bad_request_exceptions=(brotli.error, BrotliError)
|
||||
)
|
||||
|
||||
return request
|
||||
|
||||
|
||||
def handle_request_content_encoding(request, max_length):
|
||||
"""Turns a request w/ Content-Encoding into an unpacked equivalent; for further "regular" (POST, FILES) handling
|
||||
by Django.
|
||||
"""
|
||||
|
||||
encoding = request.META.get("HTTP_CONTENT_ENCODING", "").lower()
|
||||
if encoding in ["gzip", "deflate", "br"]:
|
||||
assert_(not request._read_started)
|
||||
request._stream = MaxDataReader(max_length, content_encoding_reader(request))
|
||||
|
||||
request.META["CONTENT_LENGTH"] = str(pow(2, 32) - 1) # large enough (we can't predict the decompressed value)
|
||||
request.META.pop("HTTP_CONTENT_ENCODING") # the resulting request is no longer encoded
|
||||
|
||||
|
||||
def compress_with_zlib(input_stream, wbits, chunk_size=DEFAULT_CHUNK_SIZE):
|
||||
# mostly useful for testing (compress-decompress cycles)
|
||||
|
||||
@@ -158,7 +202,7 @@ class MaxDataReader:
|
||||
self.bytes_read = 0
|
||||
self.stream = stream
|
||||
|
||||
if isinstance(max_length, str): # reusing this is a bit of a hack, but leads to readable code at usage
|
||||
if isinstance(max_length, str): # support for settings-name max_length makes both the code and errors better
|
||||
self.max_length = get_settings()[max_length]
|
||||
self.reason = "%s: %s" % (max_length, self.max_length)
|
||||
else:
|
||||
@@ -187,7 +231,7 @@ class MaxDataWriter:
|
||||
self.bytes_written = 0
|
||||
self.stream = stream
|
||||
|
||||
if isinstance(max_length, str): # reusing this is a bit of a hack, but leads to readable code at usage
|
||||
if isinstance(max_length, str): # support for settings-name max_length makes both the code and errors better
|
||||
self.max_length = get_settings()[max_length]
|
||||
self.reason = "%s: %s" % (max_length, self.max_length)
|
||||
else:
|
||||
@@ -212,3 +256,12 @@ class NullWriter:
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
class UnclosableBytesIO(io.BytesIO):
|
||||
"""Intentionally does nothing on-close: BytesIO normally discards its buffer on .close(), breaking .getvalue(); this
|
||||
overrides it so that we can use it in code that usually deals with real files (and calls .close()) while still using
|
||||
the in-memory data afterwards. We just rely on the garbage collector for the actual cleanup."""
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
@@ -401,6 +401,13 @@ class SetRemoteAddrMiddlewareTestCase(RegularTestCase):
|
||||
SetRemoteAddrMiddleware.parse_x_forwarded_for("123.123.123.123,1.2.3.4")
|
||||
|
||||
|
||||
class ContentEncodingCheckMiddlewareTestCase(DjangoTestCase):
|
||||
|
||||
def test_speak_brotli_with_arbitrary_view_fails(self):
|
||||
response = self.client.post("/", headers={"Content-Encoding": "br"})
|
||||
self.assertTrue(b"Content-Encoding handling is not supported for endpoint `home`" in response.content)
|
||||
|
||||
|
||||
class AllowedHostsMsgTestCase(DjangoTestCase):
|
||||
|
||||
def test_allowed_hosts_error_message(self):
|
||||
|
||||
@@ -14,7 +14,7 @@ from teams.views import debug_email as debug_teams_email
|
||||
from bugsink.app_settings import get_settings
|
||||
from users.views import signup, confirm_email, resend_confirmation, request_reset_password, reset_password, preferences
|
||||
from ingest.views import download_envelope
|
||||
from files.views import chunk_upload, artifact_bundle_assemble, api_root, api_catch_all
|
||||
from files.views import chunk_upload, artifact_bundle_assemble, difs_assemble, api_root, api_catch_all
|
||||
from bugsink.decorators import login_exempt
|
||||
|
||||
from events.api_views import EventViewSet
|
||||
@@ -71,6 +71,9 @@ urlpatterns = [
|
||||
path("api/0/organizations/<slug:organization_slug>/artifactbundle/assemble/", artifact_bundle_assemble,
|
||||
name="artifact_bundle_assemble"),
|
||||
|
||||
path("api/0/projects/<slug:organization_slug>/<slug:project_slug>/files/difs/assemble/", difs_assemble,
|
||||
name="difs_assemble"),
|
||||
|
||||
path('api/', include('ingest.urls')),
|
||||
path('api/0/', api_root, name='api_root'),
|
||||
|
||||
|
||||
@@ -38,6 +38,29 @@ def allowed_hosts_error_message(domain, allowed_hosts):
|
||||
return msg + "Add '%s' to ALLOWED_HOSTS or configure proxy to use 'Host: %s'." % (domain, proxy_suggestion)
|
||||
|
||||
|
||||
class NoopClose:
|
||||
"""Delegator: Gunicorn's Body doesn't implement .close(); Django calls that it in request.body's finally clause.
|
||||
That .close() call in itself is slightly surprising to me (and I have not copied it in my own streaming reads) b/c
|
||||
the [WSGI spec](https://peps.python.org/pep-3333/#input-and-error-streams) says:
|
||||
|
||||
> Applications conforming to this specification must not use any other methods or attributes of the input or errors
|
||||
> objects. In particular, applications must not attempt to close these streams, even if they possess close()
|
||||
> methods.
|
||||
|
||||
In the end, Django conforms to spec because LimitedStream _also_ drops the .close() (it's subclassing `io.IOBase`),
|
||||
but one wonders why they call it in the first place. Anyway, just stub it and we're good.
|
||||
"""
|
||||
|
||||
def __init__(self, stream):
|
||||
self._stream = stream
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._stream, name)
|
||||
|
||||
def close(self):
|
||||
return None
|
||||
|
||||
|
||||
class CustomWSGIRequest(WSGIRequest):
|
||||
"""
|
||||
Custom WSQIRequest subclass with 3 fixes/changes:
|
||||
@@ -62,7 +85,7 @@ class CustomWSGIRequest(WSGIRequest):
|
||||
super().__init__(environ)
|
||||
|
||||
if "CONTENT_LENGTH" not in environ and "HTTP_TRANSFER_ENCODING" in environ:
|
||||
self._stream = self.environ["wsgi.input"]
|
||||
self._stream = NoopClose(self.environ["wsgi.input"])
|
||||
|
||||
def get_host(self):
|
||||
"""
|
||||
@@ -75,7 +98,7 @@ class CustomWSGIRequest(WSGIRequest):
|
||||
# For /health/ endpoints, we skip the ALLOWED_HOSTS validation (see #140).
|
||||
return self._get_raw_host()
|
||||
|
||||
# copied from HttpRequest.get_host() in Django 4.2, with modifications.
|
||||
# copied from HttpRequest.get_host() in Django 5.2, with modifications.
|
||||
|
||||
host = self._get_raw_host()
|
||||
|
||||
|
||||
137
files/minidump.py
Normal file
137
files/minidump.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import io
|
||||
import zipfile
|
||||
import symbolic
|
||||
from sentry_sdk_extensions import capture_or_log_exception
|
||||
|
||||
from bugsink.utils import assert_
|
||||
from .models import FileMetadata
|
||||
|
||||
|
||||
def get_single_object(archive):
|
||||
# our understanding: sentry-cli uploads single-object archives; we need to get the single object out of it...
|
||||
# ...but this does raise the question of why archives exist at all... hence the assert
|
||||
objects = list(archive.iter_objects())
|
||||
assert_(len(objects) == 1)
|
||||
return objects[0]
|
||||
|
||||
|
||||
def build_cfi_map_from_minidump_bytes(minidump_bytes):
|
||||
process_state = symbolic.minidump.ProcessState.from_minidump_buffer(minidump_bytes)
|
||||
|
||||
frame_info_map = symbolic.minidump.FrameInfoMap.new()
|
||||
|
||||
for module in process_state.modules():
|
||||
if not module.debug_id:
|
||||
continue
|
||||
|
||||
dashed_debug_id = symbolic.debuginfo.id_from_breakpad(module.debug_id)
|
||||
if FileMetadata.objects.filter(debug_id=dashed_debug_id, file_type="dbg").count() == 0:
|
||||
continue
|
||||
|
||||
dif_bytes = FileMetadata.objects.get(debug_id=dashed_debug_id, file_type="dbg").file.data
|
||||
archive = symbolic.debuginfo.Archive.from_bytes(dif_bytes)
|
||||
|
||||
debug_object = get_single_object(archive)
|
||||
|
||||
cfi = symbolic.minidump.CfiCache.from_object(debug_object)
|
||||
frame_info_map.add(module.debug_id, cfi)
|
||||
|
||||
return frame_info_map
|
||||
|
||||
|
||||
def extract_dif_metadata(dif_bytes):
|
||||
try:
|
||||
archive = symbolic.debuginfo.Archive.from_bytes(dif_bytes)
|
||||
debug_object = get_single_object(archive)
|
||||
return {
|
||||
"kind": debug_object.kind, # "dbg", "lib", "src"
|
||||
"code_id": debug_object.code_id,
|
||||
"debug_id": debug_object.debug_id,
|
||||
# "file_format": debug_object.file_format, # "elf", "macho", "pe", "sourcebundle"
|
||||
}
|
||||
except Exception as e:
|
||||
raise # TODO stabalize what we do later
|
||||
capture_or_log_exception(e)
|
||||
return {}
|
||||
|
||||
|
||||
def extract_source_context(src_bytes, filename, center_line, context=5):
|
||||
|
||||
# TODO the usual worries about zip bombs/memory usage apply here.
|
||||
with zipfile.ZipFile(io.BytesIO(src_bytes)) as zf:
|
||||
# sourcebundle entries use relative paths like "src/main.c" or so says ChatGPT
|
||||
candidates = [n for n in zf.namelist() if n.endswith(filename)]
|
||||
|
||||
if not candidates:
|
||||
return [], None, []
|
||||
|
||||
with zf.open(candidates[0]) as f:
|
||||
lines = f.read().decode("utf-8").splitlines()
|
||||
|
||||
# Clamp line range to valid indices
|
||||
start = max(center_line - context - 1, 0)
|
||||
end = min(center_line + context, len(lines))
|
||||
|
||||
pre_context = lines[start:center_line - 1]
|
||||
context_line = lines[center_line - 1] if 0 <= center_line - 1 < len(lines) else None
|
||||
post_context = lines[center_line:end]
|
||||
|
||||
return pre_context, context_line, post_context
|
||||
|
||||
|
||||
def _find_module_for_address(process_state, abs_addr: int):
|
||||
for m in process_state.modules():
|
||||
if m.addr and m.size and m.addr <= abs_addr < (m.addr + m.size):
|
||||
return m
|
||||
return None
|
||||
|
||||
|
||||
def event_threads_for_process_state(process_state):
|
||||
threads = []
|
||||
for thread_index, symbolic_thread in enumerate(process_state.threads()):
|
||||
frames = []
|
||||
|
||||
for symbolic_frame in symbolic_thread.frames():
|
||||
module = _find_module_for_address(process_state, symbolic_frame.instruction)
|
||||
|
||||
frame = {"instruction_addr": f"0x{symbolic_frame.instruction:x}"}
|
||||
|
||||
if module and module.debug_id:
|
||||
dashed_debug_id = symbolic.debuginfo.id_from_breakpad(module.debug_id)
|
||||
|
||||
file_metadata = FileMetadata.objects.filter(debug_id=dashed_debug_id, file_type="dbg").first()
|
||||
if file_metadata:
|
||||
dif_bytes = file_metadata.file.data
|
||||
|
||||
archive = symbolic.debuginfo.Archive.from_bytes(dif_bytes)
|
||||
|
||||
obj = get_single_object(archive)
|
||||
|
||||
symcache = obj.make_symcache()
|
||||
|
||||
rel = symbolic_frame.instruction - module.addr
|
||||
infos = symcache.lookup(rel)
|
||||
if infos:
|
||||
# tentative understanding: lookup may give multiple results (e.g. inlined code). we just pick
|
||||
# the first arbitrarily which is "good enough for a PoC until proven otherwise"
|
||||
line_info = infos[0]
|
||||
|
||||
frame["function"] = line_info.function_name
|
||||
if line_info.filename:
|
||||
frame["filename"] = line_info.filename
|
||||
frame["lineno"] = line_info.line
|
||||
|
||||
src_meta = FileMetadata.objects.filter(debug_id=dashed_debug_id, file_type="src").first()
|
||||
if src_meta and line_info.filename and line_info.line:
|
||||
frame["pre_context"], frame["context_line"], frame["post_context"] = extract_source_context(
|
||||
src_meta.file.data, line_info.filename, line_info.line)
|
||||
|
||||
frames.append(frame)
|
||||
|
||||
threads.append({
|
||||
"id": symbolic_thread.thread_id,
|
||||
"crashed": thread_index == process_state.requesting_thread,
|
||||
"stacktrace": {"frames": frames},
|
||||
})
|
||||
|
||||
return threads
|
||||
@@ -105,8 +105,8 @@ def assemble_file(checksum, chunk_checksums, filename):
|
||||
|
||||
# NOTE: unimplemented checks/tricks
|
||||
# * total file-size v.s. some max
|
||||
# * explicit check chunk availability (as it stands, our processing is synchronous, so no need)
|
||||
# * skip-on-checksum-exists
|
||||
# * explicit check chunk availability
|
||||
# * skip this whole thing when the (whole-file) checksum exists
|
||||
|
||||
chunks = Chunk.objects.filter(checksum__in=chunk_checksums)
|
||||
chunks_dicts = {chunk.checksum: chunk for chunk in chunks}
|
||||
@@ -117,7 +117,7 @@ def assemble_file(checksum, chunk_checksums, filename):
|
||||
if sha1(data, usedforsecurity=False).hexdigest() != checksum:
|
||||
raise Exception("checksum mismatch")
|
||||
|
||||
result = File.objects.get_or_create(
|
||||
file, created = File.objects.get_or_create(
|
||||
checksum=checksum,
|
||||
defaults={
|
||||
"size": len(data),
|
||||
@@ -129,7 +129,7 @@ def assemble_file(checksum, chunk_checksums, filename):
|
||||
# be used in multiple files (which are still being assembled) but with chunksizes in the order of 1MiB, I'd say this
|
||||
# is unlikely.
|
||||
chunks.delete()
|
||||
return result
|
||||
return file, created
|
||||
|
||||
|
||||
@shared_task
|
||||
|
||||
133
files/views.py
133
files/views.py
@@ -13,10 +13,12 @@ from sentry.assemble import ChunkFileState
|
||||
|
||||
from bugsink.app_settings import get_settings
|
||||
from bugsink.transaction import durable_atomic, immediate_atomic
|
||||
from bugsink.streams import handle_request_content_encoding
|
||||
from bsmain.models import AuthToken
|
||||
|
||||
from .models import Chunk, File
|
||||
from .tasks import assemble_artifact_bundle
|
||||
from .models import Chunk, File, FileMetadata
|
||||
from .tasks import assemble_artifact_bundle, assemble_file
|
||||
from .minidump import extract_dif_metadata
|
||||
|
||||
logger = logging.getLogger("bugsink.api")
|
||||
|
||||
@@ -86,7 +88,8 @@ def get_chunk_upload_settings(request, organization_slug):
|
||||
# yet.
|
||||
"release_files",
|
||||
|
||||
# this would seem to be the "javascript sourcemaps" thing, but how exactly I did not check yet.
|
||||
# on second reading I would say: this is "actual source code", but I did not check yet and "don't touch it"
|
||||
# (even though we don't actually have an implementation for sources yet)
|
||||
"sources",
|
||||
|
||||
# https://github.com/getsentry/sentry/discussions/46967
|
||||
@@ -100,7 +103,7 @@ def get_chunk_upload_settings(request, organization_slug):
|
||||
# "artifact_bundles_v2",
|
||||
|
||||
# the rest of the options are below:
|
||||
# "debug_files",
|
||||
"debug_files",
|
||||
# "release_files",
|
||||
# "pdbs",
|
||||
# "bcsymbolmaps",
|
||||
@@ -151,6 +154,7 @@ def chunk_upload(request, organization_slug):
|
||||
# POST: upload (full-size) "chunks" and store them as Chunk objects; file.name whould be the sha1 of the content.
|
||||
chunks = []
|
||||
if request.FILES:
|
||||
# "file" and "file_gzip" are both possible multi-value keys for uploading (with associated semantics each)
|
||||
chunks = request.FILES.getlist("file")
|
||||
|
||||
# NOTE: we read the whole unzipped file into memory; we _could_ take an approach like bugsink/streams.py.
|
||||
@@ -198,6 +202,86 @@ def artifact_bundle_assemble(request, organization_slug):
|
||||
return JsonResponse({"state": ChunkFileState.CREATED, "missingChunks": []})
|
||||
|
||||
|
||||
@csrf_exempt # we're in API context here; this could potentially be pulled up to a higher level though
|
||||
@requires_auth_token
|
||||
def difs_assemble(request, organization_slug, project_slug):
|
||||
if not get_settings().FEATURE_MINIDUMPS:
|
||||
return JsonResponse({"detail": "minidumps not enabled"}, status=404)
|
||||
|
||||
# TODO move to tasks.something.delay
|
||||
# TODO think about the right transaction around this
|
||||
data = json.loads(request.body)
|
||||
|
||||
file_checksums = set(data.keys())
|
||||
|
||||
existing_files = {
|
||||
file.checksum: file
|
||||
for file in File.objects.filter(checksum__in=file_checksums)
|
||||
}
|
||||
|
||||
all_requested_chunks = {
|
||||
chunk
|
||||
for file_info in data.values()
|
||||
for chunk in file_info.get("chunks", [])
|
||||
}
|
||||
|
||||
available_chunks = set(
|
||||
Chunk.objects.filter(checksum__in=all_requested_chunks).values_list("checksum", flat=True)
|
||||
)
|
||||
|
||||
response = {}
|
||||
|
||||
for file_checksum, file_info in data.items():
|
||||
if file_checksum in existing_files:
|
||||
response[file_checksum] = {
|
||||
"state": ChunkFileState.OK,
|
||||
"missingChunks": [],
|
||||
# if it is ever needed, we could add something akin to the below, but so far we've not seen client-side
|
||||
# actually using this; let's add it on-demand.
|
||||
# "dif": json_repr_with_key_info_about(existing_files[file_checksum]),
|
||||
}
|
||||
continue
|
||||
|
||||
file_chunks = file_info.get("chunks", [])
|
||||
|
||||
# the sentry-cli sends an empty "chunks" list when just polling for file existence; since we already handled the
|
||||
# case of existing files above, we can simply return NOT_FOUND here.
|
||||
if not file_chunks:
|
||||
response[file_checksum] = {
|
||||
"state": ChunkFileState.NOT_FOUND,
|
||||
"missingChunks": [],
|
||||
}
|
||||
continue
|
||||
|
||||
missing_chunks = [c for c in file_chunks if c not in available_chunks]
|
||||
if missing_chunks:
|
||||
response[file_checksum] = {
|
||||
"state": ChunkFileState.NOT_FOUND,
|
||||
"missingChunks": missing_chunks,
|
||||
}
|
||||
continue
|
||||
|
||||
file, _ = assemble_file(file_checksum, file_chunks, filename=file_info["name"])
|
||||
|
||||
symbolic_metadata = extract_dif_metadata(file.data)
|
||||
|
||||
FileMetadata.objects.get_or_create(
|
||||
debug_id=file_info.get("debug_id"), # TODO : .get implies "no debug_id", but in that case it's useless
|
||||
file_type=symbolic_metadata["kind"], # NOTE: symbolic's kind goes into file_type...
|
||||
defaults={
|
||||
"file": file,
|
||||
"data": "{}", # this is the "catch all" field but I don't think we have anything in this case.
|
||||
}
|
||||
)
|
||||
|
||||
response[file_checksum] = {
|
||||
"state": ChunkFileState.OK,
|
||||
"missingChunks": [],
|
||||
}
|
||||
|
||||
return JsonResponse(response)
|
||||
|
||||
|
||||
@user_passes_test(lambda u: u.is_superuser)
|
||||
@durable_atomic
|
||||
def download_file(request, checksum):
|
||||
@@ -218,6 +302,8 @@ def api_catch_all(request, subpath):
|
||||
# the existance of this view (and the associated URL pattern) has the effect of `APPEND_SLASH=False` for our API
|
||||
# endpoints, which is a good thing: for API enpoints you generally don't want this kind of magic (explicit breakage
|
||||
# is desirable for APIs, and redirects don't even work for POST/PUT data)
|
||||
MAX_API_CATCH_ALL_SIZE = 1_000_000 # security and usability meet at this value (or below)
|
||||
handle_request_content_encoding(request, MAX_API_CATCH_ALL_SIZE)
|
||||
|
||||
if not get_settings().API_LOG_UNIMPLEMENTED_CALLS:
|
||||
raise Http404("Unimplemented API endpoint: /api/" + subpath)
|
||||
@@ -228,27 +314,44 @@ def api_catch_all(request, subpath):
|
||||
f" Method: {request.method}",
|
||||
]
|
||||
|
||||
interesting_meta_keys = ["CONTENT_TYPE", "CONTENT_LENGTH", "HTTP_TRANSFER_ENCODING"]
|
||||
interesting_headers = {
|
||||
k: request.META[k] for k in interesting_meta_keys if k in request.META
|
||||
}
|
||||
|
||||
if interesting_headers:
|
||||
lines.append(" Headers:")
|
||||
for k, v in interesting_headers.items():
|
||||
lines.append(f" {k}: {v}")
|
||||
|
||||
if request.GET:
|
||||
lines.append(f" GET: {request.GET.dict()}")
|
||||
|
||||
if request.POST:
|
||||
lines.append(f" POST: {request.POST.dict()}")
|
||||
content_type = request.META.get("CONTENT_TYPE", "")
|
||||
if content_type == "application/x-www-form-urlencoded" or content_type.startswith("multipart/form-data"):
|
||||
if request.POST:
|
||||
lines.append(f" POST: {request.POST.dict()}")
|
||||
if request.FILES:
|
||||
lines.append(f" FILES: {[f.name for f in request.FILES.values()]}")
|
||||
|
||||
body = request.body
|
||||
if body:
|
||||
try:
|
||||
decoded = body.decode("utf-8", errors="replace").strip()
|
||||
lines.append(" Body:")
|
||||
lines.append(f" {decoded[:500]}")
|
||||
else:
|
||||
body = request.read(MAX_API_CATCH_ALL_SIZE)
|
||||
decoded = body.decode("utf-8", errors="replace").strip()
|
||||
|
||||
if content_type == "application/json":
|
||||
shown_pretty = False
|
||||
try:
|
||||
parsed = json.loads(decoded)
|
||||
pretty = json.dumps(parsed, indent=2)[:10_000]
|
||||
pretty = json.dumps(parsed, indent=2)
|
||||
lines.append(" JSON body:")
|
||||
lines.extend(f" {line}" for line in pretty.splitlines())
|
||||
shown_pretty = True
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
lines.append(f" Body: <decode error: {e}>")
|
||||
|
||||
if not shown_pretty:
|
||||
lines.append(" Body:")
|
||||
lines.append(f" {body}")
|
||||
|
||||
logger.info("\n".join(lines))
|
||||
raise Http404("Unimplemented API endpoint: /api/" + subpath)
|
||||
|
||||
@@ -4,7 +4,7 @@ from django.utils._os import safe_join
|
||||
from bugsink.app_settings import get_settings
|
||||
|
||||
|
||||
def get_filename_for_event_id(event_id):
|
||||
def get_filename_for_event_id(event_id, filetype="event"):
|
||||
# The idea of having some levels of directories here (to avoid too many files in a single dir) is not yet
|
||||
# implemented. However, counterpoint: when doing stress tests, it was quite hard to get a serious backlog going
|
||||
# (snappea was very well able to play catch-up). So this might not be necessary.
|
||||
@@ -15,4 +15,8 @@ def get_filename_for_event_id(event_id):
|
||||
# without needing to inspect all call-sites)
|
||||
event_id_normalized = uuid.UUID(event_id).hex
|
||||
|
||||
return safe_join(get_settings().INGEST_STORE_BASE_DIR, event_id_normalized)
|
||||
basename = event_id_normalized
|
||||
if filetype == "minidump":
|
||||
basename += ".dmp"
|
||||
|
||||
return safe_join(get_settings().INGEST_STORE_BASE_DIR, basename)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import json
|
||||
import io
|
||||
|
||||
from bugsink.streams import MaxDataWriter
|
||||
from bugsink.streams import MaxDataWriter, UnclosableBytesIO
|
||||
|
||||
from .exceptions import ParseError
|
||||
from .header_validators import filter_valid_envelope_headers, filter_valid_item_headers
|
||||
@@ -155,7 +155,6 @@ class StreamingEnvelopeParser:
|
||||
|
||||
def get_items(self, output_stream_factory):
|
||||
# yields the item_headers and item_output_streams (with the content of the items written into them)
|
||||
# closing the item_output_stream is the responsibility of the calller
|
||||
|
||||
self.get_envelope_headers()
|
||||
|
||||
@@ -175,23 +174,27 @@ class StreamingEnvelopeParser:
|
||||
finder = NewlineFinder()
|
||||
|
||||
item_output_stream = output_stream_factory(item_headers)
|
||||
self.remainder, self.at_eof = readuntil(
|
||||
self.input_stream, self.remainder, finder, item_output_stream, self.chunk_size)
|
||||
|
||||
if "length" in item_headers:
|
||||
# items with an explicit length are terminated by a newline (if at EOF, this is optional as per the set
|
||||
# of examples in the docs)
|
||||
should_be_empty = io.BytesIO()
|
||||
try:
|
||||
self.remainder, self.at_eof = readuntil(
|
||||
self.input_stream, self.remainder, NewlineFinder(), should_be_empty, self.chunk_size)
|
||||
should_be_empty_value = should_be_empty.getvalue()
|
||||
if should_be_empty_value != b"":
|
||||
raise ParseError("Item with explicit length not terminated by newline/EOF")
|
||||
self.input_stream, self.remainder, finder, item_output_stream, self.chunk_size)
|
||||
|
||||
if "length" in item_headers:
|
||||
# items with an explicit length are terminated by a newline (if at EOF, this is optional as per the
|
||||
# set of examples in the docs)
|
||||
should_be_empty = io.BytesIO()
|
||||
self.remainder, self.at_eof = readuntil(
|
||||
self.input_stream, self.remainder, NewlineFinder(), should_be_empty, self.chunk_size)
|
||||
should_be_empty_value = should_be_empty.getvalue()
|
||||
if should_be_empty_value != b"":
|
||||
raise ParseError("Item with explicit length not terminated by newline/EOF")
|
||||
finally:
|
||||
item_output_stream.close()
|
||||
|
||||
yield item_headers, item_output_stream
|
||||
|
||||
def get_items_directly(self):
|
||||
# this method is just convenience for testing
|
||||
|
||||
for item_headers, output_stream in self.get_items(lambda item_headers: io.BytesIO()):
|
||||
for item_headers, output_stream in self.get_items(lambda item_headers: UnclosableBytesIO()):
|
||||
yield item_headers, output_stream.getvalue()
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import contextlib
|
||||
import os
|
||||
import logging
|
||||
import json
|
||||
@@ -17,17 +18,24 @@ def digest(event_id, event_metadata):
|
||||
|
||||
with open(get_filename_for_event_id(event_id), "rb") as f:
|
||||
event_data = json.loads(f.read().decode("utf-8"))
|
||||
opened = [get_filename_for_event_id(event_id)]
|
||||
|
||||
if event_metadata.get("has_minidump"):
|
||||
with open(get_filename_for_event_id(event_id, filetype="minidump"), "rb") as f:
|
||||
minidump_bytes = f.read()
|
||||
opened += [get_filename_for_event_id(event_id, filetype="minidump")]
|
||||
else:
|
||||
minidump_bytes = None
|
||||
|
||||
try:
|
||||
BaseIngestAPIView.digest_event(event_metadata, event_data)
|
||||
BaseIngestAPIView.digest_event(event_metadata, event_data, minidump_bytes=minidump_bytes)
|
||||
except ValidationError as e:
|
||||
logger.warning("ValidationError in digest_event", exc_info=e)
|
||||
finally:
|
||||
# NOTE: if an SDK misbehaves, and sends the same event_id multiple times in quick succession, the line below
|
||||
# will trigger a FileNotFoundError on the second attempt to delete the file (the files also overwrite each other
|
||||
# on-ingest). In that case your logs will also a "ValidationError in digest_event". Although that means an error
|
||||
# bubbles up from the below, at least for now I'm OK with that. (next steps _could_ be: [a] catching the error
|
||||
# as expected [b] refusing to "just overwrite and doubly enqueue on-ingest" [c] reporting about this particular
|
||||
# problem to the end-user etc... but at least "getting it really right" might actually be quite hard (race
|
||||
# conditions) and I'm not so sure it's worth it.
|
||||
os.unlink(get_filename_for_event_id(event_id))
|
||||
# NOTE: if an SDK misbehaves, and sends the same event_id multiple times in quick succession, the os.unlink
|
||||
# below will trigger a FileNotFoundError on the second attempt to delete the file (the events also overwrite
|
||||
# each other on-ingest, but that's separately dealt with, showing a "ValidationError in digest_event". We're
|
||||
# just catching those and ignoring them (bubble-up is not desirable because it hinders follow-up cleanups)
|
||||
for filename in opened:
|
||||
with contextlib.suppress(FileNotFoundError):
|
||||
os.unlink(filename)
|
||||
|
||||
248
ingest/tests.py
248
ingest/tests.py
@@ -21,10 +21,12 @@ from projects.models import Project
|
||||
from events.factories import create_event_data, create_event
|
||||
from events.retention import evict_for_max_events
|
||||
from events.storage_registry import override_event_storages
|
||||
from events.models import Event
|
||||
from issues.factories import get_or_create_issue
|
||||
from issues.models import IssueStateManager, Issue, TurningPoint, TurningPointKind
|
||||
from issues.utils import get_values
|
||||
from bugsink.app_settings import override_settings
|
||||
from bugsink.streams import UnclosableBytesIO
|
||||
from compat.timestamp import format_timestamp
|
||||
from compat.dsn import get_header_value
|
||||
from bsmain.management.commands.send_json import Command as SendJsonCommand
|
||||
@@ -293,38 +295,158 @@ class IngestViewTestCase(TransactionTestCase):
|
||||
|
||||
sentry_auth_header = get_header_value(f"http://{ project.sentry_key }@hostisignored/{ project.id }")
|
||||
|
||||
# first, we ingest many issues
|
||||
command = SendJsonCommand()
|
||||
command.stdout = io.StringIO()
|
||||
command.stderr = io.StringIO()
|
||||
SAMPLES_DIR = os.getenv("SAMPLES_DIR", "../event-samples")
|
||||
|
||||
filename = glob(SAMPLES_DIR + "/bugsink/contexts.json")[0] # pick a fixed one for reproducibility
|
||||
|
||||
for i, include_event_id in enumerate([True, False]):
|
||||
with open(filename) as f:
|
||||
data = json.loads(f.read())
|
||||
|
||||
data["event_id"] = uuid.uuid4().hex # for good measure we reset this to avoid duplicates.
|
||||
|
||||
if "timestamp" not in data:
|
||||
# as per send_json command ("weirdly enough a large numer of sentry test data don't actually...")
|
||||
data["timestamp"] = time.time()
|
||||
|
||||
event_id = data["event_id"]
|
||||
if not include_event_id:
|
||||
del data["event_id"]
|
||||
|
||||
data_bytes = json.dumps(data).encode("utf-8")
|
||||
data_bytes = (
|
||||
b'{"event_id": "%s"}\n{"type": "event"}\n' % event_id.encode("utf-8") + data_bytes)
|
||||
|
||||
response = self.client.post(
|
||||
f"/api/{ project.id }/envelope/",
|
||||
content_type="application/json",
|
||||
headers={
|
||||
"X-Sentry-Auth": sentry_auth_header,
|
||||
},
|
||||
data=data_bytes,
|
||||
)
|
||||
self.assertEqual(
|
||||
200, response.status_code, response.content if response.status_code != 302 else response.url)
|
||||
|
||||
self.assertEqual(1 + i, Event.objects.count())
|
||||
|
||||
@tag("samples")
|
||||
def test_envelope_endpoint_event_and_minidump(self):
|
||||
# dirty copy/paste from the integration test, let's start with "something", we can always clean it later.
|
||||
project = Project.objects.create(name="test")
|
||||
|
||||
sentry_auth_header = get_header_value(f"http://{ project.sentry_key }@hostisignored/{ project.id }")
|
||||
|
||||
SAMPLES_DIR = os.getenv("SAMPLES_DIR", "../event-samples")
|
||||
|
||||
event_samples = glob(SAMPLES_DIR + "/sentry/mobile1-xen.json") # pick a fixed one for reproducibility
|
||||
known_broken = [SAMPLES_DIR + "/" + s.strip() for s in _readlines(SAMPLES_DIR + "/KNOWN-BROKEN")]
|
||||
filename = glob(SAMPLES_DIR + "/bugsink/contexts.json")[0] # pick a fixed one for reproducibility
|
||||
with open(filename) as f:
|
||||
data = json.loads(f.read())
|
||||
|
||||
if len(event_samples) == 0:
|
||||
raise Exception(f"No event samples found in {SAMPLES_DIR}; I insist on having some to test with.")
|
||||
data["event_id"] = uuid.uuid4().hex # for good measure we reset this to avoid duplicates.
|
||||
|
||||
for include_event_id in [True, False]:
|
||||
for filename in [sample for sample in event_samples if sample not in known_broken][:1]: # one is enough
|
||||
with open(filename) as f:
|
||||
data = json.loads(f.read())
|
||||
if "timestamp" not in data:
|
||||
# as per send_json command ("weirdly enough a large numer of sentry test data don't actually...")
|
||||
data["timestamp"] = time.time()
|
||||
|
||||
data["event_id"] = uuid.uuid4().hex # for good measure we reset this to avoid duplicates.
|
||||
filename = glob(SAMPLES_DIR + "/minidumps/linux_overflow.dmp")[0] # pick a fixed one for reproducibility
|
||||
with open(filename, 'rb') as f:
|
||||
minidump_bytes = f.read()
|
||||
|
||||
if "timestamp" not in data:
|
||||
# as per send_json command ("weirdly enough a large numer of sentry test data don't actually...")
|
||||
data["timestamp"] = time.time()
|
||||
event_id = data["event_id"]
|
||||
|
||||
event_id = data["event_id"]
|
||||
if not include_event_id:
|
||||
del data["event_id"]
|
||||
event_bytes = json.dumps(data).encode("utf-8")
|
||||
data_bytes = (
|
||||
b'{"event_id": "%s"}\n' % event_id.encode("utf-8") +
|
||||
b'{"type": "event"}\n' + event_bytes + b"\n" +
|
||||
b'{"type": "attachment", "attachment_type": "event.minidump", "length": %d}\n' % len(minidump_bytes) +
|
||||
minidump_bytes
|
||||
)
|
||||
|
||||
data_bytes = json.dumps(data).encode("utf-8")
|
||||
data_bytes = (
|
||||
b'{"event_id": "%s"}\n{"type": "event"}\n' % event_id.encode("utf-8") + data_bytes)
|
||||
response = self.client.post(
|
||||
f"/api/{ project.id }/envelope/",
|
||||
content_type="application/json",
|
||||
headers={
|
||||
"X-Sentry-Auth": sentry_auth_header,
|
||||
},
|
||||
data=data_bytes,
|
||||
)
|
||||
self.assertEqual(
|
||||
200, response.status_code, response.content if response.status_code != 302 else response.url)
|
||||
|
||||
self.assertEqual(1, Event.objects.count())
|
||||
event = Event.objects.get()
|
||||
self.assertTrue("prod" in ([tag.value.value for tag in event.tags.all()])) # from the sample event
|
||||
|
||||
self.assertEqual('SIGABRT: Fatal Error: SIGABRT', Event.objects.get().title())
|
||||
|
||||
@tag("samples")
|
||||
def test_envelope_endpoint_minidump_only(self):
|
||||
# dirty copy/paste from the integration test, let's start with "something", we can always clean it later.
|
||||
project = Project.objects.create(name="test")
|
||||
|
||||
sentry_auth_header = get_header_value(f"http://{ project.sentry_key }@hostisignored/{ project.id }")
|
||||
|
||||
SAMPLES_DIR = os.getenv("SAMPLES_DIR", "../event-samples")
|
||||
|
||||
filename = glob(SAMPLES_DIR + "/minidumps/linux_overflow.dmp")[0] # pick a fixed one for reproducibility
|
||||
with open(filename, 'rb') as f:
|
||||
minidump_bytes = f.read()
|
||||
|
||||
event_id = uuid.uuid4().hex # required at the envelope level so we provide it.
|
||||
|
||||
data_bytes = (
|
||||
b'{"event_id": "%s"}\n' % event_id.encode("utf-8") +
|
||||
b'{"type": "attachment", "attachment_type": "event.minidump", "length": %d}\n' % len(minidump_bytes) +
|
||||
minidump_bytes
|
||||
)
|
||||
|
||||
response = self.client.post(
|
||||
f"/api/{ project.id }/envelope/",
|
||||
content_type="application/json",
|
||||
headers={
|
||||
"X-Sentry-Auth": sentry_auth_header,
|
||||
},
|
||||
data=data_bytes,
|
||||
)
|
||||
self.assertEqual(
|
||||
200, response.status_code, response.content if response.status_code != 302 else response.url)
|
||||
|
||||
self.assertEqual(1, Event.objects.count())
|
||||
event = Event.objects.get()
|
||||
self.assertFalse("prod" in ([tag.value.value for tag in event.tags.all()])) # no sample event, so False
|
||||
|
||||
self.assertEqual('SIGABRT: Fatal Error: SIGABRT', Event.objects.get().title())
|
||||
|
||||
@tag("samples")
|
||||
def test_envelope_endpoint_reused_ids_different_exceptions(self):
|
||||
# dirty copy/paste from test_envelope_endpoint,
|
||||
project = Project.objects.create(name="test")
|
||||
|
||||
sentry_auth_header = get_header_value(f"http://{ project.sentry_key }@hostisignored/{ project.id }")
|
||||
|
||||
SAMPLES_DIR = os.getenv("SAMPLES_DIR", "../event-samples")
|
||||
|
||||
filename = glob(SAMPLES_DIR + "/sentry/mobile1-xen.json")[0] # this one has 'exception.values[0].type'
|
||||
|
||||
with open(filename) as f:
|
||||
data = json.loads(f.read())
|
||||
data["event_id"] = uuid.uuid4().hex # we set it once, before the loop.
|
||||
|
||||
for type_ in ["Foo", "Bar"]: # forces different groupers, leading to separate Issue objects
|
||||
data['exception']['values'][0]['type'] = type_
|
||||
|
||||
if "timestamp" not in data:
|
||||
# as per send_json command ("weirdly enough a large numer of sentry test data don't actually...")
|
||||
data["timestamp"] = time.time()
|
||||
|
||||
event_id = data["event_id"]
|
||||
|
||||
data_bytes = json.dumps(data).encode("utf-8")
|
||||
data_bytes = (
|
||||
b'{"event_id": "%s"}\n{"type": "event"}\n' % event_id.encode("utf-8") + data_bytes)
|
||||
|
||||
def check():
|
||||
response = self.client.post(
|
||||
f"/api/{ project.id }/envelope/",
|
||||
content_type="application/json",
|
||||
@@ -336,66 +458,11 @@ class IngestViewTestCase(TransactionTestCase):
|
||||
self.assertEqual(
|
||||
200, response.status_code, response.content if response.status_code != 302 else response.url)
|
||||
|
||||
@tag("samples")
|
||||
def test_envelope_endpoint_reused_ids_different_exceptions(self):
|
||||
# dirty copy/paste from test_envelope_endpoint,
|
||||
project = Project.objects.create(name="test")
|
||||
|
||||
sentry_auth_header = get_header_value(f"http://{ project.sentry_key }@hostisignored/{ project.id }")
|
||||
|
||||
# first, we ingest many issues
|
||||
command = SendJsonCommand()
|
||||
command.stdout = io.StringIO()
|
||||
command.stderr = io.StringIO()
|
||||
|
||||
SAMPLES_DIR = os.getenv("SAMPLES_DIR", "../event-samples")
|
||||
|
||||
event_samples = glob(SAMPLES_DIR + "/sentry/mobile1-xen.json") # this one has 'exception.values[0].type'
|
||||
known_broken = [SAMPLES_DIR + "/" + s.strip() for s in _readlines(SAMPLES_DIR + "/KNOWN-BROKEN")]
|
||||
|
||||
if len(event_samples) == 0:
|
||||
raise Exception(f"No event samples found in {SAMPLES_DIR}; I insist on having some to test with.")
|
||||
|
||||
for filename in [sample for sample in event_samples if sample not in known_broken][:1]: # one is enough
|
||||
with open(filename) as f:
|
||||
data = json.loads(f.read())
|
||||
data["event_id"] = uuid.uuid4().hex # we set it once, before the loop.
|
||||
|
||||
for type_ in ["Foo", "Bar"]: # forces different groupers, leading to separate Issue objects
|
||||
data['exception']['values'][0]['type'] = type_
|
||||
|
||||
if "timestamp" not in data:
|
||||
# as per send_json command ("weirdly enough a large numer of sentry test data don't actually...")
|
||||
data["timestamp"] = time.time()
|
||||
|
||||
event_id = data["event_id"]
|
||||
|
||||
data_bytes = json.dumps(data).encode("utf-8")
|
||||
data_bytes = (
|
||||
b'{"event_id": "%s"}\n{"type": "event"}\n' % event_id.encode("utf-8") + data_bytes)
|
||||
|
||||
def check():
|
||||
response = self.client.post(
|
||||
f"/api/{ project.id }/envelope/",
|
||||
content_type="application/json",
|
||||
headers={
|
||||
"X-Sentry-Auth": sentry_auth_header,
|
||||
},
|
||||
data=data_bytes,
|
||||
)
|
||||
self.assertEqual(
|
||||
200, response.status_code, response.content if response.status_code != 302 else response.url)
|
||||
|
||||
if type_ == "Foo":
|
||||
if type_ == "Foo":
|
||||
check()
|
||||
else:
|
||||
with self.assertRaises(ViolatedExpectation):
|
||||
check()
|
||||
else:
|
||||
with self.assertRaises(ViolatedExpectation):
|
||||
check()
|
||||
|
||||
@tag("samples")
|
||||
def test_envelope_endpoint_digest_non_immediate(self):
|
||||
with override_settings(DIGEST_IMMEDIATELY=False):
|
||||
self.test_envelope_endpoint()
|
||||
|
||||
def test_envelope_endpoint_brotli_bomb(self):
|
||||
project = Project.objects.create(name="test")
|
||||
@@ -404,7 +471,7 @@ class IngestViewTestCase(TransactionTestCase):
|
||||
data_bytes = BROTLI_BOMB_4G
|
||||
|
||||
t0 = time.time()
|
||||
self.client.post(
|
||||
response = self.client.post(
|
||||
f"/api/{ project.id }/envelope/",
|
||||
content_type="application/json",
|
||||
headers={
|
||||
@@ -419,6 +486,9 @@ class IngestViewTestCase(TransactionTestCase):
|
||||
# the failing version is well above 5s (I just stopped the process after ~30s)
|
||||
self.fail("Brotli bomb caused excessive processing time: %d seconds" % (time.time() - t0))
|
||||
|
||||
self.assertTrue(b"Max length" in response.content, response.content)
|
||||
self.assertTrue(b"exceeded" in response.content, response.content)
|
||||
|
||||
@tag("samples")
|
||||
def test_filestore(self):
|
||||
# quick & dirty way to test the filestore; in absence of a proper test for it, we just run a more-or-less
|
||||
@@ -651,7 +721,7 @@ class TestParser(RegularTestCase):
|
||||
initial_chunk = b"line 0\nline 1\n"
|
||||
input_stream.seek(0)
|
||||
|
||||
output_stream = io.BytesIO()
|
||||
output_stream = UnclosableBytesIO()
|
||||
remainder, at_eof = readuntil(input_stream, initial_chunk, NewlineFinder(), output_stream, 3)
|
||||
|
||||
self.assertFalse(at_eof)
|
||||
@@ -664,7 +734,7 @@ class TestParser(RegularTestCase):
|
||||
initial_chunk = b"lin"
|
||||
input_stream.seek(0)
|
||||
|
||||
output_stream = io.BytesIO()
|
||||
output_stream = UnclosableBytesIO()
|
||||
remainder, at_eof = readuntil(input_stream, initial_chunk, NewlineFinder(), output_stream, 3)
|
||||
|
||||
self.assertFalse(at_eof)
|
||||
@@ -677,7 +747,7 @@ class TestParser(RegularTestCase):
|
||||
initial_chunk = b""
|
||||
input_stream.seek(0)
|
||||
|
||||
output_stream = io.BytesIO()
|
||||
output_stream = UnclosableBytesIO()
|
||||
remainder, at_eof = readuntil(input_stream, initial_chunk, NewlineFinder(), output_stream, 3)
|
||||
|
||||
self.assertFalse(at_eof)
|
||||
@@ -690,7 +760,7 @@ class TestParser(RegularTestCase):
|
||||
initial_chunk = b""
|
||||
input_stream.seek(0)
|
||||
|
||||
output_stream = io.BytesIO()
|
||||
output_stream = UnclosableBytesIO()
|
||||
remainder, at_eof = readuntil(input_stream, initial_chunk, NewlineFinder(), output_stream, 3)
|
||||
|
||||
self.assertTrue(at_eof)
|
||||
@@ -703,7 +773,7 @@ class TestParser(RegularTestCase):
|
||||
initial_chunk = b"lin"
|
||||
input_stream.seek(0)
|
||||
|
||||
output_stream = io.BytesIO()
|
||||
output_stream = UnclosableBytesIO()
|
||||
remainder, at_eof = readuntil(input_stream, initial_chunk, NewlineFinder(), output_stream, 1024)
|
||||
|
||||
self.assertFalse(at_eof)
|
||||
@@ -716,7 +786,7 @@ class TestParser(RegularTestCase):
|
||||
initial_chunk = b"lin"
|
||||
input_stream.seek(0)
|
||||
|
||||
output_stream = io.BytesIO()
|
||||
output_stream = UnclosableBytesIO()
|
||||
remainder, at_eof = readuntil(input_stream, initial_chunk, LengthFinder(10, "eof not ok"), output_stream, 3)
|
||||
|
||||
self.assertFalse(at_eof)
|
||||
@@ -729,7 +799,7 @@ class TestParser(RegularTestCase):
|
||||
initial_chunk = b"lin"
|
||||
input_stream.seek(0)
|
||||
|
||||
output_stream = io.BytesIO()
|
||||
output_stream = UnclosableBytesIO()
|
||||
with self.assertRaises(ParseError):
|
||||
remainder, at_eof = readuntil(input_stream, initial_chunk, LengthFinder(100, "EOF"), output_stream, 1000)
|
||||
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
from django.urls import path
|
||||
|
||||
from .views import IngestEventAPIView, IngestEnvelopeAPIView
|
||||
from .views import IngestEventAPIView, IngestEnvelopeAPIView, MinidumpAPIView
|
||||
|
||||
urlpatterns = [
|
||||
# project_pk has to be an int per Sentry Client expectations.
|
||||
path("<int:project_pk>/store/", IngestEventAPIView.as_view()),
|
||||
path("<int:project_pk>/envelope/", IngestEnvelopeAPIView.as_view()),
|
||||
path("<int:project_pk>/store/", IngestEventAPIView.as_view(), name="ingest-store"),
|
||||
path("<int:project_pk>/envelope/", IngestEnvelopeAPIView.as_view(), name="ingest-envelope"),
|
||||
|
||||
# is this "ingest"? it is at least in the sense that it matches the API schema and downstream auth etc.
|
||||
path("<int:project_pk>/minidump/", MinidumpAPIView.as_view(), name="ingest-minidump"),
|
||||
]
|
||||
|
||||
215
ingest/views.py
215
ingest/views.py
@@ -1,8 +1,8 @@
|
||||
from collections import defaultdict
|
||||
import uuid
|
||||
import hashlib
|
||||
import os
|
||||
import logging
|
||||
import io
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
import jsonschema
|
||||
@@ -29,7 +29,9 @@ from issues.regressions import issue_is_regression
|
||||
|
||||
from bugsink.transaction import immediate_atomic, delay_on_commit
|
||||
from bugsink.exceptions import ViolatedExpectation
|
||||
from bugsink.streams import content_encoding_reader, MaxDataReader, MaxDataWriter, NullWriter, MaxLengthExceeded
|
||||
from bugsink.streams import (
|
||||
content_encoding_reader, MaxDataReader, MaxDataWriter, NullWriter, MaxLengthExceeded,
|
||||
handle_request_content_encoding)
|
||||
from bugsink.app_settings import get_settings
|
||||
|
||||
from events.models import Event
|
||||
@@ -39,6 +41,8 @@ from alerts.tasks import send_new_issue_alert, send_regression_alert
|
||||
from compat.timestamp import format_timestamp, parse_timestamp
|
||||
from tags.models import digest_tags
|
||||
from bsmain.utils import b108_makedirs
|
||||
from sentry_sdk_extensions import capture_or_log_exception
|
||||
from sentry.minidump import merge_minidump_event
|
||||
|
||||
from .parsers import StreamingEnvelopeParser, ParseError
|
||||
from .filestore import get_filename_for_event_id
|
||||
@@ -49,6 +53,7 @@ from .models import StoreEnvelope, DontStoreEnvelope, Envelope
|
||||
|
||||
HTTP_429_TOO_MANY_REQUESTS = 429
|
||||
HTTP_400_BAD_REQUEST = 400
|
||||
HTTP_404_NOT_FOUND = 404
|
||||
HTTP_501_NOT_IMPLEMENTED = 501
|
||||
|
||||
|
||||
@@ -151,22 +156,32 @@ class BaseIngestAPIView(View):
|
||||
return cls.get_project(project_pk, sentry_key)
|
||||
|
||||
@classmethod
|
||||
def process_event(cls, ingested_at, event_id, event_data_stream, project, request):
|
||||
event_metadata = cls.get_event_meta(event_id, ingested_at, request, project)
|
||||
def process_minidump(cls, ingested_at, minidump_bytes, project, request):
|
||||
# This is for the "pure" minidump case, i.e. no associated event data. TSTTCPW: convert the minidump data to an
|
||||
# event and then proceed as usual.
|
||||
|
||||
if get_settings().DIGEST_IMMEDIATELY:
|
||||
# in this case the stream will be an BytesIO object, so we can actually call .get_value() on it.
|
||||
event_data_bytes = event_data_stream.getvalue()
|
||||
event_data = json.loads(event_data_bytes.decode("utf-8"))
|
||||
performance_logger.info("ingested event with %s bytes", len(event_data_bytes))
|
||||
cls.digest_event(event_metadata, event_data)
|
||||
else:
|
||||
# In this case the stream will be a file that has been written the event's content to it.
|
||||
# To ensure that the (possibly EAGER) handling of the digest has the file available, we flush it here:
|
||||
event_data_stream.flush()
|
||||
performance_logger.info("ingested minidump with %s bytes", len(minidump_bytes))
|
||||
|
||||
performance_logger.info("ingested event with %s bytes", event_data_stream.bytes_written)
|
||||
digest.delay(event_id, event_metadata)
|
||||
event_id = uuid.uuid4().hex
|
||||
event_data = {
|
||||
"event_id": event_id,
|
||||
"platform": "native",
|
||||
"extra": {},
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
merge_minidump_event(event_data, minidump_bytes)
|
||||
|
||||
# write the event data to disk:
|
||||
filename = get_filename_for_event_id(event_data["event_id"])
|
||||
b108_makedirs(os.path.dirname(filename))
|
||||
with open(filename, 'w') as f:
|
||||
json.dump(event_data, f)
|
||||
|
||||
event_metadata = cls.get_event_meta(event_data["event_id"], ingested_at, request, project)
|
||||
digest.delay(event_data["event_id"], event_metadata)
|
||||
|
||||
return event_id
|
||||
|
||||
@classmethod
|
||||
def get_event_meta(cls, event_id, ingested_at, request, project):
|
||||
@@ -237,7 +252,7 @@ class BaseIngestAPIView(View):
|
||||
|
||||
@classmethod
|
||||
@immediate_atomic()
|
||||
def digest_event(cls, event_metadata, event_data, digested_at=None):
|
||||
def digest_event(cls, event_metadata, event_data, digested_at=None, minidump_bytes=None):
|
||||
# ingested_at is passed from the point-of-ingestion; digested_at is determined here. Because this happens inside
|
||||
# `immediate_atomic`, we know digestions are serialized, and assuming non-decreasing server clocks, not decrea-
|
||||
# sing. (no so for ingestion times: clock-watching happens outside the snappe transaction, and threading in the
|
||||
@@ -267,6 +282,12 @@ class BaseIngestAPIView(View):
|
||||
if get_settings().VALIDATE_ON_DIGEST in ["warn", "strict"]:
|
||||
cls.validate_event_data(event_data, get_settings().VALIDATE_ON_DIGEST)
|
||||
|
||||
if minidump_bytes is not None:
|
||||
# we merge after validation: validation is about what's provided _externally_, not our own merging.
|
||||
# TODO error handling
|
||||
# TODO should not be inside immediate_atomic if it turns out to be slow
|
||||
merge_minidump_event(event_data, minidump_bytes)
|
||||
|
||||
# I resisted the temptation to put `get_denormalized_fields_for_data` in an if-statement: you basically "always"
|
||||
# need this info... except when duplicate event-ids are sent. But the latter is the exception, and putting this
|
||||
# in an if-statement would require more rework (and possibly extra queries) than it's worth.
|
||||
@@ -510,30 +531,31 @@ class BaseIngestAPIView(View):
|
||||
class IngestEventAPIView(BaseIngestAPIView):
|
||||
|
||||
def _post(self, request, project_pk=None):
|
||||
# This endpoint is deprecated. Personally, I think it's the simpler (and given my goals therefore better) of the
|
||||
# two, but fighting windmills and all... given that it's deprecated, I'm not going to give it quite as much love
|
||||
# (at least for now).
|
||||
#
|
||||
# The main point of "inefficiency" is that the event data is parsed twice: once here (to get the event_id), and
|
||||
# once in the actual digest.delay()
|
||||
ingested_at = datetime.now(timezone.utc)
|
||||
project = self.get_project_for_request(project_pk, request)
|
||||
if project.quota_exceeded_until is not None and ingested_at < project.quota_exceeded_until:
|
||||
return HttpResponse(status=HTTP_429_TOO_MANY_REQUESTS)
|
||||
|
||||
# This endpoint is deprecated. Personally, I think it's the simpler (and given my goals therefore better) of the
|
||||
# two, but fighting windmills and all... given that it's deprecated, I'm not going to give it quite as much love
|
||||
# (at least for now). Interfaces between the internal methods quite changed a bit recently, and this one did not
|
||||
# keep up.
|
||||
#
|
||||
# In particular I'd like to just call process_event() here, but that takes both an event_id and an unparsed data
|
||||
# stream, and we don't have an event_id here before parsing (and we don't want to parse twice). similarly,
|
||||
# event_metadata construction requires the event_id.
|
||||
#
|
||||
# Instead, we just copy/pasted the relevant parts of process_event() here, and take only one branch (the one
|
||||
# that digests immediately); i.e. we always digest immediately, independent of the setting.
|
||||
event_data_bytes = MaxDataReader(
|
||||
"MAX_EVENT_SIZE", content_encoding_reader(MaxDataReader("MAX_EVENT_COMPRESSED_SIZE", request))).read()
|
||||
|
||||
event_data = json.loads(
|
||||
MaxDataReader("MAX_EVENT_SIZE", content_encoding_reader(
|
||||
MaxDataReader("MAX_EVENT_COMPRESSED_SIZE", request))).read())
|
||||
performance_logger.info("ingested event with %s bytes", len(event_data_bytes))
|
||||
|
||||
event_data = json.loads(event_data_bytes)
|
||||
filename = get_filename_for_event_id(event_data["event_id"])
|
||||
b108_makedirs(os.path.dirname(filename))
|
||||
with open(filename, 'w') as f:
|
||||
json.dump(event_data, f)
|
||||
|
||||
event_metadata = self.get_event_meta(event_data["event_id"], ingested_at, request, project)
|
||||
|
||||
self.digest_event(event_metadata, event_data)
|
||||
digest.delay(event_data["event_id"], event_metadata)
|
||||
|
||||
return HttpResponse()
|
||||
|
||||
@@ -613,47 +635,81 @@ class IngestEnvelopeAPIView(BaseIngestAPIView):
|
||||
return HttpResponse(status=HTTP_429_TOO_MANY_REQUESTS)
|
||||
|
||||
def factory(item_headers):
|
||||
if item_headers.get("type") == "event":
|
||||
if get_settings().DIGEST_IMMEDIATELY:
|
||||
return MaxDataWriter("MAX_EVENT_SIZE", io.BytesIO())
|
||||
type_ = item_headers.get("type")
|
||||
|
||||
# envelope_headers["event_id"] is required when type=event per the spec (and takes precedence over the
|
||||
# payload's event_id), so we can rely on it having been set.
|
||||
if "event_id" not in envelope_headers:
|
||||
raise ParseError("event_id not found in envelope headers")
|
||||
if ((type_ not in ["event", "attachment"]) or
|
||||
(item_headers.get("type") == "attachment" and
|
||||
item_headers.get("attachment_type") != "event.minidump") or
|
||||
(item_headers.get("type") == "attachment" and
|
||||
item_headers.get("attachment_type") == "event.minidump" and
|
||||
not get_settings().FEATURE_MINIDUMPS)):
|
||||
|
||||
try:
|
||||
# validate that the event_id is a valid UUID as per the spec (validate at the edge)
|
||||
uuid.UUID(envelope_headers["event_id"])
|
||||
except ValueError:
|
||||
raise ParseError("event_id in envelope headers is not a valid UUID")
|
||||
# non-event/minidumps can be discarded; (we don't check for individual size limits, because these differ
|
||||
# per item type, we have the envelope limit to protect us, and we incur almost no cost (NullWriter))
|
||||
return NullWriter()
|
||||
|
||||
filename = get_filename_for_event_id(envelope_headers["event_id"])
|
||||
b108_makedirs(os.path.dirname(filename))
|
||||
return MaxDataWriter("MAX_EVENT_SIZE", open(filename, 'wb'))
|
||||
# envelope_headers["event_id"] is required when type in ["event", "attachment"] per the spec (and takes
|
||||
# precedence over the payload's event_id), so we can rely on it having been set.
|
||||
if "event_id" not in envelope_headers:
|
||||
raise ParseError("event_id not found in envelope headers")
|
||||
|
||||
# everything else can be discarded; (we don't check for individual size limits, because these differ
|
||||
# per item type, we have the envelope limit to protect us, and we incur almost no cost (NullWriter) anyway.
|
||||
return NullWriter()
|
||||
|
||||
for item_headers, event_output_stream in parser.get_items(factory):
|
||||
try:
|
||||
if item_headers.get("type") != "event":
|
||||
logger.info("skipping non-event item: %s", item_headers.get("type"))
|
||||
# validate that the event_id is a valid UUID as per the spec (validate at the edge)
|
||||
uuid.UUID(envelope_headers["event_id"])
|
||||
except ValueError:
|
||||
raise ParseError("event_id in envelope headers is not a valid UUID")
|
||||
|
||||
if item_headers.get("type") == "transaction":
|
||||
# From the spec of type=event: This Item is mutually exclusive with `"transaction"` Items.
|
||||
# i.e. when we see a transaction, a regular event will not be present and we can stop.
|
||||
logger.info("discarding the rest of the envelope")
|
||||
break
|
||||
filetype = "event" if type_ == "event" else "minidump"
|
||||
filename = get_filename_for_event_id(envelope_headers["event_id"], filetype=filetype)
|
||||
b108_makedirs(os.path.dirname(filename))
|
||||
|
||||
continue
|
||||
size_conf = "MAX_EVENT_SIZE" if type_ == "event" else "MAX_ATTACHMENT_SIZE"
|
||||
return MaxDataWriter(size_conf, open(filename, 'wb'))
|
||||
|
||||
self.process_event(ingested_at, envelope_headers["event_id"], event_output_stream, project, request)
|
||||
break # From the spec of type=event: This Item may occur at most once per Envelope. once seen: done
|
||||
# We ingest the whole envelope first and organize by type; this enables "digest once" across envelope-parts
|
||||
items_by_type = defaultdict(list)
|
||||
for item_headers, output_stream in parser.get_items(factory):
|
||||
type_ = item_headers.get("type")
|
||||
if type_ not in ["event", "attachment"]:
|
||||
logger.info("skipping non-supported envelope item: %s", item_headers.get("type"))
|
||||
continue
|
||||
|
||||
finally:
|
||||
event_output_stream.close()
|
||||
if type_ == "attachment" and item_headers.get("attachment_type") != "event.minidump":
|
||||
logger.info("skipping non-supported attachment type: %s", item_headers.get("attachment_type"))
|
||||
continue
|
||||
|
||||
performance_logger.info("ingested %s with %s bytes", type_, output_stream.bytes_written)
|
||||
items_by_type[type_].append(output_stream)
|
||||
|
||||
event_count = len(items_by_type.get("event", []))
|
||||
minidump_count = len(items_by_type.get("attachment", []))
|
||||
|
||||
if event_count > 1 or minidump_count > 1:
|
||||
# TODO: we do 2 passes (one for storing, one for calling the right task), and we check certain conditions
|
||||
# only on the second pass; this means that we may not clean up after ourselves yet.
|
||||
# TODO we don't do any minidump files cleanup yet in any of the cases.
|
||||
|
||||
logger.info(
|
||||
"can only deal with one event/minidump per envelope but found %s/%s, ignoring this envelope.",
|
||||
event_count, minidump_count)
|
||||
return HttpResponse()
|
||||
|
||||
event_metadata = self.get_event_meta(envelope_headers["event_id"], ingested_at, request, project)
|
||||
|
||||
if event_count == 1:
|
||||
if minidump_count == 1:
|
||||
event_metadata["has_minidump"] = True
|
||||
digest.delay(envelope_headers["event_id"], event_metadata)
|
||||
|
||||
else:
|
||||
# as it stands, we implement the minidump->event path for the minidump-only case on-ingest; we could push
|
||||
# this to a task too if needed or for reasons of symmetry.
|
||||
with open(get_filename_for_event_id(envelope_headers["event_id"], filetype="minidump"), 'rb') as f:
|
||||
minidump_bytes = f.read()
|
||||
|
||||
# TODO: error handling
|
||||
# NOTE "The file should start with the MDMP magic bytes." is not checked yet.
|
||||
self.process_minidump(ingested_at, minidump_bytes, project, request)
|
||||
|
||||
return HttpResponse()
|
||||
|
||||
@@ -675,6 +731,37 @@ class IngestEnvelopeAPIView(BaseIngestAPIView):
|
||||
#
|
||||
|
||||
|
||||
class MinidumpAPIView(BaseIngestAPIView):
|
||||
# A Base "Ingest" APIView in the sense that it reuses some key building blocks (auth).
|
||||
# I'm not 100% sure whether "philosophically" the minidump endpoint is also "ingesting"; we'll see.
|
||||
|
||||
def post(self, request, project_pk=None):
|
||||
if not get_settings().FEATURE_MINIDUMPS:
|
||||
return JsonResponse({"detail": "minidumps not enabled"}, status=HTTP_404_NOT_FOUND)
|
||||
|
||||
# not reusing the CORS stuff here; minidump-from-browser doesn't make sense.
|
||||
|
||||
# TODO: actually pick/configure max
|
||||
handle_request_content_encoding(request, 50 * 1024 * 1024)
|
||||
|
||||
ingested_at = datetime.now(timezone.utc)
|
||||
project = self.get_project_for_request(project_pk, request)
|
||||
|
||||
try:
|
||||
if "upload_file_minidump" not in request.FILES:
|
||||
return JsonResponse({"detail": "upload_file_minidump not found"}, status=HTTP_400_BAD_REQUEST)
|
||||
|
||||
minidump_bytes = request.FILES["upload_file_minidump"].read()
|
||||
event_id = self.process_minidump(ingested_at, minidump_bytes, project, request)
|
||||
|
||||
return JsonResponse({"id": event_id})
|
||||
except Exception as e:
|
||||
# we're still figuring out what this endpoint should do; so we log errors to learn from them while saying
|
||||
# to the client "400 Bad Request" if we can't handle their stuff.
|
||||
capture_or_log_exception(e, logger)
|
||||
return JsonResponse({"detail": str(e)}, status=HTTP_400_BAD_REQUEST)
|
||||
|
||||
|
||||
@user_passes_test(lambda u: u.is_superuser)
|
||||
def download_envelope(request, envelope_id=None):
|
||||
envelope = get_object_or_404(Envelope, pk=envelope_id)
|
||||
|
||||
@@ -66,9 +66,9 @@
|
||||
|
||||
<div class="text-ellipsis overflow-hidden"> {# filename, function, lineno #}
|
||||
{% if frame.in_app %}
|
||||
<span class="font-bold">{{ frame.filename }}</span>{% if frame.function %} in <span class="font-bold">{{ frame.function }}</span>{% endif %}{% if frame.lineno %} line <span class="font-bold">{{ frame.lineno }}</span>{% endif %}.
|
||||
<span class="font-bold">{{ frame.filename }}</span>{% if frame.function %} in <span class="font-bold">{{ frame.function }}</span>{% endif %}{% if frame.lineno %} line <span class="font-bold">{{ frame.lineno }}</span>{% endif %}{% if frame.instruction_addr %} {{ frame.instruction_addr }}{% endif %}.
|
||||
{% else %}
|
||||
<span class="italic">{{ frame.filename }}{% if frame.function %} in {{ frame.function }}{% endif %}{% if frame.lineno%} line {{ frame.lineno }}{% endif %}.</span>
|
||||
<span class="italic">{{ frame.filename }}{% if frame.function %} in {{ frame.function }}{% endif %}{% if frame.lineno%} line {{ frame.lineno }}{% endif %}{% if frame.instruction_addr %} {{ frame.instruction_addr }}{% endif %}.</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
|
||||
@@ -120,7 +120,7 @@ def get_exception_type_and_value_for_exception(data):
|
||||
# From the sentry docs:
|
||||
# > An optional flag indicating that this error is synthetic. Synthetic errors are errors that carry little
|
||||
# > meaning by themselves.
|
||||
# If this flag is set, we ignored the Exception's type and used the function name instead (if available).
|
||||
# If this flag is set, we ignore the Exception's type and used the function name instead (if available).
|
||||
if get_path(exception, "mechanism", "synthetic"):
|
||||
_, function = get_crash_location(data)
|
||||
if function:
|
||||
|
||||
@@ -92,7 +92,6 @@ def _make_message_body():
|
||||
"SINGLE_TEAM": get_settings().SINGLE_TEAM,
|
||||
"EMAIL_BACKEND": settings.EMAIL_BACKEND,
|
||||
"TASK_ALWAYS_EAGER": get_snappea_settings().TASK_ALWAYS_EAGER,
|
||||
"DIGEST_IMMEDIATELY": get_settings().DIGEST_IMMEDIATELY,
|
||||
"IS_DOCKER": settings.IS_DOCKER,
|
||||
"DATABASE_ENGINE": settings.DATABASES["default"]["ENGINE"],
|
||||
},
|
||||
|
||||
@@ -18,3 +18,4 @@ verbose_csrf_middleware==1.0.*
|
||||
ecma426>=0.2.0
|
||||
djangorestframework==3.16.*
|
||||
drf-spectacular[sidecar]
|
||||
symbolic==8.7.2 # the last version to support ProcessState (i.e. minidump parsing)
|
||||
|
||||
68
sentry/minidump.py
Normal file
68
sentry/minidump.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# copied from:
|
||||
# https://github.com/getsentry/sentry/blob/f0ac91f2ec6b45ad18e5eea6df72c5c72573e964/src/sentry/models/minidump.py#L53
|
||||
# with (as it stands) minor modifications.
|
||||
|
||||
import symbolic
|
||||
from files.minidump import build_cfi_map_from_minidump_bytes, event_threads_for_process_state
|
||||
|
||||
|
||||
def merge_minidump_event(data, minidump_bytes):
|
||||
frame_info_map = build_cfi_map_from_minidump_bytes(minidump_bytes)
|
||||
process_state = symbolic.ProcessState.from_minidump_buffer(minidump_bytes, frame_infos=frame_info_map)
|
||||
|
||||
data['level'] = 'fatal' if process_state.crashed else 'info'
|
||||
|
||||
if process_state.timestamp:
|
||||
data['timestamp'] = float(process_state.timestamp)
|
||||
|
||||
# Extract as much system information as we can. TODO: We should create
|
||||
# a custom context and implement a specific minidump view in the event
|
||||
# UI.
|
||||
info = process_state.system_info
|
||||
context = data.setdefault('contexts', {})
|
||||
os = context.setdefault('os', {})
|
||||
device = context.setdefault('device', {})
|
||||
os['name'] = info.os_name
|
||||
os['version'] = info.os_version
|
||||
device['arch'] = info.cpu_family
|
||||
|
||||
threads = event_threads_for_process_state(process_state)
|
||||
data.setdefault("threads", {})["values"] = threads
|
||||
|
||||
if process_state.requesting_thread > -1:
|
||||
crashed_thread = threads[process_state.requesting_thread]
|
||||
|
||||
exception_value = 'Assertion Error: %s' % process_state.assertion if process_state.assertion \
|
||||
else 'Fatal Error: %s' % process_state.crash_reason
|
||||
|
||||
exception = {
|
||||
'value': exception_value,
|
||||
'thread_id': crashed_thread['id'],
|
||||
'type': process_state.crash_reason,
|
||||
'stacktrace': crashed_thread.pop('stacktrace'),
|
||||
'value': exception_value,
|
||||
}
|
||||
|
||||
for frame in exception['stacktrace']['frames']:
|
||||
frame['in_app'] = True # minidumps don't distinguish in_app frames; assume all are in_app
|
||||
|
||||
exception['stacktrace']['frames'].reverse() # "Frames should be sorted from oldest to newest."
|
||||
# TODO we don't have display-info for threads yet, I think?
|
||||
# we may need to revert the per-thread stacktraces above as well then
|
||||
|
||||
data.setdefault('exception', {}) \
|
||||
.setdefault('values', []) \
|
||||
.append(exception)
|
||||
|
||||
# Extract referenced (not all loaded) images
|
||||
images = [{
|
||||
'type': 'elf', # TODO not sure what this should _actually_ be
|
||||
'image_addr': module.addr,
|
||||
'image_size': module.size,
|
||||
'code_file': module.code_file,
|
||||
'code_id': module.code_id,
|
||||
'debug_file': module.debug_file,
|
||||
'debug_id': symbolic.debuginfo.id_from_breakpad(module.debug_id) if module.debug_id else None,
|
||||
} for module in process_state.modules()]
|
||||
|
||||
data.setdefault('debug_meta', {})['images'] = images
|
||||
Reference in New Issue
Block a user