Storing of envelopes: added as a debugging tool

This commit is contained in:
Klaas van Schelven
2024-10-08 13:19:07 +02:00
parent 0e48f346d1
commit 98776ebdfb
7 changed files with 167 additions and 3 deletions

View File

@@ -39,6 +39,7 @@ DEFAULTS = {
# System inner workings:
"DIGEST_IMMEDIATELY": True,
"VALIDATE_ON_DIGEST": "none", # other legal values are "warn" and "strict"
"KEEP_ENVELOPES": 0, # set to a number to store that many; 0 means "store none". This is for debugging.
# MAX* below mirror the (current) values for the Sentry Relay
"MAX_EVENT_SIZE": _MEBIBYTE,

View File

@@ -127,6 +127,8 @@ BUGSINK = {
# will fit in the final version, so that's why it's not documented.
"USE_ADMIN": True,
"VALIDATE_ON_DIGEST": "strict",
"KEEP_ENVELOPES": 10,
}

View File

@@ -9,6 +9,7 @@ from users.views import debug_email as debug_users_email
from teams.views import debug_email as debug_teams_email
from bugsink.app_settings import get_settings
from users.views import signup, confirm_email, resend_confirmation, request_reset_password, reset_password, preferences
from ingest.views import download_envelope
from .views import home, trigger_error, favicon, settings_view
from .debug_views import csrf_debug
@@ -39,6 +40,9 @@ urlpatterns = [
path('api/', include('ingest.urls')),
# not in /api/ because it's not part of the ingest API, but still part of the ingest app
path('ingest/envelope/<str:envelope_id>/', download_envelope, name='download_envelope'),
path('projects/', include('projects.urls')),
path('teams/', include('teams.urls')),
path('events/', include('events.urls')),

View File

@@ -0,0 +1,10 @@
from django.contrib import admin
from .models import Envelope
@admin.register(Envelope)
class EnvelopeAdmin(admin.ModelAdmin):
list_display = ("id", "project_pk", "ingested_at")
fields = ["project_pk", "ingested_at", "data"]
readonly_fields = ["project_pk", "ingested_at", "data"]

View File

@@ -0,0 +1,37 @@
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
("ingest", "0001_set_sqlite_wal"),
]
operations = [
migrations.CreateModel(
name="Envelope",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("ingested_at", models.DateTimeField()),
("project_pk", models.IntegerField()),
("data", models.BinaryField()),
],
options={
"indexes": [
models.Index(
fields=["ingested_at"], name="ingest_enve_ingeste_f13790_idx"
)
],
},
),
]

View File

@@ -0,0 +1,83 @@
import logging
from django.db import models
from bugsink.transaction import immediate_atomic
from bugsink.app_settings import get_settings
logger = logging.getLogger("bugsink.ingest")
class StoreEnvelope:
def __init__(self, ingested_at, project_pk, request):
self._read = b""
self._ingested_at = ingested_at
self._project_pk = project_pk
self.request = request
def read(self, size):
result = self.request.read(size)
if result:
self._read += result
return result
def __getattr__(self, attr):
return getattr(self.request, attr)
# `immediate_atomic` here, rather than in the calling spot, to avoid its usage on the DontStoreEnvelope case.
# Also: all the transaction stuff is kinda overkill anyway, for something that's completely unconnected to our real
# data, i.e. can't really conflict... but in the sqlite world being explicit about where the transactions are is
# always a good thing, i.e. keeps them small)
@immediate_atomic()
def store(self):
# read the rest of the request; the regular .ingest() method breaks early by design
self._read += self.request.read()
if Envelope.objects.count() >= get_settings().KEEP_ENVELOPES: # >= b/c about to add
# -1 because 0-indexed; we delete including the boundary, so we'll have space for the new one
boundary = Envelope.objects.order_by("-ingested_at")[get_settings().KEEP_ENVELOPES - 1]
Envelope.objects.filter(ingested_at__lte=boundary.ingested_at).delete()
envelope = Envelope.objects.create(
ingested_at=self._ingested_at,
project_pk=self._project_pk,
data=self._read,
)
# arguably "debug", but if you turned StoreEnvelope on, you probably want to use its results "soon", and I'd
# rather not have another thing for people to configure.
logger.info("envelope stored: %s", envelope.pk)
class DontStoreEnvelope:
"""conform to the same interface as StoreEnvelope, but don't store anything"""
def __init__(self, request):
self.request = request
def __getattr__(self, attr):
return getattr(self.request, attr)
def store(self):
pass
class Envelope(models.Model):
# id is implied which makes it an Integer. Great for sorting
ingested_at = models.DateTimeField(blank=False, null=False)
# we just use PK to avoid passing Projects around for debug code, and avoid FK-constraints too.
project_pk = models.IntegerField(blank=False)
# binary, because we don't want to make any assumptions about what we get "over the wire" (whether it's even utf-8)
data = models.BinaryField(blank=False, null=False)
class Meta:
indexes = [
models.Index(fields=["ingested_at"]),
]
def get_absolute_url(self):
return f"/ingest/envelope/{self.pk}/"

View File

@@ -15,6 +15,7 @@ from django.core.exceptions import ValidationError
from django.http import HttpResponse, JsonResponse
from django.views.decorators.csrf import csrf_exempt
from django.utils.decorators import method_decorator
from django.contrib.auth.decorators import user_passes_test
from compat.auth import parse_auth_header_value
from compat.dsn import get_sentry_key
@@ -39,6 +40,7 @@ from .parsers import StreamingEnvelopeParser, ParseError
from .filestore import get_filename_for_event_id
from .tasks import digest
from .event_counter import check_for_thresholds
from .models import StoreEnvelope, DontStoreEnvelope, Envelope
HTTP_429_TOO_MANY_REQUESTS = 429
@@ -458,12 +460,29 @@ class IngestEnvelopeAPIView(BaseIngestAPIView):
def _post(self, request, project_pk=None):
ingested_at = datetime.now(timezone.utc)
input_stream = MaxDataReader("MAX_ENVELOPE_SIZE", content_encoding_reader(
MaxDataReader("MAX_ENVELOPE_COMPRESSED_SIZE", request)))
# note: we use the unvalidated (against DSN) "project_pk"; b/c of the debug-nature we assume "not a problem"
input_stream = StoreEnvelope(ingested_at, project_pk, input_stream) if get_settings().KEEP_ENVELOPES > 0 \
else DontStoreEnvelope(input_stream)
try:
return self._post2(request, input_stream, ingested_at, project_pk)
finally:
# storing stuff in the DB on-ingest (rather than on digest-only) is not "as architected"; it's OK because
# this is a debug-only thing.
#
# note: in finally, so this happens even for all paths, including errors and 404 (i.e. wrong DSN). By design
# b/c the error-paths are often the interesting ones when debugging. We even store when over quota (429),
# that's more of a trade-off to avoid adding extra complexity for a debug-tool.
input_stream.store()
def _post2(self, request, input_stream, ingested_at, project_pk=None):
# Note: wrapping the COMPRESSES_SIZE checks arount request makes it so that when clients do not compress their
# requests, they are still subject to the (smaller) maximums that apply pre-uncompress. This is exactly what we
# want.
parser = StreamingEnvelopeParser(
MaxDataReader("MAX_ENVELOPE_SIZE", content_encoding_reader(
MaxDataReader("MAX_ENVELOPE_COMPRESSED_SIZE", request))))
parser = StreamingEnvelopeParser(input_stream)
envelope_headers = parser.get_envelope_headers()
@@ -558,3 +577,11 @@ class IngestEnvelopeAPIView(BaseIngestAPIView):
# more stuff that we don't care about (up to 20MiB compressed) whereas the max event size (uncompressed) is 1MiB.
# Another advantage: this allows us to raise the relevant Header parsing and size limitation Exceptions to the SDKs.
#
@user_passes_test(lambda u: u.is_superuser)
def download_envelope(request, envelope_id=None):
envelope = get_object_or_404(Envelope, pk=envelope_id)
response = HttpResponse(envelope.data, content_type="application/x-sentry-envelope")
response["Content-Disposition"] = f'attachment; filename="envelope-{envelope_id}.json"'
return response