mirror of
https://github.com/bugsink/bugsink.git
synced 2025-12-21 13:00:13 -06:00
Envelope parsing: validate headers as per the docs
headers means: envelope headers and item headers. Provides more robustness and a layer of defense-in-depth Only those headers that we might rely on in a near future (event-based) are included. See #173
This commit is contained in:
@@ -58,3 +58,18 @@ def get_header_value(sentry_dsn):
|
||||
def get_sentry_key(sentry_dsn):
|
||||
parts = urllib.parse.urlsplit(sentry_dsn)
|
||||
return parts.username
|
||||
|
||||
|
||||
def validate_sentry_dsn(sentry_dsn):
|
||||
parts = urllib.parse.urlsplit(sentry_dsn)
|
||||
|
||||
if not parts.scheme or not parts.hostname or not parts.username:
|
||||
raise ValueError("Invalid Sentry DSN format. It must contain a scheme, hostname, and public_key.")
|
||||
|
||||
if parts.scheme not in ("http", "https"):
|
||||
raise ValueError("Invalid Sentry DSN scheme. It must be 'http' or 'https'.")
|
||||
|
||||
if (not parts.path) or ("/" not in parts.path) or (not parts.path.rsplit("/", 1)[1]):
|
||||
raise ValueError("Invalid DSN: path must include '/<project_id>'")
|
||||
|
||||
return True
|
||||
|
||||
2
ingest/exceptions.py
Normal file
2
ingest/exceptions.py
Normal file
@@ -0,0 +1,2 @@
|
||||
class ParseError(Exception):
|
||||
pass
|
||||
101
ingest/header_validators.py
Normal file
101
ingest/header_validators.py
Normal file
@@ -0,0 +1,101 @@
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
from compat.dsn import validate_sentry_dsn
|
||||
from .exceptions import ParseError
|
||||
|
||||
|
||||
# Based on the documentation here:
|
||||
#
|
||||
# https://develop.sentry.dev/sdk/data-model/envelopes/
|
||||
# https://develop.sentry.dev/sdk/data-model/envelope-items/
|
||||
#
|
||||
# From the docs, we deduced validation for
|
||||
#
|
||||
# * envelope headers -> all of them
|
||||
# * item headers -> only those that are relevant for "event" items
|
||||
|
||||
|
||||
_RFC3339_Z = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z$")
|
||||
_UUID32 = re.compile(r"^[0-9a-fA-F]{32}$")
|
||||
_UUID36 = re.compile(r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
|
||||
|
||||
|
||||
def validate_dsn(v):
|
||||
try:
|
||||
validate_sentry_dsn(v)
|
||||
except ValueError as e:
|
||||
raise ParseError(f'Envelope header "dsn" invalid: {e}')
|
||||
|
||||
|
||||
def validate_sdk(v):
|
||||
if not isinstance(v, dict):
|
||||
raise ParseError('Envelope header "sdk" must be an object')
|
||||
|
||||
|
||||
def validate_sent_at(v):
|
||||
if not isinstance(v, str) or not _RFC3339_Z.match(v):
|
||||
raise ParseError(f'Envelope header "sent_at" must be an RFC3339 UTC timestamp ending in Z: {v}')
|
||||
|
||||
try:
|
||||
datetime.strptime(v, "%Y-%m-%dT%H:%M:%SZ")
|
||||
except ValueError:
|
||||
datetime.fromisoformat(v.replace("Z", "+00:00"))
|
||||
|
||||
|
||||
def validate_event_id(v):
|
||||
if not isinstance(v, str) or not (_UUID32.match(v) or _UUID36.match(v)):
|
||||
raise ParseError(f'Envelope header "event_id" must be a valid UUID string: {v}')
|
||||
|
||||
|
||||
envelope_validators = {
|
||||
"dsn": validate_dsn,
|
||||
"sdk": validate_sdk,
|
||||
"sent_at": validate_sent_at,
|
||||
"event_id": validate_event_id,
|
||||
}
|
||||
|
||||
|
||||
def validate_envelope_headers(headers):
|
||||
for key, val in headers.items():
|
||||
if key in envelope_validators:
|
||||
envelope_validators[key](val)
|
||||
|
||||
|
||||
ALLOWED_TYPES = {
|
||||
"event", "transaction", "attachment", "session", "sessions", "feedback", "user_report", "client_report",
|
||||
"replay_event", "replay_recording", "profile", "profile_chunk", "check_in", "log", "otel_log"
|
||||
}
|
||||
|
||||
|
||||
def validate_type(v):
|
||||
return
|
||||
# alternatively (1):
|
||||
# if v not in _allowed_types:
|
||||
# Sentry's protocol might add new item types in the future; we don't want to raise an error for those.
|
||||
# logger.warning(f'Item header "type" is not recognized: {v}.'
|
||||
#
|
||||
# alternatively (2):
|
||||
# raise ParseError(f'Item header "type" must be one of {_allowed_types}, got: {v}')
|
||||
|
||||
|
||||
def _validate_length(v):
|
||||
if not isinstance(v, int) or v < 0:
|
||||
raise ParseError(f'Item header "length" must be a non-negative integer, got: {v}')
|
||||
|
||||
|
||||
item_validators = {
|
||||
"type": validate_type,
|
||||
"length": _validate_length,
|
||||
}
|
||||
|
||||
|
||||
def validate_item_headers(headers):
|
||||
if headers.get("type") != "event":
|
||||
# Only validate item headers for events. Reason: it's the only type of event that we actually process; rather
|
||||
# than trying to keep the validation in sync with for a part of the protocol that we don't use, we skip it.
|
||||
return
|
||||
|
||||
for key, val in headers.items():
|
||||
if key in item_validators:
|
||||
item_validators[key](val)
|
||||
@@ -3,9 +3,8 @@ import io
|
||||
|
||||
from bugsink.streams import MaxDataWriter
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
pass
|
||||
from .exceptions import ParseError
|
||||
from .header_validators import validate_envelope_headers, validate_item_headers
|
||||
|
||||
|
||||
class NewlineFinder:
|
||||
@@ -150,6 +149,7 @@ class StreamingEnvelopeParser:
|
||||
if self.envelope_headers is None:
|
||||
# see test_eof_after_envelope_headers for why we don't error on EOF-after-header here
|
||||
self.envelope_headers = self._parse_headers(empty_is_error=True, eof_after_header_is_error=False)
|
||||
validate_envelope_headers(self.envelope_headers)
|
||||
|
||||
return self.envelope_headers
|
||||
|
||||
@@ -161,10 +161,13 @@ class StreamingEnvelopeParser:
|
||||
|
||||
while not self.at_eof:
|
||||
item_headers = self._parse_headers(empty_is_error=False, eof_after_header_is_error=True)
|
||||
|
||||
if item_headers is None:
|
||||
self.at_eof = True
|
||||
break
|
||||
|
||||
validate_item_headers(item_headers)
|
||||
|
||||
if "length" in item_headers:
|
||||
length = item_headers["length"]
|
||||
finder = LengthFinder(length, error_for_eof="EOF while reading item with explicitly specified length")
|
||||
|
||||
Reference in New Issue
Block a user