Use the envelope's event_id when using the envelope endpoint

* As per the spec 'takes precendence'
* Also fixes the reported bug on Laravel, which apparently doesn't send event_id
  as part of the event payload.
* Fixes the envelope tests (they were doing nothing when I moved the
  data samples around recently)
* Adds a 'no event_id in data, but yes in envelope' test to that test.
* Adds handling to send_json such that we can send envelopes when the event_id
  is missing from the event data.
This commit is contained in:
Klaas van Schelven
2024-09-18 11:36:47 +02:00
parent f4bbf0c386
commit f2a78fed9d
5 changed files with 60 additions and 33 deletions

View File

@@ -61,8 +61,11 @@ class Event(models.Model):
# not actually expected to be null, but we want to be able to delete issues without deleting events (cleanup later)
issue = models.ForeignKey("issues.Issue", blank=False, null=True, on_delete=models.SET_NULL)
# The docs say:
# > Required. Hexadecimal string representing a uuid4 value. The length is exactly 32 characters. Dashes are not
# > allowed. Has to be lowercase.
# But event.schema.json has this anyOf [..] null and only speaks of "it is strongly recommended to generate that
# uuid4 clientside". In any case, we just rely on the envelope's event_id (required per the envelope spec).
# Not a primary key: events may be duplicated across projects
event_id = models.UUIDField(primary_key=False, null=False, editable=False, help_text="As per the sent data")
project = models.ForeignKey(Project, blank=False, null=True, on_delete=models.SET_NULL) # SET_NULL: cleanup 'later'
@@ -191,7 +194,7 @@ class Event(models.Model):
try:
event = cls.objects.create(
event_id=parsed_data["event_id"],
event_id=event_metadata["event_id"], # the metadata is the envelope's event_id, which takes precedence
project_id=event_metadata["project_id"],
issue=issue,
ingested_at=event_metadata["ingested_at"],

View File

@@ -129,7 +129,9 @@ class Command(BaseCommand):
data_bytes = json.dumps(data).encode("utf-8")
if use_envelope:
# the smallest possible envelope:
data_bytes = (b'{"event_id": "%s"}\n{"type": "event"}\n' % (data["event_id"]).encode("utf-8") +
event_id = data.get("event_id", uuid.uuid4().hex)
data_bytes = (b'{"event_id": "%s"}\n{"type": "event"}\n' % event_id.encode("utf-8") +
data_bytes)
if compress in ["gzip", "deflate"]:

View File

@@ -1,3 +1,4 @@
import os
from glob import glob
import json
import io
@@ -35,7 +36,12 @@ def _digest_params(event_data, project, request, now=None):
# adapter to quickly reuse existing tests on refactored code. let's see where the code ends up before spending
# considerable time on rewriting the tests
return {
"event_metadata": {"project_id": project.id, "ingested_at": format_timestamp(now), "debug_info": ""},
"event_metadata": {
"event_id": event_data["event_id"],
"project_id": project.id,
"ingested_at": format_timestamp(now),
"debug_info": "",
},
"event_data": event_data,
"project": project,
"digested_at": now,
@@ -275,33 +281,46 @@ class IngestViewTestCase(TransactionTestCase):
command.stdout = io.StringIO()
command.stderr = io.StringIO()
for filename in glob("./ingest/samples/*/*.json")[:1]: # one is enough here
with open(filename) as f:
data = json.loads(f.read())
SAMPLES_DIR = os.getenv("SAMPLES_DIR", "../event-samples")
data["event_id"] = uuid.uuid4().hex
event_samples = glob(SAMPLES_DIR + "/*/*.json")
if "timestamp" not in data:
# as per send_json command ("weirdly enough a large numer of sentry test data don't actually...")
data["timestamp"] = time.time()
if len(event_samples) == 0:
raise Exception(f"No event samples found in {SAMPLES_DIR}; I insist on having some to test with.")
if not command.is_valid(data, filename):
continue
for with_event_id in [True, False]:
for filename in event_samples[:1]: # one is enough here
with open(filename) as f:
data = json.loads(f.read())
data_bytes = json.dumps(data).encode("utf-8")
data_bytes = (b'{"event_id": "%s"}\n{"type": "event"}\n' % (data["event_id"]).encode("utf-8") + data_bytes)
data["event_id"] = uuid.uuid4().hex # for good measure we reset this to avoid duplicates.
response = self.client.post(
f"/api/{ project.id }/envelope/",
content_type="application/json",
headers={
"X-Sentry-Auth": sentry_auth_header,
"X-BugSink-DebugInfo": filename,
},
data=data_bytes,
)
self.assertEqual(
200, response.status_code, response.content if response.status_code != 302 else response.url)
if "timestamp" not in data:
# as per send_json command ("weirdly enough a large numer of sentry test data don't actually...")
data["timestamp"] = time.time()
if not command.is_valid(data, filename):
continue
event_id = data["event_id"]
if not with_event_id:
del data["event_id"]
data_bytes = json.dumps(data).encode("utf-8")
data_bytes = (
b'{"event_id": "%s"}\n{"type": "event"}\n' % event_id.encode("utf-8") + data_bytes)
response = self.client.post(
f"/api/{ project.id }/envelope/",
content_type="application/json",
headers={
"X-Sentry-Auth": sentry_auth_header,
"X-BugSink-DebugInfo": filename,
},
data=data_bytes,
)
self.assertEqual(
200, response.status_code, response.content if response.status_code != 302 else response.url)
def test_envelope_endpoint_digest_non_immediate(self):
with override_settings(DIGEST_IMMEDIATELY=False):

View File

@@ -89,7 +89,7 @@ class BaseIngestAPIView(View):
@classmethod
def process_event(cls, ingested_at, event_id, event_data_stream, project, request):
event_metadata = cls.get_event_meta(ingested_at, request, project)
event_metadata = cls.get_event_meta(event_id, ingested_at, request, project)
if get_settings().DIGEST_IMMEDIATELY:
# in this case the stream will be an BytesIO object, so we can actually call .get_value() on it.
@@ -106,11 +106,12 @@ class BaseIngestAPIView(View):
digest.delay(event_id, event_metadata)
@classmethod
def get_event_meta(cls, ingested_at, request, project):
def get_event_meta(cls, event_id, ingested_at, request, project):
# Meta means: not part of the event data. Basically: information that is available at the time of ingestion, and
# that must be passed to digest() in a serializable form.
debug_info = request.META.get("HTTP_X_BUGSINK_DEBUGINFO", "")
return {
"event_id": event_id,
"project_id": project.id,
"ingested_at": format_timestamp(ingested_at),
"debug_info": debug_info,
@@ -381,19 +382,18 @@ class IngestEventAPIView(BaseIngestAPIView):
# keep up.
#
# In particular I'd like to just call process_event() here, but that takes both an event_id and an unparsed data
# stream, and we don't have an event_id here before parsing (and we don't want to parse twice).
# stream, and we don't have an event_id here before parsing (and we don't want to parse twice). similarly,
# event_metadata construction requires the event_id.
#
# Instead, we just copy/pasted the relevant parts of process_event() here, and take only one branch (the one
# that digests immediately).
# that digests immediately); i.e. we always digest immediately, independent of the setting.
event_metadata = self.get_event_meta(ingested_at, request, project)
# if get_settings().DIGEST_IMMEDIATELY: this is the only branch we implemented here, i.e. we always digest
# immediately, independent of the setting.
event_data = json.loads(
MaxDataReader("MAX_EVENT_SIZE", content_encoding_reader(
MaxDataReader("MAX_EVENT_COMPRESSED_SIZE", request))).read())
event_metadata = self.get_event_meta(event_data["event_id"], ingested_at, request, project)
self.digest_event(event_metadata, event_data, project=project)
return HttpResponse()

View File

@@ -476,6 +476,9 @@ class IntegrationTest(TransactionTestCase):
with open(filename) as f:
data = json.loads(f.read())
# we do this because our samples do not have unique event_ids; additionally this sets the event_id if it's
# not set in the sample (it sometimes isn't); (the fact that we can deal with that case is separately
# tested)
data["event_id"] = uuid.uuid4().hex
if not command.is_valid(data, filename):