Remove DIGEST_IMMEDIATELY option

Although DIGEST_IMMEDIATELY=True is theoretically a nice thing to
have, the upkeep is not worth it now that we're about to introduce
minidump ingestion.

The only thing that you're saving is the round-trip via the filesystem,
but performance of that is negligable, and if you're configuring
DIGEST_IMMEDIATELY you're actually _not_ in the performance-critical path
anyway.

Getting rid of it _also_ harmonizes/reduces the number of paths to test.

It's approximately 1% of our installed base.
This commit is contained in:
Klaas van Schelven
2025-11-05 09:03:17 +01:00
parent de9a37aab6
commit 7f831f52d4
8 changed files with 16 additions and 52 deletions

View File

@@ -42,7 +42,6 @@ DEFAULTS = {
"TEAM_CREATION": CB_MEMBERS, # who can create new teams. default: members, which means "any member of the site"
# System inner workings:
"DIGEST_IMMEDIATELY": True,
"VALIDATE_ON_DIGEST": "none", # other legal values are "warn" and "strict"
"KEEP_ENVELOPES": 0, # set to a number to store that many; 0 means "store none". This is for debugging.
"API_LOG_UNIMPLEMENTED_CALLS": False, # if True, log unimplemented API calls; see #153

View File

@@ -52,7 +52,7 @@ eat_your_own_dogfood(SENTRY_DSN)
# Our Docker image is hard-coded to run with snappea in the background; this means we hard-code (as opposed to reading
# the from the env) certain variables: TASK_ALWAYS_EAGER, WORKAHOLIC and DIGEST_IMMEDIATELY.
# the from the env) certain variables: TASK_ALWAYS_EAGER and WORKAHOLIC
SNAPPEA = {
"TASK_ALWAYS_EAGER": False, # hard-coded, corresponds to Docker setup
"WORKAHOLIC": True, # hard-coded, corresponds to Docker setup
@@ -131,8 +131,6 @@ CB_NOBODY = "CB_NOBODY"
BUGSINK = {
"DIGEST_IMMEDIATELY": False, # hard-coded, corresponds to Docker setup
# The URL where the Bugsink instance is hosted. This is used in the email notifications and to construct DSNs.
"BASE_URL": os.getenv("BASE_URL", f"http://localhost:{_PORT}"), # no trailing slash

View File

@@ -68,11 +68,6 @@ BUGSINK = {
# you can customize this as e.g. "My Bugsink" or "Bugsink for My Company"
# "SITE_TITLE": "Bugsink",
# When running locally, it is recommended to configure the Bugsink to digest events immediately. (This is basically
# implied by the "TASK_ALWAYS_EAGER" setting above, but setting DIGEST_IMMEDIATELY to True removes one more step
# from the process.)
"DIGEST_IMMEDIATELY": True,
# You are licenced to run Bugsink locally in single-user mode. By changing the settings below, you may open the door
# to more uses; make sure to buy a licence if you do.
"SINGLE_USER": True,

View File

@@ -98,10 +98,6 @@ BUGSINK = {
"SINGLE_TEAM": False,
"TEAM_CREATION": CB_MEMBERS, # who can create new teams. default: members, which means "any member of the site"
# In the singleserver production setup, we do not digest events immediately, but instead offload this to Snappea.
# This ensures a more response and reliable server when there are peak loads in the events.
"DIGEST_IMMEDIATELY": False,
# "MAX_EVENT_SIZE": _MEBIBYTE,
# "MAX_EVENT_COMPRESSED_SIZE": 200 * _KIBIBYTE,
# "MAX_ENVELOPE_SIZE": 100 * _MEBIBYTE,

View File

@@ -83,8 +83,6 @@ SERVER_EMAIL = DEFAULT_FROM_EMAIL = 'Klaas van Schelven <klaas@bugsink.com>'
BUGSINK = {
"DIGEST_IMMEDIATELY": False,
# "MAX_EVENT_SIZE": _MEBIBYTE,
# "MAX_EVENT_COMPRESSED_SIZE": 200 * _KIBIBYTE,
# "MAX_ENVELOPE_SIZE": 100 * _MEBIBYTE,

View File

@@ -394,11 +394,6 @@ class IngestViewTestCase(TransactionTestCase):
with self.assertRaises(ViolatedExpectation):
check()
@tag("samples")
def test_envelope_endpoint_digest_non_immediate(self):
with override_settings(DIGEST_IMMEDIATELY=False):
self.test_envelope_endpoint()
@tag("samples")
def test_filestore(self):
# quick & dirty way to test the filestore; in absence of a proper test for it, we just run a more-or-less

View File

@@ -28,8 +28,7 @@ from issues.regressions import issue_is_regression
from bugsink.transaction import immediate_atomic, delay_on_commit
from bugsink.exceptions import ViolatedExpectation
from bugsink.streams import (
content_encoding_reader, MaxDataReader, MaxDataWriter, NullWriter, MaxLengthExceeded, UnclosableBytesIO)
from bugsink.streams import content_encoding_reader, MaxDataReader, MaxDataWriter, NullWriter, MaxLengthExceeded
from bugsink.app_settings import get_settings
from events.models import Event
@@ -154,16 +153,8 @@ class BaseIngestAPIView(View):
@classmethod
def process_event(cls, ingested_at, event_id, event_data_stream, project, request):
event_metadata = cls.get_event_meta(event_id, ingested_at, request, project)
if get_settings().DIGEST_IMMEDIATELY:
# in this case the stream will be an BytesIO object, so we can actually call .get_value() on it.
event_data_bytes = event_data_stream.getvalue()
event_data = json.loads(event_data_bytes.decode("utf-8"))
performance_logger.info("ingested event with %s bytes", len(event_data_bytes))
cls.digest_event(event_metadata, event_data)
else:
performance_logger.info("ingested event with %s bytes", event_data_stream.bytes_written)
digest.delay(event_id, event_metadata)
performance_logger.info("ingested event with %s bytes", event_data_stream.bytes_written)
digest.delay(event_id, event_metadata)
@classmethod
def get_event_meta(cls, event_id, ingested_at, request, project):
@@ -616,9 +607,6 @@ class IngestEnvelopeAPIView(BaseIngestAPIView):
def factory(item_headers):
if item_headers.get("type") == "event":
if get_settings().DIGEST_IMMEDIATELY:
return MaxDataWriter("MAX_EVENT_SIZE", UnclosableBytesIO())
# envelope_headers["event_id"] is required when type=event per the spec (and takes precedence over the
# payload's event_id), so we can rely on it having been set.
if "event_id" not in envelope_headers:
@@ -650,7 +638,10 @@ class IngestEnvelopeAPIView(BaseIngestAPIView):
continue
self.process_event(ingested_at, envelope_headers["event_id"], event_output_stream, project, request)
performance_logger.info("ingested event with %s bytes", event_output_stream.bytes_written)
event_metadata = self.get_event_meta(envelope_headers["event_id"], ingested_at, request, project)
digest.delay(envelope_headers["event_id"], event_metadata)
break # From the spec of type=event: This Item may occur at most once per Envelope. once seen: done
return HttpResponse()
@@ -679,22 +670,15 @@ class MinidumpAPIView(BaseIngestAPIView):
@classmethod
def _ingest(cls, ingested_at, event_data, project, request):
# TSTTCPW: just ingest the invent as normally after we've done the minidump-parsing "immediately". We make
# ready for the expectations of process_event (DIGEST_IMMEDIATELY/event_output_stream) with an if-statement
# TSTTCPW: convert the minidump data to an event and then proceed as usual.
filename = get_filename_for_event_id(event_data["event_id"])
b108_makedirs(os.path.dirname(filename))
with open(filename, 'w') as f:
json.dump(event_data, f)
event_output_stream = MaxDataWriter("MAX_EVENT_SIZE", UnclosableBytesIO())
if get_settings().DIGEST_IMMEDIATELY:
# in this case the stream will be an BytesIO object, so we can actually call .get_value() on it.
event_output_stream.write(json.dumps(event_data).encode("utf-8"))
else:
# no need to actually touch event_output_stream for this case, we just need to write a file
filename = get_filename_for_event_id(event_data["event_id"])
b108_makedirs(os.path.dirname(filename))
with open(filename, 'w') as f:
json.dump(event_data, f)
cls.process_event(ingested_at, event_data["event_id"], event_output_stream, project, request)
# performance_logger.info("ingested event with %s bytes", event_output_stream.bytes_written) TODO for minidump
event_metadata = cls.get_event_meta(event_data["event_id"], ingested_at, request, project)
digest.delay(event_data["event_id"], event_metadata)
def post(self, request, project_pk=None):
# not reusing the CORS stuff here; minidump-from-browser doesn't make sense.

View File

@@ -92,7 +92,6 @@ def _make_message_body():
"SINGLE_TEAM": get_settings().SINGLE_TEAM,
"EMAIL_BACKEND": settings.EMAIL_BACKEND,
"TASK_ALWAYS_EAGER": get_snappea_settings().TASK_ALWAYS_EAGER,
"DIGEST_IMMEDIATELY": get_settings().DIGEST_IMMEDIATELY,
"IS_DOCKER": settings.IS_DOCKER,
"DATABASE_ENGINE": settings.DATABASES["default"]["ENGINE"],
},