Introduce FileEventStorage

An (optional) way to store the `event_data` (full event as JSON)
outside the DB. This is expected to be useful for larger setups,
because it gives you:

* A more portable database (e.g. backups); (depeding on event size
  the impact on your DB is ~50x.
* Less worries about hitting "physical" limits (e.g. disk size, max
  file size) for your DB.

Presumably (more testing will happen going forwards) it will:

* Speed up migrations (especially on sqlite, which does full table
  copies)
* Speed up event ingestion(?)

Further improvements in this commit:

* `delete_with_limit` was removed; this removes one tie-in to MySQL/Sqlite
    (See #21 for this bullet)
This commit is contained in:
Klaas van Schelven
2025-02-12 17:11:24 +01:00
parent aa4aba7edf
commit 5559fba754
8 changed files with 207 additions and 21 deletions

3
.gitignore vendored
View File

@@ -15,6 +15,9 @@ __pycache__
/bugsink/_version.py /bugsink/_version.py
/dist/ /dist/
# This matches the filestorage mentioned in development.py
/filestorage
# sqlite # sqlite
/db.sqlite3 /db.sqlite3

View File

@@ -62,6 +62,7 @@ DEFAULTS = {
# Locations of files & directories: # Locations of files & directories:
"INGEST_STORE_BASE_DIR": "/tmp/bugsink/ingestion", "INGEST_STORE_BASE_DIR": "/tmp/bugsink/ingestion",
"EVENT_STORAGES": {},
# Security: # Security:
"MINIMIZE_INFORMATION_EXPOSURE": False, "MINIMIZE_INFORMATION_EXPOSURE": False,

View File

@@ -95,6 +95,15 @@ BUGSINK = {
"MAX_EVENTS_PER_PROJECT_PER_5_MINUTES": 1_000_000, "MAX_EVENTS_PER_PROJECT_PER_5_MINUTES": 1_000_000,
"MAX_EVENTS_PER_PROJECT_PER_HOUR": 50_000_000, "MAX_EVENTS_PER_PROJECT_PER_HOUR": 50_000_000,
"EVENT_STORAGES": {
"local_flat_files": {
"STORAGE": "events.storage.FileEventStorage",
"OPTIONS": {
"basepath": os.path.join(BASE_DIR, "filestorage"),
},
"USE_FOR_WRITE": True,
},
}
} }

View File

@@ -0,0 +1,20 @@
# Generated by Django 4.2.19 on 2025-02-11 20:48
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("events", "0018_remove_event_has_exception_remove_event_has_logentry"),
]
operations = [
migrations.AddField(
model_name="event",
name="storage_backend",
field=models.CharField(
blank=True, default=None, editable=False, max_length=255, null=True
),
),
]

View File

@@ -12,6 +12,7 @@ from compat.timestamp import parse_timestamp
from issues.utils import get_title_for_exception_type_and_value from issues.utils import get_title_for_exception_type_and_value
from .retention import get_random_irrelevance from .retention import get_random_irrelevance
from .storage_registry import get_write_storage, get_storage
class Platform(models.TextChoices): class Platform(models.TextChoices):
@@ -48,6 +49,15 @@ def maybe_empty(s):
return "" if not s else s return "" if not s else s
def write_to_storage(event_id, parsed_data):
"""
event_id means event.id, i.e. the internal one. This saves us from thinking about the security implications of
using an externally provided ID across storage backends.
"""
with get_write_storage().open(event_id, "w") as f:
json.dump(parsed_data, f)
class Event(models.Model): class Event(models.Model):
# Lines quotes with ">" are from the following to resources: # Lines quotes with ">" are from the following to resources:
# https://develop.sentry.dev/sdk/event-payloads/ (supposedly more human-readable) # https://develop.sentry.dev/sdk/event-payloads/ (supposedly more human-readable)
@@ -139,6 +149,8 @@ class Event(models.Model):
irrelevance_for_retention = models.PositiveIntegerField(blank=False, null=False) irrelevance_for_retention = models.PositiveIntegerField(blank=False, null=False)
never_evict = models.BooleanField(blank=False, null=False, default=False) never_evict = models.BooleanField(blank=False, null=False, default=False)
storage_backend = models.CharField(max_length=255, blank=True, null=True, default=None, editable=False)
# The following list of attributes are mentioned in the docs but are not attrs on our model (because we don't need # The following list of attributes are mentioned in the docs but are not attrs on our model (because we don't need
# them to be [yet]): # them to be [yet]):
# #
@@ -168,10 +180,20 @@ class Event(models.Model):
] ]
def get_raw_data(self): def get_raw_data(self):
return self.data if self.storage_backend is None:
return self.data
storage = get_storage(self.storage_backend)
with storage.open(self.id, "r") as f:
return f.read()
def get_parsed_data(self): def get_parsed_data(self):
return json.loads(self.data) if self.storage_backend is None:
return json.loads(self.data)
storage = get_storage(self.storage_backend)
with storage.open(self.id, "r") as f:
return json.load(f)
def get_absolute_url(self): def get_absolute_url(self):
return f"/issues/issue/{ self.issue_id }/event/{ self.id }/" return f"/issues/issue/{ self.issue_id }/event/{ self.id }/"
@@ -197,6 +219,8 @@ class Event(models.Model):
irrelevance_for_retention = get_random_irrelevance(stored_event_count) irrelevance_for_retention = get_random_irrelevance(stored_event_count)
write_storage = get_write_storage()
# A note on truncation (max_length): the fields we truncate here are directly from the SDK, so they "should have # A note on truncation (max_length): the fields we truncate here are directly from the SDK, so they "should have
# been" truncated already. But we err on the side of caution: this is the kind of SDK error that we can, and # been" truncated already. But we err on the side of caution: this is the kind of SDK error that we can, and
# just want to, paper over (it's not worth dropping the event for). # just want to, paper over (it's not worth dropping the event for).
@@ -208,7 +232,8 @@ class Event(models.Model):
grouping=grouping, grouping=grouping,
ingested_at=event_metadata["ingested_at"], ingested_at=event_metadata["ingested_at"],
digested_at=digested_at, digested_at=digested_at,
data=json.dumps(parsed_data), data=json.dumps(parsed_data) if write_storage is None else "",
storage_backend=None if write_storage is None else write_storage.name,
timestamp=parse_timestamp(parsed_data["timestamp"]), timestamp=parse_timestamp(parsed_data["timestamp"]),
platform=parsed_data["platform"][:64], platform=parsed_data["platform"][:64],
@@ -235,6 +260,9 @@ class Event(models.Model):
) )
created = True created = True
if write_storage is not None:
write_to_storage(event.id, parsed_data)
return event, created return event, created
except IntegrityError as e: except IntegrityError as e:
if not re.match( if not re.match(

View File

@@ -4,12 +4,12 @@ from django.db.models import Q, Min, Max
from random import random from random import random
from datetime import timezone, datetime from datetime import timezone, datetime
from django.db.models.sql.compiler import SQLDeleteCompiler
from django.db import connection
from bugsink.moreiterutils import pairwise, map_N_until from bugsink.moreiterutils import pairwise, map_N_until
from performance.context_managers import time_and_query_count from performance.context_managers import time_and_query_count
from .storage_registry import get_storage
bugsink_logger = logging.getLogger("bugsink")
performance_logger = logging.getLogger("bugsink.performance.retention") performance_logger = logging.getLogger("bugsink.performance.retention")
@@ -269,20 +269,6 @@ def evict_for_irrelevance(
return evicted return evicted
def delete_with_limit(qs, limit):
# Django does not support this out of the box (i.e. it does not support LIMIT in DELETE queries). Both Sqlite and
# MySQL do in fact support it (whereas many other DBs do not), so we just reach down into Django's internals.
sql, params = SQLDeleteCompiler(qs.query, connection, 'default').as_sql()
limited_sql = sql + " LIMIT %s"
limited_params = params + (limit,)
with connection.cursor() as cursor:
cursor.execute(limited_sql, limited_params)
nr_of_deletions = cursor.rowcount
return nr_of_deletions
def evict_for_epoch_and_irrelevance(project, max_epoch, max_irrelevance, max_event_count, include_never_evict): def evict_for_epoch_and_irrelevance(project, max_epoch, max_irrelevance, max_event_count, include_never_evict):
from issues.models import TurningPoint from issues.models import TurningPoint
from .models import Event from .models import Event
@@ -317,5 +303,36 @@ def evict_for_epoch_and_irrelevance(project, max_epoch, max_irrelevance, max_eve
# we need to manually ensure that no FKs to the deleted items exist: # we need to manually ensure that no FKs to the deleted items exist:
TurningPoint.objects.filter(triggering_event__in=qs).update(triggering_event=None) TurningPoint.objects.filter(triggering_event__in=qs).update(triggering_event=None)
nr_of_deletions = delete_with_limit(qs, max_event_count) # We generate the list of events-to-delete (including the LIMIT) before proceeding; this allows us:
# A. to have a portable delete_with_limit (e.g. Django does not support that, nor does Postgres).
# B. to apply both deletion and cleanup_events_on_storage() on the same list.
#
# Implementation notes:
# 1. We force evaluation here with a `list()`; this means subsequent usages do _not_ try to "just use an inner
# query". Although inner queries are attractive in the abstract, the literature suggests that performance may be
# unpredictable (e.g. on MySQL). By using a list, we lift the (max 500) ids-to-match to the actual query, which
# is quite ugly, but predictable and (at least on sqlite where I tested this) lightning-fast.
# 2. order_by: "pick something" to ensure the 2 usages of the "subquery" point to the same thing. (somewhat
# superceded by [1] above; but I like to be defensive and predictable). tie-breaking on digest_order seems
# consistent with the semantics of eviction.
pks_to_delete = list(qs.order_by("digest_order")[:max_event_count].values_list("pk", flat=True))
if len(pks_to_delete) > 0:
cleanup_events_on_storage(
Event.objects.filter(pk__in=pks_to_delete).exclude(storage_backend=None)
.values_list("id", "storage_backend")
)
nr_of_deletions = Event.objects.filter(pk__in=pks_to_delete).delete()[1]["events.Event"]
else:
nr_of_deletions = 0
return nr_of_deletions return nr_of_deletions
def cleanup_events_on_storage(todos):
for event_id, storage_backend in todos:
try:
get_storage(storage_backend).delete(event_id)
except Exception as e:
# in a try/except such that we can continue with the rest of the batch
bugsink_logger.error("Error during cleanup of %s on %s: %s", event_id, storage_backend, e)

58
events/storage.py Normal file
View File

@@ -0,0 +1,58 @@
import contextlib
import os.path
class EventStorage(object):
def __init__(self, name, **options):
self.name = name
def save(self):
raise NotImplementedError()
def exists(self, event_id):
raise NotImplementedError()
def delete(self, event_id):
raise NotImplementedError()
def open(self, event_id):
raise NotImplementedError()
# one might imagine contexts where something like "url" is useful (e.g. S3, and pointing the end-user straight at
# the event file) but such a model means you'll need to think about the security implications of that URL, which is
# not worth it, so we only support "pass through application layer" (where the auth stuff is) models of usage.
class FileEventStorage(EventStorage):
def __init__(self, name, basepath=None):
super().__init__(name)
if basepath is None:
raise ValueError("Basepath must be provided")
self.basepath = basepath
def _event_path(self, event_id):
# the dashes in uuid are preserved in the filename for readability; since their location is consistent, this is
# not a problem.
return os.path.join(self.basepath, str(event_id) + ".json")
@contextlib.contextmanager
def open(self, event_id, mode='r'):
if mode not in ('r', 'w'):
# EventStorage's API is generally _very_ limited (unique IDs, write-once) so we can (and should) be very
# strict about what we allow; we further imply "text mode" and "utf-8 encoding" given the JSON context.
raise ValueError("EventStorage.open() mode must be 'r' or 'w'")
# We open with utf-8 encoding explicitly to pre-empt the future of pep-0686 (it's also the only thing that makes
# sense in the context of JSON)
with open(self._event_path(event_id), mode, encoding="utf-8") as f:
yield f
def exists(self, event_id):
return os.path.exists(self._event_path(event_id))
def delete(self, event_id):
os.remove(self._event_path(event_id))

View File

@@ -0,0 +1,50 @@
import importlib
from bugsink.app_settings import get_settings
_storages = None
_write_storage = None
def _ensure_storages():
global _storages
global _write_storage
if _storages is not None:
return
_storages = {name: _resolve(name, conf) for name, conf in get_settings().EVENT_STORAGES.items()}
matching = [name for name, conf in get_settings().EVENT_STORAGES.items() if conf.get("USE_FOR_WRITE", False)]
if len(matching) == 1:
_write_storage = _storages[matching[0]]
elif len(matching) > 1:
raise ValueError("Multiple USE_FOR_WRITE storages found.")
# else len==0 is implied by the initial value of _write_storage (None)
def get_write_storage():
"""
Gets the USE_FOR_WRITE storage. None means "in-database" (which is not shoe-horned in the EventStorage API).
"""
_ensure_storages()
return _write_storage
def get_storage(storage_name):
_ensure_storages()
return _storages[storage_name]
def _resolve(name, conf):
storage_name = conf["STORAGE"]
module_name, class_name = storage_name.rsplit('.', 1)
module = importlib.import_module(module_name)
clazz = getattr(module, class_name)
return clazz(name, **conf.get("OPTIONS", {}))