Project.delete_deferred(): first version (WIP)

Implemented using a batch-wise dependency-scanner in delayed
(snappea) style.

* no real point-of-entry in the (regular, non-admin) UI yet.
* no hiding of Projects which are delete-in-progress from the UI

* lack of DRY
* some unnessary work (needed in the Issue-context, but not here)
  is still being done.

See #50
This commit is contained in:
Klaas van Schelven
2025-07-03 21:01:28 +02:00
parent 76dc2ede16
commit 6b9e4d8011
13 changed files with 388 additions and 7 deletions

View File

@@ -252,6 +252,8 @@ class BaseIngestAPIView(View):
digested_at = datetime.now(timezone.utc) if digested_at is None else digested_at # explicit passing: test only
project = Project.objects.get(pk=event_metadata["project_id"])
if project.is_deleted:
return # don't process events for deleted projects
if not cls.count_project_periods_and_act_on_it(project, digested_at):
return # if over-quota: just return (any cleanup is done calling-side)
@@ -360,6 +362,7 @@ class BaseIngestAPIView(View):
if issue_created:
TurningPoint.objects.create(
project=project,
issue=issue, triggering_event=event, timestamp=ingested_at,
kind=TurningPointKind.FIRST_SEEN)
event.never_evict = True
@@ -371,6 +374,7 @@ class BaseIngestAPIView(View):
# new issues cannot be regressions by definition, hence this is in the 'else' branch
if issue_is_regression(issue, event.release):
TurningPoint.objects.create(
project=project,
issue=issue, triggering_event=event, timestamp=ingested_at,
kind=TurningPointKind.REGRESSED)
event.never_evict = True

View File

@@ -0,0 +1,22 @@
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("projects", "0012_project_is_deleted"),
("issues", "0021_alter_do_nothing"),
]
operations = [
migrations.AddField(
model_name="turningpoint",
name="project",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.DO_NOTHING,
to="projects.project",
),
),
]

View File

@@ -0,0 +1,36 @@
from django.db import migrations
def turningpoint_set_project(apps, schema_editor):
TurningPoint = apps.get_model("issues", "TurningPoint")
# TurningPoint.objects.update(project=F("issue__project"))
# fails with 'Joined field references are not permitted in this query"
# This one's elegant and works in sqlite but not in MySQL:
# TurningPoint.objects.update(
# project=Subquery(
# TurningPoint.objects
# .filter(pk=OuterRef('pk'))
# .values('issue__project')[:1]
# )
# )
# django.db.utils.OperationalError: (1093, "You can't specify target table 'issues_turningpoint' for update in FROM
# clause")
# so in the end we'll just loop:
for turningpoint in TurningPoint.objects.all():
turningpoint.project = turningpoint.issue.project
turningpoint.save(update_fields=["project"])
class Migration(migrations.Migration):
dependencies = [
("issues", "0022_turningpoint_project"),
]
operations = [
migrations.RunPython(turningpoint_set_project, migrations.RunPython.noop),
]

View File

@@ -0,0 +1,20 @@
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
("projects", "0012_project_is_deleted"),
("issues", "0023_turningpoint_set_project"),
]
operations = [
migrations.AlterField(
model_name="turningpoint",
name="project",
field=models.ForeignKey(
on_delete=django.db.models.deletion.DO_NOTHING, to="projects.project"
),
),
]

View File

@@ -348,6 +348,7 @@ class IssueStateManager(object):
# path is never reached via UI-based paths (because those are by definition not event-triggered); thus
# the 2 ways of creating TurningPoints do not collide.
TurningPoint.objects.create(
project_id=issue.project_id,
issue=issue, triggering_event=triggering_event, timestamp=triggering_event.ingested_at,
kind=TurningPointKind.UNMUTED, metadata=json.dumps(unmute_metadata))
triggering_event.never_evict = True # .save() will be called by the caller of this function
@@ -491,6 +492,7 @@ class TurningPoint(models.Model):
# basically: an Event, but that name was already taken in our system :-) alternative names I considered:
# "milestone", "state_change", "transition", "annotation", "episode"
project = models.ForeignKey("projects.Project", blank=False, null=False, on_delete=models.DO_NOTHING)
issue = models.ForeignKey("Issue", blank=False, null=False, on_delete=models.DO_NOTHING)
triggering_event = models.ForeignKey("events.Event", blank=True, null=True, on_delete=models.DO_NOTHING)

View File

@@ -679,6 +679,7 @@ class IssueDeletionTestCase(TransactionTestCase):
self.event = create_event(self.project, issue=self.issue)
TurningPoint.objects.create(
project=self.project,
issue=self.issue, triggering_event=self.event, timestamp=self.event.ingested_at,
kind=TurningPointKind.FIRST_SEEN)

View File

@@ -210,10 +210,13 @@ def _make_history(issue_or_qs, action, user):
now = timezone.now()
if isinstance(issue_or_qs, Issue):
TurningPoint.objects.create(
project=issue_or_qs.project,
issue=issue_or_qs, kind=kind, user=user, metadata=json.dumps(metadata), timestamp=now)
else:
TurningPoint.objects.bulk_create([
TurningPoint(issue=issue, kind=kind, user=user, metadata=json.dumps(metadata), timestamp=now)
TurningPoint(
project_id=issue.project_id, issue=issue, kind=kind, user=user, metadata=json.dumps(metadata),
timestamp=now)
for issue in issue_or_qs
])
@@ -767,6 +770,7 @@ def history_comment_new(request, issue):
# think that's amount of magic to have: it still allows one to erase comments (possibly for non-manual
# kinds) but it saves you from what is obviously a mistake (without complaining with a red box or something)
TurningPoint.objects.create(
project=issue.project,
issue=issue, kind=TurningPointKind.MANUAL_ANNOTATION, user=request.user,
comment=form.cleaned_data["comment"],
timestamp=timezone.now())

View File

@@ -1,9 +1,17 @@
from django.contrib import admin
from django.utils.decorators import method_decorator
from django.views.decorators.csrf import csrf_protect
from admin_auto_filters.filters import AutocompleteFilter
from bugsink.transaction import immediate_atomic
from .models import Project, ProjectMembership
csrf_protect_m = method_decorator(csrf_protect)
class ProjectFilter(AutocompleteFilter):
title = 'Project'
field_name = 'project'
@@ -31,9 +39,8 @@ class ProjectAdmin(admin.ModelAdmin):
list_display = [
'name',
'dsn',
'alert_on_new_issue',
'alert_on_regression',
'alert_on_unmute',
'digested_event_count',
'stored_event_count',
]
readonly_fields = [
@@ -47,6 +54,31 @@ class ProjectAdmin(admin.ModelAdmin):
'slug': ['name'],
}
def get_deleted_objects(self, objs, request):
to_delete = list(objs) + ["...all its related objects... (delayed)"]
model_count = {
Project: len(objs),
}
perms_needed = set()
protected = []
return to_delete, model_count, perms_needed, protected
def delete_queryset(self, request, queryset):
# NOTE: not the most efficient; it will do for a first version.
with immediate_atomic():
for obj in queryset:
obj.delete_deferred()
def delete_model(self, request, obj):
with immediate_atomic():
obj.delete_deferred()
@csrf_protect_m
def delete_view(self, request, object_id, extra_context=None):
# the superclass version, but with the transaction.atomic context manager commented out (we do this ourselves)
# with transaction.atomic(using=router.db_for_write(self.model)):
return self._delete_view(request, object_id, extra_context)
# the preferred way to deal with ProjectMembership is actually through the inline above; however, because this may prove
# to not scale well with (very? more than 50?) memberships per project, we've left the separate admin interface here for

View File

@@ -0,0 +1,18 @@
# Generated by Django 4.2.21 on 2025-07-03 13:57
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("projects", "0011_fill_stored_event_count"),
]
operations = [
migrations.AddField(
model_name="project",
name="is_deleted",
field=models.BooleanField(default=False),
),
]

View File

@@ -5,11 +5,14 @@ from django.conf import settings
from django.utils.text import slugify
from bugsink.app_settings import get_settings
from bugsink.transaction import delay_on_commit
from compat.dsn import build_dsn
from teams.models import TeamMembership
from .tasks import delete_project_deps
# ## Visibility/Access-design
#
@@ -74,6 +77,7 @@ class Project(models.Model):
name = models.CharField(max_length=255, blank=False, null=False, unique=True)
slug = models.SlugField(max_length=50, blank=False, null=False, unique=True)
is_deleted = models.BooleanField(default=False)
# sentry_key mirrors the "public" part of the sentry DSN. As of late 2023 Sentry's docs say the this about DSNs:
#
@@ -143,6 +147,13 @@ class Project(models.Model):
super().save(*args, **kwargs)
def delete_deferred(self):
"""Marks the project as deleted, and schedules deletion of all related objects"""
self.is_deleted = True
self.save(update_fields=["is_deleted"])
delay_on_commit(delete_project_deps, str(self.id))
def is_joinable(self, user=None):
if user is not None:
# take the user's team membership into account

View File

@@ -4,12 +4,13 @@ from snappea.decorators import shared_task
from bugsink.app_settings import get_settings
from bugsink.utils import send_rendered_email
from .models import Project
from bugsink.transaction import immediate_atomic, delay_on_commit
from bugsink.utils import get_model_topography, delete_deps_with_budget
@shared_task
def send_project_invite_email_new_user(email, project_pk, token):
from .models import Project # avoid circular import
project = Project.objects.get(pk=project_pk)
send_rendered_email(
@@ -30,6 +31,7 @@ def send_project_invite_email_new_user(email, project_pk, token):
@shared_task
def send_project_invite_email(email, project_pk):
from .models import Project # avoid circular import
project = Project.objects.get(pk=project_pk)
send_rendered_email(
@@ -45,3 +47,94 @@ def send_project_invite_email(email, project_pk):
}),
},
)
def get_model_topography_with_project_override():
"""
Returns the model topography with ordering adjusted to prefer deletions via .project, when available.
This assumes that Project is not only the root of the dependency graph, but also that if a model has an .project
ForeignKey, deleting it via that path is sufficient, meaning we can safely avoid visiting the same model again
through other ForeignKey routes (e.g. any of the .issue paths).
The preference is encoded via an explicit list of models, which are visited early and only via their .project path.
"""
from issues.models import Issue, TurningPoint, Grouping
from events.models import Event
from tags.models import IssueTag, EventTag, TagValue, TagKey
from alerts.models import MessagingServiceConfig
from releases.models import Release
from projects.models import ProjectMembership
preferred = [
# Tag-related: remove the "depending" models first and the most depended on last.
EventTag, # above Event, to avoid deletions via .event
IssueTag,
TagValue,
TagKey,
TurningPoint, # above Event, to avoid deletions via .triggering_event
Event, # above Grouping, to avoid deletions via .grouping
Grouping,
# these things "could be anywhere" in the ordering; they're not that connected; we put them at the end.
MessagingServiceConfig,
ProjectMembership,
Release,
Issue, # at the bottom, most everything points to this, we'd rather delete those things via .project
]
def as_preferred(lst):
"""
Sorts the list of (model, fk_name) tuples such that the models are in the preferred order as indicated above,
and models which occur with another fk_name are pruned
"""
return sorted(
[(model, fk_name) for model, fk_name in lst if fk_name == "project" or model not in preferred],
key=lambda x: preferred.index(x[0]) if x[0] in preferred else len(preferred),
)
topo = get_model_topography()
for k, lst in topo.items():
topo[k] = as_preferred(lst)
return topo
@shared_task
def delete_project_deps(project_id):
from .models import Project # avoid circular import
with immediate_atomic():
# matches what we do in events/retention.py (and for which argumentation exists); in practive I have seen _much_
# faster deletion times (in the order of .03s per task on my local laptop) when using a budget of 500, _but_
# it's not a given those were for "expensive objects" (e.g. events); and I'd rather err on the side of caution
# (worst case we have a bit of inefficiency; in any case this avoids hogging the global write lock / timeouts).
budget = 500
num_deleted = 0
dep_graph = get_model_topography_with_project_override()
for model_for_recursion, fk_name_for_recursion in dep_graph["projects.Project"]:
this_num_deleted = delete_deps_with_budget(
project_id,
model_for_recursion,
fk_name_for_recursion,
[project_id],
budget - num_deleted,
dep_graph,
)
num_deleted += this_num_deleted
if num_deleted >= budget:
delay_on_commit(delete_project_deps, project_id)
return
if budget - num_deleted <= 0:
# no more budget for the self-delete.
delay_on_commit(delete_project_deps, project_id)
else:
# final step: delete the issue itself
Project.objects.filter(pk=project_id).delete()

View File

@@ -1 +1,138 @@
# from django.test import TestCase as DjangoTestCase
from django.conf import settings
from django.apps import apps
from django.contrib.auth import get_user_model
from bugsink.test_utils import TransactionTestCase25251 as TransactionTestCase
from bugsink.utils import get_model_topography
from projects.models import Project, ProjectMembership
from events.factories import create_event
from issues.factories import get_or_create_issue
from tags.models import store_tags
from issues.models import TurningPoint, TurningPointKind
from alerts.models import MessagingServiceConfig
from releases.models import Release
from .tasks import get_model_topography_with_project_override
User = get_user_model()
class ProjectDeletionTestCase(TransactionTestCase):
def setUp(self):
super().setUp()
self.project = Project.objects.create(name="Test Project", stored_event_count=1) # 1, in prep. of the below
self.issue, _ = get_or_create_issue(self.project)
self.event = create_event(self.project, issue=self.issue)
self.user = User.objects.create_user(username='test', password='test')
TurningPoint.objects.create(
project=self.project,
issue=self.issue, triggering_event=self.event, timestamp=self.event.ingested_at,
kind=TurningPointKind.FIRST_SEEN)
MessagingServiceConfig.objects.create(project=self.project)
ProjectMembership.objects.create(project=self.project, user=self.user)
Release.objects.create(project=self.project, version="1.0.0")
self.event.never_evict = True
self.event.save()
store_tags(self.event, self.issue, {"foo": "bar"})
def test_delete_project(self):
models = [apps.get_model(app_label=s.split('.')[0], model_name=s.split('.')[1].lower()) for s in [
"tags.EventTag",
"tags.IssueTag",
"tags.TagValue",
"tags.TagKey",
"issues.TurningPoint",
"events.Event",
"issues.Grouping",
"alerts.MessagingServiceConfig",
"projects.ProjectMembership",
"releases.Release",
"issues.Issue",
"projects.Project",
]]
for model in models:
# test-the-test: make sure some instances of the models actually exist after setup
self.assertTrue(model.objects.exists(), f"Some {model.__name__} should exist")
# assertNumQueries() is brittle and opaque. But at least the brittle part is quick to fix (a single number) and
# provides a canary for performance regressions.
# correct for bugsink/transaction.py's select_for_update for non-sqlite databases
correct_for_select_for_update = 1 if 'sqlite' not in settings.DATABASES['default']['ENGINE'] else 0
with self.assertNumQueries(40 + correct_for_select_for_update):
self.project.delete_deferred()
# tests run w/ TASK_ALWAYS_EAGER, so in the below we can just check the database directly
for model in models:
self.assertFalse(model.objects.exists(), f"No {model.__name__}s should exist after issue deletion")
def test_dependency_graphs(self):
# tests for an implementation detail of defered deletion, namely 1 test that asserts what the actual
# model-topography is, and one test that shows how we manually override it; this is to trigger a failure when
# the topology changes (and forces us to double-check that the override is still correct).
orig = get_model_topography()
override = get_model_topography_with_project_override()
def walk(topo, model_name):
results = []
for model, fk_name in topo[model_name]:
results.append((model, fk_name))
results.extend(walk(topo, model._meta.label))
return results
self.assertEqual(walk(orig, 'projects.Project'), [
(apps.get_model('projects', 'ProjectMembership'), 'project'),
(apps.get_model('releases', 'Release'), 'project'),
(apps.get_model('issues', 'Issue'), 'project'),
(apps.get_model('issues', 'Grouping'), 'issue'),
(apps.get_model('events', 'Event'), 'grouping'),
(apps.get_model('issues', 'TurningPoint'), 'triggering_event'),
(apps.get_model('tags', 'EventTag'), 'event'),
(apps.get_model('issues', 'TurningPoint'), 'issue'),
(apps.get_model('events', 'Event'), 'issue'),
(apps.get_model('issues', 'TurningPoint'), 'triggering_event'),
(apps.get_model('tags', 'EventTag'), 'event'),
(apps.get_model('tags', 'EventTag'), 'issue'),
(apps.get_model('tags', 'IssueTag'), 'issue'),
(apps.get_model('issues', 'Grouping'), 'project'),
(apps.get_model('events', 'Event'), 'grouping'),
(apps.get_model('issues', 'TurningPoint'), 'triggering_event'),
(apps.get_model('tags', 'EventTag'), 'event'),
(apps.get_model('issues', 'TurningPoint'), 'project'),
(apps.get_model('events', 'Event'), 'project'),
(apps.get_model('issues', 'TurningPoint'), 'triggering_event'),
(apps.get_model('tags', 'EventTag'), 'event'),
(apps.get_model('tags', 'TagKey'), 'project'),
(apps.get_model('tags', 'TagValue'), 'key'),
(apps.get_model('tags', 'EventTag'), 'value'),
(apps.get_model('tags', 'IssueTag'), 'value'),
(apps.get_model('tags', 'IssueTag'), 'key'),
(apps.get_model('tags', 'TagValue'), 'project'),
(apps.get_model('tags', 'EventTag'), 'value'),
(apps.get_model('tags', 'IssueTag'), 'value'),
(apps.get_model('tags', 'EventTag'), 'project'),
(apps.get_model('tags', 'IssueTag'), 'project'),
(apps.get_model('alerts', 'MessagingServiceConfig'), 'project'),
])
self.assertEqual(walk(override, 'projects.Project'), [
(apps.get_model('tags', 'EventTag'), 'project'),
(apps.get_model('tags', 'IssueTag'), 'project'),
(apps.get_model('tags', 'TagValue'), 'project'),
(apps.get_model('tags', 'TagKey'), 'project'),
(apps.get_model('issues', 'TurningPoint'), 'project'),
(apps.get_model('events', 'Event'), 'project'),
(apps.get_model('issues', 'Grouping'), 'project'),
(apps.get_model('alerts', 'MessagingServiceConfig'), 'project'),
(apps.get_model('projects', 'ProjectMembership'), 'project'),
(apps.get_model('releases', 'Release'), 'project'),
(apps.get_model('issues', 'Issue'), 'project')
])

View File

@@ -124,6 +124,7 @@ def create_release_if_needed(project, version, event, issue=None):
# triggering event anymore for our timestamp.
TurningPoint.objects.bulk_create([TurningPoint(
project=project,
issue=issue, kind=TurningPointKind.NEXT_MATERIALIZED, triggering_event=event,
metadata=json.dumps({"actual_release": release.version}), timestamp=event.ingested_at)
for issue in resolved_by_next_qs