From 6b9e4d801164549ea4d526f70a3301c20efa595b Mon Sep 17 00:00:00 2001 From: Klaas van Schelven Date: Thu, 3 Jul 2025 21:01:28 +0200 Subject: [PATCH] Project.delete_deferred(): first version (WIP) Implemented using a batch-wise dependency-scanner in delayed (snappea) style. * no real point-of-entry in the (regular, non-admin) UI yet. * no hiding of Projects which are delete-in-progress from the UI * lack of DRY * some unnessary work (needed in the Issue-context, but not here) is still being done. See #50 --- ingest/views.py | 4 + .../migrations/0022_turningpoint_project.py | 22 +++ .../0023_turningpoint_set_project.py | 36 +++++ ...024_turningpoint_project_alter_not_null.py | 20 +++ issues/models.py | 2 + issues/tests.py | 1 + issues/views.py | 6 +- projects/admin.py | 38 ++++- .../migrations/0012_project_is_deleted.py | 18 +++ projects/models.py | 11 ++ projects/tasks.py | 97 +++++++++++- projects/tests.py | 139 +++++++++++++++++- releases/models.py | 1 + 13 files changed, 388 insertions(+), 7 deletions(-) create mode 100644 issues/migrations/0022_turningpoint_project.py create mode 100644 issues/migrations/0023_turningpoint_set_project.py create mode 100644 issues/migrations/0024_turningpoint_project_alter_not_null.py create mode 100644 projects/migrations/0012_project_is_deleted.py diff --git a/ingest/views.py b/ingest/views.py index e45923c..49c8131 100644 --- a/ingest/views.py +++ b/ingest/views.py @@ -252,6 +252,8 @@ class BaseIngestAPIView(View): digested_at = datetime.now(timezone.utc) if digested_at is None else digested_at # explicit passing: test only project = Project.objects.get(pk=event_metadata["project_id"]) + if project.is_deleted: + return # don't process events for deleted projects if not cls.count_project_periods_and_act_on_it(project, digested_at): return # if over-quota: just return (any cleanup is done calling-side) @@ -360,6 +362,7 @@ class BaseIngestAPIView(View): if issue_created: TurningPoint.objects.create( + project=project, issue=issue, triggering_event=event, timestamp=ingested_at, kind=TurningPointKind.FIRST_SEEN) event.never_evict = True @@ -371,6 +374,7 @@ class BaseIngestAPIView(View): # new issues cannot be regressions by definition, hence this is in the 'else' branch if issue_is_regression(issue, event.release): TurningPoint.objects.create( + project=project, issue=issue, triggering_event=event, timestamp=ingested_at, kind=TurningPointKind.REGRESSED) event.never_evict = True diff --git a/issues/migrations/0022_turningpoint_project.py b/issues/migrations/0022_turningpoint_project.py new file mode 100644 index 0000000..1ce2bc0 --- /dev/null +++ b/issues/migrations/0022_turningpoint_project.py @@ -0,0 +1,22 @@ +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("projects", "0012_project_is_deleted"), + ("issues", "0021_alter_do_nothing"), + ] + + operations = [ + migrations.AddField( + model_name="turningpoint", + name="project", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.DO_NOTHING, + to="projects.project", + ), + ), + ] diff --git a/issues/migrations/0023_turningpoint_set_project.py b/issues/migrations/0023_turningpoint_set_project.py new file mode 100644 index 0000000..c5d6845 --- /dev/null +++ b/issues/migrations/0023_turningpoint_set_project.py @@ -0,0 +1,36 @@ +from django.db import migrations + + +def turningpoint_set_project(apps, schema_editor): + TurningPoint = apps.get_model("issues", "TurningPoint") + + # TurningPoint.objects.update(project=F("issue__project")) + # fails with 'Joined field references are not permitted in this query" + + # This one's elegant and works in sqlite but not in MySQL: + # TurningPoint.objects.update( + # project=Subquery( + # TurningPoint.objects + # .filter(pk=OuterRef('pk')) + # .values('issue__project')[:1] + # ) + # ) + # django.db.utils.OperationalError: (1093, "You can't specify target table 'issues_turningpoint' for update in FROM + # clause") + + # so in the end we'll just loop: + + for turningpoint in TurningPoint.objects.all(): + turningpoint.project = turningpoint.issue.project + turningpoint.save(update_fields=["project"]) + + +class Migration(migrations.Migration): + + dependencies = [ + ("issues", "0022_turningpoint_project"), + ] + + operations = [ + migrations.RunPython(turningpoint_set_project, migrations.RunPython.noop), + ] diff --git a/issues/migrations/0024_turningpoint_project_alter_not_null.py b/issues/migrations/0024_turningpoint_project_alter_not_null.py new file mode 100644 index 0000000..a14be18 --- /dev/null +++ b/issues/migrations/0024_turningpoint_project_alter_not_null.py @@ -0,0 +1,20 @@ +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("projects", "0012_project_is_deleted"), + ("issues", "0023_turningpoint_set_project"), + ] + + operations = [ + migrations.AlterField( + model_name="turningpoint", + name="project", + field=models.ForeignKey( + on_delete=django.db.models.deletion.DO_NOTHING, to="projects.project" + ), + ), + ] diff --git a/issues/models.py b/issues/models.py index e0d52eb..634fbe1 100644 --- a/issues/models.py +++ b/issues/models.py @@ -348,6 +348,7 @@ class IssueStateManager(object): # path is never reached via UI-based paths (because those are by definition not event-triggered); thus # the 2 ways of creating TurningPoints do not collide. TurningPoint.objects.create( + project_id=issue.project_id, issue=issue, triggering_event=triggering_event, timestamp=triggering_event.ingested_at, kind=TurningPointKind.UNMUTED, metadata=json.dumps(unmute_metadata)) triggering_event.never_evict = True # .save() will be called by the caller of this function @@ -491,6 +492,7 @@ class TurningPoint(models.Model): # basically: an Event, but that name was already taken in our system :-) alternative names I considered: # "milestone", "state_change", "transition", "annotation", "episode" + project = models.ForeignKey("projects.Project", blank=False, null=False, on_delete=models.DO_NOTHING) issue = models.ForeignKey("Issue", blank=False, null=False, on_delete=models.DO_NOTHING) triggering_event = models.ForeignKey("events.Event", blank=True, null=True, on_delete=models.DO_NOTHING) diff --git a/issues/tests.py b/issues/tests.py index 31fe980..0e04e7b 100644 --- a/issues/tests.py +++ b/issues/tests.py @@ -679,6 +679,7 @@ class IssueDeletionTestCase(TransactionTestCase): self.event = create_event(self.project, issue=self.issue) TurningPoint.objects.create( + project=self.project, issue=self.issue, triggering_event=self.event, timestamp=self.event.ingested_at, kind=TurningPointKind.FIRST_SEEN) diff --git a/issues/views.py b/issues/views.py index 40ebfa8..a1ae135 100644 --- a/issues/views.py +++ b/issues/views.py @@ -210,10 +210,13 @@ def _make_history(issue_or_qs, action, user): now = timezone.now() if isinstance(issue_or_qs, Issue): TurningPoint.objects.create( + project=issue_or_qs.project, issue=issue_or_qs, kind=kind, user=user, metadata=json.dumps(metadata), timestamp=now) else: TurningPoint.objects.bulk_create([ - TurningPoint(issue=issue, kind=kind, user=user, metadata=json.dumps(metadata), timestamp=now) + TurningPoint( + project_id=issue.project_id, issue=issue, kind=kind, user=user, metadata=json.dumps(metadata), + timestamp=now) for issue in issue_or_qs ]) @@ -767,6 +770,7 @@ def history_comment_new(request, issue): # think that's amount of magic to have: it still allows one to erase comments (possibly for non-manual # kinds) but it saves you from what is obviously a mistake (without complaining with a red box or something) TurningPoint.objects.create( + project=issue.project, issue=issue, kind=TurningPointKind.MANUAL_ANNOTATION, user=request.user, comment=form.cleaned_data["comment"], timestamp=timezone.now()) diff --git a/projects/admin.py b/projects/admin.py index dccd827..2b823f7 100644 --- a/projects/admin.py +++ b/projects/admin.py @@ -1,9 +1,17 @@ from django.contrib import admin +from django.utils.decorators import method_decorator +from django.views.decorators.csrf import csrf_protect + from admin_auto_filters.filters import AutocompleteFilter +from bugsink.transaction import immediate_atomic + from .models import Project, ProjectMembership +csrf_protect_m = method_decorator(csrf_protect) + + class ProjectFilter(AutocompleteFilter): title = 'Project' field_name = 'project' @@ -31,9 +39,8 @@ class ProjectAdmin(admin.ModelAdmin): list_display = [ 'name', 'dsn', - 'alert_on_new_issue', - 'alert_on_regression', - 'alert_on_unmute', + 'digested_event_count', + 'stored_event_count', ] readonly_fields = [ @@ -47,6 +54,31 @@ class ProjectAdmin(admin.ModelAdmin): 'slug': ['name'], } + def get_deleted_objects(self, objs, request): + to_delete = list(objs) + ["...all its related objects... (delayed)"] + model_count = { + Project: len(objs), + } + perms_needed = set() + protected = [] + return to_delete, model_count, perms_needed, protected + + def delete_queryset(self, request, queryset): + # NOTE: not the most efficient; it will do for a first version. + with immediate_atomic(): + for obj in queryset: + obj.delete_deferred() + + def delete_model(self, request, obj): + with immediate_atomic(): + obj.delete_deferred() + + @csrf_protect_m + def delete_view(self, request, object_id, extra_context=None): + # the superclass version, but with the transaction.atomic context manager commented out (we do this ourselves) + # with transaction.atomic(using=router.db_for_write(self.model)): + return self._delete_view(request, object_id, extra_context) + # the preferred way to deal with ProjectMembership is actually through the inline above; however, because this may prove # to not scale well with (very? more than 50?) memberships per project, we've left the separate admin interface here for diff --git a/projects/migrations/0012_project_is_deleted.py b/projects/migrations/0012_project_is_deleted.py new file mode 100644 index 0000000..6e3625a --- /dev/null +++ b/projects/migrations/0012_project_is_deleted.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.21 on 2025-07-03 13:57 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("projects", "0011_fill_stored_event_count"), + ] + + operations = [ + migrations.AddField( + model_name="project", + name="is_deleted", + field=models.BooleanField(default=False), + ), + ] diff --git a/projects/models.py b/projects/models.py index 73f9b71..eb1f0df 100644 --- a/projects/models.py +++ b/projects/models.py @@ -5,11 +5,14 @@ from django.conf import settings from django.utils.text import slugify from bugsink.app_settings import get_settings +from bugsink.transaction import delay_on_commit from compat.dsn import build_dsn from teams.models import TeamMembership +from .tasks import delete_project_deps + # ## Visibility/Access-design # @@ -74,6 +77,7 @@ class Project(models.Model): name = models.CharField(max_length=255, blank=False, null=False, unique=True) slug = models.SlugField(max_length=50, blank=False, null=False, unique=True) + is_deleted = models.BooleanField(default=False) # sentry_key mirrors the "public" part of the sentry DSN. As of late 2023 Sentry's docs say the this about DSNs: # @@ -143,6 +147,13 @@ class Project(models.Model): super().save(*args, **kwargs) + def delete_deferred(self): + """Marks the project as deleted, and schedules deletion of all related objects""" + self.is_deleted = True + self.save(update_fields=["is_deleted"]) + + delay_on_commit(delete_project_deps, str(self.id)) + def is_joinable(self, user=None): if user is not None: # take the user's team membership into account diff --git a/projects/tasks.py b/projects/tasks.py index 219320e..e060ea2 100644 --- a/projects/tasks.py +++ b/projects/tasks.py @@ -4,12 +4,13 @@ from snappea.decorators import shared_task from bugsink.app_settings import get_settings from bugsink.utils import send_rendered_email - -from .models import Project +from bugsink.transaction import immediate_atomic, delay_on_commit +from bugsink.utils import get_model_topography, delete_deps_with_budget @shared_task def send_project_invite_email_new_user(email, project_pk, token): + from .models import Project # avoid circular import project = Project.objects.get(pk=project_pk) send_rendered_email( @@ -30,6 +31,7 @@ def send_project_invite_email_new_user(email, project_pk, token): @shared_task def send_project_invite_email(email, project_pk): + from .models import Project # avoid circular import project = Project.objects.get(pk=project_pk) send_rendered_email( @@ -45,3 +47,94 @@ def send_project_invite_email(email, project_pk): }), }, ) + + +def get_model_topography_with_project_override(): + """ + Returns the model topography with ordering adjusted to prefer deletions via .project, when available. + + This assumes that Project is not only the root of the dependency graph, but also that if a model has an .project + ForeignKey, deleting it via that path is sufficient, meaning we can safely avoid visiting the same model again + through other ForeignKey routes (e.g. any of the .issue paths). + + The preference is encoded via an explicit list of models, which are visited early and only via their .project path. + """ + from issues.models import Issue, TurningPoint, Grouping + from events.models import Event + from tags.models import IssueTag, EventTag, TagValue, TagKey + from alerts.models import MessagingServiceConfig + from releases.models import Release + from projects.models import ProjectMembership + + preferred = [ + # Tag-related: remove the "depending" models first and the most depended on last. + EventTag, # above Event, to avoid deletions via .event + IssueTag, + TagValue, + TagKey, + + TurningPoint, # above Event, to avoid deletions via .triggering_event + Event, # above Grouping, to avoid deletions via .grouping + Grouping, + + # these things "could be anywhere" in the ordering; they're not that connected; we put them at the end. + MessagingServiceConfig, + ProjectMembership, + Release, + + Issue, # at the bottom, most everything points to this, we'd rather delete those things via .project + ] + + def as_preferred(lst): + """ + Sorts the list of (model, fk_name) tuples such that the models are in the preferred order as indicated above, + and models which occur with another fk_name are pruned + """ + return sorted( + [(model, fk_name) for model, fk_name in lst if fk_name == "project" or model not in preferred], + key=lambda x: preferred.index(x[0]) if x[0] in preferred else len(preferred), + ) + + topo = get_model_topography() + for k, lst in topo.items(): + topo[k] = as_preferred(lst) + + return topo + + +@shared_task +def delete_project_deps(project_id): + from .models import Project # avoid circular import + with immediate_atomic(): + # matches what we do in events/retention.py (and for which argumentation exists); in practive I have seen _much_ + # faster deletion times (in the order of .03s per task on my local laptop) when using a budget of 500, _but_ + # it's not a given those were for "expensive objects" (e.g. events); and I'd rather err on the side of caution + # (worst case we have a bit of inefficiency; in any case this avoids hogging the global write lock / timeouts). + budget = 500 + num_deleted = 0 + + dep_graph = get_model_topography_with_project_override() + + for model_for_recursion, fk_name_for_recursion in dep_graph["projects.Project"]: + this_num_deleted = delete_deps_with_budget( + project_id, + model_for_recursion, + fk_name_for_recursion, + [project_id], + budget - num_deleted, + dep_graph, + ) + + num_deleted += this_num_deleted + + if num_deleted >= budget: + delay_on_commit(delete_project_deps, project_id) + return + + if budget - num_deleted <= 0: + # no more budget for the self-delete. + delay_on_commit(delete_project_deps, project_id) + + else: + # final step: delete the issue itself + Project.objects.filter(pk=project_id).delete() diff --git a/projects/tests.py b/projects/tests.py index 38a61ce..f8b3ff5 100644 --- a/projects/tests.py +++ b/projects/tests.py @@ -1 +1,138 @@ -# from django.test import TestCase as DjangoTestCase +from django.conf import settings +from django.apps import apps +from django.contrib.auth import get_user_model + +from bugsink.test_utils import TransactionTestCase25251 as TransactionTestCase +from bugsink.utils import get_model_topography +from projects.models import Project, ProjectMembership +from events.factories import create_event +from issues.factories import get_or_create_issue +from tags.models import store_tags +from issues.models import TurningPoint, TurningPointKind +from alerts.models import MessagingServiceConfig +from releases.models import Release + +from .tasks import get_model_topography_with_project_override + +User = get_user_model() + + +class ProjectDeletionTestCase(TransactionTestCase): + + def setUp(self): + super().setUp() + self.project = Project.objects.create(name="Test Project", stored_event_count=1) # 1, in prep. of the below + self.issue, _ = get_or_create_issue(self.project) + self.event = create_event(self.project, issue=self.issue) + self.user = User.objects.create_user(username='test', password='test') + + TurningPoint.objects.create( + project=self.project, + issue=self.issue, triggering_event=self.event, timestamp=self.event.ingested_at, + kind=TurningPointKind.FIRST_SEEN) + + MessagingServiceConfig.objects.create(project=self.project) + ProjectMembership.objects.create(project=self.project, user=self.user) + Release.objects.create(project=self.project, version="1.0.0") + + self.event.never_evict = True + self.event.save() + + store_tags(self.event, self.issue, {"foo": "bar"}) + + def test_delete_project(self): + models = [apps.get_model(app_label=s.split('.')[0], model_name=s.split('.')[1].lower()) for s in [ + "tags.EventTag", + "tags.IssueTag", + "tags.TagValue", + "tags.TagKey", + "issues.TurningPoint", + "events.Event", + "issues.Grouping", + "alerts.MessagingServiceConfig", + "projects.ProjectMembership", + "releases.Release", + "issues.Issue", + "projects.Project", + ]] + + for model in models: + # test-the-test: make sure some instances of the models actually exist after setup + self.assertTrue(model.objects.exists(), f"Some {model.__name__} should exist") + + # assertNumQueries() is brittle and opaque. But at least the brittle part is quick to fix (a single number) and + # provides a canary for performance regressions. + + # correct for bugsink/transaction.py's select_for_update for non-sqlite databases + correct_for_select_for_update = 1 if 'sqlite' not in settings.DATABASES['default']['ENGINE'] else 0 + + with self.assertNumQueries(40 + correct_for_select_for_update): + self.project.delete_deferred() + + # tests run w/ TASK_ALWAYS_EAGER, so in the below we can just check the database directly + for model in models: + self.assertFalse(model.objects.exists(), f"No {model.__name__}s should exist after issue deletion") + + def test_dependency_graphs(self): + # tests for an implementation detail of defered deletion, namely 1 test that asserts what the actual + # model-topography is, and one test that shows how we manually override it; this is to trigger a failure when + # the topology changes (and forces us to double-check that the override is still correct). + + orig = get_model_topography() + override = get_model_topography_with_project_override() + + def walk(topo, model_name): + results = [] + for model, fk_name in topo[model_name]: + results.append((model, fk_name)) + results.extend(walk(topo, model._meta.label)) + return results + + self.assertEqual(walk(orig, 'projects.Project'), [ + (apps.get_model('projects', 'ProjectMembership'), 'project'), + (apps.get_model('releases', 'Release'), 'project'), + (apps.get_model('issues', 'Issue'), 'project'), + (apps.get_model('issues', 'Grouping'), 'issue'), + (apps.get_model('events', 'Event'), 'grouping'), + (apps.get_model('issues', 'TurningPoint'), 'triggering_event'), + (apps.get_model('tags', 'EventTag'), 'event'), + (apps.get_model('issues', 'TurningPoint'), 'issue'), + (apps.get_model('events', 'Event'), 'issue'), + (apps.get_model('issues', 'TurningPoint'), 'triggering_event'), + (apps.get_model('tags', 'EventTag'), 'event'), + (apps.get_model('tags', 'EventTag'), 'issue'), + (apps.get_model('tags', 'IssueTag'), 'issue'), + (apps.get_model('issues', 'Grouping'), 'project'), + (apps.get_model('events', 'Event'), 'grouping'), + (apps.get_model('issues', 'TurningPoint'), 'triggering_event'), + (apps.get_model('tags', 'EventTag'), 'event'), + (apps.get_model('issues', 'TurningPoint'), 'project'), + (apps.get_model('events', 'Event'), 'project'), + (apps.get_model('issues', 'TurningPoint'), 'triggering_event'), + (apps.get_model('tags', 'EventTag'), 'event'), + (apps.get_model('tags', 'TagKey'), 'project'), + (apps.get_model('tags', 'TagValue'), 'key'), + (apps.get_model('tags', 'EventTag'), 'value'), + (apps.get_model('tags', 'IssueTag'), 'value'), + (apps.get_model('tags', 'IssueTag'), 'key'), + (apps.get_model('tags', 'TagValue'), 'project'), + (apps.get_model('tags', 'EventTag'), 'value'), + (apps.get_model('tags', 'IssueTag'), 'value'), + (apps.get_model('tags', 'EventTag'), 'project'), + (apps.get_model('tags', 'IssueTag'), 'project'), + (apps.get_model('alerts', 'MessagingServiceConfig'), 'project'), + ]) + + self.assertEqual(walk(override, 'projects.Project'), [ + (apps.get_model('tags', 'EventTag'), 'project'), + (apps.get_model('tags', 'IssueTag'), 'project'), + (apps.get_model('tags', 'TagValue'), 'project'), + (apps.get_model('tags', 'TagKey'), 'project'), + (apps.get_model('issues', 'TurningPoint'), 'project'), + (apps.get_model('events', 'Event'), 'project'), + (apps.get_model('issues', 'Grouping'), 'project'), + (apps.get_model('alerts', 'MessagingServiceConfig'), 'project'), + (apps.get_model('projects', 'ProjectMembership'), 'project'), + (apps.get_model('releases', 'Release'), 'project'), + (apps.get_model('issues', 'Issue'), 'project') + ]) diff --git a/releases/models.py b/releases/models.py index 872f49f..a103db8 100644 --- a/releases/models.py +++ b/releases/models.py @@ -124,6 +124,7 @@ def create_release_if_needed(project, version, event, issue=None): # triggering event anymore for our timestamp. TurningPoint.objects.bulk_create([TurningPoint( + project=project, issue=issue, kind=TurningPointKind.NEXT_MATERIALIZED, triggering_event=event, metadata=json.dumps({"actual_release": release.version}), timestamp=event.ingested_at) for issue in resolved_by_next_qs