mirror of
https://github.com/bugsink/bugsink.git
synced 2026-05-04 22:10:18 -05:00
+18
-1
@@ -25,6 +25,7 @@ from events.models import Event
|
||||
from ingest.views import BaseIngestAPIView
|
||||
from issues.factories import get_or_create_issue
|
||||
from tags.models import store_tags
|
||||
from tags.tasks import vacuum_tagvalues
|
||||
|
||||
from .models import Issue, IssueStateManager, TurningPoint, TurningPointKind
|
||||
from .regressions import is_regression, is_regression_2, issue_is_regression
|
||||
@@ -692,7 +693,12 @@ class IssueDeletionTestCase(TransactionTestCase):
|
||||
'events.Event', 'tags.EventTag', 'issues.Issue',
|
||||
]]
|
||||
|
||||
for model in models:
|
||||
# 'vacuum' models are those that are not deleted when an issue is deleted, because they are exclusively owned
|
||||
# by any given issue.
|
||||
vacuum_models = [apps.get_model(app_label=s.split('.')[0], model_name=s.split('.')[1].lower())
|
||||
for s in ['tags.TagKey', 'tags.TagValue']]
|
||||
|
||||
for model in models + vacuum_models:
|
||||
# test-the-test: make sure some instances of the models actually exist after setup
|
||||
self.assertTrue(model.objects.exists(), f"Some {model.__name__} should exist")
|
||||
|
||||
@@ -701,3 +707,14 @@ class IssueDeletionTestCase(TransactionTestCase):
|
||||
# tests run w/ TASK_ALWAYS_EAGER, so in the below we can just check the database directly
|
||||
for model in models:
|
||||
self.assertFalse(model.objects.exists(), f"No {model.__name__}s should exist after issue deletion")
|
||||
|
||||
for model in vacuum_models:
|
||||
# 'should' in quotes because this isn't so because we believe it's better if they did, but because the
|
||||
# code currently does not delete them.
|
||||
self.assertTrue(model.objects.exists(), f"Some {model.__name__}s 'should' exist after issue deletion")
|
||||
|
||||
vacuum_tagvalues()
|
||||
# tests run w/ TASK_ALWAYS_EAGER, so any "delayed" (recursive) calls can be expected to have run
|
||||
|
||||
for model in vacuum_models:
|
||||
self.assertFalse(model.objects.exists(), f"No {model.__name__}s should exist after vacuuming")
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
from tags.tasks import vacuum_tagvalues
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = "Kick off tag cleanup by vacuuming orphaned TagValue and TagKey entries."
|
||||
|
||||
def handle(self, *args, **options):
|
||||
vacuum_tagvalues.delay()
|
||||
self.stdout.write("Started tag vacuum via task queue.")
|
||||
@@ -0,0 +1,84 @@
|
||||
from snappea.decorators import shared_task
|
||||
|
||||
from bugsink.transaction import immediate_atomic, delay_on_commit
|
||||
from tags.models import TagValue, TagKey, EventTag, IssueTag
|
||||
|
||||
BATCH_SIZE = 10_000
|
||||
|
||||
|
||||
@shared_task
|
||||
def vacuum_tagvalues(min_id=0):
|
||||
# This task cleans up unused TagValue in batches. A TagValue can be unused if no IssueTag or EventTag references it,
|
||||
# this can happen if IssueTag or EventTag entries are deleted. Cleanup is avoided in that case to avoid repeated
|
||||
# checks. But it still needs to be done eventually to avoid bloating the database, which is what this task does.
|
||||
|
||||
# Impl. notes:
|
||||
#
|
||||
# * select id_to_check first, and then check which of those are used in EventTag or IssueTag. This avoids doing
|
||||
# TagValue.exclude(some_usage_pattern) which may be slow / for which reasoning about performance is hard.
|
||||
# * batched to allow for incremental cleanup, using a defer-with-min-id pattern to implement the batching.
|
||||
#
|
||||
# Known limitation:
|
||||
# with _many_ TagValues (whether used or not) and when running in EAGER mode, this thing overflows the stack.
|
||||
# Basically: because then the "delayed recursion" is not actually delayed, it just runs immediately. Answer: for
|
||||
# "big things" (basically: serious setups) set up snappea.
|
||||
|
||||
with immediate_atomic():
|
||||
# Select candidate TagValue IDs above min_id
|
||||
ids_to_check = list(
|
||||
TagValue.objects
|
||||
.filter(id__gt=min_id)
|
||||
.order_by('id')
|
||||
.values_list('id', flat=True)[:BATCH_SIZE]
|
||||
)
|
||||
|
||||
if not ids_to_check:
|
||||
# Done with TagValues → start TagKey cleanup
|
||||
delay_on_commit(vacuum_tagkeys, 0)
|
||||
return
|
||||
|
||||
# Determine which ids_to_check are referenced
|
||||
used_in_event = set(
|
||||
EventTag.objects.filter(value_id__in=ids_to_check).values_list('value_id', flat=True)
|
||||
)
|
||||
used_in_issue = set(
|
||||
IssueTag.objects.filter(value_id__in=ids_to_check).values_list('value_id', flat=True)
|
||||
)
|
||||
|
||||
unused = [pk for pk in ids_to_check if pk not in used_in_event and pk not in used_in_issue]
|
||||
|
||||
# Actual deletion
|
||||
if unused:
|
||||
TagValue.objects.filter(id__in=unused).delete()
|
||||
|
||||
# Defer next batch
|
||||
vacuum_tagvalues.delay(ids_to_check[-1])
|
||||
|
||||
|
||||
@shared_task
|
||||
def vacuum_tagkeys(min_id=0):
|
||||
with immediate_atomic():
|
||||
# Select candidate TagKey IDs above min_id
|
||||
ids_to_check = list(
|
||||
TagKey.objects
|
||||
.filter(id__gt=min_id)
|
||||
.order_by('id')
|
||||
.values_list('id', flat=True)[:BATCH_SIZE]
|
||||
)
|
||||
|
||||
if not ids_to_check:
|
||||
return # done
|
||||
|
||||
# Determine which ids_to_check are referenced
|
||||
used = set(
|
||||
TagValue.objects.filter(key_id__in=ids_to_check).values_list('key_id', flat=True)
|
||||
)
|
||||
|
||||
unused = [pk for pk in ids_to_check if pk not in used]
|
||||
|
||||
# Actual deletion
|
||||
if unused:
|
||||
TagKey.objects.filter(id__in=unused).delete()
|
||||
|
||||
# Defer next batch
|
||||
vacuum_tagkeys.delay(ids_to_check[-1])
|
||||
Reference in New Issue
Block a user