mirror of
https://github.com/bugsink/bugsink.git
synced 2026-01-06 05:10:15 -06:00
Add failure tracking fields and error handling to alert backends
Co-authored-by: vanschelven <223833+vanschelven@users.noreply.github.com>
This commit is contained in:
committed by
Klaas van Schelven
parent
c6ced06a2f
commit
b564774f21
@@ -0,0 +1,43 @@
|
||||
# Generated by Django 4.2.23 on 2025-07-28 14:23
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('alerts', '0002_alter_messagingserviceconfig_project'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='messagingserviceconfig',
|
||||
name='last_failure_error_message',
|
||||
field=models.TextField(blank=True, help_text='Error message from the exception', null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='messagingserviceconfig',
|
||||
name='last_failure_error_type',
|
||||
field=models.CharField(blank=True, help_text="Type of error that occurred (e.g., 'requests.HTTPError')", max_length=100, null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='messagingserviceconfig',
|
||||
name='last_failure_is_json',
|
||||
field=models.BooleanField(blank=True, help_text='Whether the response was valid JSON', null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='messagingserviceconfig',
|
||||
name='last_failure_response_text',
|
||||
field=models.TextField(blank=True, help_text='Response text from the failed request', null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='messagingserviceconfig',
|
||||
name='last_failure_status_code',
|
||||
field=models.IntegerField(blank=True, help_text='HTTP status code of the failed request', null=True),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name='messagingserviceconfig',
|
||||
name='last_failure_timestamp',
|
||||
field=models.DateTimeField(blank=True, help_text='When the last failure occurred', null=True),
|
||||
),
|
||||
]
|
||||
@@ -12,7 +12,34 @@ class MessagingServiceConfig(models.Model):
|
||||
kind = models.CharField(choices=[("slack", "Slack (or compatible)"), ], max_length=20, default="slack")
|
||||
|
||||
config = models.TextField(blank=False)
|
||||
|
||||
# Alert backend failure tracking
|
||||
last_failure_timestamp = models.DateTimeField(null=True, blank=True,
|
||||
help_text="When the last failure occurred")
|
||||
last_failure_status_code = models.IntegerField(null=True, blank=True,
|
||||
help_text="HTTP status code of the failed request")
|
||||
last_failure_response_text = models.TextField(null=True, blank=True,
|
||||
help_text="Response text from the failed request")
|
||||
last_failure_is_json = models.BooleanField(null=True, blank=True,
|
||||
help_text="Whether the response was valid JSON")
|
||||
last_failure_error_type = models.CharField(max_length=100, null=True, blank=True,
|
||||
help_text="Type of error that occurred (e.g., 'requests.HTTPError')")
|
||||
last_failure_error_message = models.TextField(null=True, blank=True,
|
||||
help_text="Error message from the exception")
|
||||
|
||||
def get_backend(self):
|
||||
# once we have multiple backends: lookup by kind.
|
||||
return SlackBackend(self)
|
||||
|
||||
def clear_failure_status(self):
|
||||
"""Clear all failure tracking fields on successful operation"""
|
||||
self.last_failure_timestamp = None
|
||||
self.last_failure_status_code = None
|
||||
self.last_failure_response_text = None
|
||||
self.last_failure_is_json = None
|
||||
self.last_failure_error_type = None
|
||||
self.last_failure_error_message = None
|
||||
|
||||
def has_recent_failure(self):
|
||||
"""Check if this config has a recent failure"""
|
||||
return self.last_failure_timestamp is not None
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
import json
|
||||
import requests
|
||||
from django.utils import timezone
|
||||
|
||||
from django import forms
|
||||
from django.template.defaultfilters import truncatechars
|
||||
|
||||
from snappea.decorators import shared_task
|
||||
from bugsink.app_settings import get_settings
|
||||
from bugsink.transaction import immediate_atomic
|
||||
|
||||
from issues.models import Issue
|
||||
|
||||
@@ -32,8 +34,57 @@ def _safe_markdown(text):
|
||||
return text.replace("&", "&").replace("<", "<").replace(">", ">").replace("*", "\\*").replace("_", "\\_")
|
||||
|
||||
|
||||
def _store_failure_info(service_config_id, exception, response=None):
|
||||
"""Store failure information in the MessagingServiceConfig with immediate_atomic"""
|
||||
from alerts.models import MessagingServiceConfig
|
||||
|
||||
with immediate_atomic(only_if_needed=True):
|
||||
try:
|
||||
config = MessagingServiceConfig.objects.get(id=service_config_id)
|
||||
|
||||
config.last_failure_timestamp = timezone.now()
|
||||
config.last_failure_error_type = type(exception).__name__
|
||||
config.last_failure_error_message = str(exception)
|
||||
|
||||
# Handle requests-specific errors
|
||||
if response is not None:
|
||||
config.last_failure_status_code = response.status_code
|
||||
config.last_failure_response_text = response.text[:2000] # Limit response text size
|
||||
|
||||
# Check if response is JSON
|
||||
try:
|
||||
json.loads(response.text)
|
||||
config.last_failure_is_json = True
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
config.last_failure_is_json = False
|
||||
else:
|
||||
# Non-HTTP errors
|
||||
config.last_failure_status_code = None
|
||||
config.last_failure_response_text = None
|
||||
config.last_failure_is_json = None
|
||||
|
||||
config.save()
|
||||
except MessagingServiceConfig.DoesNotExist:
|
||||
# Config was deleted while task was running
|
||||
pass
|
||||
|
||||
|
||||
def _store_success_info(service_config_id):
|
||||
"""Clear failure information on successful operation"""
|
||||
from alerts.models import MessagingServiceConfig
|
||||
|
||||
with immediate_atomic(only_if_needed=True):
|
||||
try:
|
||||
config = MessagingServiceConfig.objects.get(id=service_config_id)
|
||||
config.clear_failure_status()
|
||||
config.save()
|
||||
except MessagingServiceConfig.DoesNotExist:
|
||||
# Config was deleted while task was running
|
||||
pass
|
||||
|
||||
|
||||
@shared_task
|
||||
def slack_backend_send_test_message(webhook_url, project_name, display_name):
|
||||
def slack_backend_send_test_message(webhook_url, project_name, display_name, service_config_id):
|
||||
# See Slack's Block Kit Builder
|
||||
|
||||
data = {"blocks": [
|
||||
@@ -67,17 +118,35 @@ def slack_backend_send_test_message(webhook_url, project_name, display_name):
|
||||
|
||||
]}
|
||||
|
||||
result = requests.post(
|
||||
webhook_url,
|
||||
data=json.dumps(data),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
result = requests.post(
|
||||
webhook_url,
|
||||
data=json.dumps(data),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
|
||||
result.raise_for_status()
|
||||
result.raise_for_status()
|
||||
|
||||
# Success - clear any previous failure status
|
||||
_store_success_info(service_config_id)
|
||||
|
||||
except requests.RequestException as e:
|
||||
# Store failure information for requests-related errors
|
||||
# For HTTPError from raise_for_status(), the response is in the exception
|
||||
response = getattr(e, 'response', None)
|
||||
if response is None and 'result' in locals():
|
||||
# Fallback: if no response in exception, try to get it from the result
|
||||
response = result
|
||||
_store_failure_info(service_config_id, e, response)
|
||||
raise
|
||||
except Exception as e:
|
||||
# Store failure information for other errors
|
||||
_store_failure_info(service_config_id, e)
|
||||
raise
|
||||
|
||||
|
||||
@shared_task
|
||||
def slack_backend_send_alert(webhook_url, issue_id, state_description, alert_article, alert_reason, unmute_reason=None):
|
||||
def slack_backend_send_alert(webhook_url, issue_id, state_description, alert_article, alert_reason, service_config_id, unmute_reason=None):
|
||||
issue = Issue.objects.get(id=issue_id)
|
||||
|
||||
issue_url = get_settings().BASE_URL + issue.get_absolute_url()
|
||||
@@ -134,13 +203,31 @@ def slack_backend_send_alert(webhook_url, issue_id, state_description, alert_art
|
||||
},
|
||||
]}
|
||||
|
||||
result = requests.post(
|
||||
webhook_url,
|
||||
data=json.dumps(data),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
result = requests.post(
|
||||
webhook_url,
|
||||
data=json.dumps(data),
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
|
||||
result.raise_for_status()
|
||||
result.raise_for_status()
|
||||
|
||||
# Success - clear any previous failure status
|
||||
_store_success_info(service_config_id)
|
||||
|
||||
except requests.RequestException as e:
|
||||
# Store failure information for requests-related errors
|
||||
# For HTTPError from raise_for_status(), the response is in the exception
|
||||
response = getattr(e, 'response', None)
|
||||
if response is None and 'result' in locals():
|
||||
# Fallback: if no response in exception, try to get it from the result
|
||||
response = result
|
||||
_store_failure_info(service_config_id, e, response)
|
||||
raise
|
||||
except Exception as e:
|
||||
# Store failure information for other errors
|
||||
_store_failure_info(service_config_id, e)
|
||||
raise
|
||||
|
||||
|
||||
class SlackBackend:
|
||||
@@ -156,9 +243,10 @@ class SlackBackend:
|
||||
json.loads(self.service_config.config)["webhook_url"],
|
||||
self.service_config.project.name,
|
||||
self.service_config.display_name,
|
||||
self.service_config.id,
|
||||
)
|
||||
|
||||
def send_alert(self, issue_id, state_description, alert_article, alert_reason, **kwargs):
|
||||
slack_backend_send_alert.delay(
|
||||
json.loads(self.service_config.config)["webhook_url"],
|
||||
issue_id, state_description, alert_article, alert_reason, **kwargs)
|
||||
issue_id, state_description, alert_article, alert_reason, self.service_config.id, **kwargs)
|
||||
|
||||
173
alerts/tests.py
173
alerts/tests.py
@@ -1,14 +1,20 @@
|
||||
from django.test import TestCase as DjangoTestCase
|
||||
from unittest.mock import patch, Mock
|
||||
import json
|
||||
import requests
|
||||
|
||||
from django.core import mail
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.template.loader import get_template
|
||||
from django.utils import timezone
|
||||
|
||||
from issues.factories import get_or_create_issue
|
||||
from projects.models import Project, ProjectMembership
|
||||
from events.factories import create_event
|
||||
from teams.models import Team, TeamMembership
|
||||
|
||||
from .models import MessagingServiceConfig
|
||||
from .service_backends.slack import slack_backend_send_test_message, slack_backend_send_alert
|
||||
from .tasks import send_new_issue_alert, send_regression_alert, send_unmute_alert, _get_users_for_email_alert
|
||||
from .views import DEBUG_CONTEXTS
|
||||
|
||||
@@ -132,3 +138,170 @@ class TestAlertSending(DjangoTestCase):
|
||||
user.send_email_alerts = True
|
||||
user.save()
|
||||
self.assertEqual(list(_get_users_for_email_alert(issue)), [user])
|
||||
|
||||
|
||||
class TestSlackBackendErrorHandling(DjangoTestCase):
|
||||
def setUp(self):
|
||||
self.project = Project.objects.create(name="Test project")
|
||||
self.config = MessagingServiceConfig.objects.create(
|
||||
project=self.project,
|
||||
display_name="Test Slack",
|
||||
kind="slack",
|
||||
config=json.dumps({"webhook_url": "https://hooks.slack.com/test"}),
|
||||
)
|
||||
|
||||
@patch('alerts.service_backends.slack.requests.post')
|
||||
def test_slack_test_message_success_clears_failure_status(self, mock_post):
|
||||
# Set up existing failure status
|
||||
self.config.last_failure_timestamp = timezone.now()
|
||||
self.config.last_failure_status_code = 500
|
||||
self.config.last_failure_response_text = "Server Error"
|
||||
self.config.save()
|
||||
|
||||
# Mock successful response
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.raise_for_status.return_value = None
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
# Send test message
|
||||
slack_backend_send_test_message(
|
||||
"https://hooks.slack.com/test",
|
||||
"Test project",
|
||||
"Test Slack",
|
||||
self.config.id
|
||||
)
|
||||
|
||||
# Verify failure status was cleared
|
||||
self.config.refresh_from_db()
|
||||
self.assertIsNone(self.config.last_failure_timestamp)
|
||||
self.assertIsNone(self.config.last_failure_status_code)
|
||||
self.assertIsNone(self.config.last_failure_response_text)
|
||||
|
||||
@patch('alerts.service_backends.slack.requests.post')
|
||||
def test_slack_test_message_http_error_stores_failure(self, mock_post):
|
||||
# Mock HTTP error response
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 404
|
||||
mock_response.text = '{"error": "webhook_not_found"}'
|
||||
|
||||
# Create the HTTPError with response attached
|
||||
http_error = requests.HTTPError()
|
||||
http_error.response = mock_response
|
||||
mock_response.raise_for_status.side_effect = http_error
|
||||
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
# Send test message and expect it to raise
|
||||
with self.assertRaises(requests.HTTPError):
|
||||
slack_backend_send_test_message(
|
||||
"https://hooks.slack.com/test",
|
||||
"Test project",
|
||||
"Test Slack",
|
||||
self.config.id
|
||||
)
|
||||
|
||||
# Verify failure status was stored
|
||||
self.config.refresh_from_db()
|
||||
self.assertIsNotNone(self.config.last_failure_timestamp)
|
||||
self.assertEqual(self.config.last_failure_status_code, 404)
|
||||
self.assertEqual(self.config.last_failure_response_text, '{"error": "webhook_not_found"}')
|
||||
self.assertTrue(self.config.last_failure_is_json)
|
||||
self.assertEqual(self.config.last_failure_error_type, "HTTPError")
|
||||
|
||||
@patch('alerts.service_backends.slack.requests.post')
|
||||
def test_slack_test_message_non_json_error_stores_failure(self, mock_post):
|
||||
# Mock HTTP error response with non-JSON text
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 500
|
||||
mock_response.text = 'Internal Server Error'
|
||||
|
||||
# Create the HTTPError with response attached
|
||||
http_error = requests.HTTPError()
|
||||
http_error.response = mock_response
|
||||
mock_response.raise_for_status.side_effect = http_error
|
||||
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
# Send test message and expect it to raise
|
||||
with self.assertRaises(requests.HTTPError):
|
||||
slack_backend_send_test_message(
|
||||
"https://hooks.slack.com/test",
|
||||
"Test project",
|
||||
"Test Slack",
|
||||
self.config.id
|
||||
)
|
||||
|
||||
# Verify failure status was stored
|
||||
self.config.refresh_from_db()
|
||||
self.assertIsNotNone(self.config.last_failure_timestamp)
|
||||
self.assertEqual(self.config.last_failure_status_code, 500)
|
||||
self.assertEqual(self.config.last_failure_response_text, 'Internal Server Error')
|
||||
self.assertFalse(self.config.last_failure_is_json)
|
||||
|
||||
@patch('alerts.service_backends.slack.requests.post')
|
||||
def test_slack_test_message_connection_error_stores_failure(self, mock_post):
|
||||
# Mock connection error
|
||||
mock_post.side_effect = requests.ConnectionError("Connection failed")
|
||||
|
||||
# Send test message and expect it to raise
|
||||
with self.assertRaises(requests.ConnectionError):
|
||||
slack_backend_send_test_message(
|
||||
"https://hooks.slack.com/test",
|
||||
"Test project",
|
||||
"Test Slack",
|
||||
self.config.id
|
||||
)
|
||||
|
||||
# Verify failure status was stored
|
||||
self.config.refresh_from_db()
|
||||
self.assertIsNotNone(self.config.last_failure_timestamp)
|
||||
self.assertIsNone(self.config.last_failure_status_code) # No HTTP response
|
||||
self.assertIsNone(self.config.last_failure_response_text)
|
||||
self.assertIsNone(self.config.last_failure_is_json)
|
||||
self.assertEqual(self.config.last_failure_error_type, "ConnectionError")
|
||||
self.assertEqual(self.config.last_failure_error_message, "Connection failed")
|
||||
|
||||
@patch('alerts.service_backends.slack.requests.post')
|
||||
def test_slack_alert_message_success_clears_failure_status(self, mock_post):
|
||||
# Set up existing failure status
|
||||
self.config.last_failure_timestamp = timezone.now()
|
||||
self.config.last_failure_status_code = 500
|
||||
self.config.save()
|
||||
|
||||
# Create issue
|
||||
issue, _ = get_or_create_issue(project=self.project)
|
||||
|
||||
# Mock successful response
|
||||
mock_response = Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.raise_for_status.return_value = None
|
||||
mock_post.return_value = mock_response
|
||||
|
||||
# Send alert message
|
||||
slack_backend_send_alert(
|
||||
"https://hooks.slack.com/test",
|
||||
issue.id,
|
||||
"New issue",
|
||||
"a",
|
||||
"NEW",
|
||||
self.config.id
|
||||
)
|
||||
|
||||
# Verify failure status was cleared
|
||||
self.config.refresh_from_db()
|
||||
self.assertIsNone(self.config.last_failure_timestamp)
|
||||
|
||||
def test_has_recent_failure_method(self):
|
||||
# Initially no failure
|
||||
self.assertFalse(self.config.has_recent_failure())
|
||||
|
||||
# Set failure
|
||||
self.config.last_failure_timestamp = timezone.now()
|
||||
self.config.save()
|
||||
self.assertTrue(self.config.has_recent_failure())
|
||||
|
||||
# Clear failure
|
||||
self.config.clear_failure_status()
|
||||
self.config.save()
|
||||
self.assertFalse(self.config.has_recent_failure())
|
||||
|
||||
Reference in New Issue
Block a user