mirror of
https://github.com/bluewave-labs/Checkmate.git
synced 2026-01-19 16:19:45 -06:00
Merge pull request #1179 from bluewave-labs/feat/be/threshold-notifications
feat/be/threshold notifications
This commit is contained in:
@@ -132,7 +132,7 @@ const CreateInfrastructureMonitor = () => {
|
||||
Object.keys(form)
|
||||
.filter((k) => k.startsWith(THRESHOLD_FIELD_PREFIX))
|
||||
.map((k) => {
|
||||
if (form[k]) thresholds[k] = form[k];
|
||||
if (form[k]) thresholds[k] = form[k] / 100;
|
||||
delete form[k];
|
||||
delete form[k.substring(THRESHOLD_FIELD_PREFIX.length)];
|
||||
});
|
||||
@@ -158,6 +158,7 @@ const CreateInfrastructureMonitor = () => {
|
||||
: infrastructureMonitor.name,
|
||||
interval: infrastructureMonitor.interval * MS_PER_MINUTE,
|
||||
};
|
||||
|
||||
delete form.notifications;
|
||||
if (hasValidationErrors(form, infrastructureMonitorValidation, setErrors)) {
|
||||
return;
|
||||
|
||||
@@ -16,6 +16,28 @@ const NotificationSchema = mongoose.Schema(
|
||||
phone: {
|
||||
type: String,
|
||||
},
|
||||
alertThreshold: {
|
||||
type: Number,
|
||||
default: 5,
|
||||
},
|
||||
cpuAlertThreshold: {
|
||||
type: Number,
|
||||
default: function () {
|
||||
return this.alertThreshold;
|
||||
},
|
||||
},
|
||||
memoryAlertThreshold: {
|
||||
type: Number,
|
||||
default: function () {
|
||||
return this.alertThreshold;
|
||||
},
|
||||
},
|
||||
diskAlertThreshold: {
|
||||
type: Number,
|
||||
default: function () {
|
||||
return this.alertThreshold;
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
timestamps: true,
|
||||
|
||||
@@ -65,7 +65,7 @@ class EmailService {
|
||||
serverIsDownTemplate: this.loadTemplate("serverIsDown"),
|
||||
serverIsUpTemplate: this.loadTemplate("serverIsUp"),
|
||||
passwordResetTemplate: this.loadTemplate("passwordReset"),
|
||||
thresholdViolatedTemplate: this.loadTemplate("thresholdViolated"),
|
||||
hardwareIncidentTemplate: this.loadTemplate("hardwareIncident"),
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -165,17 +165,12 @@ class JobQueue {
|
||||
// Handle status change
|
||||
const { monitor, statusChanged, prevStatus } =
|
||||
await this.statusService.updateStatus(networkResponse);
|
||||
|
||||
//If status hasn't changed, we're done
|
||||
if (statusChanged === false) return;
|
||||
|
||||
// if prevStatus is undefined, monitor is resuming, we're done
|
||||
if (prevStatus === undefined) return;
|
||||
|
||||
// Handle notifications
|
||||
this.notificationService.handleNotifications({
|
||||
...networkResponse,
|
||||
monitor,
|
||||
prevStatus,
|
||||
statusChanged,
|
||||
});
|
||||
} catch (error) {
|
||||
this.logger.error({
|
||||
|
||||
@@ -14,15 +14,34 @@ class NotificationService {
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends an email notification based on the network response.
|
||||
* Sends an email notification for hardware infrastructure alerts
|
||||
*
|
||||
* @param {Object} networkResponse - The response from the network monitor.
|
||||
* @param {Object} networkResponse.monitor - The monitor object containing details about the monitored service.
|
||||
* @param {string} networkResponse.monitor.name - The name of the monitor.
|
||||
* @param {string} networkResponse.monitor.url - The URL of the monitor.
|
||||
* @param {boolean} networkResponse.status - The current status of the monitor (true for up, false for down).
|
||||
* @param {boolean} networkResponse.prevStatus - The previous status of the monitor (true for up, false for down).
|
||||
* @param {string} address - The email address to send the notification to.
|
||||
* @async
|
||||
* @function sendHardwareEmail
|
||||
* @param {Object} networkResponse - Response object containing monitor information
|
||||
* @param {string} address - Email address to send the notification to
|
||||
* @param {Array} [alerts=[]] - List of hardware alerts to include in the email
|
||||
* @returns {Promise<boolean>} - Indicates whether email was sent successfully
|
||||
* @throws {Error}
|
||||
*/
|
||||
async sendHardwareEmail(networkResponse, address, alerts = []) {
|
||||
if (alerts.length === 0) return false;
|
||||
const { monitor, status, prevStatus } = networkResponse;
|
||||
const template = "hardwareIncidentTemplate";
|
||||
const context = { monitor: monitor.name, url: monitor.url, alerts };
|
||||
const subject = `Monitor ${monitor.name} infrastructure alerts`;
|
||||
this.emailService.buildAndSendEmail(template, context, address, subject);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sends an email notification about monitor status change
|
||||
*
|
||||
* @async
|
||||
* @function sendEmail
|
||||
* @param {Object} networkResponse - Response object containing monitor status information
|
||||
* @param {string} address - Email address to send the notification to
|
||||
* @returns {Promise<boolean>} - Indicates email was sent successfully
|
||||
*/
|
||||
async sendEmail(networkResponse, address) {
|
||||
const { monitor, status, prevStatus } = networkResponse;
|
||||
@@ -30,25 +49,133 @@ class NotificationService {
|
||||
const context = { monitor: monitor.name, url: monitor.url };
|
||||
const subject = `Monitor ${monitor.name} is ${status === true ? "up" : "down"}`;
|
||||
this.emailService.buildAndSendEmail(template, context, address, subject);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles notifications based on the network response.
|
||||
*
|
||||
* @param {Object} networkResponse - The response from the network monitor.
|
||||
* @param {string} networkResponse.monitorId - The ID of the monitor.
|
||||
*/
|
||||
async handleNotifications(networkResponse) {
|
||||
async handleStatusNotifications(networkResponse) {
|
||||
try {
|
||||
//If status hasn't changed, we're done
|
||||
if (networkResponse.statusChanged === false) return false;
|
||||
|
||||
// if prevStatus is undefined, monitor is resuming, we're done
|
||||
if (networkResponse.prevStatus === undefined) return false;
|
||||
const notifications = await this.db.getNotificationsByMonitorId(
|
||||
networkResponse.monitorId
|
||||
);
|
||||
|
||||
for (const notification of notifications) {
|
||||
if (notification.type === "email") {
|
||||
this.sendEmail(networkResponse, notification.address);
|
||||
}
|
||||
// Handle other types of notifications here
|
||||
}
|
||||
return true;
|
||||
} catch (error) {
|
||||
this.logger.warn({
|
||||
message: error.message,
|
||||
service: this.SERVICE_NAME,
|
||||
method: "handleNotifications",
|
||||
stack: error.stack,
|
||||
});
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Handles status change notifications for a monitor
|
||||
*
|
||||
* @async
|
||||
* @function handleStatusNotifications
|
||||
* @param {Object} networkResponse - Response object containing monitor status information
|
||||
* @returns {Promise<boolean>} - Indicates whether notifications were processed
|
||||
* @throws {Error}
|
||||
*/
|
||||
async handleHardwareNotifications(networkResponse) {
|
||||
const thresholds = networkResponse?.monitor?.thresholds;
|
||||
if (thresholds === undefined) return false; // No thresholds set, we're done
|
||||
|
||||
// Get thresholds from monitor
|
||||
const {
|
||||
usage_cpu: cpuThreshold = -1,
|
||||
usage_memory: memoryThreshold = -1,
|
||||
usage_disk: diskThreshold = -1,
|
||||
} = thresholds;
|
||||
|
||||
// Get metrics from response
|
||||
const metrics = networkResponse?.payload?.data ?? null;
|
||||
if (metrics === null) return false;
|
||||
|
||||
const {
|
||||
cpu: { usage_percent: cpuUsage = -1 } = {},
|
||||
memory: { usage_percent: memoryUsage = -1 } = {},
|
||||
disk = [],
|
||||
} = metrics;
|
||||
|
||||
const alerts = {
|
||||
cpu: cpuThreshold !== -1 && cpuUsage > cpuThreshold ? true : false,
|
||||
memory: memoryThreshold !== -1 && memoryUsage > memoryThreshold ? true : false,
|
||||
disk: disk.some((d) => diskThreshold !== -1 && d.usage_percent > diskThreshold)
|
||||
? true
|
||||
: false,
|
||||
};
|
||||
|
||||
const notifications = await this.db.getNotificationsByMonitorId(
|
||||
networkResponse.monitorId
|
||||
);
|
||||
for (const notification of notifications) {
|
||||
const alertsToSend = [];
|
||||
const alertTypes = ["cpu", "memory", "disk"];
|
||||
|
||||
for (const type of alertTypes) {
|
||||
// Iterate over each alert type to see if any need to be decremented
|
||||
if (alerts[type] === true) {
|
||||
notification[`${type}AlertThreshold`]--; // Decrement threshold if an alert is triggered
|
||||
|
||||
if (notification[`${type}AlertThreshold`] <= 0) {
|
||||
// If threshold drops below 0, reset and send notification
|
||||
notification[`${type}AlertThreshold`] = notification.alertThreshold;
|
||||
|
||||
const formatAlert = {
|
||||
cpu: () =>
|
||||
`Your current CPU usage (${(cpuUsage * 100).toFixed(0)}%) is above your threshold (${(cpuThreshold * 100).toFixed(0)}%)`,
|
||||
memory: () =>
|
||||
`Your current memory usage (${(memoryUsage * 100).toFixed(0)}%) is above your threshold (${(memoryThreshold * 100).toFixed(0)}%)`,
|
||||
disk: () =>
|
||||
`Your current disk usage: ${disk
|
||||
.map((d, idx) => `(Disk${idx}: ${(d.usage_percent * 100).toFixed(0)}%)`)
|
||||
.join(
|
||||
", "
|
||||
)} is above your threshold (${(diskThreshold * 100).toFixed(0)}%)`,
|
||||
};
|
||||
alertsToSend.push(formatAlert[type]());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
await notification.save();
|
||||
|
||||
if (alertsToSend.length === 0) continue; // No alerts to send, we're done
|
||||
|
||||
if (notification.type === "email") {
|
||||
this.sendHardwareEmail(networkResponse, notification.address, alertsToSend);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles notifications for different monitor types
|
||||
*
|
||||
* @async
|
||||
* @function handleNotifications
|
||||
* @param {Object} networkResponse - Response object containing monitor information
|
||||
* @returns {Promise<boolean>} - Indicates whether notifications were processed successfully
|
||||
*/
|
||||
async handleNotifications(networkResponse) {
|
||||
try {
|
||||
if (networkResponse.monitor.type === "hardware") {
|
||||
this.handleHardwareNotifications(networkResponse);
|
||||
}
|
||||
this.handleStatusNotifications(networkResponse);
|
||||
return true;
|
||||
} catch (error) {
|
||||
this.logger.warn({
|
||||
message: error.message,
|
||||
|
||||
@@ -29,7 +29,8 @@ class StatusService {
|
||||
const { monitorId, status } = networkResponse;
|
||||
const monitor = await this.db.getMonitorById(monitorId);
|
||||
// No change in monitor status, return early
|
||||
if (monitor.status === status) return { statusChanged: false };
|
||||
if (monitor.status === status)
|
||||
return { monitor, statusChanged: false, prevStatus: monitor.status };
|
||||
// Monitor status changed, save prev status and update monitor
|
||||
|
||||
this.logger.info({
|
||||
@@ -103,7 +104,7 @@ class StatusService {
|
||||
|
||||
if (type === "hardware") {
|
||||
const { cpu, memory, disk, host } = payload?.data ?? {};
|
||||
const { errors } = payload;
|
||||
const { errors } = payload?.errors ?? [];
|
||||
check.cpu = cpu ?? {};
|
||||
check.memory = memory ?? {};
|
||||
check.disk = disk ?? {};
|
||||
|
||||
43
Server/templates/hardwareIncident.mjml
Normal file
43
Server/templates/hardwareIncident.mjml
Normal file
@@ -0,0 +1,43 @@
|
||||
<mjml>
|
||||
<mj-head>
|
||||
<mj-font name="Roboto" href="https://fonts.googleapis.com/css?family=Roboto:300,500"></mj-font>
|
||||
<mj-attributes>
|
||||
<mj-all font-family="Roboto, Helvetica, sans-serif"></mj-all>
|
||||
<mj-text font-weight="300" font-size="16px" color="#616161" line-height="24px"></mj-text>
|
||||
<mj-section padding="0px"></mj-section>
|
||||
</mj-attributes>
|
||||
</mj-head>
|
||||
<mj-body>
|
||||
<mj-section padding="20px 0">
|
||||
<mj-column width="100%">
|
||||
<mj-text align="left" font-size="10px">
|
||||
Message from BlueWave Infrastructure Monitoring
|
||||
</mj-text>
|
||||
</mj-column>
|
||||
<mj-column width="45%" padding-top="20px">
|
||||
<mj-text align="center" font-weight="500" padding="0px" font-size="18px" color="red">
|
||||
Infrastructure Alerts
|
||||
</mj-text>
|
||||
<mj-divider border-width="2px" border-color="#616161"></mj-divider>
|
||||
</mj-column>
|
||||
</mj-section>
|
||||
<mj-section>
|
||||
<mj-column width="100%">
|
||||
<mj-text>
|
||||
<p>Hello {{name}}!</p>
|
||||
<p>{{monitor}} at {{url}} has the following infrastructure alerts:</p>
|
||||
{{#each alerts}}
|
||||
<p>• {{this}}</p>
|
||||
{{/each}}
|
||||
</mj-text>
|
||||
</mj-column>
|
||||
<mj-column width="100%">
|
||||
<mj-divider border-width="1px" border-color="#E0E0E0"></mj-divider>
|
||||
<mj-button background-color="#1570EF"> View Infrastructure Details </mj-button>
|
||||
<mj-text font-size="12px">
|
||||
<p>This email was sent by BlueWave Infrastructure Monitoring.</p>
|
||||
</mj-text>
|
||||
</mj-column>
|
||||
</mj-section>
|
||||
</mj-body>
|
||||
</mjml>
|
||||
@@ -1,40 +0,0 @@
|
||||
<mjml>
|
||||
<mj-head>
|
||||
<mj-font name="Roboto" href="https://fonts.googleapis.com/css?family=Roboto:300,500"></mj-font>
|
||||
<mj-attributes>
|
||||
<mj-all font-family="Roboto, Helvetica, sans-serif"></mj-all>
|
||||
<mj-text font-weight="300" font-size="16px" color="#616161" line-height="24px"></mj-text>
|
||||
<mj-section padding="0px"></mj-section>
|
||||
</mj-attributes>
|
||||
</mj-head>
|
||||
<mj-body>
|
||||
<mj-section padding="20px 0">
|
||||
<mj-column width="100%">
|
||||
<mj-text align="left" font-size="10px">
|
||||
Message from BlueWave Uptime Service
|
||||
</mj-text>
|
||||
</mj-column>
|
||||
<mj-column width="45%" padding-top="20px">
|
||||
<mj-text font-weight="500" padding="0px" font-size="18px">
|
||||
{{message}}
|
||||
</mj-text>
|
||||
<mj-text font-weight="500" padding="0px" font-size="18px">
|
||||
{{#if cpu}}
|
||||
{{cpu}}
|
||||
{{/if}}
|
||||
</mj-text>
|
||||
<mj-text font-weight="500" padding="0px" font-size="18px">
|
||||
{{#if disk}}
|
||||
{{disk}}
|
||||
{{/if}}
|
||||
</mj-text>
|
||||
<mj-text font-weight="500" padding="0px" font-size="18px">
|
||||
{{#if memory}}
|
||||
{{memory}}
|
||||
{{/if}}
|
||||
</mj-text>
|
||||
|
||||
</mj-column>
|
||||
</mj-section>
|
||||
</mj-body>
|
||||
</mjml>
|
||||
@@ -78,11 +78,23 @@ describe("NotificationService", () => {
|
||||
describe("handleNotifications", async () => {
|
||||
it("should handle notifications based on the network response", async () => {
|
||||
notificationService.sendEmail = sinon.stub();
|
||||
notificationService.db.getNotificationsByMonitorId.resolves([
|
||||
{ type: "email", address: "www.google.com" },
|
||||
]);
|
||||
await notificationService.handleNotifications({ monitorId: "123" });
|
||||
expect(notificationService.sendEmail.calledOnce).to.be.true;
|
||||
const res = await notificationService.handleNotifications({
|
||||
monitor: {
|
||||
type: "email",
|
||||
address: "www.google.com",
|
||||
},
|
||||
});
|
||||
expect(res).to.be.true;
|
||||
});
|
||||
it("should handle hardware notifications", async () => {
|
||||
notificationService.sendEmail = sinon.stub();
|
||||
const res = await notificationService.handleNotifications({
|
||||
monitor: {
|
||||
type: "hardware",
|
||||
address: "www.google.com",
|
||||
},
|
||||
});
|
||||
expect(res).to.be.true;
|
||||
});
|
||||
|
||||
it("should handle an error when getting notifications", async () => {
|
||||
@@ -92,4 +104,184 @@ describe("NotificationService", () => {
|
||||
expect(notificationService.logger.warn.calledOnce).to.be.true;
|
||||
});
|
||||
});
|
||||
|
||||
describe("sendHardwareEmail", async () => {
|
||||
let networkResponse, address, alerts;
|
||||
beforeEach(() => {
|
||||
networkResponse = {
|
||||
monitor: {
|
||||
name: "Test Monitor",
|
||||
url: "http://test.com",
|
||||
},
|
||||
status: true,
|
||||
prevStatus: false,
|
||||
};
|
||||
address = "test@test.com";
|
||||
alerts = ["test"];
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
sinon.restore();
|
||||
});
|
||||
it("should send an email notification with Hardware Template", async () => {
|
||||
emailService.buildAndSendEmail.resolves(true);
|
||||
const res = await notificationService.sendHardwareEmail(
|
||||
networkResponse,
|
||||
address,
|
||||
alerts
|
||||
);
|
||||
expect(res).to.be.true;
|
||||
});
|
||||
it("should return false if no alerts are provided", async () => {
|
||||
alerts = [];
|
||||
emailService.buildAndSendEmail.resolves(true);
|
||||
const res = await notificationService.sendHardwareEmail(
|
||||
networkResponse,
|
||||
address,
|
||||
alerts
|
||||
);
|
||||
expect(res).to.be.false;
|
||||
});
|
||||
});
|
||||
describe("handleStatusNotifications", async () => {
|
||||
let networkResponse;
|
||||
beforeEach(() => {
|
||||
networkResponse = {
|
||||
monitor: {
|
||||
name: "Test Monitor",
|
||||
url: "http://test.com",
|
||||
},
|
||||
statusChanged: true,
|
||||
status: true,
|
||||
prevStatus: false,
|
||||
};
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
sinon.restore();
|
||||
});
|
||||
|
||||
it("should handle status notifications", async () => {
|
||||
db.getNotificationsByMonitorId.resolves([
|
||||
{ type: "email", address: "test@test.com" },
|
||||
]);
|
||||
const res = await notificationService.handleStatusNotifications(networkResponse);
|
||||
expect(res).to.be.true;
|
||||
});
|
||||
it("should return false if status hasn't changed", async () => {
|
||||
networkResponse.statusChanged = false;
|
||||
const res = await notificationService.handleStatusNotifications(networkResponse);
|
||||
expect(res).to.be.false;
|
||||
});
|
||||
it("should return false if prevStatus is undefined", async () => {
|
||||
networkResponse.prevStatus = undefined;
|
||||
const res = await notificationService.handleStatusNotifications(networkResponse);
|
||||
expect(res).to.be.false;
|
||||
});
|
||||
it("should handle an error", async () => {
|
||||
const testError = new Error("Test Error");
|
||||
db.getNotificationsByMonitorId.rejects(testError);
|
||||
try {
|
||||
await notificationService.handleStatusNotifications(networkResponse);
|
||||
} catch (error) {
|
||||
expect(error).to.be.an.instanceOf(Error);
|
||||
expect(error.message).to.equal("Test Error");
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("handleHardwareNotifications", async () => {
|
||||
let networkResponse;
|
||||
beforeEach(() => {
|
||||
networkResponse = {
|
||||
monitor: {
|
||||
name: "Test Monitor",
|
||||
url: "http://test.com",
|
||||
thresholds: {
|
||||
usage_cpu: 1,
|
||||
usage_memory: 1,
|
||||
usage_disk: 1,
|
||||
},
|
||||
},
|
||||
payload: {
|
||||
data: {
|
||||
cpu: {
|
||||
usage_percent: 0.655,
|
||||
},
|
||||
memory: {
|
||||
usage_percent: 0.783,
|
||||
},
|
||||
disk: [
|
||||
{
|
||||
name: "/dev/sda1",
|
||||
usage_percent: 0.452,
|
||||
},
|
||||
{
|
||||
name: "/dev/sdb1",
|
||||
usage_percent: 0.627,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
});
|
||||
afterEach(() => {
|
||||
sinon.restore();
|
||||
});
|
||||
|
||||
describe("it should return false if no thresholds are set", () => {
|
||||
it("should return false if no thresholds are set", async () => {
|
||||
networkResponse.monitor.thresholds = undefined;
|
||||
const res =
|
||||
await notificationService.handleHardwareNotifications(networkResponse);
|
||||
expect(res).to.be.false;
|
||||
});
|
||||
|
||||
it("should return false if metrics are null", async () => {
|
||||
networkResponse.payload.data = null;
|
||||
const res =
|
||||
await notificationService.handleHardwareNotifications(networkResponse);
|
||||
expect(res).to.be.false;
|
||||
});
|
||||
|
||||
it("should return true if request is well formed and thresholds > 0", async () => {
|
||||
db.getNotificationsByMonitorId.resolves([
|
||||
{
|
||||
type: "email",
|
||||
address: "test@test.com",
|
||||
alertThreshold: 1,
|
||||
cpuAlertThreshold: 1,
|
||||
memoryAlertThreshold: 1,
|
||||
diskAlertThreshold: 1,
|
||||
save: sinon.stub().resolves(),
|
||||
},
|
||||
]);
|
||||
const res =
|
||||
await notificationService.handleHardwareNotifications(networkResponse);
|
||||
expect(res).to.be.true;
|
||||
});
|
||||
|
||||
it("should return true if thresholds are exceeded", async () => {
|
||||
db.getNotificationsByMonitorId.resolves([
|
||||
{
|
||||
type: "email",
|
||||
address: "test@test.com",
|
||||
alertThreshold: 1,
|
||||
cpuAlertThreshold: 1,
|
||||
memoryAlertThreshold: 1,
|
||||
diskAlertThreshold: 1,
|
||||
save: sinon.stub().resolves(),
|
||||
},
|
||||
]);
|
||||
networkResponse.monitor.thresholds = {
|
||||
usage_cpu: 0.01,
|
||||
usage_memory: 0.01,
|
||||
usage_disk: 0.01,
|
||||
};
|
||||
const res =
|
||||
await notificationService.handleHardwareNotifications(networkResponse);
|
||||
expect(res).to.be.true;
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user