Compare commits

...

4 Commits

Author SHA1 Message Date
Piyush Jain
769ed48a86 add observability config roles 2025-03-19 13:12:38 +05:30
Piyush Jain
d14262f804 add observability config roles 2025-03-19 13:11:44 +05:30
Piyush Jain
864ad8ac45 remove dead code 2025-03-18 23:17:55 +05:30
Piyush Jain
f7a9f86693 - move rds and elasticache to specific files
- change successfulJobsHistory to 0
- add cloudwatch alarms for rds, elb, sqs and dynamodb
- change elasticache to serverless and update secrets
2025-03-18 23:11:51 +05:30
7 changed files with 378 additions and 140 deletions

View File

@@ -0,0 +1,195 @@
data "aws_ssm_parameter" "slack_notification_channel" {
name = "/prod/formbricks/slack-webhook-url"
with_decryption = true
}
resource "aws_cloudwatch_log_group" "cloudwatch_cis_benchmark" {
name = "/aws/cis-benchmark-group"
retention_in_days = 365
}
module "notify-slack" {
source = "terraform-aws-modules/notify-slack/aws"
version = "6.6.0"
slack_channel = "kubernetes"
slack_username = "formbricks-cloudwatch"
slack_webhook_url = data.aws_ssm_parameter.slack_notification_channel.value
sns_topic_name = "cloudwatch-alarms"
create_sns_topic = true
}
module "cloudwatch_cis-alarms" {
source = "terraform-aws-modules/cloudwatch/aws//modules/cis-alarms"
version = "5.7.1"
log_group_name = aws_cloudwatch_log_group.cloudwatch_cis_benchmark.name
alarm_actions = [module.notify-slack.slack_topic_arn]
}
locals {
alarms = {
ALB_HTTPCode_Target_5XX_Count = {
alarm_description = "Average API 5XX target group error code count is too high"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 5
threshold = 1
period = 60
unit = "Count"
namespace = "AWS/ApplicationELB"
metric_name = "HTTPCode_Target_5XX_Count"
statistic = "Sum"
}
ALB_HTTPCode_ELB_5XX_Count = {
alarm_description = "Average API 5XX load balancer error code count is too high"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 5
threshold = 1
period = 60
unit = "Count"
namespace = "AWS/ApplicationELB"
metric_name = "HTTPCode_ELB_5XX_Count"
statistic = "Sum"
}
ALB_TargetResponseTime = {
alarm_description = format("Average API response time is greater than %s", 0.05)
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 5
threshold = 0.05
period = 60
unit = "Seconds"
namespace = "AWS/ApplicationELB"
metric_name = "TargetResponseTime"
statistic = "Average"
}
ALB_UnHealthyHostCount = {
alarm_description = format("Unhealthy host count is greater than %s", 1)
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 5
threshold = 1
period = 60
unit = "Count"
namespace = "AWS/ApplicationELB"
metric_name = "UnHealthyHostCount"
statistic = "Minimum"
}
RDS_CPUUtilization = {
alarm_description = format("Average RDS CPU utilization is greater than %s", 80)
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 5
threshold = 80
period = 60
unit = "Percent"
namespace = "AWS/RDS"
metric_name = "CPUUtilization"
statistic = "Average"
}
RDS_FreeStorageSpace = {
alarm_description = format("Average RDS free storage space is less than %s", 5)
comparison_operator = "LessThanThreshold"
evaluation_periods = 5
threshold = 5
period = 60
unit = "Gigabytes"
namespace = "AWS/RDS"
metric_name = "FreeStorageSpace"
statistic = "Average"
}
RDS_FreeableMemory = {
alarm_description = format("Average RDS freeable memory is less than %s", 100)
comparison_operator = "LessThanThreshold"
evaluation_periods = 5
threshold = 100
period = 60
unit = "Megabytes"
namespace = "AWS/RDS"
metric_name = "FreeableMemory"
statistic = "Average"
}
RDS_DiskQueueDepth = {
alarm_description = format("Average RDS disk queue depth is greater than %s", 1)
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 5
threshold = 1
period = 60
unit = "Count"
namespace = "AWS/RDS"
metric_name = "DiskQueueDepth"
statistic = "Average"
}
RDS_ReadIOPS = {
alarm_description = format("Average RDS read IOPS is greater than %s", 1000)
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 5
threshold = 1000
period = 60
unit = "Count/Second"
namespace = "AWS/RDS"
metric_name = "ReadIOPS"
statistic = "Average"
}
RDS_WriteIOPS = {
alarm_description = format("Average RDS write IOPS is greater than %s", 1000)
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 5
threshold = 1000
period = 60
unit = "Count/Second"
namespace = "AWS/RDS"
metric_name = "WriteIOPS"
statistic = "Average"
}
SQS_ApproximateAgeOfOldestMessage = {
alarm_description = format("Average SQS approximate age of oldest message is greater than %s", 300)
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 5
threshold = 300
period = 60
unit = "Seconds"
namespace = "AWS/SQS"
metric_name = "ApproximateAgeOfOldestMessage"
statistic = "Maximum"
}
DynamoDB_ConsumedReadCapacityUnits = {
alarm_description = format("Average DynamoDB consumed read capacity units is greater than %s", 90)
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 5
threshold = 90
period = 60
unit = "Count"
namespace = "AWS/DynamoDB"
metric_name = "ConsumedReadCapacityUnits"
statistic = "Average"
}
Lambda_Errors = {
alarm_description = format("Average Lambda errors is greater than %s", 1)
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 5
threshold = 1
period = 60
unit = "Count"
namespace = "AWS/Lambda"
metric_name = "Errors"
statistic = "Sum"
}
}
}
module "metric_alarm" {
source = "terraform-aws-modules/cloudwatch/aws//modules/metric-alarm"
version = "5.7.1"
for_each = local.alarms
alarm_name = each.key
alarm_description = each.value.alarm_description
comparison_operator = each.value.comparison_operator
evaluation_periods = each.value.evaluation_periods
threshold = each.value.threshold
period = each.value.period
unit = each.value.unit
namespace = each.value.namespace
metric_name = each.value.metric_name
statistic = each.value.statistic
alarm_actions = [module.notify-slack.slack_topic_arn]
}

View File

@@ -1,27 +0,0 @@
data "aws_ssm_parameter" "slack_notification_channel" {
name = "/prod/formbricks/slack-webhook-url"
with_decryption = true
}
resource "aws_cloudwatch_log_group" "cloudwatch_cis_benchmark" {
name = "/aws/cis-benchmark-group"
retention_in_days = 365
}
module "notify-slack" {
source = "terraform-aws-modules/notify-slack/aws"
version = "6.6.0"
slack_channel = "kubernetes"
slack_username = "formbricks-cloudwatch"
slack_webhook_url = data.aws_ssm_parameter.slack_notification_channel.value
sns_topic_name = "cloudwatch-alarms"
create_sns_topic = true
}
module "cloudwatch_cis-alarms" {
source = "terraform-aws-modules/cloudwatch/aws//modules/cis-alarms"
version = "5.7.1"
log_group_name = aws_cloudwatch_log_group.cloudwatch_cis_benchmark.name
alarm_actions = [module.notify-slack.slack_topic_arn]
}

View File

@@ -0,0 +1,70 @@
################################################################################
# ElastiCache Module
################################################################################
resource "random_password" "valkey" {
length = 20
special = false
}
resource "random_password" "valkey_default_user" {
length = 20
special = false
}
module "valkey_sg" {
source = "terraform-aws-modules/security-group/aws"
version = "~> 5.0"
name = "valkey-sg"
description = "Security group for VPC traffic"
vpc_id = module.vpc.vpc_id
ingress_cidr_blocks = [module.vpc.vpc_cidr_block]
ingress_rules = ["redis-tcp"]
tags = local.tags
}
module "elasticache_user_group" {
source = "terraform-aws-modules/elasticache/aws//modules/user-group"
version = "1.4.1"
user_group_id = "${local.name}-valkey"
create_default_user = false
default_user = {
user_id = "formbricks-default"
passwords = [random_password.valkey_default_user.result]
}
users = {
formbricks = {
access_string = "on ~* +@all"
passwords = [random_password.valkey.result]
}
}
engine = "redis"
tags = merge(local.tags, {
terraform-aws-modules = "elasticache"
})
}
module "valkey_serverless" {
source = "terraform-aws-modules/elasticache/aws//modules/serverless-cache"
version = "1.4.1"
engine = "valkey"
cache_name = "${local.name}-valkey-serverless"
cache_usage_limits = {
data_storage = {
maximum = 2
}
ecpu_per_second = {
maximum = 1000
}
}
major_engine_version = 7
subnet_ids = module.vpc.database_subnets
security_group_ids = [
module.valkey_sg.security_group_id
]
user_group_id = module.elasticache_user_group.group_id
}

View File

@@ -106,116 +106,6 @@ module "vpc_vpc-endpoints" {
tags = local.tags
}
################################################################################
# PostgreSQL Serverless v2
################################################################################
data "aws_rds_engine_version" "postgresql" {
engine = "aurora-postgresql"
version = "16.4"
}
resource "random_password" "postgres" {
length = 20
special = false
}
module "rds-aurora" {
source = "terraform-aws-modules/rds-aurora/aws"
version = "9.12.0"
name = "${local.name}-postgres"
engine = data.aws_rds_engine_version.postgresql.engine
engine_mode = "provisioned"
engine_version = data.aws_rds_engine_version.postgresql.version
storage_encrypted = true
master_username = "formbricks"
master_password = random_password.postgres.result
manage_master_user_password = false
vpc_id = module.vpc.vpc_id
db_subnet_group_name = module.vpc.database_subnet_group_name
security_group_rules = {
vpc_ingress = {
cidr_blocks = module.vpc.private_subnets_cidr_blocks
}
}
performance_insights_enabled = true
apply_immediately = true
skip_final_snapshot = true
enable_http_endpoint = true
serverlessv2_scaling_configuration = {
min_capacity = 0
max_capacity = 10
seconds_until_auto_pause = 3600
}
instance_class = "db.serverless"
instances = {
one = {}
}
tags = local.tags
}
################################################################################
# ElastiCache Module
################################################################################
resource "random_password" "valkey" {
length = 20
special = false
}
module "elasticache" {
source = "terraform-aws-modules/elasticache/aws"
version = "1.4.1"
replication_group_id = "${local.name}-valkey"
engine = "valkey"
engine_version = "7.2"
node_type = "cache.m7g.large"
transit_encryption_enabled = true
auth_token = random_password.valkey.result
maintenance_window = "sun:05:00-sun:09:00"
apply_immediately = true
# Security Group
vpc_id = module.vpc.vpc_id
security_group_rules = {
ingress_vpc = {
# Default type is `ingress`
# Default port is based on the default engine port
description = "VPC traffic"
cidr_ipv4 = module.vpc.vpc_cidr_block
}
}
# Subnet Group
subnet_group_name = "${local.name}-valkey"
subnet_group_description = "${title(local.name)} subnet group"
subnet_ids = module.vpc.database_subnets
# Parameter Group
create_parameter_group = true
parameter_group_name = "${local.name}-valkey"
parameter_group_family = "valkey7"
parameter_group_description = "${title(local.name)} parameter group"
parameters = [
{
name = "latency-tracking"
value = "yes"
}
]
tags = local.tags
}
################################################################################
# EKS Module
################################################################################
@@ -671,6 +561,7 @@ resource "helm_release" "formbricks" {
jobs:
survey-status:
schedule: "0 0 * * *"
successfulJobsHistoryLimit: 0
env:
CRON_SECRET:
valueFrom:
@@ -692,6 +583,7 @@ resource "helm_release" "formbricks" {
- 'curl -X POST -H "content-type: application/json" -H "x-api-key: $CRON_SECRET" "$WEBAPP_URL/api/cron/survey-status"'
weekely-summary:
schedule: "0 8 * * 1"
successfulJobsHistoryLimit: 0
env:
CRON_SECRET:
valueFrom:
@@ -713,6 +605,7 @@ resource "helm_release" "formbricks" {
- 'curl -X POST -H "content-type: application/json" -H "x-api-key: $CRON_SECRET" "$WEBAPP_URL/api/cron/weekly-summary"'
ping:
schedule: "0 9 * * *"
successfulJobsHistoryLimit: 0
env:
CRON_SECRET:
valueFrom:

View File

@@ -0,0 +1,54 @@
module "loki_s3_bucket" {
source = "terraform-aws-modules/s3-bucket/aws"
version = "4.6.0"
bucket_prefix = "loki-"
force_destroy = true
control_object_ownership = true
object_ownership = "BucketOwnerPreferred"
}
module "observability_loki_iam_policy" {
source = "terraform-aws-modules/iam/aws//modules/iam-policy"
version = "5.53.0"
name_prefix = "loki-"
path = "/"
description = "Policy for fombricks observability apps"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = [
"s3:*",
]
Resource = [
module.loki_s3_bucket.s3_bucket_arn,
"${module.loki_s3_bucket.s3_bucket_arn}/*"
]
}
]
})
}
module "observability_loki_iam_role" {
source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"
version = "5.53.0"
role_name_prefix = "loki-"
role_policy_arns = {
"formbricks" = module.observability_loki_iam_policy.arn
}
assume_role_condition_test = "StringLike"
oidc_providers = {
eks = {
provider_arn = module.eks.oidc_provider_arn
namespace_service_accounts = ["monitoring:loki*"]
}
}
}

55
infra/terraform/rds.tf Normal file
View File

@@ -0,0 +1,55 @@
################################################################################
# PostgreSQL Serverless v2
################################################################################
data "aws_rds_engine_version" "postgresql" {
engine = "aurora-postgresql"
version = "16.4"
}
resource "random_password" "postgres" {
length = 20
special = false
}
module "rds-aurora" {
source = "terraform-aws-modules/rds-aurora/aws"
version = "9.12.0"
name = "${local.name}-postgres"
engine = data.aws_rds_engine_version.postgresql.engine
engine_mode = "provisioned"
engine_version = data.aws_rds_engine_version.postgresql.version
storage_encrypted = true
master_username = "formbricks"
master_password = random_password.postgres.result
manage_master_user_password = false
vpc_id = module.vpc.vpc_id
db_subnet_group_name = module.vpc.database_subnet_group_name
security_group_rules = {
vpc_ingress = {
cidr_blocks = module.vpc.private_subnets_cidr_blocks
}
}
performance_insights_enabled = true
apply_immediately = true
skip_final_snapshot = true
enable_http_endpoint = true
serverlessv2_scaling_configuration = {
min_capacity = 0
max_capacity = 10
seconds_until_auto_pause = 3600
}
instance_class = "db.serverless"
instances = {
one = {}
}
tags = local.tags
}

View File

@@ -3,12 +3,10 @@ resource "aws_secretsmanager_secret" "formbricks_app_secrets" {
name = "prod/formbricks/secrets"
}
resource "aws_secretsmanager_secret_version" "formbricks_app_secrets" {
secret_id = aws_secretsmanager_secret.formbricks_app_secrets.id
secret_string = jsonencode({
DATABASE_URL = "postgres://formbricks:${random_password.postgres.result}@${module.rds-aurora.cluster_endpoint}/formbricks"
REDIS_URL = "rediss://:${random_password.valkey.result}@${module.elasticache.replication_group_primary_endpoint_address}:6379"
REDIS_URL = "rediss://formbricks:${random_password.valkey.result}@${module.valkey_serverless.serverless_cache_endpoint[0].address}:6379"
})
}