diff --git a/main.tf b/main.tf index ae95cf5fa5fa2bfe415ab337d2b72010078448b1..34b64e84c36d7c3721ad527b34a93dc900cd65f3 100644 --- a/main.tf +++ b/main.tf @@ -140,3 +140,13 @@ resource "google_cloud_run_domain_mapping" "webapp" { route_name = google_cloud_run_service.webapp.name } } + +module "uptime_monitoring" { + source = "./modules/monitoring" + project = var.project + email_address = var.alerting_email_address + monitored_domain = var.dns_name + uptime_timeout = var.alerting_uptime_timeout + uptime_period = var.alerting_uptime_period + enabled = var.alerting_enabled +} diff --git a/modules/monitoring/README.md b/modules/monitoring/README.md new file mode 100644 index 0000000000000000000000000000000000000000..df5ab4156599cfe5c6feba3b5d875e2c16e7eb9a --- /dev/null +++ b/modules/monitoring/README.md @@ -0,0 +1,11 @@ +# Basic email uptime alerting + +This provides basic uptime alerting via email for failing http polling. See +[variables.tf](variables.tf) for how to configure this module. + + +Note that the project must be in a Stackdriver monitoring workspace and this +must be configured manually. At the time of writing there is no terraform +support for this. This module will error when applying if this is not so. + + diff --git a/modules/monitoring/main.tf b/modules/monitoring/main.tf new file mode 100644 index 0000000000000000000000000000000000000000..09c8db9b070b04e780e1e82c69d05402756c03b1 --- /dev/null +++ b/modules/monitoring/main.tf @@ -0,0 +1,75 @@ + + +locals { + # this is a hack to allow disabling everying. In tf 0.13 (in beta at the + # time of writing) count can be applied to the module inclusion phase so this + # won't be needed. + + count = var.email_address == "" ? 0 : 1 +} + +resource "google_monitoring_uptime_check_config" "https" { + count = local.count + display_name = "https-uptime-check" + + timeout = var.uptime_timeout + period = var.uptime_period + + project = var.project + + http_check { + path = var.polling_path + port = "443" + use_ssl = true + validate_ssl = true + } + + monitored_resource { + type = "uptime_url" + labels = { + project_id = var.project + host = var.monitored_domain + } + } + + # workaround - see https://github.com/terraform-providers/terraform-provider-google/issues/3133 + lifecycle { + create_before_destroy = true + } +} + +resource "google_monitoring_notification_channel" "notification_email" { + count = local.count + project = var.project + display_name = "Notifications Email" + type = "email" + labels = { + email_address = var.email_address + } +} + +resource "google_monitoring_alert_policy" "uptime_alert" { + enabled = var.enabled + count = local.count + project = var.project + display_name = "HTTP uptime alert" + notification_channels = [google_monitoring_notification_channel.notification_email[count.index].id] + + combiner = "OR" + conditions { + display_name = "http check failing for ${var.monitored_domain}${var.polling_path}" + + condition_threshold { + filter = <<-EOT + metric.type="monitoring.googleapis.com/uptime_check/check_passed" AND + metric.label.check_id="${google_monitoring_uptime_check_config.https[count.index].uptime_check_id}" AND + resource.type="uptime_url" + EOT + duration = "60s" + comparison = "COMPARISON_GT" + threshold_value = "1" + trigger { count = "1" } + } + + } +} diff --git a/modules/monitoring/variables.tf b/modules/monitoring/variables.tf new file mode 100644 index 0000000000000000000000000000000000000000..1f54f74852fc17768ae18e026fdb88636889882f --- /dev/null +++ b/modules/monitoring/variables.tf @@ -0,0 +1,39 @@ +variable "email_address" { + default = "" + type = string + description = "Email address for alerts" +} + +variable "monitored_domain" { + type = string + description = "domain component of url to be monitored" +} + +variable "polling_path" { + type = string + default = "/" + description = "path component of url to be monitored" +} + +variable "project" { + type = string + description = "project for all resources" +} + +variable "uptime_timeout" { + type = string + default = "30s" + description = "timeout for http polling" +} + +variable "uptime_period" { + type = string + default = "300s" + description = "Frequency of uptime checks" +} + +variable "enabled" { + type = bool + default = true + description = "Whether the alerting policy is enabled" +} diff --git a/variables.tf b/variables.tf index d26da139f33e39ccfed3e15c7ef66b376e962475..3bd4ae6a37d14408d2cb6b976c69d0fd73d7d927 100644 --- a/variables.tf +++ b/variables.tf @@ -90,3 +90,29 @@ variable "service_account_display_name" { If non-empty, override the default display name of the webapp service account. EOI } + +variable "alerting_email_address" { + default = "" + type = string + description = <<EOT +Email address for basic uptime alerts. If empty (the default) no alerting will be configured. Otherwise note that the project must be in a Stackdriver monitoring workspace and this must be configured manually (no terraform support). +EOT +} + +variable "alerting_uptime_timeout" { + default = "30s" + type = string + description = "timeout for http polling" +} + +variable "alerting_uptime_period" { + type = string + default = "300s" + description = "Frequency of uptime checks" +} + +variable "alerting_enabled" { + type = bool + default = true + description = "Whether alerting policy is enabled" +}