diff --git a/modules/monitoring/main.tf b/modules/monitoring/main.tf index 09c8db9b070b04e780e1e82c69d05402756c03b1..909037e903437fa28faa30db97e4dc4d1f09c1f3 100644 --- a/modules/monitoring/main.tf +++ b/modules/monitoring/main.tf @@ -60,16 +60,30 @@ resource "google_monitoring_alert_policy" "uptime_alert" { display_name = "http check failing for ${var.monitored_domain}${var.polling_path}" condition_threshold { - filter = <<-EOT + filter = <<-EOT metric.type="monitoring.googleapis.com/uptime_check/check_passed" AND metric.label.check_id="${google_monitoring_uptime_check_config.https[count.index].uptime_check_id}" AND resource.type="uptime_url" EOT - duration = "60s" - comparison = "COMPARISON_GT" + duration = "60s" + comparison = "COMPARISON_GT" + threshold_value = "1" + trigger { count = "1" } - } + # I don't fully understand this stuff, but leaving this empty doesn't + # work; although it used to (either the API or the terrfaform provider + # has changed). This config was arrived at my following + # https://cloud.google.com/monitoring/uptime-checks via the dashboard, + # and then examining the differences that terraform wants to apply. It + # seems to work OK. + aggregations { + alignment_period = "120s" + group_by_fields = ["resource.*"] + cross_series_reducer = "REDUCE_COUNT_FALSE" + per_series_aligner = "ALIGN_NEXT_OLDER" + } + } } }