diff --git a/CHANGELOG b/CHANGELOG index 0aabf1a0a30c266e91e86fbbc11202b4676b73d9..c560e5afa31820c7c25c3c7a705ecc32266e59c5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [4.1.3] - 2022-01-24 +### Added + - Add a monitoring dashboard, defined in ./dashboard.json, for the Cloud Run service. + The dashboard contains charts for Request count and latency, CPU and memory utilisation, + and container instance count and billable time. + ## [4.1.2] - 2022-01-10 ### Fixed - Correct logic used to decide if custom domains can be monitored. diff --git a/dashboard.json b/dashboard.json new file mode 100644 index 0000000000000000000000000000000000000000..a470ecad228ad9e14590ebcdb172ad05a611b8fd --- /dev/null +++ b/dashboard.json @@ -0,0 +1,334 @@ +{ + "displayName": "Cloud Run service: ${service_name}", + "mosaicLayout": { + "columns": 12, + "tiles": [ + { + "width": 6, + "height": 4, + "widget": { + "title": "Request Counts", + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/request_count\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" metric.label.\"response_code\"!=\"404\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_RATE", + "crossSeriesReducer": "REDUCE_SUM", + "groupByFields": [ + "metric.label.\"response_code_class\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR" + } + } + } + }, + { + "xPos": 6, + "width": 6, + "height": 4, + "widget": { + "title": "Request Latencies", + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/request_latencies\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_DELTA", + "crossSeriesReducer": "REDUCE_PERCENTILE_99", + "groupByFields": [ + "resource.label.\"service_name\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + }, + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/request_latencies\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_DELTA", + "crossSeriesReducer": "REDUCE_PERCENTILE_95", + "groupByFields": [ + "resource.label.\"service_name\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + }, + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/request_latencies\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_DELTA", + "crossSeriesReducer": "REDUCE_PERCENTILE_50", + "groupByFields": [ + "resource.label.\"service_name\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR" + } + } + } + }, + { + "yPos": 4, + "width": 6, + "height": 3, + "widget": { + "title": "Container CPU Utilisation", + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/cpu/utilizations\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_SUM", + "crossSeriesReducer": "REDUCE_PERCENTILE_99" + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + }, + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/cpu/utilizations\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_SUM", + "crossSeriesReducer": "REDUCE_PERCENTILE_95" + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + }, + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/cpu/utilizations\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_SUM", + "crossSeriesReducer": "REDUCE_PERCENTILE_50" + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR" + } + } + } + }, + { + "xPos": 6, + "yPos": 4, + "width": 6, + "height": 3, + "widget": { + "title": "Container Memory Utiliation", + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/memory/utilizations\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_SUM", + "crossSeriesReducer": "REDUCE_PERCENTILE_99", + "groupByFields": [ + "resource.label.\"service_name\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + }, + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/memory/utilizations\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_SUM", + "crossSeriesReducer": "REDUCE_PERCENTILE_95", + "groupByFields": [ + "resource.label.\"service_name\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + }, + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/memory/utilizations\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_SUM", + "crossSeriesReducer": "REDUCE_PERCENTILE_50", + "groupByFields": [ + "resource.label.\"service_name\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR" + } + } + } + }, + { + "yPos": 7, + "width": 6, + "height": 3, + "widget": { + "title": "Instance Count", + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/instance_count\" resource.type=\"cloud_run_revision\" resource.labels.service_name=\"${service_name}\" resource.labels.location=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_MAX", + "crossSeriesReducer": "REDUCE_SUM", + "groupByFields": [ + "resource.label.service_name", + "metric.label.state" + ] + } + }, + "unitOverride": "count" + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + } + ], + "xAxis": { + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR" + } + } + } + }, + { + "xPos": 6, + "yPos": 7, + "width": 6, + "height": 3, + "widget": { + "title": "Billable instance time", + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/billable_instance_time\" resource.type=\"cloud_run_revision\" resource.labels.service_name=\"${service_name}\" resource.labels.location=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_RATE", + "crossSeriesReducer": "REDUCE_SUM", + "groupByFields": [ + "resource.label.service_name" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + } + ], + "xAxis": { + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR" + } + } + } + } + ] + } +} diff --git a/dashboard.tf b/dashboard.tf new file mode 100644 index 0000000000000000000000000000000000000000..b639e7c637da63493c73454e6218775040cccde6 --- /dev/null +++ b/dashboard.tf @@ -0,0 +1,22 @@ +# Create a monitoring dashboard for the provisioned Cloud Run service. +# +# The dashboard, defined in ./dashboard.json, contains: +# - Request counts broken down by response code class (ignoring 404s). +# - Request latencies for 50th, 95th and 99th percentile. +# - Container CPU and memory utilisations for 50th, 95th and 99th percentile. +# - Container instance count and billable instance time. + +data "template_file" "dashboard_json" { + count = var.create_monitoring_dashboard ? 1 : 0 + + template = file("${path.module}/dashboard.json") + vars = { + service_name = var.name + region = var.cloud_run_region + } +} + +resource "google_monitoring_dashboard" "dashboard" { + for_each = toset([for template in data.template_file.dashboard_json : template.rendered]) + dashboard_json = each.key +} diff --git a/variables.tf b/variables.tf index 428f3dcd1a226ac210fd224d9e125547da43d2cd..bf744526dee7beec4b2ffae567d7fdc600f765a8 100644 --- a/variables.tf +++ b/variables.tf @@ -260,6 +260,15 @@ variable "disable_monitoring" { EOL } +variable "create_monitoring_dashboard" { + type = bool + default = false + description = <<-EOL + Optional. Determines whether to create the monitoring dashboard defined in ./dashboard.json + for the provisioned Cloud Run service. Defaults to false. +EOL +} + variable "enable_static_egress_ip" { default = false type = bool