From 1d84818b53f66e7850f1d74a68229a59fab92b39 Mon Sep 17 00:00:00 2001 From: Dr Wajdi Hajji <wh330@cam.ac.uk> Date: Mon, 17 Jan 2022 13:46:51 +0000 Subject: [PATCH] Support creating a monitoring dashboard The dashboard, defined in ./dashboard.json, contains: - Request counts broken down by response code class (ignoring 404s). - Request latencies for 50th, 95th and 99th percentile. - Container CPU and memory utilisations for 50th, 95th and 99th percentile. - Container instance count and billable instance time. --- CHANGELOG | 6 + dashboard.json | 334 +++++++++++++++++++++++++++++++++++++++++++++++++ dashboard.tf | 22 ++++ variables.tf | 9 ++ 4 files changed, 371 insertions(+) create mode 100644 dashboard.json create mode 100644 dashboard.tf diff --git a/CHANGELOG b/CHANGELOG index fba9df1..067463c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [4.1.1] - 2022-01-17 +### Added + - Add a monitoring dashboard, defined in ./dashboard.json, for the Cloud Run service. + The dashboard contains charts for Request count and latency, CPU and memory utilisation, + and container instance count and billable time. + ## [4.1.0] - 2021-07-28 ### Added - Support load balancer ingress style alongside Cloud Run domain mapping. diff --git a/dashboard.json b/dashboard.json new file mode 100644 index 0000000..a470eca --- /dev/null +++ b/dashboard.json @@ -0,0 +1,334 @@ +{ + "displayName": "Cloud Run service: ${service_name}", + "mosaicLayout": { + "columns": 12, + "tiles": [ + { + "width": 6, + "height": 4, + "widget": { + "title": "Request Counts", + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/request_count\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" metric.label.\"response_code\"!=\"404\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_RATE", + "crossSeriesReducer": "REDUCE_SUM", + "groupByFields": [ + "metric.label.\"response_code_class\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR" + } + } + } + }, + { + "xPos": 6, + "width": 6, + "height": 4, + "widget": { + "title": "Request Latencies", + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/request_latencies\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_DELTA", + "crossSeriesReducer": "REDUCE_PERCENTILE_99", + "groupByFields": [ + "resource.label.\"service_name\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + }, + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/request_latencies\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_DELTA", + "crossSeriesReducer": "REDUCE_PERCENTILE_95", + "groupByFields": [ + "resource.label.\"service_name\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + }, + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/request_latencies\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_DELTA", + "crossSeriesReducer": "REDUCE_PERCENTILE_50", + "groupByFields": [ + "resource.label.\"service_name\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR" + } + } + } + }, + { + "yPos": 4, + "width": 6, + "height": 3, + "widget": { + "title": "Container CPU Utilisation", + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/cpu/utilizations\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_SUM", + "crossSeriesReducer": "REDUCE_PERCENTILE_99" + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + }, + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/cpu/utilizations\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_SUM", + "crossSeriesReducer": "REDUCE_PERCENTILE_95" + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + }, + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/cpu/utilizations\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_SUM", + "crossSeriesReducer": "REDUCE_PERCENTILE_50" + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR" + } + } + } + }, + { + "xPos": 6, + "yPos": 4, + "width": 6, + "height": 3, + "widget": { + "title": "Container Memory Utiliation", + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/memory/utilizations\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_SUM", + "crossSeriesReducer": "REDUCE_PERCENTILE_99", + "groupByFields": [ + "resource.label.\"service_name\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + }, + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/memory/utilizations\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_SUM", + "crossSeriesReducer": "REDUCE_PERCENTILE_95", + "groupByFields": [ + "resource.label.\"service_name\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + }, + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/memory/utilizations\" resource.type=\"cloud_run_revision\" resource.label.\"service_name\"=\"${service_name}\" resource.label.\"location\"=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_SUM", + "crossSeriesReducer": "REDUCE_PERCENTILE_50", + "groupByFields": [ + "resource.label.\"service_name\"" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + } + ], + "timeshiftDuration": "0s", + "yAxis": { + "label": "y1Axis", + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR" + } + } + } + }, + { + "yPos": 7, + "width": 6, + "height": 3, + "widget": { + "title": "Instance Count", + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/instance_count\" resource.type=\"cloud_run_revision\" resource.labels.service_name=\"${service_name}\" resource.labels.location=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_MAX", + "crossSeriesReducer": "REDUCE_SUM", + "groupByFields": [ + "resource.label.service_name", + "metric.label.state" + ] + } + }, + "unitOverride": "count" + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + } + ], + "xAxis": { + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR" + } + } + } + }, + { + "xPos": 6, + "yPos": 7, + "width": 6, + "height": 3, + "widget": { + "title": "Billable instance time", + "xyChart": { + "dataSets": [ + { + "timeSeriesQuery": { + "timeSeriesFilter": { + "filter": "metric.type=\"run.googleapis.com/container/billable_instance_time\" resource.type=\"cloud_run_revision\" resource.labels.service_name=\"${service_name}\" resource.labels.location=\"${region}\"", + "aggregation": { + "alignmentPeriod": "60s", + "perSeriesAligner": "ALIGN_RATE", + "crossSeriesReducer": "REDUCE_SUM", + "groupByFields": [ + "resource.label.service_name" + ] + } + } + }, + "plotType": "LINE", + "minAlignmentPeriod": "60s", + "targetAxis": "Y1" + } + ], + "xAxis": { + "scale": "LINEAR" + }, + "chartOptions": { + "mode": "COLOR" + } + } + } + } + ] + } +} diff --git a/dashboard.tf b/dashboard.tf new file mode 100644 index 0000000..b639e7c --- /dev/null +++ b/dashboard.tf @@ -0,0 +1,22 @@ +# Create a monitoring dashboard for the provisioned Cloud Run service. +# +# The dashboard, defined in ./dashboard.json, contains: +# - Request counts broken down by response code class (ignoring 404s). +# - Request latencies for 50th, 95th and 99th percentile. +# - Container CPU and memory utilisations for 50th, 95th and 99th percentile. +# - Container instance count and billable instance time. + +data "template_file" "dashboard_json" { + count = var.create_monitoring_dashboard ? 1 : 0 + + template = file("${path.module}/dashboard.json") + vars = { + service_name = var.name + region = var.cloud_run_region + } +} + +resource "google_monitoring_dashboard" "dashboard" { + for_each = toset([for template in data.template_file.dashboard_json : template.rendered]) + dashboard_json = each.key +} diff --git a/variables.tf b/variables.tf index 428f3dc..bf74452 100644 --- a/variables.tf +++ b/variables.tf @@ -260,6 +260,15 @@ variable "disable_monitoring" { EOL } +variable "create_monitoring_dashboard" { + type = bool + default = false + description = <<-EOL + Optional. Determines whether to create the monitoring dashboard defined in ./dashboard.json + for the provisioned Cloud Run service. Defaults to false. +EOL +} + variable "enable_static_egress_ip" { default = false type = bool -- GitLab