From 5713b5f5ba340c41f97995ed440c09ae97f56983 Mon Sep 17 00:00:00 2001 From: Sense T Date: Thu, 21 May 2026 06:02:21 +0000 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E7=9B=91=E6=8E=A7=E7=BB=84?= =?UTF-8?q?=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kube-prometheus-stack/.helmignore | 33 + kube-prometheus-stack/Chart.lock | 18 + kube-prometheus-stack/Chart.yaml | 72 + kube-prometheus-stack/README.md | 382 ++ kube-prometheus-stack/templates/NOTES.txt | 18 + kube-prometheus-stack/templates/_helpers.tpl | 399 ++ .../templates/alertmanager/alertmanager.yaml | 228 + .../templates/alertmanager/extrasecret.yaml | 20 + .../templates/alertmanager/ingress.yaml | 67 + .../alertmanager/ingressperreplica.yaml | 56 + .../templates/alertmanager/networkpolicy.yaml | 76 + .../alertmanager/podDisruptionBudget.yaml | 16 + .../templates/alertmanager/route.yaml | 63 + .../templates/alertmanager/secret.yaml | 29 + .../templates/alertmanager/service.yaml | 72 + .../alertmanager/serviceaccount.yaml | 21 + .../alertmanager/servicemonitor.yaml | 93 + .../alertmanager/serviceperreplica.yaml | 49 + .../alertmanager/verticalpodautoscaler.yaml | 41 + .../templates/exporters/core-dns/service.yaml | 28 + .../exporters/core-dns/servicemonitor.yaml | 54 + .../kube-api-server/servicemonitor.yaml | 51 + .../kube-controller-manager/endpoints.yaml | 22 + .../kube-controller-manager/service.yaml | 33 + .../servicemonitor.yaml | 63 + .../templates/exporters/kube-dns/service.yaml | 32 + .../exporters/kube-dns/servicemonitor.yaml | 69 + .../exporters/kube-etcd/endpoints.yaml | 20 + .../exporters/kube-etcd/service.yaml | 31 + .../exporters/kube-etcd/servicemonitor.yaml | 71 + .../exporters/kube-proxy/endpoints.yaml | 20 + .../exporters/kube-proxy/service.yaml | 31 + .../exporters/kube-proxy/servicemonitor.yaml | 59 + .../exporters/kube-scheduler/endpoints.yaml | 22 + .../exporters/kube-scheduler/service.yaml | 33 + .../kube-scheduler/servicemonitor.yaml | 63 + .../exporters/kubelet/servicemonitor.yaml | 149 + .../templates/extra-objects.yaml | 15 + .../grafana/configmaps-datasources.yaml | 98 + .../alertmanager-overview.yaml | 56 + .../grafana/dashboards-1.14/apiserver.yaml | 57 + .../dashboards-1.14/cluster-total.yaml | 57 + .../dashboards-1.14/controller-manager.yaml | 57 + .../grafana/dashboards-1.14/etcd.yaml | 56 + .../dashboards-1.14/grafana-overview.yaml | 56 + .../grafana/dashboards-1.14/k8s-coredns.yaml | 56 + .../k8s-resources-cluster.yaml | 57 + .../k8s-resources-multicluster.yaml | 57 + .../k8s-resources-namespace.yaml | 57 + .../dashboards-1.14/k8s-resources-node.yaml | 56 + .../dashboards-1.14/k8s-resources-pod.yaml | 57 + .../k8s-resources-windows-cluster.yaml | 56 + .../k8s-resources-windows-namespace.yaml | 56 + .../k8s-resources-windows-pod.yaml | 56 + .../k8s-resources-workload.yaml | 57 + .../k8s-resources-workloads-namespace.yaml | 57 + .../k8s-windows-cluster-rsrc-use.yaml | 56 + .../k8s-windows-node-rsrc-use.yaml | 56 + .../grafana/dashboards-1.14/kubelet.yaml | 57 + .../dashboards-1.14/namespace-by-pod.yaml | 57 + .../namespace-by-workload.yaml | 57 + .../node-cluster-rsrc-use.yaml | 56 + .../dashboards-1.14/node-rsrc-use.yaml | 56 + .../grafana/dashboards-1.14/nodes-aix.yaml | 56 + .../grafana/dashboards-1.14/nodes-darwin.yaml | 56 + .../grafana/dashboards-1.14/nodes.yaml | 56 + .../persistentvolumesusage.yaml | 57 + .../grafana/dashboards-1.14/pod-total.yaml | 57 + .../prometheus-remote-write.yaml | 56 + .../grafana/dashboards-1.14/prometheus.yaml | 56 + .../grafana/dashboards-1.14/proxy.yaml | 57 + .../grafana/dashboards-1.14/scheduler.yaml | 57 + .../dashboards-1.14/workload-total.yaml | 57 + .../_prometheus-operator.tpl | 7 + .../_prometheus-operator-webhook.tpl | 13 + .../deployment/deployment.yaml | 145 + .../admission-webhooks/deployment/pdb.yaml | 15 + .../deployment/service.yaml | 62 + .../deployment/serviceaccount.yaml | 18 + .../ciliumnetworkpolicy-createSecret.yaml | 36 + .../ciliumnetworkpolicy-patchWebhook.yaml | 36 + .../job-patch/clusterrole.yaml | 22 + .../job-patch/clusterrolebinding.yaml | 20 + .../job-patch/job-createSecret.yaml | 74 + .../job-patch/job-patchWebhook.yaml | 83 + .../job-patch/networkpolicy-createSecret.yaml | 33 + .../job-patch/networkpolicy-patchWebhook.yaml | 33 + .../admission-webhooks/job-patch/role.yaml | 21 + .../job-patch/rolebinding.yaml | 21 + .../job-patch/serviceaccount.yaml | 21 + .../mutatingWebhookConfiguration.yaml | 85 + .../validatingWebhookConfiguration.yaml | 155 + .../aggregate-clusterroles.yaml | 29 + .../prometheus-operator/certmanager.yaml | 61 + .../ciliumnetworkpolicy.yaml | 40 + .../prometheus-operator/clusterrole.yaml | 117 + .../clusterrolebinding.yaml | 16 + .../prometheus-operator/deployment.yaml | 249 + .../prometheus-operator/networkpolicy.yaml | 29 + .../templates/prometheus-operator/pdb.yaml | 15 + .../prometheus-operator/service.yaml | 61 + .../prometheus-operator/serviceaccount.yaml | 17 + .../prometheus-operator/servicemonitor.yaml | 47 + .../verticalpodautoscaler.yaml | 40 + .../templates/prometheus/_rules.tpl | 48 + .../additionalAlertRelabelConfigs.yaml | 16 + .../additionalAlertmanagerConfigs.yaml | 16 + .../prometheus/additionalPrometheusRules.yaml | 37 + .../prometheus/additionalScrapeConfigs.yaml | 20 + .../prometheus/ciliumnetworkpolicy.yaml | 26 + .../templates/prometheus/clusterrole.yaml | 48 + .../prometheus/clusterrolebinding.yaml | 18 + .../templates/prometheus/csi-secret.yaml | 12 + .../templates/prometheus/extrasecret.yaml | 20 + .../templates/prometheus/ingress.yaml | 66 + .../prometheus/ingressThanosSidecar.yaml | 59 + .../prometheus/ingressperreplica.yaml | 56 + .../templates/prometheus/networkpolicy.yaml | 34 + .../prometheus/podDisruptionBudget.yaml | 20 + .../templates/prometheus/podmonitors.yaml | 45 + .../templates/prometheus/prometheus.yaml | 538 ++ .../templates/prometheus/route.yaml | 63 + .../rules-1.14/alertmanager.rules.yaml | 303 + .../rules-1.14/config-reloaders.yaml | 57 + .../templates/prometheus/rules-1.14/etcd.yaml | 459 ++ .../prometheus/rules-1.14/general.rules.yaml | 125 + .../k8s.rules.container_cpu_limits.yaml | 63 + .../k8s.rules.container_cpu_requests.yaml | 63 + ...les.container_cpu_usage_seconds_total.yaml | 60 + .../k8s.rules.container_memory_cache.yaml | 43 + .../k8s.rules.container_memory_limits.yaml | 63 + .../k8s.rules.container_memory_requests.yaml | 63 + .../k8s.rules.container_memory_rss.yaml | 43 + .../k8s.rules.container_memory_swap.yaml | 43 + ...es.container_memory_working_set_bytes.yaml | 43 + .../k8s.rules.container_resource.yaml | 168 + .../rules-1.14/k8s.rules.pod_owner.yaml | 220 + .../kube-apiserver-availability.rules.yaml | 266 + .../kube-apiserver-burnrate.rules.yaml | 441 ++ .../kube-apiserver-histogram.rules.yaml | 54 + .../rules-1.14/kube-apiserver-slos.yaml | 159 + .../kube-prometheus-general.rules.yaml | 49 + .../kube-prometheus-node-recording.rules.yaml | 93 + .../rules-1.14/kube-scheduler.rules.yaml | 136 + .../rules-1.14/kube-state-metrics.yaml | 152 + .../prometheus/rules-1.14/kubelet.rules.yaml | 82 + .../rules-1.14/kubernetes-apps.yaml | 609 ++ .../rules-1.14/kubernetes-resources.yaml | 360 ++ .../rules-1.14/kubernetes-storage.yaml | 219 + .../kubernetes-system-apiserver.yaml | 232 + .../kubernetes-system-controller-manager.yaml | 86 + .../kubernetes-system-kube-proxy.yaml | 84 + .../rules-1.14/kubernetes-system-kubelet.yaml | 506 ++ .../kubernetes-system-scheduler.yaml | 86 + .../rules-1.14/kubernetes-system.yaml | 88 + .../rules-1.14/node-exporter.rules.yaml | 244 + .../prometheus/rules-1.14/node-exporter.yaml | 829 +++ .../prometheus/rules-1.14/node-network.yaml | 55 + .../prometheus/rules-1.14/node.rules.yaml | 109 + .../rules-1.14/prometheus-operator.yaml | 253 + .../prometheus/rules-1.14/prometheus.yaml | 735 +++ .../rules-1.14/windows.node.rules.yaml | 302 + .../rules-1.14/windows.pod.rules.yaml | 158 + .../templates/prometheus/secret.yaml | 15 + .../templates/prometheus/service.yaml | 89 + .../prometheus/serviceThanosSidecar.yaml | 45 + .../serviceThanosSidecarExternal.yaml | 46 + .../templates/prometheus/serviceaccount.yaml | 21 + .../templates/prometheus/servicemonitor.yaml | 86 + .../servicemonitorThanosSidecar.yaml | 45 + .../templates/prometheus/servicemonitors.yaml | 46 + .../prometheus/serviceperreplica.yaml | 58 + .../prometheus/verticalpodautoscaler.yaml | 46 + .../templates/thanos-ruler/extrasecret.yaml | 20 + .../templates/thanos-ruler/ingress.yaml | 59 + .../thanos-ruler/podDisruptionBudget.yaml | 16 + .../templates/thanos-ruler/route.yaml | 63 + .../templates/thanos-ruler/ruler.yaml | 219 + .../templates/thanos-ruler/secret.yaml | 26 + .../templates/thanos-ruler/service.yaml | 57 + .../thanos-ruler/serviceaccount.yaml | 20 + .../thanos-ruler/servicemonitor.yaml | 72 + kube-prometheus-stack/values.yaml | 5645 +++++++++++++++++ 183 files changed, 22298 insertions(+) create mode 100644 kube-prometheus-stack/.helmignore create mode 100644 kube-prometheus-stack/Chart.lock create mode 100644 kube-prometheus-stack/Chart.yaml create mode 100644 kube-prometheus-stack/README.md create mode 100644 kube-prometheus-stack/templates/NOTES.txt create mode 100644 kube-prometheus-stack/templates/_helpers.tpl create mode 100644 kube-prometheus-stack/templates/alertmanager/alertmanager.yaml create mode 100644 kube-prometheus-stack/templates/alertmanager/extrasecret.yaml create mode 100644 kube-prometheus-stack/templates/alertmanager/ingress.yaml create mode 100644 kube-prometheus-stack/templates/alertmanager/ingressperreplica.yaml create mode 100644 kube-prometheus-stack/templates/alertmanager/networkpolicy.yaml create mode 100644 kube-prometheus-stack/templates/alertmanager/podDisruptionBudget.yaml create mode 100644 kube-prometheus-stack/templates/alertmanager/route.yaml create mode 100644 kube-prometheus-stack/templates/alertmanager/secret.yaml create mode 100644 kube-prometheus-stack/templates/alertmanager/service.yaml create mode 100644 kube-prometheus-stack/templates/alertmanager/serviceaccount.yaml create mode 100644 kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml create mode 100644 kube-prometheus-stack/templates/alertmanager/serviceperreplica.yaml create mode 100644 kube-prometheus-stack/templates/alertmanager/verticalpodautoscaler.yaml create mode 100644 kube-prometheus-stack/templates/exporters/core-dns/service.yaml create mode 100644 kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-controller-manager/endpoints.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-dns/service.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-dns/servicemonitor.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-etcd/endpoints.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-proxy/endpoints.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-scheduler/endpoints.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml create mode 100644 kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml create mode 100644 kube-prometheus-stack/templates/extra-objects.yaml create mode 100644 kube-prometheus-stack/templates/grafana/configmaps-datasources.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/alertmanager-overview.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/apiserver.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/cluster-total.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/controller-manager.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/etcd.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/grafana-overview.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-coredns.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-multicluster.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-node.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-cluster.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-namespace.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-pod.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-windows-cluster-rsrc-use.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-windows-node-rsrc-use.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/kubelet.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-pod.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-workload.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/node-rsrc-use.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-aix.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-darwin.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/pod-total.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/proxy.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/scheduler.yaml create mode 100644 kube-prometheus-stack/templates/grafana/dashboards-1.14/workload-total.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/_prometheus-operator.tpl create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/_prometheus-operator-webhook.tpl create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/deployment.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/pdb.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/service.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/serviceaccount.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-createSecret.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-patchWebhook.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-createSecret.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-patchWebhook.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/aggregate-clusterroles.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/ciliumnetworkpolicy.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/clusterrole.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/clusterrolebinding.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/deployment.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/networkpolicy.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/pdb.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/service.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/serviceaccount.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/servicemonitor.yaml create mode 100644 kube-prometheus-stack/templates/prometheus-operator/verticalpodautoscaler.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/_rules.tpl create mode 100644 kube-prometheus-stack/templates/prometheus/additionalAlertRelabelConfigs.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/additionalAlertmanagerConfigs.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/additionalPrometheusRules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/additionalScrapeConfigs.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/ciliumnetworkpolicy.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/clusterrole.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/clusterrolebinding.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/csi-secret.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/extrasecret.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/ingress.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/ingressThanosSidecar.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/ingressperreplica.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/networkpolicy.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/podDisruptionBudget.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/podmonitors.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/prometheus.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/route.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/alertmanager.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/config-reloaders.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/general.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_limits.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_requests.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_usage_seconds_total.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_cache.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_limits.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_requests.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_rss.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_swap.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_working_set_bytes.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_resource.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.pod_owner.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kube-state-metrics.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-storage.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/node-network.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus-operator.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/windows.node.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/rules-1.14/windows.pod.rules.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/secret.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/service.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/serviceThanosSidecar.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/serviceThanosSidecarExternal.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/serviceaccount.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/servicemonitor.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/servicemonitorThanosSidecar.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/servicemonitors.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/serviceperreplica.yaml create mode 100644 kube-prometheus-stack/templates/prometheus/verticalpodautoscaler.yaml create mode 100644 kube-prometheus-stack/templates/thanos-ruler/extrasecret.yaml create mode 100644 kube-prometheus-stack/templates/thanos-ruler/ingress.yaml create mode 100644 kube-prometheus-stack/templates/thanos-ruler/podDisruptionBudget.yaml create mode 100644 kube-prometheus-stack/templates/thanos-ruler/route.yaml create mode 100644 kube-prometheus-stack/templates/thanos-ruler/ruler.yaml create mode 100644 kube-prometheus-stack/templates/thanos-ruler/secret.yaml create mode 100644 kube-prometheus-stack/templates/thanos-ruler/service.yaml create mode 100644 kube-prometheus-stack/templates/thanos-ruler/serviceaccount.yaml create mode 100644 kube-prometheus-stack/templates/thanos-ruler/servicemonitor.yaml create mode 100644 kube-prometheus-stack/values.yaml diff --git a/kube-prometheus-stack/.helmignore b/kube-prometheus-stack/.helmignore new file mode 100644 index 0000000..925b647 --- /dev/null +++ b/kube-prometheus-stack/.helmignore @@ -0,0 +1,33 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +# helm/charts +OWNERS +hack/ +ci/ +kube-prometheus-*.tgz + +unittests/ +files/dashboards/ + +UPGRADE.md +CONTRIBUTING.md +.editorconfig diff --git a/kube-prometheus-stack/Chart.lock b/kube-prometheus-stack/Chart.lock new file mode 100644 index 0000000..676d86b --- /dev/null +++ b/kube-prometheus-stack/Chart.lock @@ -0,0 +1,18 @@ +dependencies: +- name: crds + repository: "" + version: 0.0.0 +- name: kube-state-metrics + repository: https://prometheus-community.github.io/helm-charts + version: 7.3.0 +- name: prometheus-node-exporter + repository: https://prometheus-community.github.io/helm-charts + version: 4.55.0 +- name: grafana + repository: https://grafana-community.github.io/helm-charts + version: 12.3.3 +- name: prometheus-windows-exporter + repository: https://prometheus-community.github.io/helm-charts + version: 0.12.7 +digest: sha256:c0a8d925127e08bc8476d30ea830cfc55464ea1642fdc54b162aa5a84a6fd563 +generated: "2026-05-19T17:49:27.635862373Z" diff --git a/kube-prometheus-stack/Chart.yaml b/kube-prometheus-stack/Chart.yaml new file mode 100644 index 0000000..75443f7 --- /dev/null +++ b/kube-prometheus-stack/Chart.yaml @@ -0,0 +1,72 @@ +annotations: + artifacthub.io/license: Apache-2.0 + artifacthub.io/links: | + - name: Chart Source + url: https://github.com/prometheus-community/helm-charts + - name: Upstream Project + url: https://github.com/prometheus-operator/kube-prometheus + - name: Upgrade Process + url: https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/README.md#upgrading-chart + artifacthub.io/operator: "true" +apiVersion: v2 +appVersion: v0.90.1 +dependencies: +- condition: crds.enabled + name: crds + repository: "" + version: 0.0.0 +- condition: kubeStateMetrics.enabled + name: kube-state-metrics + repository: https://prometheus-community.github.io/helm-charts + version: 7.3.0 +- condition: nodeExporter.enabled + name: prometheus-node-exporter + repository: https://prometheus-community.github.io/helm-charts + version: 4.55.0 +- condition: grafana.enabled + name: grafana + repository: https://grafana-community.github.io/helm-charts + version: 12.3.3 +- condition: windowsMonitoring.enabled + name: prometheus-windows-exporter + repository: https://prometheus-community.github.io/helm-charts + version: 0.12.* +description: kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards, + and Prometheus rules combined with documentation and scripts to provide easy to + operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus + Operator. +home: https://github.com/prometheus-operator/kube-prometheus +icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png +keywords: +- operator +- prometheus +- kube-prometheus +kubeVersion: '>=1.25.0-0' +maintainers: +- email: andrew@quadcorps.co.uk + name: andrewgkew + url: https://github.com/andrewgkew +- email: gianrubio@gmail.com + name: gianrubio + url: https://github.com/gianrubio +- email: github.gkarthiks@gmail.com + name: gkarthiks + url: https://github.com/gkarthiks +- email: kube-prometheus-stack@sisti.pt + name: GMartinez-Sisti + url: https://github.com/GMartinez-Sisti +- email: github@jkroepke.de + name: jkroepke + url: https://github.com/jkroepke +- email: miroslav.hadzhiev@gmail.com + name: Xtigyro + url: https://github.com/Xtigyro +- email: quentin.bisson@gmail.com + name: QuentinBisson + url: https://github.com/QuentinBisson +name: kube-prometheus-stack +sources: +- https://github.com/prometheus-community/helm-charts +- https://github.com/prometheus-operator/kube-prometheus +type: application +version: 85.2.0 diff --git a/kube-prometheus-stack/README.md b/kube-prometheus-stack/README.md new file mode 100644 index 0000000..bb4f2fa --- /dev/null +++ b/kube-prometheus-stack/README.md @@ -0,0 +1,382 @@ +# kube-prometheus-stack + +Installs core components of the [kube-prometheus stack](https://github.com/prometheus-operator/kube-prometheus), a collection of Kubernetes manifests, [Grafana](http://grafana.com/) dashboards, and [Prometheus rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with [Prometheus](https://prometheus.io/) using the [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator). + +See the [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) readme for details about components, dashboards, and alerts. + +_Note: This chart was formerly named `prometheus-operator` chart, now renamed to more clearly reflect that it installs the `kube-prometheus` project stack, within which Prometheus Operator is only one component. This chart does not install all components of `kube-prometheus`, notably excluding the Prometheus Adapter and Prometheus black-box exporter._ + +## Prerequisites + +- Kubernetes 1.19+ +- Helm 3+ + +## Usage + +The chart is distributed as an [OCI Artifact](https://helm.sh/docs/topics/registries/) as well as via a traditional [Helm Repository](https://helm.sh/docs/topics/chart_repository/). + +- OCI Artifact: `oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack` +- Helm Repository: `https://prometheus-community.github.io/helm-charts` with chart `kube-prometheus-stack` + +The installation instructions use the OCI registry. Refer to the [`helm repo`]([`helm repo`](https://helm.sh/docs/helm/helm_repo/)) command documentation for information on installing charts via the traditional repository. + +### Install Helm Chart + +```console +helm install [RELEASE_NAME] oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack +``` + +_See [configuration](#configuration) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +### Dependencies + +By default this chart installs additional, dependent charts: + +- [prometheus-community/kube-state-metrics](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics) +- [prometheus-community/prometheus-node-exporter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter) +- [grafana/grafana](https://github.com/grafana/helm-charts/tree/main/charts/grafana) + +To disable dependencies during installation, see [multiple releases](#multiple-releases) below. + +_See [helm dependency](https://helm.sh/docs/helm/helm_dependency/) for command documentation._ + +#### Grafana Dashboards + +This chart provisions a collection of curated Grafana dashboards that are automatically loaded into Grafana via ConfigMaps. These dashboards are rendered into the Helm chart under [`templates/grafana/`](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/templates/grafana/), but **this is not their source of truth**. + +The dashboards originate from various upstream projects and are gathered and processed using scripts in the [`hack/`](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack) directory. For details on how these dashboards are sourced and kept up to date, refer to the [hack/README.md](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/hack/README.md). + +> **Note:** The dashboards referenced in the `hack` scripts are usually **not the original source** either. Most originate from separate **Prometheus mixin repositories** (e.g., [kubernetes-mixin](https://github.com/kubernetes-monitoring/kubernetes-mixin)) and are processed through `jsonnet` tooling before being included here. To find the original source in case you want to modify it you may have to search even further upstream. + +If you wish to contribute or modify dashboards, please follow the guidance in the `hack/README.md` to ensure consistency and reproducibility. + +### Uninstall Helm Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +CRDs created by this chart are not removed by default and should be manually cleaned up: + +```console +kubectl delete crd alertmanagerconfigs.monitoring.coreos.com +kubectl delete crd alertmanagers.monitoring.coreos.com +kubectl delete crd podmonitors.monitoring.coreos.com +kubectl delete crd probes.monitoring.coreos.com +kubectl delete crd prometheusagents.monitoring.coreos.com +kubectl delete crd prometheuses.monitoring.coreos.com +kubectl delete crd prometheusrules.monitoring.coreos.com +kubectl delete crd scrapeconfigs.monitoring.coreos.com +kubectl delete crd servicemonitors.monitoring.coreos.com +kubectl delete crd thanosrulers.monitoring.coreos.com +``` + +### Upgrading Chart + +```console +helm upgrade [RELEASE_NAME] [CHART] +``` + +With Helm v3, CRDs created by this chart are not updated by default and should be manually updated. +Consult also the [Helm Documentation on CRDs](https://helm.sh/docs/chart_best_practices/custom_resource_definitions). + +CRDs update lead to a major version bump. +The Chart's [appVersion](https://github.com/prometheus-community/helm-charts/blob/13ed7098db2f78c2bbcdab6c1c3c7a95b4b94574/charts/kube-prometheus-stack/Chart.yaml#L36) refers to the [`prometheus-operator`](https://github.com/prometheus-operator/prometheus-operator/tree/main)'s version with matching CRDs. + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +#### Upgrading an existing Release to a new major version + +A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions. + +See [UPGRADE.md](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/UPGRADE.md) +for breaking changes between versions. + +## Configuration + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments: + +```console +helm show values oci://ghcr.io/prometheus-community/charts/kube-prometheus-stack +``` + +You may also `helm show values` on this chart's [dependencies](#dependencies) for additional options. + +For templated Grafana datasource definitions (e.g. when using Helm flow control), use `grafana.additionalDataSourcesString`, which is rendered via `tpl`. + +### Prometheus High Availability (HA) + +For a basic HA setup, run multiple Prometheus replicas: + +```yaml +prometheus: + prometheusSpec: + replicas: 2 + podAntiAffinity: "hard" + externalLabels: + cluster: prod-eu1 +``` + +Important notes: + +1. `replicas` controls how many Prometheus pods are deployed for each shard. +2. Keep anti-affinity enabled (or hardened) to avoid scheduling all replicas on one node. +3. Do not clear replica/instance external labels in HA setups (`replicaExternalLabelNameClear` / `prometheusExternalLabelNameClear`), otherwise deduplication and alert/source identification become harder. +4. Querying replicas through a Kubernetes Service provides availability, but not sample deduplication across replicas by itself. For global/deduplicated querying, use a Thanos Query layer (or another backend that performs deduplication). + +See also Prometheus Operator HA guidance: + +- [Prometheus Operator HA docs](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/platform/high-availability.md#prometheus) + +### Multiple releases + +The same chart can be used to run multiple Prometheus instances in the same cluster if required. To achieve this, it is necessary to run only one instance of prometheus-operator and a pair of alertmanager pods for an HA configuration, while all other components need to be disabled. To disable a dependency during installation, set `kubeStateMetrics.enabled`, `nodeExporter.enabled` and `grafana.enabled` to `false`. + +## Work-Arounds for Known Issues + +### Running on private GKE clusters + +When Google configure the control plane for private clusters, they automatically configure VPC peering between your Kubernetes cluster’s network and a separate Google managed project. In order to restrict what Google are able to access within your cluster, the firewall rules configured restrict access to your Kubernetes pods. This means that in order to use the webhook component with a GKE private cluster, you must configure an additional firewall rule to allow the GKE control plane access to your webhook pod. + +You can read more information on how to add firewall rules for the GKE control plane nodes in the [GKE docs](https://cloud.google.com/kubernetes-engine/docs/how-to/private-clusters#add_firewall_rules) + +Alternatively, you can disable the hooks by setting `prometheusOperator.admissionWebhooks.enabled=false`. + +## PrometheusRules Admission Webhooks + +With Prometheus Operator version 0.30+, the core Prometheus Operator pod exposes an endpoint that will integrate with the `validatingwebhookconfiguration` Kubernetes feature to prevent malformed rules from being added to the cluster. + +### How the Chart Configures the Hooks + +A validating and mutating webhook configuration requires the endpoint to which the request is sent to use TLS. It is possible to set up custom certificates to do this, but in most cases, a self-signed certificate is enough. The setup of this component requires some more complex orchestration when using helm. The steps are created to be idempotent and to allow turning the feature on and off without running into helm quirks. + +1. A pre-install hook provisions a certificate into the same namespace using a format compatible with provisioning using end user certificates. If the certificate already exists, the hook exits. +2. The prometheus operator pod is configured to use a TLS proxy container, which will load that certificate. +3. Validating and Mutating webhook configurations are created in the cluster, with their failure mode set to Ignore. This allows rules to be created by the same chart at the same time, even though the webhook has not yet been fully set up - it does not have the correct CA field set. +4. A post-install hook reads the CA from the secret created by step 1 and patches the Validating and Mutating webhook configurations. This process will allow a custom CA provisioned by some other process to also be patched into the webhook configurations. The chosen failure policy is also patched into the webhook configurations + +### Alternatives + +It should be possible to use [jetstack/cert-manager](https://github.com/jetstack/cert-manager) if a more complete solution is required, but it has not been tested. + +You can enable automatic self-signed TLS certificate provisioning via cert-manager by setting the `prometheusOperator.admissionWebhooks.certManager.enabled` value to true. + +### Limitations + +Because the operator can only run as a single pod, there is potential for this component failure to cause rule deployment failure. Because this risk is outweighed by the benefit of having validation, the feature is enabled by default. + +## Developing Prometheus Rules and Grafana Dashboards + +This chart Grafana Dashboards and Prometheus Rules are just a copy from [prometheus-operator/prometheus-operator](https://github.com/prometheus-operator/prometheus-operator) and other sources, synced (with alterations) by scripts in [hack](hack) folder. In order to introduce any changes you need to first [add them to the original repository](https://github.com/prometheus-operator/kube-prometheus/blob/main/docs/customizations/developing-prometheus-rules-and-grafana-dashboards.md) and then sync there by scripts. + +## Further Information + +For more in-depth documentation of configuration options meanings, please see + +- [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator) +- [Prometheus](https://prometheus.io/docs/introduction/overview/) +- [Grafana](https://github.com/grafana/helm-charts/tree/main/charts/grafana#grafana-helm-chart) + +## prometheus.io/scrape + +The prometheus operator does not support annotation-based discovery of services, using the `PodMonitor` or `ServiceMonitor` CRD in its place as they provide far more configuration options. +For information on how to use PodMonitors/ServiceMonitors, please see the documentation on the `prometheus-operator/prometheus-operator` documentation here: + +- [ServiceMonitors](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/developer/getting-started.md#using-servicemonitors) +- [PodMonitors](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/developer/getting-started.md#using-podmonitors) +- [Running Exporters](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/user-guides/running-exporters.md) + +By default, Prometheus discovers PodMonitors and ServiceMonitors within its namespace, that are labeled with the same release tag as the prometheus-operator release. +Sometimes, you may need to discover custom PodMonitors/ServiceMonitors, for example used to scrape data from third-party applications. +An easy way of doing this, without compromising the default PodMonitors/ServiceMonitors discovery, is allowing Prometheus to discover all PodMonitors/ServiceMonitors within its namespace, without applying label filtering. +To do so, you can set `prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues` and `prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues` to `false`. + +## Migrating from stable/prometheus-operator chart + +## Zero downtime + +Since `kube-prometheus-stack` is fully compatible with the `stable/prometheus-operator` chart, a migration without downtime can be achieved. +However, the old name prefix needs to be kept. If you want the new name please follow the step by step guide below (with downtime). + +You can override the name to achieve this: + +```console +helm upgrade prometheus-operator prometheus-community/kube-prometheus-stack -n monitoring --reuse-values --set nameOverride=prometheus-operator +``` + +**Note**: It is recommended to run this first with `--dry-run --debug`. + +## Redeploy with new name (downtime) + +If the **prometheus-operator** values are compatible with the new **kube-prometheus-stack** chart, please follow the below steps for migration: + +> The guide presumes that chart is deployed in `monitoring` namespace and the deployments are running there. If in other namespace, please replace the `monitoring` to the deployed namespace. + +1. Patch the PersistenceVolume created/used by the prometheus-operator chart to `Retain` claim policy: + + ```console + kubectl patch pv/ -p '{"spec":{"persistentVolumeReclaimPolicy":"Retain"}}' + ``` + + **Note:** To execute the above command, the user must have a cluster wide permission. Please refer [Kubernetes RBAC](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) + +2. Uninstall the **prometheus-operator** release and delete the existing PersistentVolumeClaim, and verify PV become Released. + + ```console + helm uninstall prometheus-operator -n monitoring + kubectl delete pvc/ -n monitoring + ``` + + Additionally, you have to manually remove the remaining `prometheus-operator-kubelet` service. + + ```console + kubectl delete service/prometheus-operator-kubelet -n kube-system + ``` + + You can choose to remove all your existing CRDs (ServiceMonitors, Podmonitors, etc.) if you want to. + +3. Remove current `spec.claimRef` values to change the PV's status from Released to Available. + + ```console + kubectl patch pv/ --type json -p='[{"op": "remove", "path": "/spec/claimRef"}]' -n monitoring + ``` + +**Note:** To execute the above command, the user must have a cluster wide permission. Please refer to [Kubernetes RBAC](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) + +After these steps, proceed to a fresh **kube-prometheus-stack** installation and make sure the current release of **kube-prometheus-stack** matching the `volumeClaimTemplate` values in the `values.yaml`. + +The binding is done via matching a specific amount of storage requested and with certain access modes. + +For example, if you had storage specified as this with **prometheus-operator**: + +```yaml +volumeClaimTemplate: + spec: + storageClassName: gp2 + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 50Gi +``` + +You have to specify matching `volumeClaimTemplate` with 50Gi storage and `ReadWriteOnce` access mode. + +Additionally, you should check the current AZ of your legacy installation's PV, and configure the fresh release to use the same AZ as the old one. If the pods are in a different AZ than the PV, the release will fail to bind the existing one, hence creating a new PV. + +This can be achieved either by specifying the labels through `values.yaml`, e.g. setting `prometheus.prometheusSpec.nodeSelector` to: + +```yaml +nodeSelector: + failure-domain.beta.kubernetes.io/zone: east-west-1a +``` + +or passing these values as `--set` overrides during installation. + +The new release should now re-attach your previously released PV with its content. + +## Migrating from coreos/prometheus-operator chart + +The multiple charts have been combined into a single chart that installs prometheus operator, prometheus, alertmanager, grafana as well as the multitude of exporters necessary to monitor a cluster. + +There is no simple and direct migration path between the charts as the changes are extensive and intended to make the chart easier to support. + +The capabilities of the old chart are all available in the new chart, including the ability to run multiple prometheus instances on a single cluster - you will need to disable the parts of the chart you do not wish to deploy. + +You can check out the tickets for this change at [prometheus-operator/prometheus-operator #592](https://github.com/prometheus-operator/prometheus-operator/issues/592) and [helm/charts #6765](https://github.com/helm/charts/pull/6765). + +### High-level overview of Changes + +#### Added dependencies + +The chart has added 3 [dependencies](#dependencies). + +- Node-Exporter, Kube-State-Metrics: These components are loaded as dependencies into the chart, and are relatively simple components +- Grafana: The Grafana chart is more feature-rich than this chart - it contains a sidecar that is able to load data sources and dashboards from configmaps deployed into the same cluster. For more information check out the [documentation for the chart](https://github.com/grafana/helm-charts/blob/main/charts/grafana/README.md) + +#### Kubelet Service + +Because the kubelet service has a new name in the chart, make sure to clean up the old kubelet service in the `kube-system` namespace to prevent counting container metrics twice. + +#### Persistent Volumes + +If you would like to keep the data of the current persistent volumes, it should be possible to attach existing volumes to new PVCs and PVs that are created using the conventions in the new chart. For example, in order to use an existing Azure disk for a helm release called `prometheus-migration` the following resources can be created: + +```yaml +apiVersion: v1 +kind: PersistentVolume +metadata: + name: pvc-prometheus-migration-prometheus-0 +spec: + accessModes: + - ReadWriteOnce + azureDisk: + cachingMode: None + diskName: pvc-prometheus-migration-prometheus-0 + diskURI: /subscriptions/f5125d82-2622-4c50-8d25-3f7ba3e9ac4b/resourceGroups/sample-migration-resource-group/providers/Microsoft.Compute/disks/pvc-prometheus-migration-prometheus-0 + fsType: "" + kind: Managed + readOnly: false + capacity: + storage: 1Gi + persistentVolumeReclaimPolicy: Delete + storageClassName: prometheus + volumeMode: Filesystem +``` + +```yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + app.kubernetes.io/name: prometheus + prometheus: prometheus-migration-prometheus + name: prometheus-prometheus-migration-prometheus-db-prometheus-prometheus-migration-prometheus-0 + namespace: monitoring +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + storageClassName: prometheus + volumeMode: Filesystem + volumeName: pvc-prometheus-migration-prometheus-0 +``` + +The PVC will take ownership of the PV and when you create a release using a persistent volume claim template it will use the existing PVCs as they match the naming convention used by the chart. For other cloud providers similar approaches can be used. + +#### KubeProxy + +The metrics bind address of kube-proxy is default to `127.0.0.1:10249` that prometheus instances **cannot** access to. You should expose metrics by changing `metricsBindAddress` field value to `0.0.0.0:10249` if you want to collect them. + +Depending on the cluster, the relevant part `config.conf` will be in ConfigMap `kube-system/kube-proxy` or `kube-system/kube-proxy-config`. For example: + +```console +kubectl -n kube-system edit cm kube-proxy +``` + +```yaml +apiVersion: v1 +data: + config.conf: |- + apiVersion: kubeproxy.config.k8s.io/v1alpha1 + kind: KubeProxyConfiguration + # ... + # metricsBindAddress: 127.0.0.1:10249 + metricsBindAddress: 0.0.0.0:10249 + # ... + kubeconfig.conf: |- + # ... +kind: ConfigMap +metadata: + labels: + app: kube-proxy + name: kube-proxy + namespace: kube-system +``` diff --git a/kube-prometheus-stack/templates/NOTES.txt b/kube-prometheus-stack/templates/NOTES.txt new file mode 100644 index 0000000..a2e0f26 --- /dev/null +++ b/kube-prometheus-stack/templates/NOTES.txt @@ -0,0 +1,18 @@ +{{ $.Chart.Name }} has been installed. Check its status by running: + kubectl --namespace {{ template "kube-prometheus-stack.namespace" . }} get pods -l "release={{ $.Release.Name }}" + +Get Grafana '{{ .Values.grafana.adminUser }}' user password by running: + + kubectl --namespace {{ template "kube-prometheus-stack.namespace" . }} get secrets {{ $.Release.Name }}-grafana -o jsonpath="{.data.admin-password}" | base64 -d ; echo + +Access Grafana local instance: + + export POD_NAME=$(kubectl --namespace {{ template "kube-prometheus-stack.namespace" . }} get pod -l "app.kubernetes.io/name={{ default "grafana" .Values.grafana.name }},app.kubernetes.io/instance={{ $.Release.Name }}" -oname) + kubectl --namespace {{ template "kube-prometheus-stack.namespace" . }} port-forward $POD_NAME 3000 + +Get your grafana admin user password by running: + + kubectl get secret --namespace {{ .Values.grafana.namespaceOverride | default (include "kube-prometheus-stack.namespace" .) }} -l app.kubernetes.io/component=admin-secret -o jsonpath="{.items[0].data.{{ .Values.grafana.admin.passwordKey | default "admin-password" }}}" | base64 --decode ; echo + + +Visit https://github.com/prometheus-operator/kube-prometheus for instructions on how to create & configure Alertmanager and Prometheus instances using the Operator. diff --git a/kube-prometheus-stack/templates/_helpers.tpl b/kube-prometheus-stack/templates/_helpers.tpl new file mode 100644 index 0000000..8fb4041 --- /dev/null +++ b/kube-prometheus-stack/templates/_helpers.tpl @@ -0,0 +1,399 @@ +{{/* vim: set filetype=mustache: */}} +{{/* Expand the name of the chart. This is suffixed with -alertmanager, which means subtract 13 from longest 63 available */}} +{{- define "kube-prometheus-stack.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 50 | trimSuffix "-" -}} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +The components in this chart create additional resources that expand the longest created name strings. +The longest name that gets created adds and extra 37 characters, so truncation should be 63-35=26. +*/}} +{{- define "kube-prometheus-stack.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 26 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 26 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 26 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* Fullname suffixed with -operator */}} +{{/* Adding 9 to 26 truncation of kube-prometheus-stack.fullname */}} +{{- define "kube-prometheus-stack.operator.fullname" -}} +{{- if .Values.prometheusOperator.fullnameOverride -}} +{{- .Values.prometheusOperator.fullnameOverride | trunc 35 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-operator" (include "kube-prometheus-stack.fullname" .) -}} +{{- end }} +{{- end }} + +{{/* Prometheus custom resource instance name */}} +{{- define "kube-prometheus-stack.prometheus.crname" -}} +{{- if .Values.cleanPrometheusOperatorObjectNames }} +{{- include "kube-prometheus-stack.fullname" . }} +{{- else }} +{{- print (include "kube-prometheus-stack.fullname" .) "-prometheus" }} +{{- end }} +{{- end }} + +{{/* Alertmanager custom resource instance name */}} +{{- define "kube-prometheus-stack.alertmanager.crname" -}} +{{- if .Values.cleanPrometheusOperatorObjectNames }} +{{- include "kube-prometheus-stack.fullname" . }} +{{- else }} +{{- print (include "kube-prometheus-stack.fullname" .) "-alertmanager" -}} +{{- end }} +{{- end }} + +{{/* ThanosRuler custom resource instance name */}} +{{/* Subtracting 1 from 26 truncation of kube-prometheus-stack.fullname */}} +{{- define "kube-prometheus-stack.thanosRuler.crname" -}} +{{- if .Values.cleanPrometheusOperatorObjectNames }} +{{- include "kube-prometheus-stack.fullname" . }} +{{- else }} +{{- print (include "kube-prometheus-stack.fullname" . | trunc 25 | trimSuffix "-") "-thanos-ruler" -}} +{{- end }} +{{- end }} + +{{/* Shortened name suffixed with thanos-ruler */}} +{{- define "kube-prometheus-stack.thanosRuler.name" -}} +{{- default (printf "%s-thanos-ruler" (include "kube-prometheus-stack.name" .)) .Values.thanosRuler.name -}} +{{- end }} + +{{/* Create chart name and version as used by the chart label. */}} +{{- define "kube-prometheus-stack.chartref" -}} +{{- replace "+" "_" .Chart.Version | printf "%s-%s" .Chart.Name -}} +{{- end }} + +{{/* Generate basic labels */}} +{{- define "kube-prometheus-stack.labels" }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/instance: {{ .Release.Name }} +app.kubernetes.io/version: "{{ replace "+" "_" .Chart.Version }}" +app.kubernetes.io/part-of: {{ template "kube-prometheus-stack.name" . }} +chart: {{ template "kube-prometheus-stack.chartref" . }} +release: {{ $.Release.Name | quote }} +heritage: {{ $.Release.Service | quote }} +{{- if .Values.commonLabels}} +{{ toYaml .Values.commonLabels }} +{{- end }} +{{- end }} + +{{/* Create the name of kube-prometheus-stack service account to use */}} +{{- define "kube-prometheus-stack.operator.serviceAccountName" -}} +{{- if .Values.prometheusOperator.serviceAccount.create -}} + {{ default (include "kube-prometheus-stack.operator.fullname" .) .Values.prometheusOperator.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.prometheusOperator.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* Create the name of kube-prometheus-stack service account to use */}} +{{- define "kube-prometheus-stack.operator.admissionWebhooks.serviceAccountName" -}} +{{- if .Values.prometheusOperator.serviceAccount.create -}} + {{ default (printf "%s-webhook" (include "kube-prometheus-stack.operator.fullname" .)) .Values.prometheusOperator.admissionWebhooks.deployment.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.prometheusOperator.admissionWebhooks.deployment.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* Create the name of prometheus service account to use */}} +{{- define "kube-prometheus-stack.prometheus.serviceAccountName" -}} +{{- if .Values.prometheus.serviceAccount.create -}} + {{ default (print (include "kube-prometheus-stack.fullname" .) "-prometheus") .Values.prometheus.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.prometheus.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* Create the name of alertmanager service account to use */}} +{{- define "kube-prometheus-stack.alertmanager.serviceAccountName" -}} +{{- if .Values.alertmanager.serviceAccount.create -}} + {{ default (print (include "kube-prometheus-stack.fullname" .) "-alertmanager") .Values.alertmanager.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.alertmanager.serviceAccount.name }} +{{- end -}} + +{{- end -}} + +{{/* Create the name of thanosRuler service account to use */}} +{{- define "kube-prometheus-stack.thanosRuler.serviceAccountName" -}} +{{- if .Values.thanosRuler.serviceAccount.create -}} + {{ default (include "kube-prometheus-stack.thanosRuler.name" .) .Values.thanosRuler.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.thanosRuler.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "kube-prometheus-stack.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Use the grafana namespace override for multi-namespace deployments in combined charts +*/}} +{{- define "kube-prometheus-stack-grafana.namespace" -}} + {{- if .Values.grafana.namespaceOverride -}} + {{- .Values.grafana.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Use the Alertmanager namespace override for multi-namespace deployments in combined charts +*/}} +{{- define "kube-prometheus-stack-alertmanager.namespace" -}} + {{- if .Values.alertmanager.namespaceOverride -}} + {{- .Values.alertmanager.namespaceOverride -}} + {{- else -}} + {{- include "kube-prometheus-stack.namespace" . -}} + {{- end -}} +{{- end -}} + +{{/* +Allow kubelet job name to be overridden +*/}} +{{- define "kube-prometheus-stack-kubelet.name" -}} + {{- if index .Values "kubelet" "jobNameOverride" -}} + {{- index .Values "kubelet" "jobNameOverride" -}} + {{- else -}} + {{- print "kubelet" -}} + {{- end -}} +{{- end -}} + + +{{/* +Allow kube-controller-manager job name to be overridden +*/}} +{{- define "kube-prometheus-stack-kube-controller-manager.name" -}} + {{- if index .Values "kubeControllerManager" "jobNameOverride" -}} + {{- index .Values "kubeControllerManager" "jobNameOverride" -}} + {{- else -}} + {{- print "kube-controller-manager" -}} + {{- end -}} +{{- end -}} + + +{{/* +Allow kube-scheduler job name to be overridden +*/}} +{{- define "kube-prometheus-stack-kube-scheduler.name" -}} + {{- if index .Values "kubeScheduler" "jobNameOverride" -}} + {{- index .Values "kubeScheduler" "jobNameOverride" -}} + {{- else -}} + {{- print "kube-scheduler" -}} + {{- end -}} +{{- end -}} + + +{{/* +Allow kube-proxy job name to be overridden +*/}} +{{- define "kube-prometheus-stack-kube-proxy.name" -}} + {{- if index .Values "kubeProxy" "jobNameOverride" -}} + {{- index .Values "kubeProxy" "jobNameOverride" -}} + {{- else -}} + {{- print "kube-proxy" -}} + {{- end -}} +{{- end -}} + +{{/* +Allow kube-apiserver job name to be overridden +*/}} +{{- define "kube-prometheus-stack-kube-apiserver.name" -}} + {{- if index .Values "kubeApiServer" "jobNameOverride" -}} + {{- index .Values "kubeApiServer" "jobNameOverride" -}} + {{- else -}} + {{- print "apiserver" -}} + {{- end -}} +{{- end -}} + +{{/* +Allow kube-state-metrics job name to be overridden +*/}} +{{- define "kube-prometheus-stack-kube-state-metrics.name" -}} + {{- if index .Values "kube-state-metrics" "nameOverride" -}} + {{- index .Values "kube-state-metrics" "nameOverride" -}} + {{- else -}} + {{- print "kube-state-metrics" -}} + {{- end -}} +{{- end -}} + +{{/* +Use the kube-state-metrics namespace override for multi-namespace deployments in combined charts +*/}} +{{- define "kube-prometheus-stack-kube-state-metrics.namespace" -}} + {{- if index .Values "kube-state-metrics" "namespaceOverride" -}} + {{- index .Values "kube-state-metrics" "namespaceOverride" -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Use the prometheus-node-exporter namespace override for multi-namespace deployments in combined charts +*/}} +{{- define "kube-prometheus-stack-prometheus-node-exporter.namespace" -}} + {{- if index .Values "prometheus-node-exporter" "namespaceOverride" -}} + {{- index .Values "prometheus-node-exporter" "namespaceOverride" -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* Allow KubeVersion to be overridden. */}} +{{- define "kube-prometheus-stack.kubeVersion" -}} + {{- default .Capabilities.KubeVersion.Version .Values.kubeVersionOverride -}} +{{- end -}} + +{{/* Get value based on current Kubernetes version */}} +{{- define "kube-prometheus-stack.kubeVersionDefaultValue" -}} + {{- $values := index . 0 -}} + {{- $kubeVersion := index . 1 -}} + {{- $old := index . 2 -}} + {{- $new := index . 3 -}} + {{- $default := index . 4 -}} + {{- if kindIs "invalid" $default -}} + {{- if semverCompare $kubeVersion (include "kube-prometheus-stack.kubeVersion" $values) -}} + {{- print $new -}} + {{- else -}} + {{- print $old -}} + {{- end -}} + {{- else -}} + {{- print $default }} + {{- end -}} +{{- end -}} + +{{/* Get value for kube-controller-manager depending on insecure scraping availability */}} +{{- define "kube-prometheus-stack.kubeControllerManager.insecureScrape" -}} + {{- $values := index . 0 -}} + {{- $insecure := index . 1 -}} + {{- $secure := index . 2 -}} + {{- $userValue := index . 3 -}} + {{- include "kube-prometheus-stack.kubeVersionDefaultValue" (list $values ">= 1.22-0" $insecure $secure $userValue) -}} +{{- end -}} + +{{/* Get value for kube-scheduler depending on insecure scraping availability */}} +{{- define "kube-prometheus-stack.kubeScheduler.insecureScrape" -}} + {{- $values := index . 0 -}} + {{- $insecure := index . 1 -}} + {{- $secure := index . 2 -}} + {{- $userValue := index . 3 -}} + {{- include "kube-prometheus-stack.kubeVersionDefaultValue" (list $values ">= 1.23-0" $insecure $secure $userValue) -}} +{{- end -}} + +{{/* Sets default scrape limits for servicemonitor */}} +{{- define "servicemonitor.scrapeLimits" -}} +{{- with .sampleLimit }} +sampleLimit: {{ . }} +{{- end }} +{{- with .targetLimit }} +targetLimit: {{ . }} +{{- end }} +{{- with .labelLimit }} +labelLimit: {{ . }} +{{- end }} +{{- with .labelNameLengthLimit }} +labelNameLengthLimit: {{ . }} +{{- end }} +{{- with .labelValueLengthLimit }} +labelValueLengthLimit: {{ . }} +{{- end }} +{{- end -}} + +{{/* +To help compatibility with other charts which use global.imagePullSecrets. +Allow either an array of {name: pullSecret} maps (k8s-style), or an array of strings (more common helm-style). +global: + imagePullSecrets: + - name: pullSecret1 + - name: pullSecret2 + +or + +global: + imagePullSecrets: + - pullSecret1 + - pullSecret2 +*/}} +{{- define "kube-prometheus-stack.imagePullSecrets" -}} +{{- range .Values.global.imagePullSecrets }} + {{- if eq (typeOf .) "map[string]interface {}" }} +- {{ toYaml . | trim }} + {{- else }} +- name: {{ . }} + {{- end }} +{{- end }} +{{- end -}} + +{{- define "kube-prometheus-stack.operator.admission-webhook.dnsNames" }} +{{- $fullname := include "kube-prometheus-stack.operator.fullname" . }} +{{- $namespace := include "kube-prometheus-stack.namespace" . }} +{{- $fullname }} +{{ $fullname }}.{{ $namespace }}.svc +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.enabled }} +{{ $fullname }}-webhook +{{ $fullname }}-webhook.{{ $namespace }}.svc +{{- end }} +{{- end }} + +{{/* To help configure the kubelet servicemonitor for http or https. */}} +{{- define "kube-prometheus-stack.kubelet.scheme" }} +{{- if .Values.kubelet.serviceMonitor.https }}https{{ else }}http{{ end }} +{{- end }} +{{- define "kube-prometheus-stack.kubelet.authConfig" }} +{{- if .Values.kubelet.serviceMonitor.https }} +tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecureSkipVerify: {{ .Values.kubelet.serviceMonitor.insecureSkipVerify }} +bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token +{{- end }} +{{- end }} + + +{{/* To help configure anti-affinity rules for Prometheus pods */}} +{{- define "kube-prometheus-stack.prometheus.pod-anti-affinity.matchExpressions" }} +{{- if .Values.prometheus.agentMode }} +- {key: app.kubernetes.io/name, operator: In, values: [prometheus-agent]} +- {key: app.kubernetes.io/instance, operator: In, values: [{{ template "kube-prometheus-stack.prometheus.crname" . }}]} +{{- else }} +- {key: app.kubernetes.io/name, operator: In, values: [prometheus]} +- {key: app.kubernetes.io/instance, operator: In, values: [{{ template "kube-prometheus-stack.prometheus.crname" . }}]} +{{- end }} +{{- end }} + +{{/* To help configure Grafana operator folder settings (folder, folderUID, or folderRef) */}} +{{- define "kube-prometheus-stack.grafana.operator.folder" }} +{{- $folder := .Values.grafana.operator.folder }} +{{- $folderUID := .Values.grafana.operator.folderUID }} +{{- $folderRef := .Values.grafana.operator.folderRef }} +{{- if not (or + (and $folder (not $folderUID) (not $folderRef)) + (and (not $folder) $folderUID (not $folderRef)) + (and (not $folder) (not $folderUID) $folderRef) +)}} +{{- fail "grafana.operator: only one of folder, folderUID, or folderRef must be set" }} +{{- end }} +{{- if $folder }} +folder: {{ $folder | quote }} +{{- else if $folderUID }} +folderUID: {{ $folderUID | quote }} +{{- else if $folderRef }} +folderRef: {{ $folderRef | quote }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml b/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml new file mode 100644 index 0000000..118c17a --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/alertmanager.yaml @@ -0,0 +1,228 @@ +{{- if .Values.alertmanager.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: Alertmanager +metadata: + name: {{ template "kube-prometheus-stack.alertmanager.crname" . }} + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} + {{- with .Values.alertmanager.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.alertmanager.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: +{{- if .Values.alertmanager.alertmanagerSpec.image }} + {{- $registry := .Values.global.imageRegistry | default .Values.alertmanager.alertmanagerSpec.image.registry -}} + {{- if and .Values.alertmanager.alertmanagerSpec.image.tag .Values.alertmanager.alertmanagerSpec.image.sha }} + image: "{{ $registry }}/{{ .Values.alertmanager.alertmanagerSpec.image.repository }}:{{ .Values.alertmanager.alertmanagerSpec.image.tag }}@sha256:{{ .Values.alertmanager.alertmanagerSpec.image.sha }}" + {{- else if .Values.alertmanager.alertmanagerSpec.image.sha }} + image: "{{ $registry }}/{{ .Values.alertmanager.alertmanagerSpec.image.repository }}@sha256:{{ .Values.alertmanager.alertmanagerSpec.image.sha }}" + {{- else if .Values.alertmanager.alertmanagerSpec.image.tag }} + image: "{{ $registry }}/{{ .Values.alertmanager.alertmanagerSpec.image.repository }}:{{ .Values.alertmanager.alertmanagerSpec.image.tag }}" + {{- else }} + image: "{{ $registry }}/{{ .Values.alertmanager.alertmanagerSpec.image.repository }}" + {{- end }} + imagePullPolicy: "{{ .Values.alertmanager.alertmanagerSpec.image.pullPolicy }}" + version: "{{ default .Values.alertmanager.alertmanagerSpec.image.tag .Values.alertmanager.alertmanagerSpec.version }}" + {{- if .Values.alertmanager.alertmanagerSpec.image.sha }} + sha: {{ .Values.alertmanager.alertmanagerSpec.image.sha }} + {{- end }} +{{- end }} + replicas: {{ .Values.alertmanager.alertmanagerSpec.replicas }} + listenLocal: {{ .Values.alertmanager.alertmanagerSpec.listenLocal }} + {{- if .Values.alertmanager.alertmanagerSpec.serviceName }} + serviceName: {{ tpl .Values.alertmanager.alertmanagerSpec.serviceName . }} + {{- end }} + serviceAccountName: {{ template "kube-prometheus-stack.alertmanager.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.alertmanager.alertmanagerSpec.automountServiceAccountToken }} +{{- if .Values.alertmanager.alertmanagerSpec.externalUrl }} + externalUrl: "{{ tpl .Values.alertmanager.alertmanagerSpec.externalUrl . }}" +{{- else if and .Values.alertmanager.ingress.enabled .Values.alertmanager.ingress.hosts }} + externalUrl: "http://{{ tpl (index .Values.alertmanager.ingress.hosts 0) . }}{{ .Values.alertmanager.alertmanagerSpec.routePrefix }}" +{{- else }} + externalUrl: http://{{ template "kube-prometheus-stack.fullname" . }}-alertmanager.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.alertmanager.service.port }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.nodeSelector }} + nodeSelector: +{{ toYaml .Values.alertmanager.alertmanagerSpec.nodeSelector | indent 4 }} +{{- end }} + paused: {{ .Values.alertmanager.alertmanagerSpec.paused }} + logFormat: {{ .Values.alertmanager.alertmanagerSpec.logFormat | quote }} + logLevel: {{ .Values.alertmanager.alertmanagerSpec.logLevel | quote }} + retention: {{ .Values.alertmanager.alertmanagerSpec.retention | quote }} + {{- with .Values.alertmanager.enableFeatures }} + enableFeatures: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.secrets }} + secrets: +{{ toYaml .Values.alertmanager.alertmanagerSpec.secrets | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.configSecret }} + configSecret: {{ .Values.alertmanager.alertmanagerSpec.configSecret }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.configMaps }} + configMaps: +{{ toYaml .Values.alertmanager.alertmanagerSpec.configMaps | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.alertmanagerConfigSelector }} + alertmanagerConfigSelector: +{{ tpl (toYaml .Values.alertmanager.alertmanagerSpec.alertmanagerConfigSelector | indent 4) . }} +{{ else }} + alertmanagerConfigSelector: {} +{{- end }} + alertmanagerConfigNamespaceSelector: +{{ tpl (toYaml .Values.alertmanager.alertmanagerSpec.alertmanagerConfigNamespaceSelector | indent 4 | default "null") . }} +{{- if .Values.alertmanager.alertmanagerSpec.web }} + web: +{{ toYaml .Values.alertmanager.alertmanagerSpec.web | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.alertmanagerConfiguration }} + alertmanagerConfiguration: +{{ toYaml .Values.alertmanager.alertmanagerSpec.alertmanagerConfiguration | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.alertmanagerConfigMatcherStrategy }} + alertmanagerConfigMatcherStrategy: +{{ toYaml .Values.alertmanager.alertmanagerSpec.alertmanagerConfigMatcherStrategy | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.resources }} + resources: +{{ toYaml .Values.alertmanager.alertmanagerSpec.resources | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.routePrefix }} + routePrefix: "{{ .Values.alertmanager.alertmanagerSpec.routePrefix }}" +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.securityContext }} + securityContext: +{{ toYaml .Values.alertmanager.alertmanagerSpec.securityContext | indent 4 }} +{{- end }} +{{- if kindIs "bool" .Values.alertmanager.alertmanagerSpec.hostUsers }} + hostUsers: {{ .Values.alertmanager.alertmanagerSpec.hostUsers }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.dnsConfig }} + dnsConfig: +{{ toYaml .Values.alertmanager.alertmanagerSpec.dnsConfig | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.dnsPolicy }} + dnsPolicy: {{ .Values.alertmanager.alertmanagerSpec.dnsPolicy }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.storage }} + storage: +{{ tpl (toYaml .Values.alertmanager.alertmanagerSpec.storage | indent 4) . }} +{{- end }} +{{- with .Values.alertmanager.alertmanagerSpec.persistentVolumeClaimRetentionPolicy }} + persistentVolumeClaimRetentionPolicy: + {{- toYaml . | nindent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.podMetadata }} + podMetadata: +{{ toYaml .Values.alertmanager.alertmanagerSpec.podMetadata | indent 4 }} +{{- end }} +{{- if or .Values.alertmanager.alertmanagerSpec.podAntiAffinity .Values.alertmanager.alertmanagerSpec.affinity }} + affinity: +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.affinity }} +{{ toYaml .Values.alertmanager.alertmanagerSpec.affinity | indent 4 }} +{{- end }} +{{- if eq .Values.alertmanager.alertmanagerSpec.podAntiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - topologyKey: {{ .Values.alertmanager.alertmanagerSpec.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [alertmanager]} + - {key: alertmanager, operator: In, values: [{{ template "kube-prometheus-stack.alertmanager.crname" . }}]} +{{- else if eq .Values.alertmanager.alertmanagerSpec.podAntiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: {{ .Values.alertmanager.alertmanagerSpec.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [alertmanager]} + - {key: alertmanager, operator: In, values: [{{ template "kube-prometheus-stack.alertmanager.crname" . }}]} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.tolerations }} + tolerations: +{{ toYaml .Values.alertmanager.alertmanagerSpec.tolerations | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.topologySpreadConstraints }} + topologySpreadConstraints: +{{ toYaml .Values.alertmanager.alertmanagerSpec.topologySpreadConstraints | indent 4 }} +{{- end }} +{{- if .Values.global.imagePullSecrets }} + imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.containers }} + containers: +{{ toYaml .Values.alertmanager.alertmanagerSpec.containers | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.initContainers }} + initContainers: +{{ toYaml .Values.alertmanager.alertmanagerSpec.initContainers | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.priorityClassName }} + priorityClassName: {{.Values.alertmanager.alertmanagerSpec.priorityClassName }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.additionalPeers }} + additionalPeers: +{{ toYaml .Values.alertmanager.alertmanagerSpec.additionalPeers | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.volumes }} + volumes: +{{ toYaml .Values.alertmanager.alertmanagerSpec.volumes | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.volumeMounts }} + volumeMounts: +{{ toYaml .Values.alertmanager.alertmanagerSpec.volumeMounts | indent 4 }} +{{- end }} + portName: {{ .Values.alertmanager.alertmanagerSpec.portName }} +{{- if .Values.alertmanager.alertmanagerSpec.clusterAdvertiseAddress }} + clusterAdvertiseAddress: {{ .Values.alertmanager.alertmanagerSpec.clusterAdvertiseAddress }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.clusterGossipInterval }} + clusterGossipInterval: {{ .Values.alertmanager.alertmanagerSpec.clusterGossipInterval }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.clusterPeerTimeout }} + clusterPeerTimeout: {{ .Values.alertmanager.alertmanagerSpec.clusterPeerTimeout }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.clusterPushpullInterval }} + clusterPushpullInterval: {{ .Values.alertmanager.alertmanagerSpec.clusterPushpullInterval }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.clusterLabel }} + clusterLabel: {{ .Values.alertmanager.alertmanagerSpec.clusterLabel }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.forceEnableClusterMode }} + forceEnableClusterMode: {{ .Values.alertmanager.alertmanagerSpec.forceEnableClusterMode }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.minReadySeconds }} + minReadySeconds: {{ .Values.alertmanager.alertmanagerSpec.minReadySeconds }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.podManagementPolicy }} + podManagementPolicy: {{ .Values.alertmanager.alertmanagerSpec.podManagementPolicy }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.updateStrategy }} + updateStrategy: +{{ toYaml .Values.alertmanager.alertmanagerSpec.updateStrategy | indent 4 }} +{{- end }} + hostNetwork: {{ .Values.alertmanager.alertmanagerSpec.hostNetwork }} +{{- if .Values.alertmanager.alertmanagerSpec.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ .Values.alertmanager.alertmanagerSpec.terminationGracePeriodSeconds }} +{{- end }} +{{- with .Values.alertmanager.alertmanagerSpec.additionalConfig }} + {{- tpl (toYaml .) $ | nindent 2 }} +{{- end }} +{{- with .Values.alertmanager.alertmanagerSpec.additionalConfigString }} + {{- tpl . $ | nindent 2 }} +{{- end }} +{{- if .Values.alertmanager.alertmanagerSpec.additionalArgs }} + additionalArgs: +{{ toYaml .Values.alertmanager.alertmanagerSpec.additionalArgs | indent 4 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/alertmanager/extrasecret.yaml b/kube-prometheus-stack/templates/alertmanager/extrasecret.yaml new file mode 100644 index 0000000..22118bf --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/extrasecret.yaml @@ -0,0 +1,20 @@ +{{- if .Values.alertmanager.extraSecret.data -}} +{{- $secretName := printf "alertmanager-%s-extra" (include "kube-prometheus-stack.fullname" . ) -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ default $secretName .Values.alertmanager.extraSecret.name }} + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" . }} +{{- if .Values.alertmanager.extraSecret.annotations }} + annotations: +{{ toYaml .Values.alertmanager.extraSecret.annotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager + app.kubernetes.io/component: alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: +{{- range $key, $val := .Values.alertmanager.extraSecret.data }} + {{ $key }}: {{ $val | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/alertmanager/ingress.yaml b/kube-prometheus-stack/templates/alertmanager/ingress.yaml new file mode 100644 index 0000000..e4dd73f --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/ingress.yaml @@ -0,0 +1,67 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.ingress.enabled }} +{{- $pathType := .Values.alertmanager.ingress.pathType | default "ImplementationSpecific" }} +{{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }} +{{- $backendServiceName := .Values.alertmanager.ingress.serviceName | default (printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager") }} +{{- $servicePort := .Values.alertmanager.ingress.servicePort | default .Values.alertmanager.service.port -}} +{{- $routePrefix := list .Values.alertmanager.alertmanagerSpec.routePrefix }} +{{- $paths := .Values.alertmanager.ingress.paths | default $routePrefix -}} +{{- $extraPaths := .Values.alertmanager.ingress.extraPaths | default list -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ $serviceName }} + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" . }} +{{- if .Values.alertmanager.ingress.annotations }} + annotations: + {{- tpl (toYaml .Values.alertmanager.ingress.annotations) . | nindent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager +{{- if .Values.alertmanager.ingress.labels }} +{{ toYaml .Values.alertmanager.ingress.labels | indent 4 }} +{{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + {{- if .Values.alertmanager.ingress.ingressClassName }} + ingressClassName: {{ .Values.alertmanager.ingress.ingressClassName }} + {{- end }} + rules: + {{- if .Values.alertmanager.ingress.hosts }} + {{- range $host := .Values.alertmanager.ingress.hosts }} + - host: {{ tpl $host $ | quote }} + http: + paths: + {{- range $p := $paths }} + {{- with $extraPaths }} + {{- toYaml . | nindent 10 }} + {{- end }} + - path: {{ tpl $p $ }} + pathType: {{ $pathType }} + backend: + service: + name: {{ $backendServiceName }} + port: + number: {{ $servicePort }} + {{- end -}} + {{- end -}} + {{- else }} + - http: + paths: + {{- range $p := $paths }} + {{- with $extraPaths }} + {{- toYaml . | nindent 10 }} + {{- end }} + - path: {{ tpl $p $ }} + pathType: {{ $pathType }} + backend: + service: + name: {{ $backendServiceName }} + port: + number: {{ $servicePort }} + {{- end -}} + {{- end -}} + {{- if .Values.alertmanager.ingress.tls }} + tls: +{{ tpl (toYaml .Values.alertmanager.ingress.tls | indent 4) . }} + {{- end -}} +{{- end -}} diff --git a/kube-prometheus-stack/templates/alertmanager/ingressperreplica.yaml b/kube-prometheus-stack/templates/alertmanager/ingressperreplica.yaml new file mode 100644 index 0000000..6c58f55 --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/ingressperreplica.yaml @@ -0,0 +1,56 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.servicePerReplica.enabled .Values.alertmanager.ingressPerReplica.enabled }} +{{- $pathType := .Values.alertmanager.ingressPerReplica.pathType | default "" }} +{{- $count := .Values.alertmanager.alertmanagerSpec.replicas | int -}} +{{- $servicePort := .Values.alertmanager.service.port -}} +{{- $ingressValues := .Values.alertmanager.ingressPerReplica -}} +apiVersion: v1 +kind: List +metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-ingressperreplica + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" . }} +items: +{{ range $i, $e := until $count }} + - kind: Ingress + apiVersion: networking.k8s.io/v1 + metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-{{ $i }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ include "kube-prometheus-stack.name" $ }}-alertmanager + {{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if $ingressValues.labels }} +{{ toYaml $ingressValues.labels | indent 8 }} + {{- end }} + {{- if $ingressValues.annotations }} + annotations: + {{- tpl (toYaml $ingressValues.annotations) $ | nindent 8 }} + {{- end }} + spec: + {{- if $ingressValues.ingressClassName }} + ingressClassName: {{ $ingressValues.ingressClassName }} + {{- end }} + rules: + - host: {{ $ingressValues.hostPrefix }}-{{ $i }}.{{ $ingressValues.hostDomain }} + http: + paths: + {{- range $p := $ingressValues.paths }} + - path: {{ tpl $p $ }} + pathType: {{ $pathType }} + backend: + service: + name: {{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-{{ $i }} + port: + number: {{ $servicePort }} + {{- end -}} + {{- if or $ingressValues.tlsSecretName $ingressValues.tlsSecretPerReplica.enabled }} + tls: + - hosts: + - {{ $ingressValues.hostPrefix }}-{{ $i }}.{{ $ingressValues.hostDomain }} + {{- if $ingressValues.tlsSecretPerReplica.enabled }} + secretName: {{ $ingressValues.tlsSecretPerReplica.prefix }}-{{ $i }} + {{- else }} + secretName: {{ $ingressValues.tlsSecretName }} + {{- end }} + {{- end }} +{{- end -}} +{{- end -}} diff --git a/kube-prometheus-stack/templates/alertmanager/networkpolicy.yaml b/kube-prometheus-stack/templates/alertmanager/networkpolicy.yaml new file mode 100644 index 0000000..3d08b70 --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/networkpolicy.yaml @@ -0,0 +1,76 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.networkPolicy.enabled }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + app.kubernetes.io/name: alertmanager + policyTypes: + {{- toYaml .Values.alertmanager.networkPolicy.policyTypes | nindent 4 }} + ingress: + {{- if and (.Values.alertmanager.networkPolicy.gateway.namespace) (.Values.alertmanager.networkPolicy.gateway.podLabels) }} + # Allow ingress from gateway + - from: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: {{ .Values.alertmanager.networkPolicy.gateway.namespace }} + {{- if and .Values.alertmanager.networkPolicy.gateway.podLabels (not (empty .Values.alertmanager.networkPolicy.gateway.podLabels)) }} + podSelector: + matchLabels: + {{- toYaml .Values.alertmanager.networkPolicy.gateway.podLabels | nindent 14 }} + {{- end }} + ports: + - port: {{ .Values.alertmanager.service.port }} + protocol: TCP + {{- end }} + {{- if .Values.alertmanager.networkPolicy.monitoringRules.prometheus }} + # Allow ingress from Prometheus + - from: + - podSelector: + matchLabels: + app.kubernetes.io/name: prometheus + ports: + - port: {{ .Values.alertmanager.service.port }} + protocol: TCP + {{- if .Values.alertmanager.networkPolicy.monitoringRules.configReloader }} + - port: 8080 + protocol: TCP + {{- end }} + {{- end }} + {{- if and (.Values.alertmanager.networkPolicy.enableClusterRules) (.Values.alertmanager.service.clusterPort) }} + # Allow ingress from other Alertmanager pods (for clustering) + - from: + - podSelector: + matchLabels: + app.kubernetes.io/name: alertmanager + ports: + - port: {{ .Values.alertmanager.service.clusterPort }} + protocol: TCP + {{- end }} + {{- if .Values.alertmanager.networkPolicy.monitoringRules.configReloader }} + # Allow ingress for config reloader metrics + - from: + - podSelector: + matchLabels: + app.kubernetes.io/name: alertmanager + component: config-reloader + ports: + - port: 8080 + protocol: TCP + {{- end }} + {{- with .Values.alertmanager.networkPolicy.additionalIngress }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- if .Values.alertmanager.networkPolicy.egress.enabled }} + egress: + {{- with .Values.alertmanager.networkPolicy.egress.rules }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/alertmanager/podDisruptionBudget.yaml b/kube-prometheus-stack/templates/alertmanager/podDisruptionBudget.yaml new file mode 100644 index 0000000..e136f88 --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/podDisruptionBudget.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.podDisruptionBudget.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: +{{- toYaml (omit .Values.alertmanager.podDisruptionBudget "enabled") | nindent 2 }} + selector: + matchLabels: + app.kubernetes.io/name: alertmanager + alertmanager: {{ template "kube-prometheus-stack.alertmanager.crname" . }} +{{- end }} diff --git a/kube-prometheus-stack/templates/alertmanager/route.yaml b/kube-prometheus-stack/templates/alertmanager/route.yaml new file mode 100644 index 0000000..bc0858d --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/route.yaml @@ -0,0 +1,63 @@ +{{- if .Values.alertmanager.enabled -}} + {{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }} + {{- $servicePort := .Values.alertmanager.ingress.servicePort | default .Values.alertmanager.service.port -}} + {{- range $name, $route := .Values.alertmanager.route }} + {{- if $route.enabled }} +--- +apiVersion: {{ $route.apiVersion | default "gateway.networking.k8s.io/v1" }} +kind: {{ $route.kind | default "HTTPRoute" }} +metadata: + {{- with $route.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + name: {{ $serviceName }}{{ if ne $name "main" }}-{{ $name }}{{ end }} + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" $ }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-alertmanager + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} + {{- with $route.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with $route.parentRefs }} + parentRefs: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with $route.hostnames }} + hostnames: + {{- tpl (toYaml .) $ | nindent 4 }} + {{- end }} + rules: + {{- if $route.additionalRules }} + {{- tpl (toYaml $route.additionalRules) $ | nindent 4 }} + {{- end }} + {{- if $route.httpsRedirect }} + - filters: + - type: RequestRedirect + requestRedirect: + scheme: https + statusCode: 301 + {{- else }} + - backendRefs: + - group: "" + kind: Service + weight: 1 + name: {{ $serviceName }} + port: {{ $servicePort }} + {{- with $route.filters }} + filters: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $route.matches }} + matches: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $route.sessionPersistence }} + sessionPersistence: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/alertmanager/secret.yaml b/kube-prometheus-stack/templates/alertmanager/secret.yaml new file mode 100644 index 0000000..6cf39e9 --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/secret.yaml @@ -0,0 +1,29 @@ +{{- if and (.Values.alertmanager.enabled) (not .Values.alertmanager.alertmanagerSpec.useExistingSecret) }} +apiVersion: v1 +kind: Secret +metadata: + name: alertmanager-{{ template "kube-prometheus-stack.alertmanager.crname" . }} + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" . }} +{{- if .Values.alertmanager.secret.annotations }} + annotations: +{{ toYaml .Values.alertmanager.secret.annotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: +{{- if .Values.alertmanager.tplConfig }} +{{- if .Values.alertmanager.stringConfig }} + alertmanager.yaml: {{ tpl (.Values.alertmanager.stringConfig) . | b64enc | quote }} +{{- else if eq (typeOf .Values.alertmanager.config) "string" }} + alertmanager.yaml: {{ tpl (.Values.alertmanager.config) . | b64enc | quote }} +{{- else }} + alertmanager.yaml: {{ tpl (toYaml .Values.alertmanager.config) . | b64enc | quote }} +{{- end }} +{{- else }} + alertmanager.yaml: {{ toYaml .Values.alertmanager.config | b64enc | quote }} +{{- end }} +{{- range $key, $val := .Values.alertmanager.templateFiles }} + {{ $key }}: {{ $val | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/alertmanager/service.yaml b/kube-prometheus-stack/templates/alertmanager/service.yaml new file mode 100644 index 0000000..e82c517 --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/service.yaml @@ -0,0 +1,72 @@ +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and .Values.alertmanager.enabled .Values.alertmanager.service.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager + self-monitor: {{ .Values.alertmanager.serviceMonitor.selfMonitor | quote }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.alertmanager.service.labels }} +{{ toYaml .Values.alertmanager.service.labels | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.service.annotations }} + annotations: +{{ toYaml .Values.alertmanager.service.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.alertmanager.service.clusterIP }} + clusterIP: {{ .Values.alertmanager.service.clusterIP }} +{{- end }} +{{- if .Values.alertmanager.service.externalIPs }} + externalIPs: +{{ toYaml .Values.alertmanager.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.alertmanager.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.alertmanager.service.loadBalancerIP }} +{{- end }} +{{- if .Values.alertmanager.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.alertmanager.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.alertmanager.service.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.alertmanager.service.externalTrafficPolicy }} +{{- end }} + ports: + - name: {{ .Values.alertmanager.alertmanagerSpec.portName }} + {{- if eq .Values.alertmanager.service.type "NodePort" }} + nodePort: {{ .Values.alertmanager.service.nodePort }} + {{- end }} + port: {{ .Values.alertmanager.service.port }} + targetPort: {{ .Values.alertmanager.service.targetPort }} + protocol: TCP + - name: reloader-web + {{- if semverCompare ">=1.20.0-0" $kubeTargetVersion }} + appProtocol: http + {{- end }} + port: 8080 + targetPort: reloader-web +{{- if .Values.alertmanager.service.additionalPorts }} +{{ toYaml .Values.alertmanager.service.additionalPorts | indent 2 }} +{{- end }} + selector: + app.kubernetes.io/name: alertmanager + alertmanager: {{ template "kube-prometheus-stack.alertmanager.crname" . }} +{{- if .Values.alertmanager.service.sessionAffinity }} + sessionAffinity: {{ .Values.alertmanager.service.sessionAffinity }} +{{- end }} +{{- if eq .Values.alertmanager.service.sessionAffinity "ClientIP" }} + sessionAffinityConfig: + clientIP: + timeoutSeconds: {{ .Values.alertmanager.service.sessionAffinityConfig.clientIP.timeoutSeconds }} +{{- end }} + type: "{{ .Values.alertmanager.service.type }}" +{{- if .Values.alertmanager.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.alertmanager.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.alertmanager.service.ipDualStack.ipFamilyPolicy }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/alertmanager/serviceaccount.yaml b/kube-prometheus-stack/templates/alertmanager/serviceaccount.yaml new file mode 100644 index 0000000..51f7890 --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/serviceaccount.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "kube-prometheus-stack.alertmanager.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager + app.kubernetes.io/name: {{ template "kube-prometheus-stack.name" . }}-alertmanager + app.kubernetes.io/component: alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.alertmanager.serviceAccount.annotations }} + annotations: +{{ toYaml .Values.alertmanager.serviceAccount.annotations | indent 4 }} +{{- end }} +automountServiceAccountToken: {{ .Values.alertmanager.serviceAccount.automountServiceAccountToken }} +{{- if .Values.global.imagePullSecrets }} +imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 2}} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml b/kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml new file mode 100644 index 0000000..1660808 --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/servicemonitor.yaml @@ -0,0 +1,93 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.serviceMonitor.selfMonitor }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- with .Values.alertmanager.serviceMonitor.additionalLabels }} +{{- toYaml . | nindent 4 }} +{{- end }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.alertmanager.serviceMonitor | nindent 2 }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager + release: {{ $.Release.Name | quote }} + self-monitor: "true" + namespaceSelector: + matchNames: + - {{ printf "%s" (include "kube-prometheus-stack.namespace" .) | quote }} + endpoints: + - port: {{ .Values.alertmanager.alertmanagerSpec.portName }} + enableHttp2: {{ .Values.alertmanager.serviceMonitor.enableHttp2 }} + {{- if .Values.alertmanager.serviceMonitor.interval }} + interval: {{ .Values.alertmanager.serviceMonitor.interval }} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.alertmanager.serviceMonitor.proxyUrl}} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.scheme }} + scheme: {{ .Values.alertmanager.serviceMonitor.scheme }} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.alertmanager.serviceMonitor.bearerTokenFile }} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.tlsConfig }} + tlsConfig: {{- toYaml .Values.alertmanager.serviceMonitor.tlsConfig | nindent 6 }} + {{- end }} + path: "{{ trimSuffix "/" .Values.alertmanager.alertmanagerSpec.routePrefix }}/metrics" + {{- if .Values.alertmanager.serviceMonitor.metricRelabelings }} + metricRelabelings: {{- tpl (toYaml .Values.alertmanager.serviceMonitor.metricRelabelings | nindent 6) . }} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.relabelings }} + relabelings: {{- toYaml .Values.alertmanager.serviceMonitor.relabelings | nindent 6 }} + {{- end }} + - port: reloader-web + {{- if .Values.alertmanager.serviceMonitor.interval }} + interval: {{ .Values.alertmanager.serviceMonitor.interval }} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.alertmanager.serviceMonitor.proxyUrl}} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.scheme }} + scheme: {{ .Values.alertmanager.serviceMonitor.scheme }} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.tlsConfig }} + tlsConfig: {{- toYaml .Values.alertmanager.serviceMonitor.tlsConfig | nindent 6 }} + {{- end }} + path: "/metrics" + {{- if .Values.alertmanager.serviceMonitor.metricRelabelings }} + metricRelabelings: {{- tpl (toYaml .Values.alertmanager.serviceMonitor.metricRelabelings | nindent 6) . }} + {{- end }} + {{- if .Values.alertmanager.serviceMonitor.relabelings }} + relabelings: {{- toYaml .Values.alertmanager.serviceMonitor.relabelings | nindent 6 }} + {{- end }} + {{- range .Values.alertmanager.serviceMonitor.additionalEndpoints }} + - port: {{ .port }} + {{- if or $.Values.alertmanager.serviceMonitor.interval .interval }} + interval: {{ default $.Values.alertmanager.serviceMonitor.interval .interval }} + {{- end }} + {{- if or $.Values.alertmanager.serviceMonitor.proxyUrl .proxyUrl }} + proxyUrl: {{ default $.Values.alertmanager.serviceMonitor.proxyUrl .proxyUrl }} + {{- end }} + {{- if or $.Values.alertmanager.serviceMonitor.scheme .scheme }} + scheme: {{ default $.Values.alertmanager.serviceMonitor.scheme .scheme }} + {{- end }} + {{- if or $.Values.alertmanager.serviceMonitor.bearerTokenFile .bearerTokenFile }} + bearerTokenFile: {{ default $.Values.alertmanager.serviceMonitor.bearerTokenFile .bearerTokenFile }} + {{- end }} + {{- if or $.Values.alertmanager.serviceMonitor.tlsConfig .tlsConfig }} + tlsConfig: {{- default $.Values.alertmanager.serviceMonitor.tlsConfig .tlsConfig | toYaml | nindent 6 }} + {{- end }} + path: {{ .path }} + {{- if or $.Values.alertmanager.serviceMonitor.metricRelabelings .metricRelabelings }} + metricRelabelings: {{- tpl (default $.Values.alertmanager.serviceMonitor.metricRelabelings .metricRelabelings | toYaml | nindent 6) . }} + {{- end }} + {{- if or $.Values.alertmanager.serviceMonitor.relabelings .relabelings }} + relabelings: {{- default $.Values.alertmanager.serviceMonitor.relabelings .relabelings | toYaml | nindent 6 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/alertmanager/serviceperreplica.yaml b/kube-prometheus-stack/templates/alertmanager/serviceperreplica.yaml new file mode 100644 index 0000000..6322df8 --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/serviceperreplica.yaml @@ -0,0 +1,49 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.servicePerReplica.enabled }} +{{- $count := .Values.alertmanager.alertmanagerSpec.replicas | int -}} +{{- $serviceValues := .Values.alertmanager.servicePerReplica -}} +apiVersion: v1 +kind: List +metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-serviceperreplica + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" . }} +items: +{{- range $i, $e := until $count }} + - apiVersion: v1 + kind: Service + metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-alertmanager-{{ $i }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ include "kube-prometheus-stack.name" $ }}-alertmanager +{{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if $serviceValues.annotations }} + annotations: +{{ toYaml $serviceValues.annotations | indent 8 }} + {{- end }} + spec: + {{- if $serviceValues.clusterIP }} + clusterIP: {{ $serviceValues.clusterIP }} + {{- end }} + {{- if $serviceValues.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := $serviceValues.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} + {{- end }} + {{- if ne $serviceValues.type "ClusterIP" }} + externalTrafficPolicy: {{ $serviceValues.externalTrafficPolicy }} + {{- end }} + ports: + - name: {{ $.Values.alertmanager.alertmanagerSpec.portName }} + {{- if eq $serviceValues.type "NodePort" }} + nodePort: {{ $serviceValues.nodePort }} + {{- end }} + port: {{ $serviceValues.port }} + targetPort: {{ $serviceValues.targetPort }} + selector: + app.kubernetes.io/name: alertmanager + alertmanager: {{ template "kube-prometheus-stack.alertmanager.crname" $ }} + statefulset.kubernetes.io/pod-name: alertmanager-{{ include "kube-prometheus-stack.alertmanager.crname" $ }}-{{ $i }} + type: "{{ $serviceValues.type }}" +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/alertmanager/verticalpodautoscaler.yaml b/kube-prometheus-stack/templates/alertmanager/verticalpodautoscaler.yaml new file mode 100644 index 0000000..60c665a --- /dev/null +++ b/kube-prometheus-stack/templates/alertmanager/verticalpodautoscaler.yaml @@ -0,0 +1,41 @@ +{{- if and .Values.alertmanager.enabled .Values.alertmanager.verticalPodAutoscaler.enabled }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + namespace: {{ template "kube-prometheus-stack-alertmanager.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-alertmanager + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} +spec: + {{- with .Values.alertmanager.verticalPodAutoscaler.recommenders }} + recommenders: + {{- toYaml . | nindent 4 }} + {{- end }} + resourcePolicy: + containerPolicies: + - containerName: alertmanager + {{- with .Values.alertmanager.verticalPodAutoscaler.controlledResources }} + controlledResources: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.alertmanager.verticalPodAutoscaler.controlledValues }} + controlledValues: {{ .Values.alertmanager.verticalPodAutoscaler.controlledValues }} + {{- end }} + {{- if .Values.alertmanager.verticalPodAutoscaler.maxAllowed }} + maxAllowed: + {{- toYaml .Values.alertmanager.verticalPodAutoscaler.maxAllowed | nindent 8 }} + {{- end }} + {{- if .Values.alertmanager.verticalPodAutoscaler.minAllowed }} + minAllowed: + {{- toYaml .Values.alertmanager.verticalPodAutoscaler.minAllowed | nindent 8 }} + {{- end }} + targetRef: + apiVersion: monitoring.coreos.com/v1 + kind: Alertmanager + name: {{ template "kube-prometheus-stack.alertmanager.crname" . }} + {{- with .Values.alertmanager.verticalPodAutoscaler.updatePolicy }} + updatePolicy: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/core-dns/service.yaml b/kube-prometheus-stack/templates/exporters/core-dns/service.yaml new file mode 100644 index 0000000..5dedc36 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/core-dns/service.yaml @@ -0,0 +1,28 @@ +{{- if and .Values.coreDns.enabled .Values.coreDns.service.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-coredns + labels: + app: {{ template "kube-prometheus-stack.name" . }}-coredns + jobLabel: coredns +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +spec: + clusterIP: None + {{- if .Values.coreDns.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.coreDns.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.coreDns.service.ipDualStack.ipFamilyPolicy }} + {{- end }} + ports: + - name: {{ .Values.coreDns.serviceMonitor.port }} + port: {{ .Values.coreDns.service.port }} + protocol: TCP + targetPort: {{ .Values.coreDns.service.targetPort }} + selector: + {{- if .Values.coreDns.service.selector }} +{{ toYaml .Values.coreDns.service.selector | indent 4 }} + {{- else}} + k8s-app: kube-dns + {{- end}} +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml b/kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml new file mode 100644 index 0000000..379783b --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/core-dns/servicemonitor.yaml @@ -0,0 +1,54 @@ +{{- if and .Values.coreDns.enabled .Values.coreDns.serviceMonitor.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-coredns + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: kube-system + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-coredns + {{- with .Values.coreDns.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: {{ .Values.coreDns.serviceMonitor.jobLabel }} + {{- with .Values.coreDns.serviceMonitor.targetLabels }} + targetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- include "servicemonitor.scrapeLimits" .Values.coreDns.serviceMonitor | nindent 2 }} + selector: + {{- if .Values.coreDns.serviceMonitor.selector }} + {{ tpl (toYaml .Values.coreDns.serviceMonitor.selector | nindent 4) . }} + {{- else }} + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-coredns + release: {{ $.Release.Name | quote }} + {{- end }} + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: {{ .Values.coreDns.serviceMonitor.port }} + {{- if .Values.coreDns.serviceMonitor.interval}} + interval: {{ .Values.coreDns.serviceMonitor.interval }} + {{- end }} + {{- if .Values.coreDns.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.coreDns.serviceMonitor.proxyUrl}} + {{- end }} + {{- if .Values.coreDns.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.coreDns.serviceMonitor.bearerTokenFile }} + {{- end }} +{{- if .Values.coreDns.serviceMonitor.metricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.coreDns.serviceMonitor.metricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.coreDns.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.coreDns.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml b/kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml new file mode 100644 index 0000000..35780a7 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-api-server/servicemonitor.yaml @@ -0,0 +1,51 @@ +{{- if and .Values.kubeApiServer.enabled .Values.kubeApiServer.serviceMonitor.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-apiserver + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: default + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-apiserver + {{- with .Values.kubeApiServer.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.kubeApiServer.serviceMonitor | nindent 2 }} + endpoints: + - bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeApiServer.serviceMonitor.interval }} + interval: {{ .Values.kubeApiServer.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubeApiServer.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeApiServer.serviceMonitor.proxyUrl }} + {{- end }} + port: https + scheme: https +{{- if .Values.kubeApiServer.serviceMonitor.metricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubeApiServer.serviceMonitor.metricRelabelings | indent 6) . }} +{{- end }} +{{- if .Values.kubeApiServer.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubeApiServer.serviceMonitor.relabelings | indent 6) . }} +{{- end }} + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + serverName: {{ .Values.kubeApiServer.tlsConfig.serverName }} + insecureSkipVerify: {{ .Values.kubeApiServer.tlsConfig.insecureSkipVerify }} + jobLabel: {{ .Values.kubeApiServer.serviceMonitor.jobLabel }} + {{- with .Values.kubeApiServer.serviceMonitor.targetLabels }} + targetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + namespaceSelector: + matchNames: + - default + selector: +{{ toYaml .Values.kubeApiServer.serviceMonitor.selector | indent 4 }} +{{- end}} diff --git a/kube-prometheus-stack/templates/exporters/kube-controller-manager/endpoints.yaml b/kube-prometheus-stack/templates/exporters/kube-controller-manager/endpoints.yaml new file mode 100644 index 0000000..6a6afa6 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-controller-manager/endpoints.yaml @@ -0,0 +1,22 @@ +{{- if and .Values.kubeControllerManager.enabled .Values.kubeControllerManager.endpoints .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Endpoints +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-controller-manager + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-controller-manager + k8s-app: kube-controller-manager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +subsets: + - addresses: + {{- range .Values.kubeControllerManager.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: {{ .Values.kubeControllerManager.serviceMonitor.port }} + {{- $kubeControllerManagerDefaultInsecurePort := 10252 }} + {{- $kubeControllerManagerDefaultSecurePort := 10257 }} + port: {{ include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . $kubeControllerManagerDefaultInsecurePort $kubeControllerManagerDefaultSecurePort .Values.kubeControllerManager.service.port) }} + protocol: TCP +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml b/kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml new file mode 100644 index 0000000..0a901c4 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-controller-manager/service.yaml @@ -0,0 +1,33 @@ +{{- if and .Values.kubeControllerManager.enabled .Values.kubeControllerManager.service.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-controller-manager + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-controller-manager + jobLabel: kube-controller-manager +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +spec: + clusterIP: None + {{- if .Values.kubeControllerManager.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.kubeControllerManager.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.kubeControllerManager.service.ipDualStack.ipFamilyPolicy }} + {{- end }} + ports: + - name: {{ .Values.kubeControllerManager.serviceMonitor.port }} + {{- $kubeControllerManagerDefaultInsecurePort := 10252 }} + {{- $kubeControllerManagerDefaultSecurePort := 10257 }} + port: {{ include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . $kubeControllerManagerDefaultInsecurePort $kubeControllerManagerDefaultSecurePort .Values.kubeControllerManager.service.port) }} + protocol: TCP + targetPort: {{ include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . $kubeControllerManagerDefaultInsecurePort $kubeControllerManagerDefaultSecurePort .Values.kubeControllerManager.service.targetPort) }} +{{- if .Values.kubeControllerManager.endpoints }}{{- else }} + selector: + {{- if .Values.kubeControllerManager.service.selector }} +{{ toYaml .Values.kubeControllerManager.service.selector | indent 4 }} + {{- else}} + component: kube-controller-manager + {{- end}} +{{- end }} + type: ClusterIP +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml b/kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml new file mode 100644 index 0000000..2ee8aff --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-controller-manager/servicemonitor.yaml @@ -0,0 +1,63 @@ +{{- if and .Values.kubeControllerManager.enabled .Values.kubeControllerManager.serviceMonitor.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-controller-manager + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: kube-system + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-controller-manager + {{- with .Values.kubeControllerManager.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: {{ .Values.kubeControllerManager.serviceMonitor.jobLabel }} + {{- with .Values.kubeControllerManager.serviceMonitor.targetLabels }} + targetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- include "servicemonitor.scrapeLimits" .Values.kubeControllerManager.serviceMonitor | nindent 2 }} + selector: + {{- if .Values.kubeControllerManager.serviceMonitor.selector }} + {{ tpl (toYaml .Values.kubeControllerManager.serviceMonitor.selector | nindent 4) . }} + {{- else }} + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-controller-manager + release: {{ $.Release.Name | quote }} + {{- end }} + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: {{ .Values.kubeControllerManager.serviceMonitor.port }} + {{- if .Values.kubeControllerManager.serviceMonitor.interval }} + interval: {{ .Values.kubeControllerManager.serviceMonitor.interval }} + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeControllerManager.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeControllerManager.serviceMonitor.proxyUrl}} + {{- end }} + {{- if eq (include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . false true .Values.kubeControllerManager.serviceMonitor.https )) "true" }} + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + {{- if eq (include "kube-prometheus-stack.kubeControllerManager.insecureScrape" (list . nil true .Values.kubeControllerManager.serviceMonitor.insecureSkipVerify)) "true" }} + insecureSkipVerify: true + {{- end }} + {{- if .Values.kubeControllerManager.serviceMonitor.serverName }} + serverName: {{ .Values.kubeControllerManager.serviceMonitor.serverName }} + {{- end }} + {{- end }} +{{- if .Values.kubeControllerManager.serviceMonitor.metricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubeControllerManager.serviceMonitor.metricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubeControllerManager.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubeControllerManager.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/kube-dns/service.yaml b/kube-prometheus-stack/templates/exporters/kube-dns/service.yaml new file mode 100644 index 0000000..478f419 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-dns/service.yaml @@ -0,0 +1,32 @@ +{{- if and .Values.kubeDns.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-dns + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-dns + jobLabel: kube-dns +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +spec: + clusterIP: None + {{- if .Values.kubeDns.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.kubeDns.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.kubeDns.service.ipDualStack.ipFamilyPolicy }} + {{- end }} + ports: + - name: http-metrics-dnsmasq + port: {{ .Values.kubeDns.service.dnsmasq.port }} + protocol: TCP + targetPort: {{ .Values.kubeDns.service.dnsmasq.targetPort }} + - name: http-metrics-skydns + port: {{ .Values.kubeDns.service.skydns.port }} + protocol: TCP + targetPort: {{ .Values.kubeDns.service.skydns.targetPort }} + selector: + {{- if .Values.kubeDns.service.selector }} +{{ toYaml .Values.kubeDns.service.selector | indent 4 }} + {{- else}} + k8s-app: kube-dns + {{- end}} +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/kube-dns/servicemonitor.yaml b/kube-prometheus-stack/templates/exporters/kube-dns/servicemonitor.yaml new file mode 100644 index 0000000..8e35819 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-dns/servicemonitor.yaml @@ -0,0 +1,69 @@ +{{- if and .Values.kubeDns.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-dns + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: kube-system + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-dns + {{- with .Values.kubeDns.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: {{ .Values.kubeDns.serviceMonitor.jobLabel }} + {{- with .Values.kubeDns.serviceMonitor.targetLabels }} + targetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- include "servicemonitor.scrapeLimits" .Values.kubeDns.serviceMonitor | nindent 2 }} + selector: + {{- if .Values.kubeDns.serviceMonitor.selector }} + {{ tpl (toYaml .Values.kubeDns.serviceMonitor.selector | nindent 4) . }} + {{- else }} + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-dns + release: {{ $.Release.Name | quote }} + {{- end }} + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: http-metrics-dnsmasq + {{- if .Values.kubeDns.serviceMonitor.interval }} + interval: {{ .Values.kubeDns.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubeDns.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.kubeDns.serviceMonitor.bearerTokenFile }} + {{- end }} + {{- if .Values.kubeDns.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeDns.serviceMonitor.proxyUrl}} + {{- end }} +{{- if .Values.kubeDns.serviceMonitor.dnsmasqMetricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubeDns.serviceMonitor.dnsmasqMetricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubeDns.serviceMonitor.dnsmasqRelabelings }} + relabelings: +{{ toYaml .Values.kubeDns.serviceMonitor.dnsmasqRelabelings | indent 4 }} +{{- end }} + - port: http-metrics-skydns + {{- if .Values.kubeDns.serviceMonitor.interval }} + interval: {{ .Values.kubeDns.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubeDns.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.kubeDns.serviceMonitor.bearerTokenFile }} + {{- end }} +{{- if .Values.kubeDns.serviceMonitor.metricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubeDns.serviceMonitor.metricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubeDns.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubeDns.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/kube-etcd/endpoints.yaml b/kube-prometheus-stack/templates/exporters/kube-etcd/endpoints.yaml new file mode 100644 index 0000000..e366447 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-etcd/endpoints.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.kubeEtcd.enabled .Values.kubeEtcd.endpoints .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Endpoints +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-etcd + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-etcd + k8s-app: etcd-server +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +subsets: + - addresses: + {{- range .Values.kubeEtcd.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: {{ .Values.kubeEtcd.serviceMonitor.port }} + port: {{ .Values.kubeEtcd.service.port }} + protocol: TCP +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml b/kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml new file mode 100644 index 0000000..a62059a --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-etcd/service.yaml @@ -0,0 +1,31 @@ +{{- if and .Values.kubeEtcd.enabled .Values.kubeEtcd.service.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-etcd + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-etcd + jobLabel: kube-etcd +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +spec: + clusterIP: None + {{- if .Values.kubeEtcd.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.kubeEtcd.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.kubeEtcd.service.ipDualStack.ipFamilyPolicy }} + {{- end }} + ports: + - name: {{ .Values.kubeEtcd.serviceMonitor.port }} + port: {{ .Values.kubeEtcd.service.port }} + protocol: TCP + targetPort: {{ .Values.kubeEtcd.service.targetPort }} +{{- if .Values.kubeEtcd.endpoints }}{{- else }} + selector: + {{- if .Values.kubeEtcd.service.selector }} +{{ toYaml .Values.kubeEtcd.service.selector | indent 4 }} + {{- else}} + component: etcd + {{- end}} +{{- end }} + type: ClusterIP +{{- end -}} diff --git a/kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml b/kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml new file mode 100644 index 0000000..999d9e1 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-etcd/servicemonitor.yaml @@ -0,0 +1,71 @@ +{{- if and .Values.kubeEtcd.enabled .Values.kubeEtcd.serviceMonitor.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-etcd + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: kube-system + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-etcd + {{- with .Values.kubeEtcd.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: {{ .Values.kubeEtcd.serviceMonitor.jobLabel }} + {{- with .Values.kubeEtcd.serviceMonitor.targetLabels }} + targetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- include "servicemonitor.scrapeLimits" .Values.kubeEtcd.serviceMonitor | nindent 2 }} + selector: + {{- if .Values.kubeEtcd.serviceMonitor.selector }} + {{ tpl (toYaml .Values.kubeEtcd.serviceMonitor.selector | nindent 4) . }} + {{- else }} + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-etcd + release: {{ $.Release.Name | quote }} + {{- end }} + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: {{ .Values.kubeEtcd.serviceMonitor.port }} + {{- if .Values.kubeEtcd.serviceMonitor.interval }} + interval: {{ .Values.kubeEtcd.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubeEtcd.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.kubeEtcd.serviceMonitor.bearerTokenFile }} + {{- end }} + {{- if .Values.kubeEtcd.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeEtcd.serviceMonitor.proxyUrl}} + {{- end }} + {{- if eq .Values.kubeEtcd.serviceMonitor.scheme "https" }} + scheme: https + tlsConfig: + {{- if .Values.kubeEtcd.serviceMonitor.serverName }} + serverName: {{ .Values.kubeEtcd.serviceMonitor.serverName }} + {{- end }} + {{- if .Values.kubeEtcd.serviceMonitor.caFile }} + caFile: {{ .Values.kubeEtcd.serviceMonitor.caFile }} + {{- end }} + {{- if .Values.kubeEtcd.serviceMonitor.certFile }} + certFile: {{ .Values.kubeEtcd.serviceMonitor.certFile }} + {{- end }} + {{- if .Values.kubeEtcd.serviceMonitor.keyFile }} + keyFile: {{ .Values.kubeEtcd.serviceMonitor.keyFile }} + {{- end}} + insecureSkipVerify: {{ .Values.kubeEtcd.serviceMonitor.insecureSkipVerify }} + {{- end }} +{{- if .Values.kubeEtcd.serviceMonitor.metricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubeEtcd.serviceMonitor.metricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubeEtcd.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubeEtcd.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/kube-proxy/endpoints.yaml b/kube-prometheus-stack/templates/exporters/kube-proxy/endpoints.yaml new file mode 100644 index 0000000..8613e62 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-proxy/endpoints.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.kubeProxy.enabled .Values.kubeProxy.endpoints .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Endpoints +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-proxy + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-proxy + k8s-app: kube-proxy +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +subsets: + - addresses: + {{- range .Values.kubeProxy.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: {{ .Values.kubeProxy.serviceMonitor.port }} + port: {{ .Values.kubeProxy.service.port }} + protocol: TCP +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml b/kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml new file mode 100644 index 0000000..672f549 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-proxy/service.yaml @@ -0,0 +1,31 @@ +{{- if and .Values.kubeProxy.enabled .Values.kubeProxy.service.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-proxy + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-proxy + jobLabel: kube-proxy +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +spec: + clusterIP: None + {{- if .Values.kubeProxy.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.kubeProxy.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.kubeProxy.service.ipDualStack.ipFamilyPolicy }} + {{- end }} + ports: + - name: {{ .Values.kubeProxy.serviceMonitor.port }} + port: {{ .Values.kubeProxy.service.port }} + protocol: TCP + targetPort: {{ .Values.kubeProxy.service.targetPort }} +{{- if .Values.kubeProxy.endpoints }}{{- else }} + selector: + {{- if .Values.kubeProxy.service.selector }} +{{ toYaml .Values.kubeProxy.service.selector | indent 4 }} + {{- else}} + k8s-app: kube-proxy + {{- end}} +{{- end }} + type: ClusterIP +{{- end -}} diff --git a/kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml b/kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml new file mode 100644 index 0000000..c38d2da --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-proxy/servicemonitor.yaml @@ -0,0 +1,59 @@ +{{- if and .Values.kubeProxy.enabled .Values.kubeProxy.serviceMonitor.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-proxy + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: kube-system + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-proxy + {{- with .Values.kubeProxy.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: {{ .Values.kubeProxy.serviceMonitor.jobLabel }} + {{- with .Values.kubeProxy.serviceMonitor.targetLabels }} + targetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- include "servicemonitor.scrapeLimits" .Values.kubeProxy.serviceMonitor | nindent 2 }} + selector: + {{- if .Values.kubeProxy.serviceMonitor.selector }} + {{ tpl (toYaml .Values.kubeProxy.serviceMonitor.selector | nindent 4) . }} + {{- else }} + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-proxy + release: {{ $.Release.Name | quote }} + {{- end }} + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: {{ .Values.kubeProxy.serviceMonitor.port }} + {{- if .Values.kubeProxy.serviceMonitor.interval }} + interval: {{ .Values.kubeProxy.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubeProxy.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.kubeProxy.serviceMonitor.bearerTokenFile }} + {{- end }} + {{- if .Values.kubeProxy.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeProxy.serviceMonitor.proxyUrl}} + {{- end }} + {{- if .Values.kubeProxy.serviceMonitor.https }} + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + {{- end}} +{{- if .Values.kubeProxy.serviceMonitor.metricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubeProxy.serviceMonitor.metricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubeProxy.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubeProxy.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/kube-scheduler/endpoints.yaml b/kube-prometheus-stack/templates/exporters/kube-scheduler/endpoints.yaml new file mode 100644 index 0000000..6236b42 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-scheduler/endpoints.yaml @@ -0,0 +1,22 @@ +{{- if and .Values.kubeScheduler.enabled .Values.kubeScheduler.endpoints .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Endpoints +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-scheduler + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-scheduler + k8s-app: kube-scheduler +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +subsets: + - addresses: + {{- range .Values.kubeScheduler.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: {{ .Values.kubeScheduler.serviceMonitor.port }} + {{- $kubeSchedulerDefaultInsecurePort := 10251 }} + {{- $kubeSchedulerDefaultSecurePort := 10259 }} + port: {{ include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . $kubeSchedulerDefaultInsecurePort $kubeSchedulerDefaultSecurePort .Values.kubeScheduler.service.port) }} + protocol: TCP +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml b/kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml new file mode 100644 index 0000000..8663d79 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-scheduler/service.yaml @@ -0,0 +1,33 @@ +{{- if and .Values.kubeScheduler.enabled .Values.kubeScheduler.service.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-scheduler + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-scheduler + jobLabel: kube-scheduler +{{ include "kube-prometheus-stack.labels" . | indent 4 }} + namespace: kube-system +spec: + clusterIP: None + {{- if .Values.kubeScheduler.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.kubeScheduler.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.kubeScheduler.service.ipDualStack.ipFamilyPolicy }} + {{- end }} + ports: + - name: {{ .Values.kubeScheduler.serviceMonitor.port }} + {{- $kubeSchedulerDefaultInsecurePort := 10251 }} + {{- $kubeSchedulerDefaultSecurePort := 10259 }} + port: {{ include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . $kubeSchedulerDefaultInsecurePort $kubeSchedulerDefaultSecurePort .Values.kubeScheduler.service.port) }} + protocol: TCP + targetPort: {{ include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . $kubeSchedulerDefaultInsecurePort $kubeSchedulerDefaultSecurePort .Values.kubeScheduler.service.targetPort) }} +{{- if .Values.kubeScheduler.endpoints }}{{- else }} + selector: + {{- if .Values.kubeScheduler.service.selector }} +{{ toYaml .Values.kubeScheduler.service.selector | indent 4 }} + {{- else}} + component: kube-scheduler + {{- end}} +{{- end }} + type: ClusterIP +{{- end -}} diff --git a/kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml b/kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml new file mode 100644 index 0000000..26c2853 --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kube-scheduler/servicemonitor.yaml @@ -0,0 +1,63 @@ +{{- if and .Values.kubeScheduler.enabled .Values.kubeScheduler.serviceMonitor.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kube-scheduler + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: kube-system + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-scheduler + {{- with .Values.kubeScheduler.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + jobLabel: {{ .Values.kubeScheduler.serviceMonitor.jobLabel }} + {{- with .Values.kubeScheduler.serviceMonitor.targetLabels }} + targetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- include "servicemonitor.scrapeLimits" .Values.kubeScheduler.serviceMonitor | nindent 2 }} + selector: + {{- if .Values.kubeScheduler.serviceMonitor.selector }} + {{ tpl (toYaml .Values.kubeScheduler.serviceMonitor.selector | nindent 4) . }} + {{- else }} + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-kube-scheduler + release: {{ $.Release.Name | quote }} + {{- end }} + namespaceSelector: + matchNames: + - "kube-system" + endpoints: + - port: {{ .Values.kubeScheduler.serviceMonitor.port }} + {{- if .Values.kubeScheduler.serviceMonitor.interval }} + interval: {{ .Values.kubeScheduler.serviceMonitor.interval }} + {{- end }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if .Values.kubeScheduler.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubeScheduler.serviceMonitor.proxyUrl}} + {{- end }} + {{- if eq (include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . false true .Values.kubeScheduler.serviceMonitor.https )) "true" }} + scheme: https + tlsConfig: + caFile: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + {{- if eq (include "kube-prometheus-stack.kubeScheduler.insecureScrape" (list . nil true .Values.kubeScheduler.serviceMonitor.insecureSkipVerify)) "true" }} + insecureSkipVerify: true + {{- end }} + {{- if .Values.kubeScheduler.serviceMonitor.serverName }} + serverName: {{ .Values.kubeScheduler.serviceMonitor.serverName }} + {{- end}} + {{- end}} +{{- if .Values.kubeScheduler.serviceMonitor.metricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubeScheduler.serviceMonitor.metricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubeScheduler.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubeScheduler.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml b/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml new file mode 100644 index 0000000..41f160d --- /dev/null +++ b/kube-prometheus-stack/templates/exporters/kubelet/servicemonitor.yaml @@ -0,0 +1,149 @@ +{{- if and .Values.kubelet.enabled .Values.kubelet.serviceMonitor.enabled .Values.kubernetesServiceMonitors.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-kubelet + {{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + namespace: {{ .Values.kubelet.namespace }} + {{- else }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-kubelet + {{- with .Values.kubelet.serviceMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{- include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.kubelet.serviceMonitor | nindent 2 }} + {{- with .Values.kubelet.serviceMonitor.attachMetadata }} + attachMetadata: + {{- toYaml . | nindent 4 }} + {{- end }} + jobLabel: k8s-app + {{- with .Values.kubelet.serviceMonitor.targetLabels }} + targetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + namespaceSelector: + matchNames: + - {{ .Values.kubelet.namespace }} + selector: + matchLabels: + app.kubernetes.io/name: kubelet + k8s-app: kubelet + endpoints: +{{- if .Values.kubelet.serviceMonitor.kubelet }} + - port: {{ template "kube-prometheus-stack.kubelet.scheme" . }}-metrics + scheme: {{ template "kube-prometheus-stack.kubelet.scheme" . }} + {{- if .Values.kubelet.serviceMonitor.interval }} + interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} + {{- end }} + {{- include "kube-prometheus-stack.kubelet.authConfig" . | indent 4 }} + honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} +{{- if .Values.kubelet.serviceMonitor.metricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.metricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.relabelings }} + relabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.relabelings | indent 4) . }} +{{- end }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.cAdvisor }} + - port: {{ template "kube-prometheus-stack.kubelet.scheme" . }}-metrics + scheme: {{ template "kube-prometheus-stack.kubelet.scheme" . }} + path: /metrics/cadvisor + {{- if .Values.kubelet.serviceMonitor.interval }} + interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- else }} + interval: {{ .Values.kubelet.serviceMonitor.cAdvisorInterval }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} + {{- end }} + honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + {{- if .Values.kubelet.serviceMonitor.trackTimestampsStaleness }} + honorTimestamps: true + {{- else }} + honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} + {{- end }} + trackTimestampsStaleness: {{ .Values.kubelet.serviceMonitor.trackTimestampsStaleness }} + {{- include "kube-prometheus-stack.kubelet.authConfig" . | indent 4 }} +{{- if .Values.kubelet.serviceMonitor.cAdvisorMetricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.cAdvisorMetricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.cAdvisorRelabelings }} + relabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.cAdvisorRelabelings | indent 4) . }} +{{- end }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.probes }} + - port: {{ template "kube-prometheus-stack.kubelet.scheme" . }}-metrics + scheme: {{ template "kube-prometheus-stack.kubelet.scheme" . }} + path: /metrics/probes + {{- if .Values.kubelet.serviceMonitor.interval }} + interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} + {{- end }} + honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} + {{- include "kube-prometheus-stack.kubelet.authConfig" . | indent 4 }} +{{- if .Values.kubelet.serviceMonitor.probesMetricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.probesMetricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.probesRelabelings }} + relabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.probesRelabelings | indent 4) . }} +{{- end }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.resource }} + - port: {{ template "kube-prometheus-stack.kubelet.scheme" . }}-metrics + scheme: {{ template "kube-prometheus-stack.kubelet.scheme" . }} + path: {{ .Values.kubelet.serviceMonitor.resourcePath }} + {{- if .Values.kubelet.serviceMonitor.interval }} + interval: {{ .Values.kubelet.serviceMonitor.interval }} + {{- else }} + interval: {{ .Values.kubelet.serviceMonitor.resourceInterval }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.kubelet.serviceMonitor.proxyUrl }} + {{- end }} + {{- if .Values.kubelet.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.kubelet.serviceMonitor.scrapeTimeout }} + {{- end }} + honorLabels: {{ .Values.kubelet.serviceMonitor.honorLabels }} + {{- if .Values.kubelet.serviceMonitor.trackTimestampsStaleness }} + honorTimestamps: true + {{- else }} + honorTimestamps: {{ .Values.kubelet.serviceMonitor.honorTimestamps }} + {{- end }} + trackTimestampsStaleness: {{ .Values.kubelet.serviceMonitor.trackTimestampsStaleness }} + {{- include "kube-prometheus-stack.kubelet.authConfig" . | indent 4 }} +{{- if .Values.kubelet.serviceMonitor.resourceMetricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.resourceMetricRelabelings | indent 4) . }} +{{- end }} +{{- if .Values.kubelet.serviceMonitor.resourceRelabelings }} + relabelings: +{{ tpl (toYaml .Values.kubelet.serviceMonitor.resourceRelabelings | indent 4) . }} +{{- end }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/extra-objects.yaml b/kube-prometheus-stack/templates/extra-objects.yaml new file mode 100644 index 0000000..b0ec6fa --- /dev/null +++ b/kube-prometheus-stack/templates/extra-objects.yaml @@ -0,0 +1,15 @@ +{{- /* Normalize extraObjects to a list, easier to loop over */ -}} +{{- $extraObjects := .Values.extraManifests | default (list) -}} + +{{- if kindIs "map" $extraObjects -}} + {{- $extraObjects = values $extraObjects -}} +{{- end -}} + +{{- range $extraObjects }} +--- + {{- if kindIs "map" . }} + {{- tpl (toYaml .) $ | nindent 0 }} + {{- else if kindIs "string" . }} + {{- tpl . $ | nindent 0 }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/configmaps-datasources.yaml b/kube-prometheus-stack/templates/grafana/configmaps-datasources.yaml new file mode 100644 index 0000000..3b0f328 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/configmaps-datasources.yaml @@ -0,0 +1,98 @@ +{{- if or (and .Values.grafana.enabled .Values.grafana.sidecar.datasources.enabled) .Values.grafana.forceDeployDatasources }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-grafana-datasource + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} +{{- if .Values.grafana.sidecar.datasources.annotations }} + annotations: + {{- toYaml .Values.grafana.sidecar.datasources.annotations | nindent 4 }} +{{- end }} + labels: + {{ tpl $.Values.grafana.sidecar.datasources.label $ }}: {{ (tpl $.Values.grafana.sidecar.datasources.labelValue $) | quote }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana +{{ include "kube-prometheus-stack.labels" $ | indent 4 }} +data: + datasource.yaml: |- + apiVersion: 1 +{{- if .Values.grafana.deleteDatasources }} + deleteDatasources: +{{ tpl (toYaml .Values.grafana.deleteDatasources | indent 6) . }} +{{- end }} +{{- if .Values.grafana.prune }} + prune: {{ .Values.grafana.prune }} +{{- end }} + datasources: +{{- $scrapeInterval := .Values.grafana.sidecar.datasources.defaultDatasourceScrapeInterval | default .Values.prometheus.prometheusSpec.scrapeInterval | default "30s" }} +{{- if .Values.grafana.sidecar.datasources.defaultDatasourceEnabled }} + - name: "{{ .Values.grafana.sidecar.datasources.name }}" + type: prometheus + uid: {{ .Values.grafana.sidecar.datasources.uid }} + {{- if .Values.grafana.sidecar.datasources.url }} + url: {{ .Values.grafana.sidecar.datasources.url }} + {{- else }} + url: http://{{ template "kube-prometheus-stack.fullname" . }}-prometheus.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.prometheus.service.port }}/{{ trimPrefix "/" .Values.prometheus.prometheusSpec.routePrefix }} + {{- end }} + access: proxy + isDefault: {{ .Values.grafana.sidecar.datasources.isDefaultDatasource }} + jsonData: + {{- with .Values.grafana.sidecar.datasources.extraJsonData -}} + {{- tpl (toYaml .) $ | nindent 8 }} + {{- end }} + httpMethod: {{ .Values.grafana.sidecar.datasources.httpMethod }} + timeInterval: {{ $scrapeInterval }} + {{- if .Values.grafana.sidecar.datasources.timeout }} + timeout: {{ .Values.grafana.sidecar.datasources.timeout }} + {{- end }} + {{- if .Values.grafana.sidecar.datasources.customQueryParameters }} + customQueryParameters: {{ .Values.grafana.sidecar.datasources.customQueryParameters }} + {{- end }} +{{- if .Values.grafana.sidecar.datasources.exemplarTraceIdDestinations }} + exemplarTraceIdDestinations: + - datasourceUid: {{ .Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.datasourceUid }} + name: {{ .Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.traceIdLabelName }} + urlDisplayLabel: {{ .Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.urlDisplayLabel }} +{{- end }} +{{- if .Values.grafana.sidecar.datasources.createPrometheusReplicasDatasources }} +{{- range until (int .Values.prometheus.prometheusSpec.replicas) }} + - name: "{{ $.Values.grafana.sidecar.datasources.name }}-{{ . }}" + type: prometheus + uid: {{ $.Values.grafana.sidecar.datasources.uid }}-replica-{{ . }} + url: http://prometheus-{{ template "kube-prometheus-stack.prometheus.crname" $ }}-{{ . }}.{{ $.Values.grafana.sidecar.datasources.prometheusServiceName}}:9090/{{ trimPrefix "/" $.Values.prometheus.prometheusSpec.routePrefix }} + access: proxy + isDefault: false + jsonData: + {{- with $.Values.grafana.sidecar.datasources.extraJsonData -}} + {{- tpl (toYaml .) $ | nindent 8 }} + {{- end }} + timeInterval: {{ $scrapeInterval }} +{{- if $.Values.grafana.sidecar.datasources.exemplarTraceIdDestinations }} + exemplarTraceIdDestinations: + - datasourceUid: {{ $.Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.datasourceUid }} + name: {{ $.Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.traceIdLabelName }} + urlDisplayLabel: {{ .Values.grafana.sidecar.datasources.exemplarTraceIdDestinations.urlDisplayLabel }} +{{- end }} +{{- end }} +{{- end }} +{{- if .Values.grafana.sidecar.datasources.alertmanager.enabled }} + - name: "{{ .Values.grafana.sidecar.datasources.alertmanager.name }}" + type: alertmanager + uid: {{ .Values.grafana.sidecar.datasources.alertmanager.uid }} + {{- if .Values.grafana.sidecar.datasources.alertmanager.url }} + url: {{ .Values.grafana.sidecar.datasources.alertmanager.url }} + {{- else }} + url: http://{{ template "kube-prometheus-stack.fullname" . }}-alertmanager.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.alertmanager.service.port }}/{{ trimPrefix "/" .Values.alertmanager.alertmanagerSpec.routePrefix }} + {{- end }} + access: proxy + jsonData: + handleGrafanaManagedAlerts: {{ .Values.grafana.sidecar.datasources.alertmanager.handleGrafanaManagedAlerts }} + implementation: {{ .Values.grafana.sidecar.datasources.alertmanager.implementation }} +{{- end }} +{{- end }} +{{- if .Values.grafana.additionalDataSources }} +{{ tpl (toYaml .Values.grafana.additionalDataSources | indent 4) . }} +{{- end }} +{{- with .Values.grafana.additionalDataSourcesString }} +{{ tpl . $ | indent 4 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/alertmanager-overview.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/alertmanager-overview.yaml new file mode 100644 index 0000000..220b835 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/alertmanager-overview.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'alertmanager-overview' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (or .Values.alertmanager.enabled .Values.alertmanager.forceDeployDashboards) }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "alertmanager-overview" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + alertmanager-overview.json: |- + {{`{"graphTooltip":1,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"Alerts","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"description":"current set of alerts stored in the Alertmanager","fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"none"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(alertmanager_alerts{namespace=~\"$namespace\",service=~\"$service\"}) by (namespace,service,instance)","intervalFactor":2,"legendFormat":"{{instance}}"}],"title":"Alerts","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"description":"rate of successful and invalid alerts received by the Alertmanager","fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(alertmanager_alerts_received_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)","intervalFactor":2,"legendFormat":"{{instance}} Received"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(alertmanager_alerts_invalid_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval])) by (namespace,service,instance)","intervalFactor":2,"legendFormat":"{{instance}} Invalid"}],"title":"Alerts receive rate","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"panels":[],"title":"Notifications","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"description":"rate of successful and invalid notifications sent by the Alertmanager","fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":9},"id":5,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","repeat":"integration","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(alertmanager_notifications_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)","intervalFactor":2,"legendFormat":"{{instance}} Total"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(alertmanager_notifications_failed_total{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)","intervalFactor":2,"legendFormat":"{{instance}} Failed"}],"title":"$integration: Notifications Send Rate","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"description":"latency of notifications sent by the Alertmanager","fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"s"}},"gridPos":{"h":7,"w":12,"x":12,"y":9},"id":6,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","repeat":"integration","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"histogram_quantile(0.99,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n)\n","intervalFactor":2,"legendFormat":"{{instance}} 99th Percentile"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"histogram_quantile(0.50,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n)\n","intervalFactor":2,"legendFormat":"{{instance}} Median"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(alertmanager_notification_latency_seconds_sum{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n/\nsum(rate(alertmanager_notification_latency_seconds_count{namespace=~\"$namespace\",service=~\"$service\", integration=\"$integration\"}[$__rate_interval])) by (namespace,service,instance)\n","intervalFactor":2,"legendFormat":"{{instance}} Average"}],"title":"$integration: Notification Duration","type":"timeseries"}],"schemaVersion":39,"tags":["alertmanager-mixin"],"templating":{"list":[{"current":{"selected":false,"text":"Prometheus","value":"Prometheus"},"hide":0,"label":"Data Source","name":"datasource","query":"prometheus","type":"datasource"},{"current":{"selected":false,"text":"","value":""},"datasource":{"type":"prometheus","uid":"${datasource}"},"includeAll":false,"label":"namespace","name":"namespace","query":"label_values(alertmanager_alerts, namespace)","refresh":2,"sort":1,"type":"query"},{"current":{"selected":false,"text":"","value":""},"datasource":{"type":"prometheus","uid":"${datasource}"},"includeAll":false,"label":"service","name":"service","query":"label_values(alertmanager_alerts, service)","refresh":2,"sort":1,"type":"query"},{"current":{"selected":false,"text":"$__all","value":"$__all"},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":2,"includeAll":true,"name":"integration","query":"label_values(alertmanager_notifications_total{integration=~\".*\"}, integration)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["30s"]},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Alertmanager / Overview","uid":"alertmanager-overview"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (or .Values.alertmanager.enabled .Values.alertmanager.forceDeployDashboards) }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "alertmanager-overview" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "alertmanager-overview" | trunc 63 | trimSuffix "-" }} + key: alertmanager-overview.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/apiserver.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/apiserver.yaml new file mode 100644 index 0000000..a67a8ed --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/apiserver.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'apiserver' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeApiServer.enabled }} +{{- $kubeApiserverJob := include "kube-prometheus-stack-kube-apiserver.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "apiserver" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + apiserver.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.","gridPos":{"h":2,"w":24,"x":0,"y":0},"id":1,"options":{"content":"The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only."},"pluginVersion":"v11.4.0","title":"Notice","type":"text"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?","fieldConfig":{"defaults":{"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":8,"x":0,"y":2},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"}"}],"title":"Availability (30d) > 99.000%","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How much error budget is left looking at our 0.990% availability guarantees?","fieldConfig":{"defaults":{"custom":{"fillOpacity":100},"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":16,"x":8,"y":2},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"100 * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)","legendFormat":"errorbudget"}],"title":"ErrorBudget (30d) > 99.000%","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?","fieldConfig":{"defaults":{"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":0,"y":9},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"apiserver_request:availability30d{verb=\"read\", cluster=\"$cluster\"}"}],"title":"Read Availability (30d)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many read requests (LIST,GET) per second do the apiservers get by code?","fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"stacking":{"mode":"normal"}},"unit":"reqps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/2../i"},"properties":[{"id":"color","value":"#56A64B"}]},{"matcher":{"id":"byRegexp","options":"/3../i"},"properties":[{"id":"color","value":"#F2CC0C"}]},{"matcher":{"id":"byRegexp","options":"/4../i"},"properties":[{"id":"color","value":"#3274D9"}]},{"matcher":{"id":"byRegexp","options":"/5../i"},"properties":[{"id":"color","value":"#E02F44"}]}]},"gridPos":{"h":7,"w":6,"x":6,"y":9},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})","legendFormat":"{{ code }}"}],"title":"Read SLI - Requests","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?","fieldConfig":{"defaults":{"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":12,"y":9},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})","legendFormat":"{{ resource }}"}],"title":"Read SLI - Errors","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many seconds is the 99th percentile for reading (LIST|GET) a given resource?","fieldConfig":{"defaults":{"unit":"s"}},"gridPos":{"h":7,"w":6,"x":18,"y":9},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}","legendFormat":"{{ resource }}"}],"title":"Read SLI - Duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?","fieldConfig":{"defaults":{"decimals":3,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":0,"y":16},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"apiserver_request:availability30d{verb=\"write\", cluster=\"$cluster\"}"}],"title":"Write Availability (30d)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?","fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"stacking":{"mode":"normal"}},"unit":"reqps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/2../i"},"properties":[{"id":"color","value":"#56A64B"}]},{"matcher":{"id":"byRegexp","options":"/3../i"},"properties":[{"id":"color","value":"#F2CC0C"}]},{"matcher":{"id":"byRegexp","options":"/4../i"},"properties":[{"id":"color","value":"#3274D9"}]},{"matcher":{"id":"byRegexp","options":"/5../i"},"properties":[{"id":"color","value":"#E02F44"}]}]},"gridPos":{"h":7,"w":6,"x":6,"y":16},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})","legendFormat":"{{ code }}"}],"title":"Write SLI - Requests","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?","fieldConfig":{"defaults":{"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":6,"x":12,"y":16},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})","legendFormat":"{{ resource }}"}],"title":"Write SLI - Errors","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"description":"How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?","fieldConfig":{"defaults":{"unit":"s"}},"gridPos":{"h":7,"w":6,"x":18,"y":16},"id":11,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}","legendFormat":"{{ resource }}"}],"title":"Write SLI - Duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":23},"id":12,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":false},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(workqueue_adds_total{job=\"`}}{{ $kubeApiserverJob }}{{`\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)","legendFormat":"{{instance}} {{name}}"}],"title":"Work Queue Add Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":23},"id":13,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":false},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(workqueue_depth{job=\"`}}{{ $kubeApiserverJob }}{{`\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)","legendFormat":"{{instance}} {{name}}"}],"title":"Work Queue Depth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":30},"id":14,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"`}}{{ $kubeApiserverJob }}{{`\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))","legendFormat":"{{instance}} {{name}}"}],"title":"Work Queue Latency","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":37},"id":15,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"process_resident_memory_bytes{job=\"`}}{{ $kubeApiserverJob }}{{`\",instance=~\"$instance\", cluster=\"$cluster\"}","legendFormat":"{{instance}}"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"min":0,"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":37},"id":16,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"rate(process_cpu_seconds_total{job=\"`}}{{ $kubeApiserverJob }}{{`\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])","legendFormat":"{{instance}}"}],"title":"CPU usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":37},"id":17,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"go_goroutines{job=\"`}}{{ $kubeApiserverJob }}{{`\",instance=~\"$instance\", cluster=\"$cluster\"}","legendFormat":"{{instance}}"}],"title":"Goroutines","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"`}}{{ $kubeApiserverJob }}{{`\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"name":"instance","query":"label_values(up{job=\"`}}{{ $kubeApiserverJob }}{{`\", cluster=\"$cluster\"}, instance)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / API server","uid":"09ec8aa1e996d6ffcd6817bbaff4db1b"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeApiServer.enabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "apiserver" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "apiserver" | trunc 63 | trimSuffix "-" }} + key: apiserver.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/cluster-total.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/cluster-total.yaml new file mode 100644 index 0000000..93449d2 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/cluster-total.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'cluster-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "cluster-total" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + cluster-total.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Current Rate of Bits Received","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Current Rate of Bits Transmitted","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bits/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down","url":"/d/8b7a8b326d7a6f1f04244066368c67af?${datasource:queryparam}&var-cluster=${cluster}&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":9,"w":24,"x":0,"y":9},"id":3,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"avg by (namespace) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"avg by (namespace) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","format":"table","instant":true}],"title":"Current Status","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time 7":true,"Time 8":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Time 7":6,"Time 8":7,"Value #A":9,"Value #B":10,"Value #C":11,"Value #D":12,"Value #E":13,"Value #F":14,"Value #G":15,"Value #H":16,"namespace":8},"renameByName":{"Value #A":"Rx Bits","Value #B":"Tx Bits","Value #C":"Rx Bits (Avg)","Value #D":"Tx Bits (Avg)","Value #E":"Rx Packets","Value #F":"Tx Packets","Value #G":"Rx Packets Dropped","Value #H":"Tx Packets Dropped","namespace":"Namespace"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":18},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"avg by (namespace) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Average Rate of Bits Received","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":18},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"avg by (namespace) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Average Rate of Bits Transmitted","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":27},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":27},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":36},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":36},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":45},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":45},"id":11,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (namespace) (\n rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\",cluster=\"$cluster\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"percentunit"}},"gridPos":{"h":9,"w":12,"x":0,"y":54},"id":12,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (instance) (\n rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$__rate_interval]) / rate(node_netstat_Tcp_OutSegs{cluster=\"$cluster\"}[$__rate_interval])\n)\n","legendFormat":"__auto"}],"title":"Rate of TCP Retransmits out of all sent segments","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"percentunit"}},"gridPos":{"h":9,"w":12,"x":12,"y":54},"id":13,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (instance) (\n rate(node_netstat_TcpExt_TCPSynRetrans{cluster=\"$cluster\"}[$__rate_interval]) / rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$__rate_interval])\n)\n","legendFormat":"__auto"}],"title":"Rate of TCP SYN Retransmits out of all retransmits","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Networking / Cluster","uid":"ff635a025bcfea7bc3dd4f508990a3e9"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "cluster-total" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "cluster-total" | trunc 63 | trimSuffix "-" }} + key: cluster-total.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/controller-manager.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/controller-manager.yaml new file mode 100644 index 0000000..b746def --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/controller-manager.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'controller-manager' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeControllerManager.enabled }} +{{- $kubeControllerManagerJob := include "kube-prometheus-stack-kube-controller-manager.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "controller-manager" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + controller-manager.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(up{cluster=\"$cluster\", job=\"`}}{{ $kubeControllerManagerJob }}{{`\"})","instant":true}],"title":"Up","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":20,"x":4,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"`}}{{ $kubeControllerManagerJob }}{{`\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)","legendFormat":"{{cluster}} {{instance}} {{name}}"}],"title":"Work Queue Add Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"`}}{{ $kubeControllerManagerJob }}{{`\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)","legendFormat":"{{cluster}} {{instance}} {{name}}"}],"title":"Work Queue Depth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeControllerManagerJob }}{{`\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))","legendFormat":"{{cluster}} {{instance}} {{name}}"}],"title":"Work Queue Latency","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":8,"x":0,"y":21},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{job=\"`}}{{ $kubeControllerManagerJob }}{{`\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","legendFormat":"2xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{job=\"`}}{{ $kubeControllerManagerJob }}{{`\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","legendFormat":"3xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{job=\"`}}{{ $kubeControllerManagerJob }}{{`\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","legendFormat":"4xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{job=\"`}}{{ $kubeControllerManagerJob }}{{`\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","legendFormat":"5xx"}],"title":"Kube API Request Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":16,"x":8,"y":21},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeControllerManagerJob }}{{`\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, le))","legendFormat":"{{verb}}"}],"title":"Post Request Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeControllerManagerJob }}{{`\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))","legendFormat":"{{verb}}"}],"title":"Get Request Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":35},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"process_resident_memory_bytes{cluster=\"$cluster\", job=\"`}}{{ $kubeControllerManagerJob }}{{`\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":35},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"`}}{{ $kubeControllerManagerJob }}{{`\",instance=~\"$instance\"}[$__rate_interval])","legendFormat":"{{instance}}"}],"title":"CPU usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":35},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"go_goroutines{cluster=\"$cluster\", job=\"`}}{{ $kubeControllerManagerJob }}{{`\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Goroutines","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"`}}{{ $kubeControllerManagerJob }}{{`\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"instance","name":"instance","query":"label_values(up{cluster=\"$cluster\", job=\"`}}{{ $kubeControllerManagerJob }}{{`\"}, instance)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Controller Manager","uid":"72e0e05bef5099e5f049b05fdc429ed4"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeControllerManager.enabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "controller-manager" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "controller-manager" | trunc 63 | trimSuffix "-" }} + key: controller-manager.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/etcd.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/etcd.yaml new file mode 100644 index 0000000..61867d1 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/etcd.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'etcd' from https://github.com/etcd-io/etcd.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeEtcd.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "etcd" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + etcd.json: |- + {{`{"description":"etcd sample Grafana dashboard with Prometheus","panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"gridPos":{"h":7,"w":6,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none","graphMode":"none","reduceOptions":{"calcs":["lastNotNull"]}},"pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(etcd_server_has_leader{job=~\".*etcd.*\", job=\"$cluster\"})","legendFormat":"{{cluster}} - {{namespace}}\n"}],"title":"Up","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"ops"}},"gridPos":{"h":7,"w":10,"x":6,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(grpc_server_started_total{job=~\".*etcd.*\", job=\"$cluster\",grpc_type=\"unary\"}[$__rate_interval]))","legendFormat":"RPC rate"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(grpc_server_handled_total{job=~\".*etcd.*\", job=\"$cluster\",grpc_type=\"unary\",grpc_code=~\"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded\"}[$__rate_interval]))","legendFormat":"RPC failed rate"}],"title":"RPC rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"}}},"gridPos":{"h":7,"w":8,"x":16,"y":0},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(grpc_server_started_total{job=~\".*etcd.*\",job=\"$cluster\",grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Watch\",grpc_type=\"bidi_stream\"})","legendFormat":"Watch streams"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(grpc_server_started_total{job=~\".*etcd.*\",job=\"$cluster\",grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"}) - sum(grpc_server_handled_total{job=\"$cluster\",grpc_service=\"etcdserverpb.Lease\",grpc_type=\"bidi_stream\"})","legendFormat":"Lease streams"}],"title":"Active streams","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":25},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"etcd_mvcc_db_total_size_in_bytes{job=~\".*etcd.*\", job=\"$cluster\"}","legendFormat":"{{instance}} DB size"}],"title":"DB size","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"s"}},"gridPos":{"h":7,"w":8,"x":8,"y":25},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"histogram_quantile(0.99, sum(rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])) by (instance, le))","legendFormat":"{{instance}} WAL fsync"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"histogram_quantile(0.99, sum(rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])) by (instance, le))","legendFormat":"{{instance}} DB fsync"}],"title":"Disk sync duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":16,"y":25},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"process_resident_memory_bytes{job=~\".*etcd.*\", job=\"$cluster\"}","legendFormat":"{{instance}} resident memory"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"Bps"}},"gridPos":{"h":7,"w":6,"x":0,"y":50},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(etcd_network_client_grpc_received_bytes_total{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])","legendFormat":"{{instance}} client traffic in"}],"title":"Client traffic in","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"Bps"}},"gridPos":{"h":7,"w":6,"x":6,"y":50},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(etcd_network_client_grpc_sent_bytes_total{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])","legendFormat":"{{instance}} client traffic out"}],"title":"Client traffic out","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"Bps"}},"gridPos":{"h":7,"w":6,"x":12,"y":50},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(etcd_network_peer_received_bytes_total{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])) by (instance)","legendFormat":"{{instance}} peer traffic in"}],"title":"Peer traffic in","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"Bps"}},"gridPos":{"h":7,"w":6,"x":18,"y":50},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(etcd_network_peer_sent_bytes_total{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])) by (instance)","legendFormat":"{{instance}} peer traffic out"}],"title":"Peer traffic out","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"}}},"gridPos":{"h":7,"w":8,"x":0,"y":75},"id":11,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"changes(etcd_server_leader_changes_seen_total{job=~\".*etcd.*\", job=\"$cluster\"}[1d])","legendFormat":"{{instance}} total leader elections per day"}],"title":"Raft proposals","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"}}},"gridPos":{"h":7,"w":8,"x":8,"y":75},"id":12,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"changes(etcd_server_leader_changes_seen_total{job=~\".*etcd.*\", job=\"$cluster\"}[1d])","legendFormat":"{{instance}} total leader elections per day"}],"title":"Total leader elections per day","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"lineWidth":2,"showPoints":"never"},"unit":"s"}},"gridPos":{"h":7,"w":8,"x":16,"y":75},"id":13,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v10.0.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"histogram_quantile(0.99, sum by (instance, le) (rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~\".*etcd.*\", job=\"$cluster\"}[$__rate_interval])))","legendFormat":"{{instance}} peer round trip time"}],"title":"Peer round trip time","type":"timeseries"}],"refresh":"10s","schemaVersion":36,"tags":["etcd-mixin"],"templating":{"list":[{"label":"Data Source","name":"datasource","query":"prometheus","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"label":"cluster","name":"cluster","query":"label_values(etcd_server_has_leader{job=~\".*etcd.*\"}, job)","refresh":2,"type":"query","allValue":".*","hide":`}}{{ if (or .Values.grafana.sidecar.dashboards.multicluster.global.enabled .Values.grafana.sidecar.dashboards.multicluster.etcd.enabled) }}0{{ else }}2{{ end }}{{`}]},"time":{"from":"now-15m","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"etcd","uid":"c2f4e12cdf69feb95caa41a5a1b423d9"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeEtcd.enabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "etcd" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "etcd" | trunc 63 | trimSuffix "-" }} + key: etcd.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/grafana-overview.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/grafana-overview.yaml new file mode 100644 index 0000000..d5b13aa --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/grafana-overview.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'grafana-overview' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "grafana-overview" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + grafana-overview.json: |- + {{`{"annotations":{"list":[{"builtIn":1,"datasource":{"type":"datasource","uid":"grafana"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","target":{"limit":100,"matchAny":false,"tags":[],"type":"dashboard"},"type":"dashboard"}]},"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"fiscalYearStartMonth":0,"graphTooltip":0,"id":23,"links":[],"panels":[{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"mappings":[],"noValue":"0","thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":6,"x":0,"y":0},"id":6,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"showPercentChange":false,"text":{},"textMode":"auto","wideLayout":true},"pluginVersion":"12.0.2","targets":[{"datasource":{"uid":"$datasource"},"expr":"grafana_alerting_result_total{job=~\"$job\", instance=~\"$instance\", state=\"alerting\"}","instant":true,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","legendFormat":"","refId":"A"}],"title":"Firing Alerts","type":"stat"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":6,"x":6,"y":0},"id":8,"options":{"colorMode":"value","graphMode":"area","justifyMode":"auto","orientation":"auto","percentChangeColorMode":"standard","reduceOptions":{"calcs":["mean"],"fields":"","values":false},"showPercentChange":false,"text":{},"textMode":"auto","wideLayout":true},"pluginVersion":"12.0.2","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(grafana_stat_totals_dashboard{job=~\"$job\", instance=~\"$instance\"})","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","legendFormat":"","refId":"A"}],"title":"Dashboards","type":"stat"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"cellOptions":{"type":"auto"},"inspect":false},"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]}},"overrides":[]},"gridPos":{"h":5,"w":12,"x":12,"y":0},"id":10,"options":{"cellHeight":"sm","footer":{"countRows":false,"fields":"","reducer":["sum"],"show":false},"showHeader":true},"pluginVersion":"12.0.2","targets":[{"datasource":{"uid":"$datasource"},"expr":"grafana_build_info{job=~\"$job\", instance=~\"$instance\"}","instant":true,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","legendFormat":"","refId":"A"}],"title":"Build Info","transformations":[{"id":"labelsToFields","options":{}},{"id":"merge","options":{}},{"id":"organize","options":{"excludeByName":{"Time":true,"Value":true,"branch":true,"container":true,"goversion":true,"namespace":true,"pod":true,"revision":true},"indexByName":{"Time":7,"Value":11,"branch":4,"container":8,"edition":2,"goversion":6,"instance":1,"job":0,"namespace":9,"pod":10,"revision":5,"version":3},"renameByName":{}}}],"type":"table"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"reqps"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":0,"y":5},"id":2,"options":{"alertThreshold":true,"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"multi","sort":"none"}},"pluginVersion":"12.0.2","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum by (status_code) (irate(grafana_http_request_duration_seconds_count{job=~\"$job\", instance=~\"$instance\"}[1m])) ","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","legendFormat":"{{status_code}}","refId":"A"}],"title":"RPS","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"barWidthFactor":0.6,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":1,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":false,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"thresholds":{"mode":"absolute","steps":[{"color":"green"},{"color":"red","value":80}]},"unit":"ms"},"overrides":[]},"gridPos":{"h":8,"w":12,"x":12,"y":5},"id":4,"options":{"alertThreshold":true,"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"hideZeros":false,"mode":"multi","sort":"none"}},"pluginVersion":"12.0.2","targets":[{"datasource":{"uid":"$datasource"},"exemplar":true,"expr":"histogram_quantile(0.99, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","legendFormat":"99th Percentile","refId":"A"},{"datasource":{"uid":"$datasource"},"exemplar":true,"expr":"histogram_quantile(0.50, sum(irate(grafana_http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) by (le)) * 1","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","legendFormat":"50th Percentile","refId":"B"},{"datasource":{"uid":"$datasource"},"exemplar":true,"expr":"sum(irate(grafana_http_request_duration_seconds_sum{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval])) * 1 / sum(irate(grafana_http_request_duration_seconds_count{instance=~\"$instance\", job=~\"$job\"}[$__rate_interval]))","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","legendFormat":"Average","refId":"C"}],"title":"Request Latency","type":"timeseries"}],"preload":false,"refresh":"","schemaVersion":41,"tags":[],"templating":{"list":[{"current":{"text":"Prometheus","value":"prometheus"},"includeAll":false,"name":"datasource","options":[],"query":"prometheus","refresh":1,"regex":"","type":"datasource"},{"allValue":".*","current":{"text":"All","value":["$__all"]},"datasource":"$datasource","definition":"label_values(grafana_build_info, job)","includeAll":true,"multi":true,"name":"job","options":[],"query":{"query":"label_values(grafana_build_info, job)","refId":"Billing Admin-job-Variable-Query"},"refresh":1,"regex":"","type":"query"},{"allValue":".*","current":{"text":"All","value":"$__all"},"datasource":"$datasource","definition":"label_values(grafana_build_info, instance)","includeAll":true,"multi":true,"name":"instance","options":[],"query":{"query":"label_values(grafana_build_info, instance)","refId":"Billing Admin-instance-Variable-Query"},"refresh":1,"regex":"","type":"query"}]},"time":{"from":"now-6h","to":"now"},"timepicker":{"refresh_intervals":["10s","30s","1m","5m","15m","30m","1h","2h","1d"]},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Grafana Overview","uid":"6be0s85Mk","version":1}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "grafana-overview" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "grafana-overview" | trunc 63 | trimSuffix "-" }} + key: grafana-overview.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-coredns.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-coredns.yaml new file mode 100644 index 0000000..92ceac9 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-coredns.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'k8s-coredns' from ../files/dashboards/k8s-coredns.json +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.coreDns.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-coredns" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-coredns.json: |- + {{`{"annotations":{"list":[{"builtIn":1,"datasource":{"type":"datasource","uid":"grafana"},"enable":true,"hide":true,"iconColor":"rgba(0, 211, 255, 1)","name":"Annotations & Alerts","type":"dashboard"}]},"description":"A dashboard for the CoreDNS DNS server with updated metrics for version 1.7.0+. Based on the CoreDNS dashboard by buhay.","editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"fiscalYearStartMonth":0,"gnetId":12539,"graphTooltip":0,"id":7,"links":[{"icon":"external link","tags":[],"targetBlank":true,"title":"CoreDNS.io","type":"link","url":"https://coredns.io"}],"liveNow":false,"panels":[{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":8,"x":0,"y":0},"id":2,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_dns_request_count_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (proto) or\nsum(rate(coredns_dns_requests_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (proto)","format":"time_series","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"{{ proto }}","refId":"A","step":60}],"title":"Requests (total)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":8,"x":8,"y":0},"id":4,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_dns_request_type_count_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (type) or \nsum(rate(coredns_dns_requests_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (type)","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"{{ type }}","refId":"A","step":60}],"title":"Requests (by qtype)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":8,"x":16,"y":0},"id":6,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_dns_request_count_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (zone) or\nsum(rate(coredns_dns_requests_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (zone)","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"{{ zone }}","refId":"A","step":60}],"title":"Requests (by zone)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":12,"x":0,"y":7},"id":8,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_dns_request_do_count_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) or\nsum(rate(coredns_dns_do_requests_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m]))","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"DO","refId":"A","step":40},{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_dns_request_count_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) or\nsum(rate(coredns_dns_requests_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m]))","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"total","refId":"B","step":40}],"title":"Requests (DO bit)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes","unitScale":true},"overrides":[{"matcher":{"id":"byName","options":"tcp:90"},"properties":[{"id":"unit","value":"short"}]},{"matcher":{"id":"byName","options":"tcp:99 "},"properties":[{"id":"unit","value":"short"}]},{"matcher":{"id":"byName","options":"tcp:50"},"properties":[{"id":"unit","value":"short"}]}]},"gridPos":{"h":7,"w":6,"x":12,"y":7},"id":10,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.99, (sum(rate(coredns_dns_request_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (proto)) or (sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)))","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"{{ proto }}:99 ","refId":"A","step":60},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.90, (sum(rate(coredns_dns_request_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (proto)) or (sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)))","intervalFactor":2,"legendFormat":"{{ proto }}:90","refId":"B","step":60},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.50, (sum(rate(coredns_dns_request_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (proto)) or (sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto)))","intervalFactor":2,"legendFormat":"{{ proto }}:50","refId":"C","step":60}],"title":"Requests (size, udp)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":6,"x":18,"y":7},"id":12,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.99, (sum(rate(coredns_dns_request_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (proto)) or (sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)))","format":"time_series","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"{{ proto }}:99 ","refId":"A","step":60},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.90, (sum(rate(coredns_dns_request_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (proto)) or (sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)))","format":"time_series","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"{{ proto }}:90","refId":"B","step":60},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.50, (sum(rate(coredns_dns_request_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (proto)) or (sum(rate(coredns_dns_request_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto)))","format":"time_series","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"{{ proto }}:50","refId":"C","step":60}],"title":"Requests (size,tcp)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":12,"x":0,"y":14},"id":14,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_dns_response_rcode_count_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (rcode) or\nsum(rate(coredns_dns_responses_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (rcode)","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"{{ rcode }}","refId":"A","step":40}],"title":"Responses (by rcode)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"s","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":12,"x":12,"y":14},"id":32,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.99, (sum(rate(coredns_dns_request_duration_seconds{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (job)) or (sum(rate(coredns_dns_request_duration_seconds_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (le, job)))","format":"time_series","intervalFactor":2,"legendFormat":"99%","refId":"A","step":40},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.90, (sum(rate(coredns_dns_request_duration_seconds{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by ()) or (sum(rate(coredns_dns_request_duration_seconds_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (le)))","format":"time_series","intervalFactor":2,"legendFormat":"90%","refId":"B","step":40},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.50, (sum(rate(coredns_dns_request_duration_seconds{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by ()) or (sum(rate(coredns_dns_request_duration_seconds_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (le)))","format":"time_series","intervalFactor":2,"legendFormat":"50%","refId":"C","step":40}],"title":"Responses (duration)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes","unitScale":true},"overrides":[{"matcher":{"id":"byName","options":"tcp:50%"},"properties":[{"id":"unit","value":"short"}]},{"matcher":{"id":"byName","options":"tcp:90%"},"properties":[{"id":"unit","value":"short"}]},{"matcher":{"id":"byName","options":"tcp:99%"},"properties":[{"id":"unit","value":"short"}]}]},"gridPos":{"h":7,"w":12,"x":0,"y":21},"id":18,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.99, (sum(rate(coredns_dns_response_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (proto)) or (sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))) ","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"{{ proto }}:99%","refId":"A","step":40},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.90, (sum(rate(coredns_dns_response_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (proto)) or (sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))) ","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"{{ proto }}:90%","refId":"B","step":40},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.50, (sum(rate(coredns_dns_response_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (proto)) or (sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"udp\"}[5m])) by (le,proto))) ","hide":false,"intervalFactor":2,"legendFormat":"{{ proto }}:50%","metric":"","refId":"C","step":40}],"title":"Responses (size, udp)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"none"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"bytes","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":12,"x":12,"y":21},"id":20,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"none"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.99, (sum(rate(coredns_dns_response_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (proto)) or (sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ proto }}:99%","refId":"A","step":40},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.90, (sum(rate(coredns_dns_response_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (proto)) or (sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ proto }}:90%","refId":"B","step":40},{"datasource":{"uid":"$datasource"},"expr":"histogram_quantile(0.50, (sum(rate(coredns_dns_response_size_bytes{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (proto)) or (sum(rate(coredns_dns_response_size_bytes_bucket{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\",proto=\"tcp\"}[5m])) by (le,proto))) ","format":"time_series","intervalFactor":2,"legendFormat":"{{ proto }}:50%","metric":"","refId":"C","step":40}],"title":"Responses (size, tcp)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"decbytes","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":12,"x":0,"y":28},"id":22,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(coredns_cache_size{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}) by (type) or\nsum(coredns_cache_entries{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}) by (type)","interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","intervalFactor":2,"legendFormat":"{{ type }}","refId":"A","step":40}],"title":"Cache (size)","type":"timeseries"},{"datasource":{"uid":"$datasource"},"fieldConfig":{"defaults":{"color":{"mode":"palette-classic"},"custom":{"axisBorderShow":false,"axisCenteredZero":false,"axisColorMode":"text","axisLabel":"","axisPlacement":"auto","barAlignment":0,"drawStyle":"line","fillOpacity":10,"gradientMode":"none","hideFrom":{"legend":false,"tooltip":false,"viz":false},"insertNulls":false,"lineInterpolation":"linear","lineWidth":2,"pointSize":5,"scaleDistribution":{"type":"linear"},"showPoints":"never","spanNulls":true,"stacking":{"group":"A","mode":"normal"},"thresholdsStyle":{"mode":"off"}},"links":[],"mappings":[],"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":80}]},"unit":"pps","unitScale":true},"overrides":[]},"gridPos":{"h":7,"w":12,"x":12,"y":28},"id":24,"links":[],"options":{"legend":{"calcs":[],"displayMode":"list","placement":"bottom","showLegend":true},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"10.3.3","targets":[{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_cache_hits_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (type)","hide":false,"intervalFactor":2,"legendFormat":"hits:{{ type }}","refId":"A","step":40},{"datasource":{"uid":"$datasource"},"expr":"sum(rate(coredns_cache_misses_total{job=~\"$job\",cluster=~\"$cluster\",instance=~\"$instance\"}[5m])) by (type)","hide":false,"intervalFactor":2,"legendFormat":"misses","refId":"B","step":40}],"title":"Cache (hitrate)","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["dns","coredns"],"templating":{"list":[{"current":{},"hide":0,"includeAll":false,"multi":false,"name":"datasource","options":[],"query":"prometheus","queryValue":"","refresh":1,"regex":"","skipUrlSync":false,"type":"datasource"},{"allValue":".*","current":{"selected":false,"text":"All","value":"$__all"},"datasource":{"type":"prometheus","uid":"$datasource"},"definition":"label_values(coredns_dns_requests_total, cluster)","hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":true,"label":"Cluster","multi":false,"name":"cluster","options":[],"query":"label_values(coredns_dns_requests_total, cluster)","refresh":2,"regex":"","skipUrlSync":false,"sort":1,"tagValuesQuery":"","tagsQuery":"","type":"query","useTags":false},{"allValue":".*","current":{"selected":false,"text":"All","value":"$__all"},"datasource":{"type":"prometheus","uid":"${datasource}"},"definition":"label_values(coredns_dns_requests_total{cluster=~\"$cluster\"},job)","hide":0,"includeAll":true,"label":"Job","multi":false,"name":"job","options":[],"query":{"qryType":1,"query":"label_values(coredns_dns_requests_total{cluster=~\"$cluster\"},job)","refId":"PrometheusVariableQueryEditor-VariableQuery"},"refresh":2,"regex":"","skipUrlSync":false,"sort":1,"type":"query"},{"allValue":".*","current":{"selected":false,"text":"All","value":"$__all"},"datasource":{"type":"prometheus","uid":"$datasource"},"definition":"label_values(coredns_dns_requests_total{job=~\"$job\",cluster=~\"$cluster\"}, instance)","hide":0,"includeAll":true,"label":"Instance","multi":false,"name":"instance","options":[],"query":"label_values(coredns_dns_requests_total{job=~\"$job\",cluster=~\"$cluster\"}, instance)","refresh":2,"regex":"","skipUrlSync":false,"sort":3,"tagValuesQuery":"","tagsQuery":"","type":"query","useTags":false}]},"time":{"from":"now-3h","to":"now"},"timepicker":{"refresh_intervals":["10s","30s","1m","5m","15m","30m","1h","2h","1d"]},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"CoreDNS","uid":"vkQ0UHxik","version":3,"weekStart":""}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.coreDns.enabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-coredns" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-coredns" | trunc 63 | trimSuffix "-" }} + key: k8s-coredns.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml new file mode 100644 index 0000000..cb1fea0 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-cluster.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'k8s-resources-cluster' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-resources-cluster.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}","instant":true}],"title":"CPU Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":4,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})","instant":true}],"title":"CPU Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":8,"y":0},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})","instant":true}],"title":"CPU Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":12,"y":0},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})","instant":true}],"title":"Memory Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":16,"y":0},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})","instant":true}],"title":"Memory Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":20,"y":0},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})","instant":true}],"title":"Memory Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":6,"w":24,"x":0,"y":6},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\"})) by (namespace)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/85a562078cdf77779eaa1add43ccec1e?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":12},"id":8,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\"})) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\"})) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\"})) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time 7":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Time 7":6,"Value #A":8,"Value #B":9,"Value #C":10,"Value #D":11,"Value #E":12,"Value #F":13,"Value #G":14,"namespace":7},"renameByName":{"Value #A":"Pods","Value #B":"Workloads","Value #C":"CPU Usage","Value #D":"CPU Requests","Value #E":"CPU Requests %","Value #F":"CPU Limits","Value #G":"CPU Limits %","namespace":"Namespace"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":6,"w":24,"x":0,"y":18},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_rss{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"})) by (namespace)","legendFormat":"__auto"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Memory Usage"},"properties":[{"id":"unit","value":"bytes"}]},{"matcher":{"id":"byName","options":"Memory Requests"},"properties":[{"id":"unit","value":"bytes"}]},{"matcher":{"id":"byName","options":"Memory Limits"},"properties":[{"id":"unit","value":"bytes"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/85a562078cdf77779eaa1add43ccec1e?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":24},"id":10,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_rss{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"})) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_rss{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"})) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_rss{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", container!=\"\"})) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true}],"title":"Memory Requests by Namespace","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time 7":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Time 7":6,"Value #A":8,"Value #B":9,"Value #C":10,"Value #D":11,"Value #E":12,"Value #F":13,"Value #G":14,"namespace":7},"renameByName":{"Value #A":"Pods","Value #B":"Workloads","Value #C":"Memory Usage","Value #D":"Memory Requests","Value #E":"Memory Requests %","Value #F":"Memory Limits","Value #G":"Memory Limits %","namespace":"Namespace"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bandwidth/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/85a562078cdf77779eaa1add43ccec1e?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":30},"id":11,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval]))) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval]))) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","format":"table","instant":true}],"title":"Current Network Usage","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"namespace":6},"renameByName":{"Value #A":"Current Receive Bandwidth","Value #B":"Current Transmit Bandwidth","Value #C":"Rate of Received Packets","Value #D":"Rate of Transmitted Packets","Value #E":"Rate of Received Packets Dropped","Value #F":"Rate of Transmitted Packets Dropped","namespace":"Namespace"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":6,"w":24,"x":0,"y":36},"id":12,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval]))) by (namespace)","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":6,"w":24,"x":0,"y":42},"id":13,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval]))) by (namespace)","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":6,"w":24,"x":0,"y":48},"id":14,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"avg((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval]))) by (namespace)","legendFormat":"__auto"}],"title":"Average Container Bandwidth by Namespace: Received","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":6,"w":24,"x":0,"y":54},"id":15,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"avg((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval]))) by (namespace)","legendFormat":"__auto"}],"title":"Average Container Bandwidth by Namespace: Transmitted","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":6,"w":24,"x":0,"y":60},"id":16,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":6,"w":24,"x":0,"y":66},"id":17,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":6,"w":24,"x":0,"y":72},"id":18,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":6,"w":24,"x":0,"y":78},"id":19,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"iops"}},"gridPos":{"h":6,"w":24,"x":0,"y":84},"id":20,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))","legendFormat":"__auto"}],"title":"IOPS(Reads+Writes)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"Bps"}},"gridPos":{"h":6,"w":24,"x":0,"y":90},"id":21,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","legendFormat":"__auto"}],"title":"ThroughPut(Read+Write)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/IOPS/"},"properties":[{"id":"unit","value":"iops"}]},{"matcher":{"id":"byRegexp","options":"/Throughput/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/85a562078cdf77779eaa1add43ccec1e?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":96},"id":22,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(namespace) (rate(container_fs_reads_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(namespace) (rate(container_fs_writes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(namespace) (rate(container_fs_reads_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))","format":"table","instant":true}],"title":"Current Storage IO","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"namespace":6},"renameByName":{"Value #A":"IOPS(Reads)","Value #B":"IOPS(Writes)","Value #C":"IOPS(Reads + Writes)","Value #D":"Throughput(Read)","Value #E":"Throughput(Write)","Value #F":"Throughput(Read + Write)","namespace":"Namespace"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Cluster","uid":"efa86fd1d0c121a26444b636a3f509a8"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-cluster" | trunc 63 | trimSuffix "-" }} + key: k8s-resources-cluster.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-multicluster.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-multicluster.yaml new file mode 100644 index 0000000..8bc5295 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-multicluster.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'k8s-resources-multicluster' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-multicluster" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-resources-multicluster.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":3,"w":4,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(cluster:node_cpu:ratio_rate5m) / count(cluster:node_cpu:ratio_rate5m)","instant":true}],"title":"CPU Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":4,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"cpu\"})","instant":true}],"title":"CPU Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":8,"y":0},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"cpu\"})","instant":true}],"title":"CPU Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":12,"y":0},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"1 - sum(:node_memory_MemAvailable_bytes:sum) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\"})","instant":true}],"title":"Memory Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":16,"y":0},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"memory\"})","instant":true}],"title":"Memory Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":20,"y":0},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\", resource=\"memory\"})","instant":true}],"title":"Memory Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"}}},"gridPos":{"h":7,"w":24,"x":0,"y":1},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m)) by (cluster)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Cluster"},"properties":[{"id":"links","value":[{"title":"Drill down","url":"/d/efa86fd1d0c121a26444b636a3f509a8?${datasource:queryparam}&var-cluster=${__data.fields.Cluster}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":2},"id":8,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m)) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m)) by (cluster) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m)) by (cluster) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"cpu\"}) by (cluster)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"cluster","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"cluster":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","cluster":"Cluster"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":3},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_rss{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", container!=\"\"})) by (cluster)","legendFormat":"__auto"}],"title":"Memory Usage (w/o cache)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Cluster"},"properties":[{"id":"links","value":[{"title":"Drill down","url":"/d/efa86fd1d0c121a26444b636a3f509a8?${datasource:queryparam}&var-cluster=${__data.fields.Cluster}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":4},"id":10,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_rss{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", container!=\"\"})) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_rss{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", container!=\"\"})) by (cluster) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_rss{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", container!=\"\"})) by (cluster) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", resource=\"memory\"}) by (cluster)","format":"table","instant":true}],"title":"Memory Requests by Cluster","transformations":[{"id":"joinByField","options":{"byField":"cluster","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"cluster":5},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","cluster":"Cluster"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Multi-Cluster","uid":"b59e6c9f2fcbe2e16d77fc492374cc4f"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-multicluster" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-multicluster" | trunc 63 | trimSuffix "-" }} + key: k8s-resources-multicluster.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml new file mode 100644 index 0000000..b4ef1c0 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-namespace.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'k8s-resources-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-resources-namespace.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":6,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})","instant":true}],"title":"CPU Utilisation (from requests)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":6,"x":6,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})","instant":true}],"title":"CPU Utilisation (from limits)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":6,"x":12,"y":0},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"})) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})","instant":true}],"title":"Memory Utilisation (from requests)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":6,"x":18,"y":0},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"})) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})","instant":true}],"title":"Memory Utilisation (from limits)","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})) by (pod)","legendFormat":"__auto"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"}))","legendFormat":"quota - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"}))","legendFormat":"quota - limits"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":6,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"})) by (pod)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"pod":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"})) by (pod)","legendFormat":"__auto"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"}))","legendFormat":"quota - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"}))","legendFormat":"quota - limits"}],"title":"Memory Usage (w/o cache)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":8,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_rss{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_cache{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_swap{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"})) by (pod)","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time 7":true,"Time 8":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Time 7":6,"Time 8":7,"Value #A":9,"Value #B":10,"Value #C":11,"Value #D":12,"Value #E":13,"Value #F":14,"Value #G":15,"Value #H":16,"pod":8},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","Value #F":"Memory Usage (RSS)","Value #G":"Memory Usage (Cache)","Value #H":"Memory Usage (Swap)","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bandwidth/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":35},"id":9,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","format":"table","instant":true}],"title":"Current Network Usage","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"pod":6},"renameByName":{"Value #A":"Current Receive Bandwidth","Value #B":"Current Transmit Bandwidth","Value #C":"Rate of Received Packets","Value #D":"Rate of Transmitted Packets","Value #E":"Rate of Received Packets Dropped","Value #F":"Rate of Transmitted Packets Dropped","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":42},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))) by (pod)","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":42},"id":11,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))) by (pod)","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":49},"id":12,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":49},"id":13,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":56},"id":14,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":56},"id":15,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"iops"}},"gridPos":{"h":7,"w":12,"x":0,"y":63},"id":16,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))","legendFormat":"__auto"}],"title":"IOPS(Reads+Writes)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"Bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":63},"id":17,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","legendFormat":"__auto"}],"title":"ThroughPut(Read+Write)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/IOPS/"},"properties":[{"id":"unit","value":"iops"}]},{"matcher":{"id":"byRegexp","options":"/Throughput/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":70},"id":18,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(pod) (rate(container_fs_reads_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(pod) (rate(container_fs_writes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(pod) (rate(container_fs_reads_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(pod) (rate(container_fs_reads_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(pod) (rate(container_fs_writes_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(pod) (rate(container_fs_reads_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))","format":"table","instant":true}],"title":"Current Storage IO","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"pod":6},"renameByName":{"Value #A":"IOPS(Reads)","Value #B":"IOPS(Writes)","Value #C":"IOPS(Reads + Writes)","Value #D":"Throughput(Read)","Value #E":"Throughput(Write)","Value #F":"Throughput(Read + Write)","pod":"Pod"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Namespace (Pods)","uid":"85a562078cdf77779eaa1add43ccec1e"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-namespace" | trunc 63 | trimSuffix "-" }} + key: k8s-resources-namespace.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-node.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-node.yaml new file mode 100644 index 0000000..42fd63e --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-node.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'k8s-resources-node' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-node" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-resources-node.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true,"stacking":{"mode":"normal"}}},"overrides":[{"matcher":{"id":"byName","options":"max capacity"},"properties":[{"id":"color","value":{"fixedColor":"red","mode":"fixed"}},{"id":"custom.stacking","value":{"mode":"none"}},{"id":"custom.hideFrom","value":{"legend":false,"tooltip":true,"viz":false}},{"id":"custom.lineStyle","value":{"dash":[10,10],"fill":"dash"}}]},{"matcher":{"id":"byName","options":"max allocatable"},"properties":[{"id":"color","value":{"fixedColor":"super-light-red","mode":"fixed"}},{"id":"custom.stacking","value":{"mode":"none"}},{"id":"custom.hideFrom","value":{"legend":false,"tooltip":true,"viz":false}},{"id":"custom.lineStyle","value":{"dash":[10,10],"fill":"dash"}},{"id":"custom.fillOpacity","value":0}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_node_status_capacity{cluster=\"$cluster\", job=\"kube-state-metrics\", node=~\"$node\", resource=\"cpu\"})","legendFormat":"max capacity"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_node_status_allocatable{cluster=\"$cluster\", job=\"kube-state-metrics\", node=~\"$node\", resource=\"cpu\"})","legendFormat":"max allocatable"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", node=~\"$node\"})) by (pod)","legendFormat":"{{pod}}"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":6},"id":2,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", node=~\"$node\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", node=~\"$node\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"})) by (pod)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true,"stacking":{"mode":"normal"}},"unit":"bytes"},"overrides":[{"matcher":{"id":"byName","options":"max capacity"},"properties":[{"id":"color","value":{"fixedColor":"red","mode":"fixed"}},{"id":"custom.stacking","value":{"mode":"none"}},{"id":"custom.hideFrom","value":{"legend":false,"tooltip":true,"viz":false}},{"id":"custom.lineStyle","value":{"dash":[10,10],"fill":"dash"}}]},{"matcher":{"id":"byName","options":"max allocatable"},"properties":[{"id":"color","value":{"fixedColor":"super-light-red","mode":"fixed"}},{"id":"custom.stacking","value":{"mode":"none"}},{"id":"custom.hideFrom","value":{"legend":false,"tooltip":true,"viz":false}},{"id":"custom.lineStyle","value":{"dash":[10,10],"fill":"dash"}},{"id":"custom.fillOpacity","value":0}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":12},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_node_status_capacity{cluster=\"$cluster\", job=\"kube-state-metrics\", node=~\"$node\", resource=\"memory\"})","legendFormat":"max capacity"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_node_status_allocatable{cluster=\"$cluster\", job=\"kube-state-metrics\", node=~\"$node\", resource=\"memory\"})","legendFormat":"max allocatable"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"})) by (pod)","legendFormat":"{{pod}}"}],"title":"Memory Usage (w/cache)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true,"stacking":{"mode":"normal"}},"unit":"bytes"},"overrides":[{"matcher":{"id":"byName","options":"max capacity"},"properties":[{"id":"color","value":{"fixedColor":"red","mode":"fixed"}},{"id":"custom.stacking","value":{"mode":"none"}},{"id":"custom.hideFrom","value":{"legend":false,"tooltip":true,"viz":false}},{"id":"custom.lineStyle","value":{"dash":[10,10],"fill":"dash"}}]},{"matcher":{"id":"byName","options":"max allocatable"},"properties":[{"id":"color","value":{"fixedColor":"super-light-red","mode":"fixed"}},{"id":"custom.stacking","value":{"mode":"none"}},{"id":"custom.hideFrom","value":{"legend":false,"tooltip":true,"viz":false}},{"id":"custom.lineStyle","value":{"dash":[10,10],"fill":"dash"}},{"id":"custom.fillOpacity","value":0}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":18},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_node_status_capacity{cluster=\"$cluster\", job=\"kube-state-metrics\", node=~\"$node\", resource=\"memory\"})","legendFormat":"max capacity"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_node_status_allocatable{cluster=\"$cluster\", job=\"kube-state-metrics\", node=~\"$node\", resource=\"memory\"})","legendFormat":"max allocatable"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\", container!=\"\"})) by (pod)","legendFormat":"{{pod}}"}],"title":"Memory Usage (w/o cache)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":6,"w":24,"x":0,"y":24},"id":5,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"})) by (pod) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"})) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"})) by (pod)","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time 7":true,"Time 8":true},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","Value #F":"Memory Usage (RSS)","Value #G":"Memory Usage (Cache)","Value #H":"Memory Usage (Swap)","pod":"Pod"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"node","multi":true,"name":"node","query":"label_values(kube_node_info{cluster=\"$cluster\"}, node)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Node (Pods)","uid":"200ac8fdbfbb74b39aff88118e4d1c2c"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-node" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-node" | trunc 63 | trimSuffix "-" }} + key: k8s-resources-node.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml new file mode 100644 index 0000000..c12f03f --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-pod.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'k8s-resources-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-resources-pod.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\", container!=\"\"})) by (container)","legendFormat":"__auto"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n","legendFormat":"requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n","legendFormat":"limits"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"axisColorMode":"thresholds","axisSoftMax":1,"axisSoftMin":0,"fillOpacity":10,"showPoints":"never","spanNulls":true,"thresholdsStyle":{"mode":"dashed+area"}},"unit":"percentunit"},"overrides":[{"matcher":{"id":"byFrameRefID","options":"A"},"properties":[{"id":"thresholds","value":{"mode":"absolute","steps":[{"color":"green","value":null},{"color":"red","value":0.25}]}},{"id":"color","value":{"mode":"thresholds","seriesBy":"lastNotNull"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(increase(container_cpu_cfs_throttled_periods_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)","legendFormat":"__auto"}],"title":"CPU Throttling","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\", container!=\"\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"})) by (container) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"})) by (container) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"})) by (container)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"container","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"container":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","container":"Container"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"})) by (container)","legendFormat":"__auto"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n","legendFormat":"requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n","legendFormat":"limits"}],"title":"Memory Usage (WSS)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":5,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"})) by (container) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"})) by (container) / sum(max by (cluster, namespace, pod, container)(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_rss{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_cache{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"})) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(max by (cluster, namespace, pod, container)(container_memory_swap{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"})) by (container)","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"container","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time 7":true,"Time 8":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Time 7":6,"Time 8":7,"Value #A":9,"Value #B":10,"Value #C":11,"Value #D":12,"Value #E":13,"Value #F":14,"Value #G":15,"Value #H":16,"container":8},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","Value #F":"Memory Usage (RSS)","Value #G":"Memory Usage (Cache)","Value #H":"Memory Usage (Swap)","container":"Container"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":35},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * irate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))) by (pod)","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":35},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))) by (pod)","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":42},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":42},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":49},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":49},"id":11,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"iops"}},"gridPos":{"h":7,"w":12,"x":0,"y":56},"id":12,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"ceil(sum by(pod) (rate(container_fs_reads_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))","legendFormat":"Reads"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"ceil(sum by(pod) (rate(container_fs_writes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))","legendFormat":"Writes"}],"title":"IOPS (Pod)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"Bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":56},"id":13,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(pod) (rate(container_fs_reads_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))","legendFormat":"Reads"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(pod) (rate(container_fs_writes_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))","legendFormat":"Writes"}],"title":"ThroughPut (Pod)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"iops"}},"gridPos":{"h":7,"w":12,"x":0,"y":63},"id":14,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"ceil(sum by(container) (rate(container_fs_reads_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))","legendFormat":"__auto"}],"title":"IOPS (Containers)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"Bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":63},"id":15,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(container) (rate(container_fs_reads_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","legendFormat":"__auto"}],"title":"ThroughPut (Containers)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/IOPS/"},"properties":[{"id":"unit","value":"iops"}]},{"matcher":{"id":"byRegexp","options":"/Throughput/"},"properties":[{"id":"unit","value":"Bps"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":70},"id":16,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(container) (rate(container_fs_reads_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(container) (rate(container_fs_writes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\",device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(container) (rate(container_fs_reads_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(container) (rate(container_fs_reads_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(container) (rate(container_fs_writes_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by(container) (rate(container_fs_reads_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))","format":"table","instant":true}],"title":"Current Storage IO","transformations":[{"id":"joinByField","options":{"byField":"container","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"container":6},"renameByName":{"Value #A":"IOPS(Reads)","Value #B":"IOPS(Writes)","Value #C":"IOPS(Reads + Writes)","Value #D":"Throughput(Read)","Value #E":"Throughput(Write)","Value #F":"Throughput(Read + Write)","container":"Container"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"pod","name":"pod","query":"label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Pod","uid":"6581e46e4e5c7ba40a07646395ef7b23"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-pod" | trunc 63 | trimSuffix "-" }} + key: k8s-resources-pod.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-cluster.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-cluster.yaml new file mode 100644 index 0000000..4144d9e --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-cluster.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'k8s-resources-windows-cluster' from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-windows-cluster" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-resources-windows-cluster.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":3,"w":4,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"1 - avg(rate(windows_cpu_time_total{cluster=\"$cluster\", job=\"windows-exporter\", mode=\"idle\"}[$__rate_interval]))","instant":true}],"title":"CPU Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":4,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})","instant":true}],"title":"CPU Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":8,"y":0},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) / sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"})","instant":true}],"title":"CPU Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":12,"y":0},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"1 - sum(:windows_node_memory_MemFreeCached_bytes:sum{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})","instant":true}],"title":"Memory Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":16,"y":0},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})","instant":true}],"title":"Memory Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":20,"y":0},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) / sum(:windows_node_memory_MemTotal_bytes:sum{cluster=\"$cluster\"})","instant":true}],"title":"Memory Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/490b402361724ab1d4c45666c1fa9b6f?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":8,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"namespace":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","namespace":"Namespace"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"decbytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)","legendFormat":"__auto"}],"title":"Memory Usage (Private Working Set)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Memory Usage"},"properties":[{"id":"unit","value":"decbytes"}]},{"matcher":{"id":"byName","options":"Memory Requests"},"properties":[{"id":"unit","value":"decbytes"}]},{"matcher":{"id":"byName","options":"Memory Limits"},"properties":[{"id":"unit","value":"decbytes"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/490b402361724ab1d4c45666c1fa9b6f?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":10,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\"}) by (namespace)","format":"table","instant":true}],"title":"Memory Requests by Namespace","transformations":[{"id":"joinByField","options":{"byField":"namespace","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"namespace":5},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","namespace":"Namespace"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Cluster(Windows)","uid":"4d08557fd9391b100730f2494bccac68"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-windows-cluster" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-windows-cluster" | trunc 63 | trimSuffix "-" }} + key: k8s-resources-windows-cluster.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-namespace.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-namespace.yaml new file mode 100644 index 0000000..a5c099e --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-namespace.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'k8s-resources-windows-namespace' from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-windows-namespace" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-resources-windows-namespace.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/40597a704a610e936dc6ed374a7ce023?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"pod":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"decbytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","legendFormat":"__auto"}],"title":"Memory Usage (Private Working Set)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/40597a704a610e936dc6ed374a7ce023?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"pod":5},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","pod":"Pod"}}}],"type":"table"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Namespace(Windows)","uid":"490b402361724ab1d4c45666c1fa9b6f"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-windows-namespace" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-windows-namespace" | trunc 63 | trimSuffix "-" }} + key: k8s-resources-windows-namespace.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-pod.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-pod.yaml new file mode 100644 index 0000000..376850b --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-windows-pod.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'k8s-resources-windows-pod' from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-windows-pod" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-resources-windows-pod.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Namespace"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/490b402361724ab1d4c45666c1fa9b6f?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"container","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"container":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","container":"Container"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"decbytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","legendFormat":"__auto"}],"title":"Memory Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_request{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(windows_container_private_working_set_usage{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container) / sum(kube_pod_windows_container_resource_memory_limit{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"container","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"container":5},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","container":"Container"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum by (container) (rate(windows_container_network_received_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))","legendFormat":"Received : {{ container }}"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum by (container) (rate(windows_container_network_transmitted_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))","legendFormat":"Transmitted : {{ container }}"}],"title":"Network I/O","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(windows_pod_container_available{cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"pod","name":"pod","query":"label_values(windows_pod_container_available{cluster=\"$cluster\",namespace=\"$namespace\"}, pod)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Pod(Windows)","uid":"40597a704a610e936dc6ed374a7ce023"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-windows-pod" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-windows-pod" | trunc 63 | trimSuffix "-" }} + key: k8s-resources-windows-pod.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml new file mode 100644 index 0000000..bef3c7c --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workload.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'k8s-resources-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-resources-workload.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}}},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","legendFormat":"__auto"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value #C":8,"Value #D":9,"Value #E":10,"pod":5},"renameByName":{"Value #A":"CPU Usage","Value #B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value #E":"CPU Limits %","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","legendFormat":"__auto"}],"title":"Memory Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":9,"Value #B":10,"Value #C":11,"Value #D":12,"Value #E":13,"pod":8},"renameByName":{"Value #A":"Memory Usage","Value #B":"Memory Requests","Value #C":"Memory Requests %","Value #D":"Memory Limits","Value #E":"Memory Limits %","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bandwidth/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":5,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","format":"table","instant":true}],"title":"Current Network Usage","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"pod":6},"renameByName":{"Value #A":"Current Receive Bandwidth","Value #B":"Current Transmit Bandwidth","Value #C":"Rate of Received Packets","Value #D":"Rate of Transmitted Packets","Value #E":"Rate of Received Packets Dropped","Value #F":"Rate of Transmitted Packets Dropped","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":35},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":35},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":42},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(avg((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Average Container Bandwidth by Pod: Received","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":42},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(avg((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Average Container Bandwidth by Pod: Transmitted","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":49},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":49},"id":11,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":56},"id":12,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":56},"id":13,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets Dropped","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"workload_type","name":"type","query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"workload","name":"workload","query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}, workload)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Workload","uid":"a164a7f0339f99e89cea5cb47e9be617"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workload" | trunc 63 | trimSuffix "-" }} + key: k8s-resources-workload.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml new file mode 100644 index 0000000..c3e4d76 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-resources-workloads-namespace.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'k8s-resources-workloads-namespace' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-resources-workloads-namespace.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})\n* on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","legendFormat":"{{workload}} - {{workload_type}}"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"requests.cpu|cpu\"}))","legendFormat":"quota - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"limits.cpu\"}))","legendFormat":"quota - limits"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Workload"},"properties":[{"id":"links","value":[{"title":"Drill down to workloads","url":"/d/a164a7f0339f99e89cea5cb47e9be617?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}"}]}]},{"matcher":{"id":"byName","options":"Running Pods"},"properties":[{"id":"unit","value":"none"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})\n* on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n* on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})\n* on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n* on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n* on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{cluster=\"$cluster\", namespace=\"$namespace\"})\n* on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})\n* on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true}],"title":"CPU Quota","transformations":[{"id":"joinByField","options":{"byField":"workload","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"workload_type 2":true,"workload_type 3":true,"workload_type 4":true,"workload_type 5":true,"workload_type 6":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":8,"Value #B":9,"Value #C":10,"Value #D":11,"Value #E":12,"Value #F":13,"workload":6,"workload_type 1":7,"workload_type 2":14,"workload_type 3":15,"workload_type 4":16,"workload_type 5":17,"workload_type 6":18},"renameByName":{"Value #A":"Running Pods","Value #B":"CPU Usage","Value #C":"CPU Requests","Value #D":"CPU Requests %","Value #E":"CPU Limits","Value #F":"CPU Limits %","workload":"Workload","workload_type 1":"Type"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","legendFormat":"{{workload}} - {{workload_type}}"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"requests.memory|memory\"}))","legendFormat":"quota - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"limits.memory\"}))","legendFormat":"quota - limits"}],"title":"Memory Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Workload"},"properties":[{"id":"links","value":[{"title":"Drill down to workloads","url":"/d/a164a7f0339f99e89cea5cb47e9be617?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}"}]}]},{"matcher":{"id":"byName","options":"Running Pods"},"properties":[{"id":"unit","value":"none"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n* on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n* on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n* on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n max by (cluster, namespace, pod, container)(container_memory_working_set_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"})\n * on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n max by (cluster, namespace, pod, container)(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})\n* on(cluster, namespace, pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","format":"table","instant":true}],"title":"Memory Quota","transformations":[{"id":"joinByField","options":{"byField":"workload","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"workload_type 2":true,"workload_type 3":true,"workload_type 4":true,"workload_type 5":true,"workload_type 6":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":8,"Value #B":9,"Value #C":10,"Value #D":11,"Value #E":12,"Value #F":13,"workload":6,"workload_type 1":7,"workload_type 2":14,"workload_type 3":15,"workload_type 4":16,"workload_type 5":17,"workload_type 6":18},"renameByName":{"Value #A":"Running Pods","Value #B":"Memory Usage","Value #C":"Memory Requests","Value #D":"Memory Requests %","Value #E":"Memory Limits","Value #F":"Memory Limits %","workload":"Workload","workload_type 1":"Type"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bandwidth/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Workload"},"properties":[{"id":"links","value":[{"title":"Drill down to workloads","url":"/d/a164a7f0339f99e89cea5cb47e9be617?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":5,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n","format":"table","instant":true}],"title":"Current Network Usage","transformations":[{"id":"joinByField","options":{"byField":"workload","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"workload":6},"renameByName":{"Value #A":"Current Receive Bandwidth","Value #B":"Current Transmit Bandwidth","Value #C":"Rate of Received Packets","Value #D":"Rate of Transmitted Packets","Value #E":"Rate of Received Packets Dropped","Value #F":"Rate of Transmitted Packets Dropped","workload":"Workload"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":35},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":35},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":42},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(avg((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Average Container Bandwidth by Workload: Received","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":42},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(avg((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Average Container Bandwidth by Workload: Transmitted","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":49},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":49},"id":11,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":0,"y":56},"id":12,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_receive_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":7,"w":12,"x":12,"y":56},"id":13,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(sum(rate(container_network_transmit_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets Dropped","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"kube-state-metrics\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"workload_type","name":"type","query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Compute Resources / Namespace (Workloads)","uid":"a87fb0d919ec0ea5f6543124e16c42a5"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-resources-workloads-namespace" | trunc 63 | trimSuffix "-" }} + key: k8s-resources-workloads-namespace.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-windows-cluster-rsrc-use.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-windows-cluster-rsrc-use.yaml new file mode 100644 index 0000000..508cf05 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-windows-cluster-rsrc-use.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'k8s-windows-cluster-rsrc-use' from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-windows-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-windows-cluster-rsrc-use.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"percentunit"}},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_cpu_utilisation:avg1m{cluster=\"$cluster\"} * node:windows_node_num_cpu:sum{cluster=\"$cluster\"} / scalar(sum(node:windows_node_num_cpu:sum{cluster=\"$cluster\"}))","legendFormat":"{{instance}}"}],"title":"CPU Utilisation","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":7},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_memory_utilisation:ratio{cluster=\"$cluster\"}","legendFormat":"{{instance}}"}],"title":"Memory Utilisation","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":7},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_memory_swap_io_pages:irate{cluster=\"$cluster\"}","legendFormat":"{{instance}}"}],"title":"Memory Saturation (Swap I/O Pages)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"percentunit"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_disk_utilisation:avg_irate{cluster=\"$cluster\"} / scalar(node:windows_node:sum{cluster=\"$cluster\"})","legendFormat":"{{instance}}"}],"title":"Disk IO Utilisation","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":21},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_net_utilisation:sum_irate{cluster=\"$cluster\"}","legendFormat":"{{instance}}"}],"title":"Net Utilisation (Transmitted)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":21},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_net_saturation:sum_irate{cluster=\"$cluster\"}","legendFormat":"{{instance}}"}],"title":"Net Utilisation (Dropped)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"percentunit"}},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (instance)(node:windows_node_filesystem_usage:{cluster=\"$cluster\"})","legendFormat":"{{instance}}"}],"title":"Disk Capacity","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / USE Method / Cluster(Windows)","uid":"53a43377ec9aaf2ff64dfc7a1f539334"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-windows-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-windows-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }} + key: k8s-windows-cluster-rsrc-use.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-windows-node-rsrc-use.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-windows-node-rsrc-use.yaml new file mode 100644 index 0000000..d6b90f8 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/k8s-windows-node-rsrc-use.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'k8s-windows-node-rsrc-use' from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-windows-node-rsrc-use" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + k8s-windows-node-rsrc-use.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_cpu_utilisation:avg1m{cluster=\"$cluster\", instance=\"$instance\"}","legendFormat":"Utilisation"}],"title":"CPU Utilisation","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":12,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (core) (irate(windows_cpu_time_total{cluster=\"$cluster\", job=\"windows-exporter\", mode!=\"idle\", instance=\"$instance\"}[$__rate_interval]))","legendFormat":"{{core}}"}],"title":"CPU Usage Per Core","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"percentunit"}},"gridPos":{"h":7,"w":8,"x":0,"y":7},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_memory_utilisation:{cluster=\"$cluster\", instance=\"$instance\"}","legendFormat":"Memory"}],"title":"Memory Utilisation %","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":8,"y":7},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"max(\n windows_os_visible_memory_bytes{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"}\n - windows_memory_available_bytes{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"}\n)\n","legendFormat":"memory used"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"max(node:windows_node_memory_totalCached_bytes:sum{cluster=\"$cluster\", instance=\"$instance\"})","legendFormat":"memory cached"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"max(windows_memory_available_bytes{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"})","legendFormat":"memory free"}],"title":"Memory Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":7},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_memory_swap_io_pages:irate{cluster=\"$cluster\", instance=\"$instance\"}","legendFormat":"Swap IO"}],"title":"Memory Saturation (Swap I/O) Pages","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":14},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_disk_utilisation:avg_irate{cluster=\"$cluster\", instance=\"$instance\"}","legendFormat":"Utilisation"}],"title":"Disk IO Utilisation","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/io time/"},"properties":[{"id":"unit","value":"ms"}]}]},"gridPos":{"h":7,"w":12,"x":12,"y":14},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"max(rate(windows_logical_disk_read_bytes_total{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"}[$__rate_interval]))","legendFormat":"read"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"max(rate(windows_logical_disk_write_bytes_total{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"}[$__rate_interval]))","legendFormat":"written"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"max(rate(windows_logical_disk_read_seconds_total{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"}[$__rate_interval]) + rate(windows_logical_disk_write_seconds_total{cluster=\"$cluster\", job=\"windows-exporter\", instance=\"$instance\"}[$__rate_interval]))","legendFormat":"io time"}],"title":"Disk IO","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"percentunit"}},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_filesystem_usage:{cluster=\"$cluster\", instance=\"$instance\"}","legendFormat":"{{volume}}"}],"title":"Disk Utilisation","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":28},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_net_utilisation:sum_irate{cluster=\"$cluster\", instance=\"$instance\"}","legendFormat":"Utilisation"}],"title":"Net Utilisation (Transmitted)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":28},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"node:windows_node_net_saturation:sum_irate{cluster=\"$cluster\", instance=\"$instance\"}","legendFormat":"Saturation"}],"title":"Net Saturation (Dropped)","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"windows-exporter\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"instance","name":"instance","query":"label_values(windows_system_boot_time_timestamp_seconds{cluster=\"$cluster\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / USE Method / Node(Windows)","uid":"96e7484b0bb53b74fbc2bcb7723cd40b"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.windowsMonitoring.enabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-windows-node-rsrc-use" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "k8s-windows-node-rsrc-use" | trunc 63 | trimSuffix "-" }} + key: k8s-windows-node-rsrc-use.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/kubelet.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/kubelet.yaml new file mode 100644 index 0000000..40c2e18 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/kubelet.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'kubelet' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubelet.enabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "kubelet" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + kubelet.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kubelet_node_name{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\"})","instant":true}],"title":"Running Kubelets","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":4,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kubelet_running_pods{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\"})","instant":true}],"title":"Running Pods","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":8,"y":0},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kubelet_running_containers{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", container_state=\"running\", instance=~\"$instance\"})","instant":true}],"title":"Running Containers","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":12,"y":0},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\", state=\"actual_state_of_world\"})","instant":true}],"title":"Actual Volume Count","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":16,"y":0},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\",state=\"desired_state_of_world\"})","instant":true}],"title":"Desired Volume Count","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":20,"y":0},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval]))","instant":true}],"title":"Config Error Count","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":7},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)","legendFormat":"{{instance}} {{operation_type}}"}],"title":"Operation Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":12,"y":7},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)","legendFormat":"{{instance}} {{operation_type}}"}],"title":"Operation Error Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))","legendFormat":"{{instance}} {{operation_type}}"}],"title":"Operation Duration 99th quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":21},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)","legendFormat":"{{instance}} pod"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance)","legendFormat":"{{instance}} worker"}],"title":"Pod Start Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":12,"x":12,"y":21},"id":11,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_bucket{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","legendFormat":"{{instance}} pod"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","legendFormat":"{{instance}} worker"}],"title":"Pod Start Duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":28},"id":12,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)","legendFormat":"{{instance}} {{operation_name}} {{volume_plugin}}"}],"title":"Storage Operation Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":12,"y":28},"id":13,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)","legendFormat":"{{instance}} {{operation_name}} {{volume_plugin}}"}],"title":"Storage Operation Error Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":35},"id":14,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))","legendFormat":"{{instance}} {{operation_name}} {{volume_plugin}}"}],"title":"Storage Operation Duration 99th quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":42},"id":15,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)","legendFormat":"{{operation_type}}"}],"title":"Cgroup manager operation rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":12,"x":12,"y":42},"id":16,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))","legendFormat":"{{instance}} {{operation_type}}"}],"title":"Cgroup manager 99th quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":49},"id":17,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance)","legendFormat":"{{instance}}"}],"title":"PLEG relist rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":12,"x":12,"y":49},"id":18,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","legendFormat":"{{instance}}"}],"title":"PLEG relist interval","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":56},"id":19,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","legendFormat":"{{instance}}"}],"title":"PLEG relist duration","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":24,"x":0,"y":63},"id":20,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","legendFormat":"2xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","legendFormat":"3xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","legendFormat":"4xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","legendFormat":"5xx"}],"title":"RPC rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":70},"id":21,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, le))","legendFormat":"{{instance}} {{verb}}"}],"title":"Request duration 99th quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":77},"id":22,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"process_resident_memory_bytes{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":77},"id":23,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}[$__rate_interval])","legendFormat":"{{instance}}"}],"title":"CPU usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":77},"id":24,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"go_goroutines{cluster=\"$cluster\",job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Goroutines","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"instance","name":"instance","query":"label_values(up{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\",cluster=\"$cluster\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Kubelet","uid":"3138fa155d5915769fbded898ac09fd9"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubelet.enabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "kubelet" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "kubelet" | trunc 63 | trimSuffix "-" }} + key: kubelet.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-pod.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-pod.yaml new file mode 100644 index 0000000..7b2f127 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-pod.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'namespace-by-pod' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-pod" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + namespace-by-pod.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"displayName":"$namespace","max":10000000000,"min":0,"thresholds":{"steps":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}]},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Current Rate of Bits Received","type":"gauge"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"displayName":"$namespace","max":10000000000,"min":0,"thresholds":{"steps":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}]},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Current Rate of Bits Transmitted","type":"gauge"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bandwidth/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill down","url":"/d/7a18067ce943a40ae25454675c19ff5c?${datasource:queryparam}&var-cluster=${cluster}&var-namespace=${namespace}&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":9,"w":24,"x":0,"y":9},"id":3,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (pod) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (pod) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (pod) (\n rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (pod) (\n rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (pod) (\n rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (pod) (\n rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","format":"table","instant":true}],"title":"Current Network Usage","transformations":[{"id":"joinByField","options":{"byField":"pod","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Value #A":7,"Value #B":8,"Value #C":9,"Value #D":10,"Value #E":11,"Value #F":12,"pod":6},"renameByName":{"Value #A":"Current Receive Bandwidth","Value #B":"Current Transmit Bandwidth","Value #C":"Rate of Received Packets","Value #D":"Rate of Transmitted Packets","Value #E":"Rate of Received Packets Dropped","Value #F":"Rate of Transmitted Packets Dropped","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":18},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (pod) (\n (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":18},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (pod) (\n (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":27},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (pod) (\n rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":27},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (pod) (\n rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":36},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (pod) (\n rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":36},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum by (pod) (\n rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets Dropped","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"allValue":".+","current":{"selected":false,"text":"kube-system","value":"kube-system"},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"namespace","name":"namespace","query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Networking / Namespace (Pods)","uid":"8b7a8b326d7a6f1f04244066368c67af"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-pod" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-pod" | trunc 63 | trimSuffix "-" }} + key: namespace-by-pod.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-workload.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-workload.yaml new file mode 100644 index 0000000..e9df3a4 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/namespace-by-workload.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'namespace-by-workload' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-workload" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + namespace-by-workload.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"color":{"fixedColor":"green","mode":"fixed"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"displayMode":"basic","showUnfilled":false},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Current Rate of Bits Received","type":"bargauge"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"color":{"fixedColor":"green","mode":"fixed"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"displayMode":"basic","showUnfilled":false},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Current Rate of Bits Transmitted","type":"bargauge"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/Bits/"},"properties":[{"id":"unit","value":"bps"}]},{"matcher":{"id":"byRegexp","options":"/Packets/"},"properties":[{"id":"unit","value":"pps"}]},{"matcher":{"id":"byName","options":"Workload"},"properties":[{"id":"links","value":[{"title":"Drill down","url":"/d/728bf77cc1166d2f3133bf25846876cc?${datasource:queryparam}&var-cluster=${cluster}&var-namespace=${namespace}&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}"}]}]}]},"gridPos":{"h":9,"w":24,"x":0,"y":9},"id":3,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n sum by (workload, workload_type) (\n sum by (cluster, namespace, pod) (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * on (cluster, namespace, pod)\n topk by (cluster, namespace, pod) (\n 1,\n max by (cluster, namespace, pod) (kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"})\n )\n * on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n sum by (workload, workload_type) (\n sum by (cluster, namespace, pod) (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * on (cluster, namespace, pod)\n topk by (cluster, namespace, pod) (\n 1,\n max by (cluster, namespace, pod) (kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"})\n )\n * on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n avg by (workload, workload_type) (\n sum by (cluster, namespace, pod) (8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * on (cluster, namespace, pod)\n topk by (cluster, namespace, pod) (\n 1,\n max by (cluster, namespace, pod) (kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"})\n )\n * on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n avg by (workload, workload_type) (\n sum by (cluster, namespace, pod) (8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * on (cluster, namespace, pod)\n topk by (cluster, namespace, pod) (\n 1,\n max by (cluster, namespace, pod) (kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"})\n )\n * on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n sum by (workload, workload_type) (\n sum by (cluster, namespace, pod) (1 * rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * on (cluster, namespace, pod)\n topk by (cluster, namespace, pod) (\n 1,\n max by (cluster, namespace, pod) (kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"})\n )\n * on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n sum by (workload, workload_type) (\n sum by (cluster, namespace, pod) (1 * rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * on (cluster, namespace, pod)\n topk by (cluster, namespace, pod) (\n 1,\n max by (cluster, namespace, pod) (kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"})\n )\n * on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n sum by (workload, workload_type) (\n sum by (cluster, namespace, pod) (1 * rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * on (cluster, namespace, pod)\n topk by (cluster, namespace, pod) (\n 1,\n max by (cluster, namespace, pod) (kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"})\n )\n * on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(\n sum by (workload, workload_type) (\n sum by (cluster, namespace, pod) (1 * rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n * on (cluster, namespace, pod)\n topk by (cluster, namespace, pod) (\n 1,\n max by (cluster, namespace, pod) (kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"})\n )\n * on (cluster, namespace, pod) group_left (workload, workload_type)\n namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}\n )\n)\n","format":"table","instant":true}],"title":"Current Status","transformations":[{"id":"joinByField","options":{"byField":"workload","mode":"outer"}},{"id":"organize","options":{"excludeByName":{"Time":true,"Time 1":true,"Time 2":true,"Time 3":true,"Time 4":true,"Time 5":true,"Time 6":true,"Time 7":true,"Time 8":true,"workload_type 2":true,"workload_type 3":true,"workload_type 4":true,"workload_type 5":true,"workload_type 6":true,"workload_type 7":true,"workload_type 8":true},"indexByName":{"Time 1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Time 6":5,"Time 7":6,"Time 8":7,"Value #A":10,"Value #B":11,"Value #C":12,"Value #D":13,"Value #E":14,"Value #F":15,"Value #G":16,"Value #H":17,"workload":8,"workload_type 1":9,"workload_type 2":18,"workload_type 3":19,"workload_type 4":20,"workload_type 5":21,"workload_type 6":22,"workload_type 7":23,"workload_type 8":24},"renameByName":{"Value #A":"Rx Bits","Value #B":"Tx Bits","Value #C":"Rx Bits (Avg)","Value #D":"Tx Bits (Avg)","Value #E":"Rx Packets","Value #F":"Tx Packets","Value #G":"Rx Packets Dropped","Value #H":"Tx Packets Dropped","workload":"Workload","workload_type 1":"Type"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":18},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":18},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":27},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(avg((8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Average Container Bandwidth by Workload: Received","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":27},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(avg((8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval]))\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Average Container Bandwidth by Workload: Transmitted","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":36},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":36},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":45},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":45},"id":11,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets Dropped","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"current":{"selected":false,"text":"kube-system","value":"kube-system"},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"namespace","name":"namespace","query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"workload_type","name":"type","query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Networking / Namespace (Workload)","uid":"bbb2a765a623ae38130206c7d94a160f"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-workload" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "namespace-by-workload" | trunc 63 | trimSuffix "-" }} + key: namespace-by-workload.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml new file mode 100644 index 0000000..18c79c2 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-cluster-rsrc-use.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'node-cluster-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + node-cluster-rsrc-use.json: |- + {{`{"graphTooltip":1,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"CPU","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"((\n instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", cluster=~\"$cluster\"}\n *\n instance:node_num_cpu:sum{job=\"node-exporter\", cluster=~\"$cluster\"}\n) != 0 )\n/ scalar(sum(instance:node_num_cpu:sum{job=\"node-exporter\", cluster=~\"$cluster\"}))\n","legendFormat":"{{ instance }}"}],"title":"CPU Utilisation","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=~\"$cluster\"}\n / scalar(count(instance:node_load1_per_cpu:ratio{job=\"node-exporter\", cluster=~\"$cluster\"}))\n) != 0\n","legendFormat":"{{ instance }}"}],"title":"CPU Saturation (Load1 per CPU)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"panels":[],"title":"Memory","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":9},"id":5,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=~\"$cluster\"}\n / scalar(count(instance:node_memory_utilisation:ratio{job=\"node-exporter\", cluster=~\"$cluster\"}))\n) != 0\n","legendFormat":"{{ instance }}"}],"title":"Memory Utilisation","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"rds"}},"gridPos":{"h":7,"w":12,"x":12,"y":9},"id":6,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", cluster=~\"$cluster\"}","legendFormat":"{{ instance }}"}],"title":"Memory Saturation (Major Page Faults)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":16},"id":7,"panels":[],"title":"Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"Bps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/Transmit/"},"properties":[{"id":"custom.transform","value":"negative-Y"}]}]},"gridPos":{"h":7,"w":12,"x":0,"y":17},"id":8,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_receive_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=~\"$cluster\"} != 0","legendFormat":"{{ instance }} Receive"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_transmit_bytes_excluding_lo:rate5m{job=\"node-exporter\", cluster=~\"$cluster\"} != 0","legendFormat":"{{ instance }} Transmit"}],"title":"Network Utilisation (Bytes Receive/Transmit)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"Bps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/Transmit/"},"properties":[{"id":"custom.transform","value":"negative-Y"}]}]},"gridPos":{"h":7,"w":12,"x":12,"y":17},"id":9,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_receive_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=~\"$cluster\"} != 0","legendFormat":"{{ instance }} Receive"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_transmit_drop_excluding_lo:rate5m{job=\"node-exporter\", cluster=~\"$cluster\"} != 0","legendFormat":"{{ instance }} Transmit"}],"title":"Network Saturation (Drops Receive/Transmit)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":24},"id":10,"panels":[],"title":"Disk IO","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":25},"id":11,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=~\"$cluster\"}\n/ scalar(count(instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", cluster=~\"$cluster\"}))\n","legendFormat":"{{ instance }} {{device}}"}],"title":"Disk IO Utilisation","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":12,"y":25},"id":12,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=~\"$cluster\"}\n/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", cluster=~\"$cluster\"}))\n","legendFormat":"{{ instance }} {{device}}"}],"title":"Disk IO Saturation","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":34},"id":13,"panels":[],"title":"Disk Space","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":24,"x":0,"y":35},"id":14,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum without (device) (\n max without (fstype, mountpoint) ((\n node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=~\"$cluster\"}\n -\n node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=~\"$cluster\"}\n ) != 0)\n)\n/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", mountpoint!=\"\", cluster=~\"$cluster\"})))\n","legendFormat":"{{ instance }}"}],"title":"Disk Space Utilisation","type":"timeseries"}],"refresh":"30s","schemaVersion":39,"tags":["node-exporter-mixin"],"templating":{"list":[{"name":"datasource","query":"prometheus","type":"datasource"},{"allValue":".*","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":true,"name":"cluster","query":"label_values(node_time_seconds, cluster)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Node Exporter / USE Method / Cluster","uid":"3e97d1d02672cdd0861f4c97c64f89b2"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-cluster-rsrc-use" | trunc 63 | trimSuffix "-" }} + key: node-cluster-rsrc-use.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-rsrc-use.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-rsrc-use.yaml new file mode 100644 index 0000000..ca8dc19 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/node-rsrc-use.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'node-rsrc-use' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-rsrc-use" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + node-rsrc-use.json: |- + {{`{"graphTooltip":1,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"CPU","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_cpu_utilisation:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Utilisation"}],"title":"CPU Utilisation","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_load1_per_cpu:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Saturation"}],"title":"CPU Saturation (Load1 per CPU)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"panels":[],"title":"Memory","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":9},"id":5,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_memory_utilisation:ratio{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Utilisation"}],"title":"Memory Utilisation","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"rds"}},"gridPos":{"h":7,"w":12,"x":12,"y":9},"id":6,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_vmstat_pgmajfault:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Major page Faults"}],"title":"Memory Saturation (Major Page Faults)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":16},"id":7,"panels":[],"title":"Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"Bps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/Transmit/"},"properties":[{"id":"custom.transform","value":"negative-Y"}]}]},"gridPos":{"h":7,"w":12,"x":0,"y":17},"id":8,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_receive_bytes_physical:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Receive"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_transmit_bytes_physical:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Transmit"}],"title":"Network Utilisation (Bytes Receive/Transmit)","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"Bps"},"overrides":[{"matcher":{"id":"byRegexp","options":"/Transmit/"},"properties":[{"id":"custom.transform","value":"negative-Y"}]}]},"gridPos":{"h":7,"w":12,"x":12,"y":17},"id":9,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_receive_drop_physical:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Receive"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance:node_network_transmit_drop_physical:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"Transmit"}],"title":"Network Saturation (Drops Receive/Transmit)","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":24},"id":10,"panels":[],"title":"Disk IO","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":25},"id":11,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance_device:node_disk_io_time_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"{{device}}"}],"title":"Disk IO Utilisation","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":12,"y":25},"id":12,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"instance_device:node_disk_io_time_weighted_seconds:rate5m{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} != 0","legendFormat":"{{device}}"}],"title":"Disk IO Saturation","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":34},"id":13,"panels":[],"title":"Disk Space","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"percentunit"}},"gridPos":{"h":7,"w":24,"x":0,"y":35},"id":14,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sort_desc(1 -\n (\n max without (mountpoint, fstype) (node_filesystem_avail_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=~\"$cluster\"})\n /\n max without (mountpoint, fstype) (node_filesystem_size_bytes{job=\"node-exporter\", fstype!=\"\", instance=\"$instance\", cluster=~\"$cluster\"})\n ) != 0\n)\n","legendFormat":"{{device}}"}],"title":"Disk Space Utilisation","type":"timeseries"}],"refresh":"30s","schemaVersion":39,"tags":["node-exporter-mixin"],"templating":{"list":[{"name":"datasource","query":"prometheus","type":"datasource"},{"allValue":".*","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":true,"name":"cluster","query":"label_values(node_time_seconds, cluster)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"name":"instance","query":"label_values(node_exporter_build_info{job=\"node-exporter\", cluster=~\"$cluster\"}, instance)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Node Exporter / USE Method / Node","uid":"fac67cfbe174d3ef53eb473d73d9212f"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-rsrc-use" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "node-rsrc-use" | trunc 63 | trimSuffix "-" }} + key: node-rsrc-use.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-aix.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-aix.yaml new file mode 100644 index 0000000..79f372b --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-aix.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'nodes-aix' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (and (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) .Values.nodeExporter.operatingSystems.aix.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes-aix" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + nodes-aix.json: |- + {{`{"graphTooltip":1,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"CPU","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"max":1,"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\", cluster=~\"$cluster\"})\n)\n","intervalFactor":5,"legendFormat":"{{cpu}}"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load1{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"1m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load5{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"5m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load15{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"15m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", mode=\"idle\"})","legendFormat":"logical cores"}],"title":"Load Average","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"title":"Memory","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"none"}},"min":0,"unit":"bytes"}},"gridPos":{"h":7,"w":18,"x":0,"y":9},"id":5,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"Physical Memory"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} -\n node_memory_available_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}\n)\n","legendFormat":"Memory Used"}],"title":"Memory Usage","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"max":100,"min":0,"thresholds":{"steps":[{"color":"rgba(50, 172, 45, 0.97)"},{"color":"rgba(237, 129, 40, 0.89)","value":80},{"color":"rgba(245, 54, 54, 0.9)","value":90}]},"unit":"percent"}},"gridPos":{"h":7,"w":6,"x":18,"y":9},"id":6,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"100 -\n(\n avg(node_memory_available_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}) /\n avg(node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"})\n * 100\n)\n"}],"title":"Memory Usage","type":"gauge"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":18},"id":7,"panels":[],"title":"Disk","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0},"overrides":[{"matcher":{"id":"byRegexp","options":"/ read| written/"},"properties":[{"id":"unit","value":"Bps"}]},{"matcher":{"id":"byRegexp","options":"/ io time/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":12,"x":0,"y":19},"id":8,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} read"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} written"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} io time"}],"title":"Disk I/O","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"thresholds":{"steps":[{"color":"green"},{"color":"yellow","value":0.8},{"color":"red","value":0.9}]},"unit":"decbytes"},"overrides":[{"matcher":{"id":"byName","options":"Mounted on"},"properties":[{"id":"custom.width","value":260}]},{"matcher":{"id":"byName","options":"Size"},"properties":[{"id":"custom.width","value":93}]},{"matcher":{"id":"byName","options":"Used"},"properties":[{"id":"custom.width","value":72}]},{"matcher":{"id":"byName","options":"Available"},"properties":[{"id":"custom.width","value":88}]},{"matcher":{"id":"byName","options":"Used, %"},"properties":[{"id":"unit","value":"percentunit"},{"id":"custom.cellOptions","value":{"type":"gauge"}},{"id":"max","value":1},{"id":"min","value":0}]}]},"gridPos":{"h":7,"w":12,"x":12,"y":19},"id":9,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"legendFormat":""},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"legendFormat":""}],"title":"Disk Space Usage","transformations":[{"id":"groupBy","options":{"fields":{"Value #A":{"aggregations":["lastNotNull"],"operation":"aggregate"},"Value #B":{"aggregations":["lastNotNull"],"operation":"aggregate"},"mountpoint":{"aggregations":[],"operation":"groupby"}}}},{"id":"merge"},{"id":"calculateField","options":{"alias":"Used","binary":{"left":"Value #A (lastNotNull)","operator":"-","reducer":"sum","right":"Value #B (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"calculateField","options":{"alias":"Used, %","binary":{"left":"Used","operator":"/","reducer":"sum","right":"Value #A (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"organize","options":{"excludeByName":{},"indexByName":{},"renameByName":{"Value #A (lastNotNull)":"Size","Value #B (lastNotNull)":"Available","mountpoint":"Mounted on"}}},{"id":"sortBy","options":{"fields":{},"sort":[{"field":"Mounted on"}]}}],"type":"table"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":26},"id":10,"panels":[],"title":"Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"description":"Network received (bits/s)","fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0,"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":27},"id":11,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device!=\"lo\"}[$__rate_interval]) * 8","intervalFactor":1,"legendFormat":"{{device}}"}],"title":"Network Received","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"description":"Network transmitted (bits/s)","fieldConfig":{"defaults":{"custom":{"fillOpacity":0},"min":0,"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":27},"id":12,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device!=\"lo\"}[$__rate_interval]) * 8","intervalFactor":1,"legendFormat":"{{device}}"}],"title":"Network Transmitted","type":"timeseries"}],"refresh":"30s","schemaVersion":39,"tags":["node-exporter-mixin"],"templating":{"list":[{"name":"datasource","query":"prometheus","type":"datasource"},{"allValue":".*","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":true,"label":"Cluster","name":"cluster","query":"label_values(node_uname_info{job=\"node-exporter\", sysname!=\"Darwin\"}, cluster)","refresh":2,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"label":"Instance","name":"instance","query":"label_values(node_uname_info{job=\"node-exporter\", cluster=~\"$cluster\", sysname!=\"Darwin\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Node Exporter / AIX","uid":"7e0a61e486f727d763fb1d86fdd629c2"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (and (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) .Values.nodeExporter.operatingSystems.aix.enabled) }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes-aix" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes-aix" | trunc 63 | trimSuffix "-" }} + key: nodes-aix.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-darwin.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-darwin.yaml new file mode 100644 index 0000000..f7935cf --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes-darwin.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'nodes-darwin' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (and (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) .Values.nodeExporter.operatingSystems.darwin.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes-darwin" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + nodes-darwin.json: |- + {{`{"graphTooltip":1,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"CPU","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"max":1,"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\", cluster=~\"$cluster\"})\n)\n","intervalFactor":5,"legendFormat":"{{cpu}}"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load1{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"1m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load5{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"5m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load15{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"15m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", mode=\"idle\"})","legendFormat":"logical cores"}],"title":"Load Average","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"title":"Memory","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"none"}},"min":0,"unit":"bytes"}},"gridPos":{"h":7,"w":18,"x":0,"y":9},"id":5,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"Physical Memory"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} +\n node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} +\n node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}\n)\n","legendFormat":"Memory Used"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"} -\n node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}\n)\n","legendFormat":"App Memory"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"Wired Memory"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"Compressed"}],"title":"Memory Usage","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"max":100,"min":0,"thresholds":{"steps":[{"color":"rgba(50, 172, 45, 0.97)"},{"color":"rgba(237, 129, 40, 0.89)","value":80},{"color":"rgba(245, 54, 54, 0.9)","value":90}]},"unit":"percent"}},"gridPos":{"h":7,"w":6,"x":18,"y":9},"id":6,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n (\n avg(node_memory_internal_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}) -\n avg(node_memory_purgeable_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}) +\n avg(node_memory_wired_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}) +\n avg(node_memory_compressed_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"})\n ) /\n avg(node_memory_total_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"})\n)\n*\n100\n"}],"title":"Memory Usage","type":"gauge"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":18},"id":7,"panels":[],"title":"Disk","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0},"overrides":[{"matcher":{"id":"byRegexp","options":"/ read| written/"},"properties":[{"id":"unit","value":"Bps"}]},{"matcher":{"id":"byRegexp","options":"/ io time/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":12,"x":0,"y":19},"id":8,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} read"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} written"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} io time"}],"title":"Disk I/O","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"thresholds":{"steps":[{"color":"green"},{"color":"yellow","value":0.8},{"color":"red","value":0.9}]},"unit":"decbytes"},"overrides":[{"matcher":{"id":"byName","options":"Mounted on"},"properties":[{"id":"custom.width","value":260}]},{"matcher":{"id":"byName","options":"Size"},"properties":[{"id":"custom.width","value":93}]},{"matcher":{"id":"byName","options":"Used"},"properties":[{"id":"custom.width","value":72}]},{"matcher":{"id":"byName","options":"Available"},"properties":[{"id":"custom.width","value":88}]},{"matcher":{"id":"byName","options":"Used, %"},"properties":[{"id":"unit","value":"percentunit"},{"id":"custom.cellOptions","value":{"type":"gauge"}},{"id":"max","value":1},{"id":"min","value":0}]}]},"gridPos":{"h":7,"w":12,"x":12,"y":19},"id":9,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"legendFormat":""},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"legendFormat":""}],"title":"Disk Space Usage","transformations":[{"id":"groupBy","options":{"fields":{"Value #A":{"aggregations":["lastNotNull"],"operation":"aggregate"},"Value #B":{"aggregations":["lastNotNull"],"operation":"aggregate"},"mountpoint":{"aggregations":[],"operation":"groupby"}}}},{"id":"merge"},{"id":"calculateField","options":{"alias":"Used","binary":{"left":"Value #A (lastNotNull)","operator":"-","reducer":"sum","right":"Value #B (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"calculateField","options":{"alias":"Used, %","binary":{"left":"Used","operator":"/","reducer":"sum","right":"Value #A (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"organize","options":{"excludeByName":{},"indexByName":{},"renameByName":{"Value #A (lastNotNull)":"Size","Value #B (lastNotNull)":"Available","mountpoint":"Mounted on"}}},{"id":"sortBy","options":{"fields":{},"sort":[{"field":"Mounted on"}]}}],"type":"table"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":26},"id":10,"panels":[],"title":"Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"description":"Network received (bits/s)","fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0,"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":27},"id":11,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device!=\"lo\"}[$__rate_interval]) * 8","intervalFactor":1,"legendFormat":"{{device}}"}],"title":"Network Received","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"description":"Network transmitted (bits/s)","fieldConfig":{"defaults":{"custom":{"fillOpacity":0},"min":0,"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":27},"id":12,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device!=\"lo\"}[$__rate_interval]) * 8","intervalFactor":1,"legendFormat":"{{device}}"}],"title":"Network Transmitted","type":"timeseries"}],"refresh":"30s","schemaVersion":39,"tags":["node-exporter-mixin"],"templating":{"list":[{"name":"datasource","query":"prometheus","type":"datasource"},{"allValue":".*","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":true,"label":"Cluster","name":"cluster","query":"label_values(node_uname_info{job=\"node-exporter\", sysname=\"Darwin\"}, cluster)","refresh":2,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"label":"Instance","name":"instance","query":"label_values(node_uname_info{job=\"node-exporter\", cluster=~\"$cluster\", sysname=\"Darwin\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Node Exporter / MacOS","uid":"629701ea43bf69291922ea45f4a87d37"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (and (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) .Values.nodeExporter.operatingSystems.darwin.enabled) }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes-darwin" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes-darwin" | trunc 63 | trimSuffix "-" }} + key: nodes-darwin.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml new file mode 100644 index 0000000..4528f26 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/nodes.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'nodes' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (and (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) .Values.nodeExporter.operatingSystems.linux.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + nodes.json: |- + {{`{"graphTooltip":1,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"CPU","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"max":1,"min":0,"unit":"percentunit"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n (1 - sum without (mode) (rate(node_cpu_seconds_total{job=\"node-exporter\", mode=~\"idle|iowait|steal\", instance=\"$instance\", cluster=~\"$cluster\"}[$__rate_interval])))\n/ ignoring(cpu) group_left\n count without (cpu, mode) (node_cpu_seconds_total{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\", cluster=~\"$cluster\"})\n)\n","intervalFactor":5,"legendFormat":"{{cpu}}"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load1{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"1m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load5{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"5m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_load15{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"15m load average"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"count(node_cpu_seconds_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", mode=\"idle\"})","legendFormat":"logical cores"}],"title":"Load Average","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"title":"Memory","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"bytes"}},"gridPos":{"h":7,"w":18,"x":0,"y":9},"id":5,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}\n-\n node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}\n-\n node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}\n-\n node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}\n)\n","legendFormat":"memory used"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_Buffers_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"memory buffers"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_Cached_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"memory cached"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"node_memory_MemFree_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}","legendFormat":"memory free"}],"title":"Memory Usage","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"max":100,"min":0,"thresholds":{"steps":[{"color":"rgba(50, 172, 45, 0.97)"},{"color":"rgba(237, 129, 40, 0.89)","value":80},{"color":"rgba(245, 54, 54, 0.9)","value":90}]},"unit":"percent"}},"gridPos":{"h":7,"w":6,"x":18,"y":9},"id":6,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"100 -\n(\n avg(node_memory_MemAvailable_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"}) /\n avg(node_memory_MemTotal_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\"})\n* 100\n)\n"}],"title":"Memory Usage","type":"gauge"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":18},"id":7,"panels":[],"title":"Disk","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0},"overrides":[{"matcher":{"id":"byRegexp","options":"/ read| written/"},"properties":[{"id":"unit","value":"Bps"}]},{"matcher":{"id":"byRegexp","options":"/ io time/"},"properties":[{"id":"unit","value":"percentunit"}]}]},"gridPos":{"h":7,"w":12,"x":0,"y":19},"id":8,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_read_bytes_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} read"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_written_bytes_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} written"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_disk_io_time_seconds_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device=~\"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)\"}[$__rate_interval])","intervalFactor":1,"legendFormat":"{{device}} io time"}],"title":"Disk I/O","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"fieldConfig":{"defaults":{"thresholds":{"steps":[{"color":"green"},{"color":"yellow","value":0.8},{"color":"red","value":0.9}]},"unit":"decbytes"},"overrides":[{"matcher":{"id":"byName","options":"Mounted on"},"properties":[{"id":"custom.width","value":260}]},{"matcher":{"id":"byName","options":"Size"},"properties":[{"id":"custom.width","value":93}]},{"matcher":{"id":"byName","options":"Used"},"properties":[{"id":"custom.width","value":72}]},{"matcher":{"id":"byName","options":"Available"},"properties":[{"id":"custom.width","value":88}]},{"matcher":{"id":"byName","options":"Used, %"},"properties":[{"id":"unit","value":"percentunit"},{"id":"custom.cellOptions","value":{"type":"gauge"}},{"id":"max","value":1},{"id":"min","value":0}]}]},"gridPos":{"h":7,"w":12,"x":12,"y":19},"id":9,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max by (mountpoint) (node_filesystem_size_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"legendFormat":""},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max by (mountpoint) (node_filesystem_avail_bytes{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", fstype!=\"\", mountpoint!=\"\"})\n","format":"table","instant":true,"legendFormat":""}],"title":"Disk Space Usage","transformations":[{"id":"groupBy","options":{"fields":{"Value #A":{"aggregations":["lastNotNull"],"operation":"aggregate"},"Value #B":{"aggregations":["lastNotNull"],"operation":"aggregate"},"mountpoint":{"aggregations":[],"operation":"groupby"}}}},{"id":"merge"},{"id":"calculateField","options":{"alias":"Used","binary":{"left":"Value #A (lastNotNull)","operator":"-","reducer":"sum","right":"Value #B (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"calculateField","options":{"alias":"Used, %","binary":{"left":"Used","operator":"/","reducer":"sum","right":"Value #A (lastNotNull)"},"mode":"binary","reduce":{"reducer":"sum"}}},{"id":"organize","options":{"excludeByName":{},"indexByName":{},"renameByName":{"Value #A (lastNotNull)":"Size","Value #B (lastNotNull)":"Available","mountpoint":"Mounted on"}}},{"id":"sortBy","options":{"fields":{},"sort":[{"field":"Mounted on"}]}}],"type":"table"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":26},"id":10,"panels":[],"title":"Network","type":"row"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"description":"Network received (bits/s)","fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"min":0,"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":0,"y":27},"id":11,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_network_receive_bytes_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device!=\"lo\"}[$__rate_interval]) * 8","intervalFactor":1,"legendFormat":"{{device}}"}],"title":"Network Received","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"description":"Network transmitted (bits/s)","fieldConfig":{"defaults":{"custom":{"fillOpacity":0},"min":0,"unit":"bps"}},"gridPos":{"h":7,"w":12,"x":12,"y":27},"id":12,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(node_network_transmit_bytes_total{job=\"node-exporter\", instance=\"$instance\", cluster=~\"$cluster\", device!=\"lo\"}[$__rate_interval]) * 8","intervalFactor":1,"legendFormat":"{{device}}"}],"title":"Network Transmitted","type":"timeseries"}],"refresh":"30s","schemaVersion":39,"tags":["node-exporter-mixin"],"templating":{"list":[{"name":"datasource","query":"prometheus","type":"datasource"},{"allValue":".*","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":true,"label":"Cluster","name":"cluster","query":"label_values(node_uname_info{job=\"node-exporter\", sysname!=\"Darwin\"}, cluster)","refresh":2,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"label":"Instance","name":"instance","query":"label_values(node_uname_info{job=\"node-exporter\", cluster=~\"$cluster\", sysname!=\"Darwin\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Node Exporter / Nodes","uid":"7d57716318ee0dddbac5a7f451fb7753"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled (and (or .Values.nodeExporter.enabled .Values.nodeExporter.forceDeployDashboards) .Values.nodeExporter.operatingSystems.linux.enabled) }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "nodes" | trunc 63 | trimSuffix "-" }} + key: nodes.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml new file mode 100644 index 0000000..f30a1e7 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/persistentvolumesusage.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'persistentvolumesusage' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "persistentvolumesusage" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + persistentvolumesusage.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":18,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n","legendFormat":"Used Space"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n","legendFormat":"Free Space"}],"title":"Volume Space Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"max":100,"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":0},{"color":"orange","value":80},{"color":"red","value":90}]},"unit":"percent"}},"gridPos":{"h":7,"w":6,"x":18,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n","instant":true}],"title":"Volume Space Usage","type":"gauge"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"none"}},"gridPos":{"h":7,"w":18,"y":7},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))","legendFormat":"Used inodes"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n","legendFormat":"Free inodes"}],"title":"Volume inodes Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"color":{"mode":"thresholds"},"max":100,"min":0,"thresholds":{"mode":"absolute","steps":[{"color":"green","value":0},{"color":"orange","value":80},{"color":"red","value":90}]},"unit":"percent"}},"gridPos":{"h":7,"w":6,"x":18,"y":7},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n","instant":true}],"title":"Volume inodes Usage","type":"gauge"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(kubelet_volume_stats_capacity_bytes{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"Namespace","name":"namespace","query":"label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"PersistentVolumeClaim","name":"volume","query":"label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics\", namespace=\"$namespace\"}, persistentvolumeclaim)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Persistent Volumes","uid":"919b92a8e8041bd567af9edab12c840c"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "persistentvolumesusage" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "persistentvolumesusage" | trunc 63 | trimSuffix "-" }} + key: persistentvolumesusage.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/pod-total.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/pod-total.yaml new file mode 100644 index 0000000..1a4fdc7 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/pod-total.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'pod-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pod-total" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + pod-total.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"displayName":"$pod","max":10000000000,"min":0,"thresholds":{"steps":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}]},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))","legendFormat":"__auto"}],"title":"Current Rate of Bits Received","type":"gauge"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"displayName":"$pod","max":10000000000,"min":0,"thresholds":{"steps":[{"color":"dark-green","index":0,"value":null},{"color":"dark-yellow","index":1,"value":5000000000},{"color":"dark-red","index":2,"value":7000000000}]},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))","legendFormat":"__auto"}],"title":"Current Rate of Bits Transmitted","type":"gauge"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":9},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))) by (pod)","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":9},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum((8 * rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))) by (pod)","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":18},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":18},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":27},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"showPoints":"never"},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":27},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets Dropped","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"allValue":".+","current":{"selected":false,"text":"kube-system","value":"kube-system"},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"namespace","name":"namespace","query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"current":{"selected":false,"text":"kube-system","value":"kube-system"},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"pod","name":"pod","query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Networking / Pod","uid":"7a18067ce943a40ae25454675c19ff5c"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pod-total" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "pod-total" | trunc 63 | trimSuffix "-" }} + key: pod-total.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml new file mode 100644 index 0000000..4942c69 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus-remote-write.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'prometheus-remote-write' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.prometheus.prometheusSpec.remoteWriteDashboards }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus-remote-write" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + prometheus-remote-write.json: |- + {{`{"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"Timestamps","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"unit":"short"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"(\n prometheus_remote_storage_queue_highest_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}\n-\n prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}\n)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}::{{instance}} {{remote_name}}:{{url}}"}],"title":"Highest Enqueued Timestamp vs. Highest Timestamp Sent","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"clamp_min(\n rate(prometheus_remote_storage_queue_highest_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])\n-\n rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])\n, 0)\n","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}} {{remote_name}}:{{url}}"}],"title":"Rate[5m]","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"panels":[],"title":"Samples","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"unit":"short"}},"gridPos":{"h":7,"w":24,"x":0,"y":9},"id":5,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n-\n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]))\n-\n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]))\n","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}} {{remote_name}}:{{url}}"}],"title":"Rate, in vs. succeeded or dropped [5m]","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":16},"id":6,"panels":[],"title":"Shards","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"unit":"short"}},"gridPos":{"h":7,"w":24,"x":0,"y":16},"id":7,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}} {{remote_name}}:{{url}}"}],"title":"Current Shards","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":0,"y":23},"id":8,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}} {{remote_name}}:{{url}}"}],"title":"Max Shards","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":23},"id":9,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}{{instance}} {{remote_name}}:{{url}}"}],"title":"Min Shards","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":23},"id":10,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}} {{remote_name}}:{{url}}"}],"title":"Desired Shards","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":24},"id":11,"panels":[],"title":"Shard Details","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"unit":"short"}},"gridPos":{"h":7,"w":12,"x":0,"y":25},"id":12,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"prometheus_remote_storage_shard_capacity{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}} {{remote_name}}:{{url}}"}],"title":"Shard Capacity","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":25},"id":13,"options":{"tooltip":{"mode":"multi"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}} {{remote_name}}:{{url}}"}],"title":"Pending Samples","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":32},"id":14,"panels":[],"title":"Segments","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"unit":"none"}},"gridPos":{"h":7,"w":12,"x":0,"y":33},"id":15,"options":{"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"prometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}}"}],"title":"TSDB Current Segment","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"},"unit":"none"}},"gridPos":{"h":7,"w":12,"x":12,"y":33},"id":16,"options":{"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}} {{consumer}}"}],"title":"Remote Write Current Segment","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":40},"id":17,"panels":[],"title":"Misc. Rates","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"}}},"gridPos":{"h":7,"w":6,"x":0,"y":41},"id":18,"options":{"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}} {{remote_name}}:{{url}}"}],"title":"Dropped Samples","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"}}},"gridPos":{"h":7,"w":6,"x":6,"y":41},"id":19,"options":{"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}} {{remote_name}}:{{url}}"}],"title":"Failed Samples","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"}}},"gridPos":{"h":7,"w":6,"x":12,"y":41},"id":20,"options":{"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}} {{remote_name}}:{{url}}"}],"title":"Retried Samples","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":0,"showPoints":"never"}}},"gridPos":{"h":7,"w":6,"x":18,"y":41},"id":21,"options":{"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\", url=~\"$url\"}[5m])","format":"time_series","intervalFactor":2,"legendFormat":"{{cluster}}:{{instance}} {{remote_name}}:{{url}}"}],"title":"Enqueue Retries","type":"timeseries"}],"schemaVersion":39,"tags":["prometheus-mixin"],"templating":{"list":[{"current":{"selected":false,"text":"default","value":"default"},"hide":0,"name":"datasource","query":"prometheus","type":"datasource"},{"current":{"selected":false,"text":"$__all","value":"$__all"},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":true,"name":"cluster","query":"label_values(prometheus_build_info, cluster)","refresh":2,"type":"query","allValue":".*"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"includeAll":true,"name":"instance","query":"label_values(prometheus_build_info{cluster=~\"$cluster\"}, instance)","refresh":2,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"includeAll":true,"name":"url","query":"label_values(prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}, url)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["60s"]},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Prometheus / Remote Write","uid":"cb079f93-fde4-41f0-862b-d4301d7c1c56"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.prometheus.prometheusSpec.remoteWriteDashboards }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus-remote-write" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus-remote-write" | trunc 63 | trimSuffix "-" }} + key: prometheus-remote-write.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml new file mode 100644 index 0000000..7b6ff96 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/prometheus.yaml @@ -0,0 +1,56 @@ +{{- /* +Generated from 'prometheus' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + prometheus.json: |- + {{`{"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"Prometheus Stats","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"decimals":2,"displayName":"","unit":"short"},"overrides":[{"matcher":{"id":"byName","options":"Time"},"properties":[{"id":"displayName","value":"Time"},{"id":"custom.align","value":null},{"id":"custom.hidden","value":"true"}]},{"matcher":{"id":"byName","options":"cluster"},"properties":[{"id":"custom.align","value":null},{"id":"unit","value":"short"},{"id":"decimals","value":2},{"id":"displayName","value":"Cluster"}]},{"matcher":{"id":"byName","options":"job"},"properties":[{"id":"custom.align","value":null},{"id":"unit","value":"short"},{"id":"decimals","value":2},{"id":"displayName","value":"Job"}]},{"matcher":{"id":"byName","options":"instance"},"properties":[{"id":"displayName","value":"Instance"},{"id":"custom.align","value":null},{"id":"unit","value":"short"},{"id":"decimals","value":2}]},{"matcher":{"id":"byName","options":"version"},"properties":[{"id":"displayName","value":"Version"},{"id":"custom.align","value":null},{"id":"unit","value":"short"},{"id":"decimals","value":2}]},{"matcher":{"id":"byName","options":"Value #A"},"properties":[{"id":"displayName","value":"Count"},{"id":"custom.align","value":null},{"id":"unit","value":"short"},{"id":"decimals","value":2},{"id":"custom.hidden","value":"true"}]},{"matcher":{"id":"byName","options":"Value #B"},"properties":[{"id":"displayName","value":"Uptime"},{"id":"custom.align","value":null},{"id":"unit","value":"s"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":1},"id":2,"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"count by (cluster, job, instance, version) (prometheus_build_info{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})","format":"table","instant":true,"legendFormat":""},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max by (cluster, job, instance) (time() - process_start_time_seconds{cluster=~\"$cluster\", job=~\"$job\", instance=~\"$instance\"})","format":"table","instant":true,"legendFormat":""}],"title":"Prometheus Stats","type":"table"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":3,"panels":[],"title":"Discovery","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"min":0,"unit":"ms"}},"gridPos":{"h":7,"w":12,"x":0,"y":9},"id":4,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[5m])) by (cluster, job, scrape_job, instance) * 1e3","format":"time_series","legendFormat":"{{cluster}}:{{job}}:{{instance}}:{{scrape_job}}"}],"title":"Target Sync","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":9},"id":5,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum by (cluster, job, instance) (prometheus_sd_discovered_targets{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"})","format":"time_series","legendFormat":"{{cluster}}:{{job}}:{{instance}}"}],"title":"Targets","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":16},"id":6,"panels":[],"title":"Retrieval","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never"},"min":0,"unit":"ms"}},"gridPos":{"h":7,"w":8,"x":0,"y":17},"id":7,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(prometheus_target_interval_length_seconds_sum{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}[5m]) / rate(prometheus_target_interval_length_seconds_count{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}[5m]) * 1e3","format":"time_series","legendFormat":"{{cluster}}:{{job}}:{{instance}} {{interval}} configured"}],"title":"Average Scrape Interval Duration","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":17},"id":8,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"exceeded body size limit: {{cluster}} {{job}} {{instance}}"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"exceeded sample limit: {{cluster}} {{job}} {{instance}}"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"duplicate timestamp: {{cluster}} {{job}} {{instance}}"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"out of bounds: {{cluster}} {{job}} {{instance}}"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}[1m]))","format":"time_series","legendFormat":"out of order: {{cluster}} {{job}} {{instance}}"}],"title":"Scrape failures","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":17},"id":9,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(prometheus_tsdb_head_samples_appended_total{cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}[5m])","format":"time_series","legendFormat":"{{cluster}} {{job}} {{instance}}"}],"title":"Appended Samples","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":24},"id":10,"panels":[],"title":"Storage","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":0,"y":25},"id":11,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"prometheus_tsdb_head_series{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}","format":"time_series","legendFormat":"{{cluster}} {{job}} {{instance}} head series"}],"title":"Head Series","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":12,"y":25},"id":12,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"prometheus_tsdb_head_chunks{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\"}","format":"time_series","legendFormat":"{{cluster}} {{job}} {{instance}} head chunks"}],"title":"Head Chunks","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":32},"id":13,"panels":[],"title":"Query","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"short"}},"gridPos":{"h":7,"w":12,"x":0,"y":33},"id":14,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"rate(prometheus_engine_query_duration_seconds_count{cluster=~\"$cluster\",job=~\"$job\",instance=~\"$instance\",slice=\"inner_eval\"}[5m])","format":"time_series","legendFormat":"{{cluster}} {{job}} {{instance}}"}],"title":"Query Rate","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":100,"lineWidth":0,"showPoints":"never","stacking":{"mode":"normal"}},"min":0,"unit":"ms"}},"gridPos":{"h":7,"w":12,"x":12,"y":33},"id":15,"options":{"tooltip":{"mode":"multi","sort":"desc"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"max by (slice) (prometheus_engine_query_duration_seconds{quantile=\"0.9\",cluster=~\"$cluster\", job=~\"$job\",instance=~\"$instance\"}) * 1e3","format":"time_series","legendFormat":"{{slice}}"}],"title":"Stage Duration","type":"timeseries"}],"schemaVersion":39,"tags":["prometheus-mixin"],"templating":{"list":[{"current":{"selected":false,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","type":"datasource"},{"allValue":".*","current":{"selected":false,"text":["$__all"],"value":["$__all"]},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"includeAll":true,"label":"cluster","multi":true,"name":"cluster","query":"label_values(prometheus_build_info{}, cluster)","refresh":2,"sort":2,"type":"query"},{"allValue":".+","datasource":{"type":"prometheus","uid":"${datasource}"},"includeAll":true,"label":"job","multi":true,"name":"job","query":"label_values(prometheus_build_info{cluster=~\"$cluster\"}, job)","refresh":2,"sort":2,"type":"query"},{"allValue":".+","datasource":{"type":"prometheus","uid":"${datasource}"},"includeAll":true,"label":"instance","multi":true,"name":"instance","query":"label_values(prometheus_build_info{cluster=~\"$cluster\", job=~\"$job\"}, instance)","refresh":2,"sort":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timepicker":{"refresh_intervals":["60s"]},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Prometheus / Overview","uid":"9fa0d141-d019-4ad7-8bc5-42196ee308bd"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "prometheus" | trunc 63 | trimSuffix "-" }} + key: prometheus.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/proxy.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/proxy.yaml new file mode 100644 index 0000000..7bd4fb7 --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/proxy.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'proxy' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeProxy.enabled }} +{{- $kubeProxyJob := include "kube-prometheus-stack-kube-proxy.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "proxy" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + proxy.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(up{cluster=\"$cluster\", job=\"`}}{{ $kubeProxyJob }}{{`\"})","instant":true}],"title":"Up","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":10,"x":4,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"`}}{{ $kubeProxyJob }}{{`\", instance=~\"$instance\"}[$__rate_interval]))","legendFormat":"rate"}],"title":"Rules Sync Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":10,"x":14,"y":0},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeProxyJob }}{{`\", instance=~\"$instance\"}[$__rate_interval]))","legendFormat":"{{instance}}"}],"title":"Rules Sync Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":7},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"`}}{{ $kubeProxyJob }}{{`\", instance=~\"$instance\"}[$__rate_interval]))","legendFormat":"rate"}],"title":"Network Programming Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":12,"x":12,"y":7},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeProxyJob }}{{`\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))","legendFormat":"{{instance}}"}],"title":"Network Programming Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":8,"x":0,"y":14},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"`}}{{ $kubeProxyJob }}{{`\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","legendFormat":"2xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"`}}{{ $kubeProxyJob }}{{`\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","legendFormat":"3xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"`}}{{ $kubeProxyJob }}{{`\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","legendFormat":"4xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"`}}{{ $kubeProxyJob }}{{`\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","legendFormat":"5xx"}],"title":"Kube API Request Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":16,"x":8,"y":14},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeProxyJob }}{{`\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, le))","legendFormat":"{{verb}}"}],"title":"Post Request Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeProxyJob }}{{`\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))","legendFormat":"{{verb}}"}],"title":"Get Request Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":28},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"process_resident_memory_bytes{cluster=\"$cluster\", job=\"`}}{{ $kubeProxyJob }}{{`\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":28},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"`}}{{ $kubeProxyJob }}{{`\",instance=~\"$instance\"}[$__rate_interval])","legendFormat":"{{instance}}"}],"title":"CPU usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":28},"id":11,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"go_goroutines{cluster=\"$cluster\", job=\"`}}{{ $kubeProxyJob }}{{`\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Goroutines","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"`}}{{ $kubeProxyJob }}{{`\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"allValue":".+","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"instance","name":"instance","query":"label_values(up{job=\"`}}{{ $kubeProxyJob }}{{`\", cluster=\"$cluster\", job=\"`}}{{ $kubeProxyJob }}{{`\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Proxy","uid":"632e265de029684c40b21cb76bca4f94"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeProxy.enabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "proxy" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "proxy" | trunc 63 | trimSuffix "-" }} + key: proxy.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/scheduler.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/scheduler.yaml new file mode 100644 index 0000000..b831b1b --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/scheduler.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'scheduler' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeScheduler.enabled }} +{{- $kubeSchedulerJob := include "kube-prometheus-stack-kube-scheduler.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "scheduler" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + scheduler.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":7,"w":4,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"colorMode":"none"},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(up{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\"})","instant":true}],"title":"Up","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":10,"x":4,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(scheduler_scheduling_attempt_duration_seconds_count{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)","legendFormat":"{{cluster}} {{instance}} e2e"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(scheduler_pod_scheduling_sli_duration_seconds_count{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)","legendFormat":"{{cluster}} {{instance}} binding"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)","legendFormat":"{{cluster}} {{instance}} scheduling algorithm"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)","legendFormat":"{{cluster}} {{instance}} volume"}],"title":"Scheduling Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":10,"x":14,"y":0},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))","legendFormat":"{{cluster}} {{instance}} e2e"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(scheduler_pod_scheduling_sli_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))","legendFormat":"{{cluster}} {{instance}} binding"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))","legendFormat":"{{cluster}} {{instance}} scheduling algorithm"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))","legendFormat":"{{cluster}} {{instance}} volume"}],"title":"Scheduling latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":8,"x":0,"y":7},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))","legendFormat":"2xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))","legendFormat":"3xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))","legendFormat":"4xx"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))","legendFormat":"5xx"}],"title":"Kube API Request Rate","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"ops"}},"gridPos":{"h":7,"w":16,"x":8,"y":7},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, le))","legendFormat":"{{verb}}"}],"title":"Post Request Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"s"}},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, le))","legendFormat":"{{verb}}"}],"title":"Get Request Latency 99th Quantile","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"}},"gridPos":{"h":7,"w":8,"x":0,"y":21},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"process_resident_memory_bytes{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\", instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Memory","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":8,"y":21},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\", instance=~\"$instance\"}[$__rate_interval])","legendFormat":"{{instance}}"}],"title":"CPU usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"short"}},"gridPos":{"h":7,"w":8,"x":16,"y":21},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"go_goroutines{cluster=\"$cluster\", job=\"`}}{{ $kubeSchedulerJob }}{{`\",instance=~\"$instance\"}","legendFormat":"{{instance}}"}],"title":"Goroutines","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(up{job=\"`}}{{ $kubeSchedulerJob }}{{`\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"allValue":".+","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"instance","name":"instance","query":"label_values(up{job=\"`}}{{ $kubeSchedulerJob }}{{`\", cluster=\"$cluster\"}, instance)","refresh":2,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Scheduler","uid":"2e6b6a3b4bddf1427b3a55aa1311c656"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled .Values.kubeScheduler.enabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "scheduler" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "scheduler" | trunc 63 | trimSuffix "-" }} + key: scheduler.json +{{- end }} diff --git a/kube-prometheus-stack/templates/grafana/dashboards-1.14/workload-total.yaml b/kube-prometheus-stack/templates/grafana/dashboards-1.14/workload-total.yaml new file mode 100644 index 0000000..12c0bef --- /dev/null +++ b/kube-prometheus-stack/templates/grafana/dashboards-1.14/workload-total.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'workload-total' from https://raw.githubusercontent.com/prometheus-operator/kube-prometheus/a0d4361bf7336609bf3f166db8501a6c3b37c8ce/manifests/grafana-dashboardDefinitions.yaml +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "workload-total" | trunc 63 | trimSuffix "-" }} + annotations: +{{ toYaml .Values.grafana.sidecar.dashboards.annotations | indent 4 }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +data: + workload-total.json: |- + {{`{"editable":`}}{{ .Values.grafana.defaultDashboardsEditable }}{{`,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"color":{"fixedColor":"green","mode":"fixed"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":0},"id":1,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"displayMode":"basic","showUnfilled":false},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Current Rate of Bits Received","type":"bargauge"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"color":{"fixedColor":"green","mode":"fixed"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":0},"id":2,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"displayMode":"basic","showUnfilled":false},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Current Rate of Bits Transmitted","type":"bargauge"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"color":{"fixedColor":"green","mode":"fixed"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":9},"id":3,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"displayMode":"basic","showUnfilled":false},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(avg((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Average Rate of Bits Received","type":"bargauge"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"color":{"fixedColor":"green","mode":"fixed"},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":9},"id":4,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"displayMode":"basic","showUnfilled":false},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(avg((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Average Rate of Bits Transmitted","type":"bargauge"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":0,"y":18},"id":5,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 * rate(container_network_receive_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Receive Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bps"}},"gridPos":{"h":9,"w":12,"x":12,"y":18},"id":6,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum((8 * rate(container_network_transmit_bytes_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Transmit Bandwidth","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":27},"id":7,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_receive_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Rate of Received Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":27},"id":8,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_transmit_packets_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":0,"y":36},"id":9,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_receive_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Rate of Received Packets Dropped","type":"timeseries"},{"datasource":{"type":"datasource","uid":"-- Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"pps"}},"gridPos":{"h":9,"w":12,"x":12,"y":36},"id":10,"interval":"`}}{{ .Values.grafana.defaultDashboardsInterval }}{{`","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.4.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sort_desc(sum(rate(container_network_transmit_packets_dropped_total{job=\"`}}{{ $kubeletJob }}{{`\", metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (cluster, namespace, pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n","legendFormat":"__auto"}],"title":"Rate of Transmitted Packets Dropped","type":"timeseries"}],"refresh":"10s","schemaVersion":39,"tags":["kubernetes-mixin"],"templating":{"list":[{"current":{"selected":true,"text":"default","value":"default"},"hide":0,"label":"Data source","name":"datasource","query":"prometheus","regex":"","type":"datasource"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":`}}{{ if .Values.grafana.sidecar.dashboards.multicluster.global.enabled }}0{{ else }}2{{ end }}{{`,"label":"cluster","name":"cluster","query":"label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)","refresh":2,"sort":1,"type":"query","allValue":".*"},{"allValue":".+","current":{"selected":false,"text":"kube-system","value":"kube-system"},"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"namespace","name":"namespace","query":"label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)","refresh":2,"sort":1,"type":"query"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"label":"workload","name":"workload","query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\"}, workload)","refresh":2,"sort":1,"type":"query"},{"allValue":".+","datasource":{"type":"prometheus","uid":"${datasource}"},"hide":0,"includeAll":true,"label":"workload_type","name":"type","query":"label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)","refresh":2,"sort":1,"type":"query"}]},"time":{"from":"now-1h","to":"now"},"timezone": "`}}{{ .Values.grafana.defaultDashboardsTimezone }}{{`","title":"Kubernetes / Networking / Workload","uid":"728bf77cc1166d2f3133bf25846876cc"}`}} +{{- end }} +--- +{{- if and .Values.grafana.operator.dashboardsConfigMapRefEnabled (or .Values.grafana.enabled .Values.grafana.forceDeployDashboards) (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.grafana.defaultDashboardsEnabled }} +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "workload-total" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack-grafana.namespace" . }} + {{ with .Values.grafana.operator.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{ end }} + labels: + {{- if $.Values.grafana.sidecar.dashboards.label }} + {{ tpl $.Values.grafana.sidecar.dashboards.label $ }}: {{ ((tpl $.Values.grafana.sidecar.dashboards.labelValue $) | default 1) | quote }} + {{- end }} + app: {{ template "kube-prometheus-stack.name" $ }}-grafana + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} +spec: + allowCrossNamespaceImport: true + resyncPeriod: {{ .Values.grafana.operator.resyncPeriod | quote | default "10m" }} + {{- include "kube-prometheus-stack.grafana.operator.folder" . | nindent 2 }} + instanceSelector: + matchLabels: + {{- if .Values.grafana.operator.matchLabels }} + {{- toYaml .Values.grafana.operator.matchLabels | nindent 6 }} + {{- else }} + {{- fail "grafana.operator.matchLabels must be specified when grafana.operator.dashboardsConfigMapRefEnabled is true" }} + {{- end }} + configMapRef: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) "workload-total" | trunc 63 | trimSuffix "-" }} + key: workload-total.json +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/_prometheus-operator.tpl b/kube-prometheus-stack/templates/prometheus-operator/_prometheus-operator.tpl new file mode 100644 index 0000000..6ae9dc7 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/_prometheus-operator.tpl @@ -0,0 +1,7 @@ +{{/* Generate basic labels for prometheus-operator */}} +{{- define "kube-prometheus-stack.prometheus-operator.labels" }} +{{- include "kube-prometheus-stack.labels" . }} +app: {{ template "kube-prometheus-stack.name" . }}-operator +app.kubernetes.io/name: {{ template "kube-prometheus-stack.name" . }}-prometheus-operator +app.kubernetes.io/component: prometheus-operator +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/_prometheus-operator-webhook.tpl b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/_prometheus-operator-webhook.tpl new file mode 100644 index 0000000..8adafeb --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/_prometheus-operator-webhook.tpl @@ -0,0 +1,13 @@ +{{/* Generate basic labels for prometheus-operator-webhook */}} +{{- define "kube-prometheus-stack.prometheus-operator-webhook.labels" }} +{{- include "kube-prometheus-stack.labels" . }} +app.kubernetes.io/name: {{ template "kube-prometheus-stack.name" . }}-prometheus-operator +app.kubernetes.io/component: prometheus-operator-webhook +{{- end }} + +{{- define "kube-prometheus-stack.prometheus-operator-webhook.annotations" }} +{{- if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }} +certmanager.k8s.io/inject-ca-from: {{ printf "%s/%s-admission" (include "kube-prometheus-stack.namespace" .) (include "kube-prometheus-stack.fullname" .) | quote }} +cert-manager.io/inject-ca-from: {{ printf "%s/%s-admission" (include "kube-prometheus-stack.namespace" .) (include "kube-prometheus-stack.fullname" .) | quote }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/deployment.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/deployment.yaml new file mode 100644 index 0000000..f2183c8 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/deployment.yaml @@ -0,0 +1,145 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.deployment.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }}-webhook + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-operator-webhook + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" . | nindent 4 }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.labels }} +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.annotations }} + annotations: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.annotations | indent 4 }} +{{- end }} +spec: + replicas: {{ .Values.prometheusOperator.admissionWebhooks.deployment.replicas }} + revisionHistoryLimit: {{ .Values.prometheusOperator.admissionWebhooks.deployment.revisionHistoryLimit }} + {{- with .Values.prometheusOperator.admissionWebhooks.deployment.strategy }} + strategy: + {{- toYaml . | nindent 4 }} + {{- end }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-operator-webhook + release: {{ $.Release.Name | quote }} + template: + metadata: + labels: + app: {{ template "kube-prometheus-stack.name" . }}-operator-webhook + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" . | nindent 8 }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.podLabels }} +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.podLabels | indent 8 }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.podAnnotations }} + annotations: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.podAnnotations | indent 8 }} +{{- end }} + spec: + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.priorityClassName }} + priorityClassName: {{ .Values.prometheusOperator.admissionWebhooks.deployment.priorityClassName }} + {{- end }} + {{- if .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- include "kube-prometheus-stack.imagePullSecrets" . | indent 8 }} + {{- end }} + containers: + - name: prometheus-operator-admission-webhook + {{- $operatorRegistry := .Values.global.imageRegistry | default .Values.prometheusOperator.admissionWebhooks.deployment.image.registry -}} + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.image.sha }} + image: "{{ $operatorRegistry }}/{{ .Values.prometheusOperator.admissionWebhooks.deployment.image.repository }}:{{ .Values.prometheusOperator.admissionWebhooks.deployment.image.tag | default .Chart.AppVersion }}@sha256:{{ .Values.prometheusOperator.admissionWebhooks.deployment.image.sha }}" + {{- else }} + image: "{{ $operatorRegistry }}/{{ .Values.prometheusOperator.admissionWebhooks.deployment.image.repository }}:{{ .Values.prometheusOperator.admissionWebhooks.deployment.image.tag | default .Chart.AppVersion }}" + {{- end }} + imagePullPolicy: "{{ .Values.prometheusOperator.admissionWebhooks.deployment.image.pullPolicy }}" + args: + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.logFormat }} + - --log-format={{ .Values.prometheusOperator.admissionWebhooks.deployment.logFormat }} + {{- end }} + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.logLevel }} + - --log-level={{ .Values.prometheusOperator.admissionWebhooks.deployment.logLevel }} + {{- end }} + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled }} + - "--web.enable-tls=true" + - "--web.cert-file=/cert/{{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}tls.crt{{ else }}cert{{ end }}" + - "--web.key-file=/cert/{{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}tls.key{{ else }}key{{ end }}" + - "--web.listen-address=:{{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.internalPort }}" + - "--web.tls-min-version={{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.tlsMinVersion }}" + ports: + - containerPort: {{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.internalPort }} + name: https + {{- else }} + - "--web.enable-tls=false" + - "--web.listen-address=:8080" + ports: + - containerPort: 8080 + name: http + {{- end }} + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.readinessProbe.enabled }} + readinessProbe: + httpGet: + path: /healthz + port: {{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled | ternary "https" "http" }} + scheme: {{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled | ternary "HTTPS" "HTTP" }} + initialDelaySeconds: {{ .Values.prometheusOperator.admissionWebhooks.deployment.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.prometheusOperator.admissionWebhooks.deployment.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.prometheusOperator.admissionWebhooks.deployment.readinessProbe.timeoutSeconds }} + successThreshold: {{ .Values.prometheusOperator.admissionWebhooks.deployment.readinessProbe.successThreshold }} + failureThreshold: {{ .Values.prometheusOperator.admissionWebhooks.deployment.readinessProbe.failureThreshold }} + {{- end }} + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.livenessProbe.enabled }} + livenessProbe: + httpGet: + path: /healthz + port: {{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled | ternary "https" "http" }} + scheme: {{ .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled | ternary "HTTPS" "HTTP" }} + initialDelaySeconds: {{ .Values.prometheusOperator.admissionWebhooks.deployment.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.prometheusOperator.admissionWebhooks.deployment.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.prometheusOperator.admissionWebhooks.deployment.livenessProbe.timeoutSeconds }} + successThreshold: {{ .Values.prometheusOperator.admissionWebhooks.deployment.livenessProbe.successThreshold }} + failureThreshold: {{ .Values.prometheusOperator.admissionWebhooks.deployment.livenessProbe.failureThreshold }} + {{- end }} + resources: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.resources | indent 12 }} + securityContext: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.containerSecurityContext | indent 12 }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled }} + volumeMounts: + - name: tls-secret + mountPath: /cert + readOnly: true + volumes: + - name: tls-secret + secret: + defaultMode: 420 + secretName: {{ template "kube-prometheus-stack.fullname" . }}-admission +{{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.deployment.dnsConfig }} + dnsConfig: +{{ toYaml . | indent 8 }} + {{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.securityContext }} + securityContext: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.securityContext | indent 8 }} +{{- end }} + serviceAccountName: {{ template "kube-prometheus-stack.operator.admissionWebhooks.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.prometheusOperator.admissionWebhooks.deployment.automountServiceAccountToken }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.hostNetwork }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet +{{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.deployment.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.deployment.affinity }} + affinity: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.deployment.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/pdb.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/pdb.yaml new file mode 100644 index 0000000..0559a8e --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/pdb.yaml @@ -0,0 +1,15 @@ +{{- if and .Values.prometheusOperator.admissionWebhooks.deployment.enabled .Values.prometheusOperator.admissionWebhooks.deployment.podDisruptionBudget.enabled -}} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }}-webhook + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-operator-webhook + release: {{ $.Release.Name | quote }} +{{ toYaml (omit .Values.prometheusOperator.admissionWebhooks.deployment.podDisruptionBudget "enabled") | indent 2 }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/service.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/service.yaml new file mode 100644 index 0000000..6de9cbb --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/service.yaml @@ -0,0 +1,62 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.deployment.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }}-webhook + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-operator-webhook + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" . | nindent 4 }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.labels }} +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.service.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.annotations }} + annotations: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.service.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.clusterIP }} + clusterIP: {{ .Values.prometheusOperator.admissionWebhooks.deployment.service.clusterIP }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.prometheusOperator.admissionWebhooks.deployment.service.ipDualStack.ipFamilyPolicy }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.externalIPs }} + externalIPs: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.deployment.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.prometheusOperator.admissionWebhooks.deployment.service.loadBalancerIP }} +{{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.deployment.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.prometheusOperator.admissionWebhooks.deployment.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.prometheusOperator.admissionWebhooks.deployment.service.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.prometheusOperator.admissionWebhooks.deployment.service.externalTrafficPolicy }} +{{- end }} + ports: + {{- if not .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled }} + - name: http + {{- if eq .Values.prometheusOperator.admissionWebhooks.deployment.service.type "NodePort" }} + nodePort: {{ .Values.prometheusOperator.admissionWebhooks.deployment.service.nodePort }} + {{- end }} + port: 8080 + targetPort: http + {{- end }} + {{- if .Values.prometheusOperator.admissionWebhooks.deployment.tls.enabled }} + - name: https + {{- if eq .Values.prometheusOperator.admissionWebhooks.deployment.service.type "NodePort"}} + nodePort: {{ .Values.prometheusOperator.admissionWebhooks.deployment.service.nodePortTls }} + {{- end }} + port: 443 + targetPort: https + {{- end }} + selector: + app: {{ template "kube-prometheus-stack.name" . }}-operator-webhook + release: {{ $.Release.Name | quote }} + type: "{{ .Values.prometheusOperator.admissionWebhooks.deployment.service.type }}" +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/serviceaccount.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/serviceaccount.yaml new file mode 100644 index 0000000..15ff86e --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/deployment/serviceaccount.yaml @@ -0,0 +1,18 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.deployment.enabled }} +apiVersion: v1 +kind: ServiceAccount +automountServiceAccountToken: {{ .Values.prometheusOperator.admissionWebhooks.deployment.serviceAccount.automountServiceAccountToken }} +metadata: + name: {{ template "kube-prometheus-stack.operator.admissionWebhooks.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-operator + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" . | indent 4 }} + {{- with .Values.prometheusOperator.admissionWebhooks.deployment.serviceAccount.annotations }} + annotations: {{- toYaml . | nindent 4 }} + {{- end }} +{{- if .Values.global.imagePullSecrets }} +imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 2 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-createSecret.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-createSecret.yaml new file mode 100644 index 0000000..f7543b0 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-createSecret.yaml @@ -0,0 +1,36 @@ +{{- if and .Values.prometheusOperator.networkPolicy.enabled (eq .Values.prometheusOperator.networkPolicy.flavor "cilium") }} +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-create + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + helm.sh/hook: pre-install,pre-upgrade + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded + ## Ensure this is run before the job + helm.sh/hook-weight: "-5" + {{- with .Values.prometheusOperator.admissionWebhooks.annotations }} + {{ toYaml . | nindent 4 }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-create + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +spec: + endpointSelector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-create + {{- if .Values.prometheusOperator.networkPolicy.matchLabels }} + {{ toYaml .Values.prometheusOperator.networkPolicy.matchLabels | nindent 6 }} + {{- else }} + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 6 }} + {{- end }} + egress: + {{- if and .Values.prometheusOperator.networkPolicy.cilium .Values.prometheusOperator.networkPolicy.cilium.egress }} + {{ toYaml .Values.prometheusOperator.networkPolicy.cilium.egress | nindent 6 }} + {{- else }} + - toEntities: + - kube-apiserver + {{- end }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-patchWebhook.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-patchWebhook.yaml new file mode 100644 index 0000000..4e3b0d9 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/ciliumnetworkpolicy-patchWebhook.yaml @@ -0,0 +1,36 @@ +{{- if and .Values.prometheusOperator.networkPolicy.enabled (eq .Values.prometheusOperator.networkPolicy.flavor "cilium") }} +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-patch + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + helm.sh/hook: post-install,post-upgrade + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded + ## Ensure this is run before the job + helm.sh/hook-weight: "-5" + {{- with .Values.prometheusOperator.admissionWebhooks.patch.annotations }} + {{ toYaml . | nindent 4 }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-patch + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +spec: + endpointSelector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-patch + {{- if .Values.prometheusOperator.networkPolicy.matchLabels }} + {{ toYaml .Values.prometheusOperator.networkPolicy.matchLabels | nindent 6 }} + {{- else }} + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 6 }} + {{- end }} + egress: + {{- if and .Values.prometheusOperator.networkPolicy.cilium .Values.prometheusOperator.networkPolicy.cilium.egress }} + {{ toYaml .Values.prometheusOperator.networkPolicy.cilium.egress | nindent 6 }} + {{- else }} + - toEntities: + - kube-apiserver + {{- end }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml new file mode 100644 index 0000000..a0c1048 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrole.yaml @@ -0,0 +1,22 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + annotations: + "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +rules: + - apiGroups: + - admissionregistration.k8s.io + resources: + - validatingwebhookconfigurations + - mutatingwebhookconfigurations + verbs: + - get + - update + - patch +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml new file mode 100644 index 0000000..4cf1335 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/clusterrolebinding.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + annotations: + "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "kube-prometheus-stack.fullname" . }}-admission +subjects: + - kind: ServiceAccount + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml new file mode 100644 index 0000000..9865786 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-createSecret.yaml @@ -0,0 +1,74 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-create + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +{{- with .Values.prometheusOperator.admissionWebhooks.annotations }} +{{ toYaml . | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-create + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +spec: + ttlSecondsAfterFinished: {{ .Values.prometheusOperator.admissionWebhooks.patch.ttlSecondsAfterFinished }} + template: + metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-create +{{- with .Values.prometheusOperator.admissionWebhooks.patch.podAnnotations }} + annotations: +{{ toYaml . | indent 8 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-create + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 8 }} + spec: + {{- if .Values.prometheusOperator.admissionWebhooks.patch.priorityClassName }} + priorityClassName: {{ .Values.prometheusOperator.admissionWebhooks.patch.priorityClassName }} + {{- end }} + {{- if .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- include "kube-prometheus-stack.imagePullSecrets" . | indent 8 }} + {{- end }} + containers: + - name: create + {{- $registry := .Values.global.imageRegistry | default .Values.prometheusOperator.admissionWebhooks.patch.image.registry -}} + {{- if .Values.prometheusOperator.admissionWebhooks.patch.image.sha }} + image: {{ $registry }}/{{ .Values.prometheusOperator.admissionWebhooks.patch.image.repository }}:{{ .Values.prometheusOperator.admissionWebhooks.patch.image.tag }}@sha256:{{ .Values.prometheusOperator.admissionWebhooks.patch.image.sha }} + {{- else }} + image: {{ $registry }}/{{ .Values.prometheusOperator.admissionWebhooks.patch.image.repository }}:{{ .Values.prometheusOperator.admissionWebhooks.patch.image.tag }} + {{- end }} + imagePullPolicy: {{ .Values.prometheusOperator.admissionWebhooks.patch.image.pullPolicy }} + args: + - create + - --host={{- include "kube-prometheus-stack.operator.admission-webhook.dnsNames" . | replace "\n" "," }} + - --namespace={{ template "kube-prometheus-stack.namespace" . }} + - --secret-name={{ template "kube-prometheus-stack.fullname" . }}-admission + {{- with .Values.prometheusOperator.admissionWebhooks.createSecretJob }} + securityContext: + {{ toYaml .securityContext | nindent 12 }} + {{- end }} + resources: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.resources | indent 12 }} + restartPolicy: OnFailure + serviceAccountName: {{ template "kube-prometheus-stack.fullname" . }}-admission + {{- with .Values.prometheusOperator.admissionWebhooks.patch.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.patch.affinity }} + affinity: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.patch.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.patch.securityContext }} + securityContext: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.securityContext | indent 8 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml new file mode 100644 index 0000000..9a2bbbe --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/job-patchWebhook.yaml @@ -0,0 +1,83 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +{{- $failurePolicy := .Values.prometheusOperator.admissionWebhooks.failurePolicy }} +{{- if eq $failurePolicy "IgnoreOnInstallOnly" }} + {{- if .Release.IsInstall }} + {{- $failurePolicy = "Ignore" }} + {{- else }} + {{- $failurePolicy = "Fail" }} + {{- end }} +{{- end }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-patch + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +{{- with .Values.prometheusOperator.admissionWebhooks.patch.annotations }} +{{ toYaml . | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-patch + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +spec: + ttlSecondsAfterFinished: {{ .Values.prometheusOperator.admissionWebhooks.patch.ttlSecondsAfterFinished }} + template: + metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-patch +{{- with .Values.prometheusOperator.admissionWebhooks.patch.podAnnotations }} + annotations: +{{ toYaml . | indent 8 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-patch + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 8 }} + spec: + {{- if .Values.prometheusOperator.admissionWebhooks.patch.priorityClassName }} + priorityClassName: {{ .Values.prometheusOperator.admissionWebhooks.patch.priorityClassName }} + {{- end }} + {{- if .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- include "kube-prometheus-stack.imagePullSecrets" . | indent 8 }} + {{- end }} + containers: + - name: patch + {{- $registry := .Values.global.imageRegistry | default .Values.prometheusOperator.admissionWebhooks.patch.image.registry -}} + {{- if .Values.prometheusOperator.admissionWebhooks.patch.image.sha }} + image: {{ $registry }}/{{ .Values.prometheusOperator.admissionWebhooks.patch.image.repository }}:{{ .Values.prometheusOperator.admissionWebhooks.patch.image.tag }}@sha256:{{ .Values.prometheusOperator.admissionWebhooks.patch.image.sha }} + {{- else }} + image: {{ $registry }}/{{ .Values.prometheusOperator.admissionWebhooks.patch.image.repository }}:{{ .Values.prometheusOperator.admissionWebhooks.patch.image.tag }} + {{- end }} + imagePullPolicy: {{ .Values.prometheusOperator.admissionWebhooks.patch.image.pullPolicy }} + args: + - patch + - --webhook-name={{ template "kube-prometheus-stack.fullname" . }}-admission + - --namespace={{ template "kube-prometheus-stack.namespace" . }} + - --secret-name={{ template "kube-prometheus-stack.fullname" . }}-admission + - --patch-failure-policy={{ $failurePolicy }} + {{- with .Values.prometheusOperator.admissionWebhooks.patchWebhookJob }} + securityContext: + {{ toYaml .securityContext | nindent 12 }} + {{- end }} + resources: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.resources | indent 12 }} + restartPolicy: OnFailure + serviceAccountName: {{ template "kube-prometheus-stack.fullname" . }}-admission + {{- with .Values.prometheusOperator.admissionWebhooks.patch.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.patch.affinity }} + affinity: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.patch.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} +{{- if .Values.prometheusOperator.admissionWebhooks.patch.securityContext }} + securityContext: +{{ toYaml .Values.prometheusOperator.admissionWebhooks.patch.securityContext | indent 8 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-createSecret.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-createSecret.yaml new file mode 100644 index 0000000..864deb5 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-createSecret.yaml @@ -0,0 +1,33 @@ +{{- if and .Values.prometheusOperator.networkPolicy.enabled (eq .Values.prometheusOperator.networkPolicy.flavor "kubernetes") }} +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-create + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + ## Ensure this is run before the job + "helm.sh/hook-weight": "-5" + {{- with .Values.prometheusOperator.admissionWebhooks.annotations }} + {{ toYaml . | nindent 4 }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-create + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +spec: + podSelector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-create + {{- if .Values.prometheusOperator.networkPolicy.matchLabels }} + {{ toYaml .Values.prometheusOperator.networkPolicy.matchLabels | nindent 6 }} + {{- else }} + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 6 }} + {{- end }} + egress: + - {} + policyTypes: + - Egress +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-patchWebhook.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-patchWebhook.yaml new file mode 100644 index 0000000..076c467 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/networkpolicy-patchWebhook.yaml @@ -0,0 +1,33 @@ +{{- if and .Values.prometheusOperator.networkPolicy.enabled (eq .Values.prometheusOperator.networkPolicy.flavor "kubernetes") }} +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission-patch + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + ## Ensure this is run before the job + "helm.sh/hook-weight": "-5" + {{- with .Values.prometheusOperator.admissionWebhooks.patch.annotations }} + {{ toYaml . | nindent 4 }} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-patch + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +spec: + podSelector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission-patch + {{- if .Values.prometheusOperator.networkPolicy.matchLabels }} + {{ toYaml .Values.prometheusOperator.networkPolicy.matchLabels | nindent 6 }} + {{- else }} + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 6 }} + {{- end }} + egress: + - {} + policyTypes: + - Egress +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml new file mode 100644 index 0000000..f15abf4 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/role.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +rules: + - apiGroups: + - "" + resources: + - secrets + verbs: + - get + - create +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml new file mode 100644 index 0000000..30bde92 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/rolebinding.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.global.rbac.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "kube-prometheus-stack.fullname" . }}-admission +subjects: + - kind: ServiceAccount + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml new file mode 100644 index 0000000..b89d6a6 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/job-patch/serviceaccount.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled .Values.prometheusOperator.admissionWebhooks.patch.enabled .Values.prometheusOperator.admissionWebhooks.patch.serviceAccount.create (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} + annotations: + "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} + {{- with .Values.prometheusOperator.admissionWebhooks.patch.serviceAccount.annotations }} + annotations: {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.prometheusOperator.admissionWebhooks.patch.serviceAccount.automountServiceAccountToken }} +{{- if .Values.global.imagePullSecrets }} +imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 2 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml new file mode 100644 index 0000000..1661c9a --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/mutatingWebhookConfiguration.yaml @@ -0,0 +1,85 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled }} +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + annotations: + {{- include "kube-prometheus-stack.prometheus-operator-webhook.annotations" $ | trim |nindent 4 }} + {{- with .Values.prometheusOperator.admissionWebhooks.mutatingWebhookConfiguration.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +webhooks: + - name: prometheusrulemutate.monitoring.coreos.com + {{- if eq .Values.prometheusOperator.admissionWebhooks.failurePolicy "IgnoreOnInstallOnly" }} + failurePolicy: {{ .Release.IsInstall | ternary "Ignore" "Fail" }} + {{- else if .Values.prometheusOperator.admissionWebhooks.failurePolicy }} + failurePolicy: {{ .Values.prometheusOperator.admissionWebhooks.failurePolicy }} + {{- else if .Values.prometheusOperator.admissionWebhooks.patch.enabled }} + failurePolicy: Ignore + {{- else }} + failurePolicy: Fail + {{- end }} + rules: + - apiGroups: + - monitoring.coreos.com + apiVersions: + - "*" + resources: + - prometheusrules + operations: + - CREATE + - UPDATE + clientConfig: + service: + namespace: {{ template "kube-prometheus-stack.namespace" . }} + name: {{ template "kube-prometheus-stack.operator.fullname" $ }}{{ if .Values.prometheusOperator.admissionWebhooks.deployment.enabled }}-webhook{{ end }} + path: /admission-prometheusrules/mutate + {{- if and .Values.prometheusOperator.admissionWebhooks.caBundle (not .Values.prometheusOperator.admissionWebhooks.patch.enabled) (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} + caBundle: {{ .Values.prometheusOperator.admissionWebhooks.caBundle }} + {{- end }} + timeoutSeconds: {{ .Values.prometheusOperator.admissionWebhooks.timeoutSeconds }} + admissionReviewVersions: ["v1", "v1beta1"] + sideEffects: None + {{- if or .Values.prometheusOperator.denyNamespaces .Values.prometheusOperator.namespaces .Values.prometheusOperator.admissionWebhooks.namespaceSelector }} + namespaceSelector: + {{- with (omit .Values.prometheusOperator.admissionWebhooks.namespaceSelector "matchExpressions") }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- if or .Values.prometheusOperator.denyNamespaces .Values.prometheusOperator.namespaces .Values.prometheusOperator.admissionWebhooks.namespaceSelector.matchExpressions }} + matchExpressions: + {{- with (.Values.prometheusOperator.admissionWebhooks.namespaceSelector.matchExpressions) }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- if .Values.prometheusOperator.denyNamespaces }} + - key: kubernetes.io/metadata.name + operator: NotIn + values: + {{- range $namespace := mustUniq .Values.prometheusOperator.denyNamespaces }} + - {{ $namespace }} + {{- end }} + {{- else if and .Values.prometheusOperator.namespaces .Values.prometheusOperator.namespaces.additional }} + - key: kubernetes.io/metadata.name + operator: In + values: + {{- if and .Values.prometheusOperator.namespaces.releaseNamespace (default .Values.prometheusOperator.namespaces.releaseNamespace true) }} + {{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} + - {{ $namespace }} + {{- end }} + {{- range $namespace := mustUniq .Values.prometheusOperator.namespaces.additional }} + - {{ $namespace }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.objectSelector }} + objectSelector: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.matchConditions }} + matchConditions: + {{- toYaml . | nindent 6 }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml new file mode 100644 index 0000000..62207a8 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/admission-webhooks/validatingWebhookConfiguration.yaml @@ -0,0 +1,155 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.admissionWebhooks.enabled }} +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + annotations: + {{- include "kube-prometheus-stack.prometheus-operator-webhook.annotations" $ | trim | nindent 4 }} + {{- with .Values.prometheusOperator.admissionWebhooks.validatingWebhookConfiguration.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-admission + {{- include "kube-prometheus-stack.prometheus-operator-webhook.labels" $ | nindent 4 }} +webhooks: + - name: prometheusrulevalidate.monitoring.coreos.com + {{- if eq .Values.prometheusOperator.admissionWebhooks.failurePolicy "IgnoreOnInstallOnly" }} + failurePolicy: {{ .Release.IsInstall | ternary "Ignore" "Fail" }} + {{- else if .Values.prometheusOperator.admissionWebhooks.failurePolicy }} + failurePolicy: {{ .Values.prometheusOperator.admissionWebhooks.failurePolicy }} + {{- else if .Values.prometheusOperator.admissionWebhooks.patch.enabled }} + failurePolicy: Ignore + {{- else }} + failurePolicy: Fail + {{- end }} + rules: + - apiGroups: + - monitoring.coreos.com + apiVersions: + - "*" + resources: + - prometheusrules + operations: + - CREATE + - UPDATE + clientConfig: + service: + namespace: {{ template "kube-prometheus-stack.namespace" . }} + name: {{ template "kube-prometheus-stack.operator.fullname" $ }}{{ if .Values.prometheusOperator.admissionWebhooks.deployment.enabled }}-webhook{{ end }} + path: /admission-prometheusrules/validate + {{- if and .Values.prometheusOperator.admissionWebhooks.caBundle (not .Values.prometheusOperator.admissionWebhooks.patch.enabled) (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} + caBundle: {{ .Values.prometheusOperator.admissionWebhooks.caBundle }} + {{- end }} + timeoutSeconds: {{ .Values.prometheusOperator.admissionWebhooks.timeoutSeconds }} + admissionReviewVersions: ["v1", "v1beta1"] + sideEffects: None + {{- if or .Values.prometheusOperator.denyNamespaces .Values.prometheusOperator.namespaces .Values.prometheusOperator.admissionWebhooks.namespaceSelector }} + namespaceSelector: + {{- with (omit .Values.prometheusOperator.admissionWebhooks.namespaceSelector "matchExpressions") }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- if or .Values.prometheusOperator.denyNamespaces .Values.prometheusOperator.namespaces .Values.prometheusOperator.admissionWebhooks.namespaceSelector.matchExpressions }} + matchExpressions: + {{- with (.Values.prometheusOperator.admissionWebhooks.namespaceSelector.matchExpressions) }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.prometheusOperator.denyNamespaces }} + - key: kubernetes.io/metadata.name + operator: NotIn + values: + {{- range $namespace := mustUniq .Values.prometheusOperator.denyNamespaces }} + - {{ $namespace }} + {{- end }} + {{- else if and .Values.prometheusOperator.namespaces .Values.prometheusOperator.namespaces.additional }} + - key: kubernetes.io/metadata.name + operator: In + values: + {{- if and .Values.prometheusOperator.namespaces.releaseNamespace (default .Values.prometheusOperator.namespaces.releaseNamespace true) }} + {{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} + - {{ $namespace }} + {{- end }} + {{- range $namespace := mustUniq .Values.prometheusOperator.namespaces.additional }} + - {{ $namespace }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.objectSelector }} + objectSelector: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.matchConditions }} + matchConditions: + {{- toYaml . | nindent 6 }} + {{- end }} + - name: alertmanagerconfigsvalidate.monitoring.coreos.com + {{- if eq .Values.prometheusOperator.admissionWebhooks.failurePolicy "IgnoreOnInstallOnly" }} + failurePolicy: {{ .Release.IsInstall | ternary "Ignore" "Fail" }} + {{- else if .Values.prometheusOperator.admissionWebhooks.failurePolicy }} + failurePolicy: {{ .Values.prometheusOperator.admissionWebhooks.failurePolicy }} + {{- else if .Values.prometheusOperator.admissionWebhooks.patch.enabled }} + failurePolicy: Ignore + {{- else }} + failurePolicy: Fail + {{- end }} + rules: + - apiGroups: + - monitoring.coreos.com + apiVersions: + - v1alpha1 + resources: + - alertmanagerconfigs + operations: + - CREATE + - UPDATE + clientConfig: + service: + namespace: {{ template "kube-prometheus-stack.namespace" . }} + name: {{ template "kube-prometheus-stack.operator.fullname" $ }}{{ if .Values.prometheusOperator.admissionWebhooks.deployment.enabled }}-webhook{{ end }} + path: /admission-alertmanagerconfigs/validate + {{- if and .Values.prometheusOperator.admissionWebhooks.caBundle (not .Values.prometheusOperator.admissionWebhooks.patch.enabled) (not .Values.prometheusOperator.admissionWebhooks.certManager.enabled) }} + caBundle: {{ .Values.prometheusOperator.admissionWebhooks.caBundle }} + {{- end }} + timeoutSeconds: {{ .Values.prometheusOperator.admissionWebhooks.timeoutSeconds }} + admissionReviewVersions: ["v1", "v1beta1"] + sideEffects: None + {{- if or .Values.prometheusOperator.denyNamespaces .Values.prometheusOperator.namespaces .Values.prometheusOperator.admissionWebhooks.namespaceSelector }} + namespaceSelector: + {{- with (omit .Values.prometheusOperator.admissionWebhooks.namespaceSelector "matchExpressions") }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- if or .Values.prometheusOperator.denyNamespaces .Values.prometheusOperator.namespaces .Values.prometheusOperator.admissionWebhooks.namespaceSelector.matchExpressions }} + matchExpressions: + {{- with (.Values.prometheusOperator.admissionWebhooks.namespaceSelector.matchExpressions) }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.prometheusOperator.denyNamespaces }} + - key: kubernetes.io/metadata.name + operator: NotIn + values: + {{- range $namespace := mustUniq .Values.prometheusOperator.denyNamespaces }} + - {{ $namespace }} + {{- end }} + {{- else if and .Values.prometheusOperator.namespaces .Values.prometheusOperator.namespaces.additional }} + - key: kubernetes.io/metadata.name + operator: In + values: + {{- if and .Values.prometheusOperator.namespaces.releaseNamespace (default .Values.prometheusOperator.namespaces.releaseNamespace true) }} + {{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} + - {{ $namespace }} + {{- end }} + {{- range $namespace := mustUniq .Values.prometheusOperator.namespaces.additional }} + - {{ $namespace }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.objectSelector }} + objectSelector: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.prometheusOperator.admissionWebhooks.matchConditions }} + matchConditions: + {{- toYaml . | nindent 6 }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/aggregate-clusterroles.yaml b/kube-prometheus-stack/templates/prometheus-operator/aggregate-clusterroles.yaml new file mode 100644 index 0000000..3d8af83 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/aggregate-clusterroles.yaml @@ -0,0 +1,29 @@ +{{/* This file is based on https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/platform/rbac-crd.md */}} +{{- if and .Values.global.rbac.create .Values.global.rbac.createAggregateClusterRoles }} +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-crd-view + labels: + rbac.authorization.k8s.io/aggregate-to-admin: "true" + rbac.authorization.k8s.io/aggregate-to-edit: "true" + rbac.authorization.k8s.io/aggregate-to-view: "true" + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +rules: +- apiGroups: ["monitoring.coreos.com"] + resources: ["alertmanagers", "alertmanagerconfigs", "podmonitors", "probes", "prometheuses", "prometheusagents", "prometheusrules", "scrapeconfigs", "servicemonitors"] + verbs: ["get", "list", "watch"] +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-crd-edit + labels: + rbac.authorization.k8s.io/aggregate-to-edit: "true" + rbac.authorization.k8s.io/aggregate-to-admin: "true" + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +rules: +- apiGroups: ["monitoring.coreos.com"] + resources: ["alertmanagers", "alertmanagerconfigs", "podmonitors", "probes", "prometheuses", "prometheusagents", "prometheusrules", "scrapeconfigs", "servicemonitors"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml b/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml new file mode 100644 index 0000000..e3c75ca --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/certmanager.yaml @@ -0,0 +1,61 @@ +{{- if .Values.prometheusOperator.admissionWebhooks.certManager.enabled -}} +{{- if not .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef -}} +# Create a selfsigned Issuer, in order to create a root CA certificate for +# signing webhook serving certificates +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-self-signed-issuer + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + selfSigned: {} +--- +# Generate a CA Certificate used to sign certificates for the webhook +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-root-cert + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + secretName: {{ template "kube-prometheus-stack.fullname" . }}-root-cert + duration: {{ .Values.prometheusOperator.admissionWebhooks.certManager.rootCert.duration | default "43800h0m0s" | quote }} + {{- with .Values.prometheusOperator.admissionWebhooks.certManager.rootCert.revisionHistoryLimit }} + revisionHistoryLimit: {{ . }} + {{- end }} + issuerRef: + name: {{ template "kube-prometheus-stack.fullname" . }}-self-signed-issuer + commonName: "ca.webhook.kube-prometheus-stack" + isCA: true +--- +# Create an Issuer that uses the above generated CA certificate to issue certs +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-root-issuer + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + ca: + secretName: {{ template "kube-prometheus-stack.fullname" . }}-root-cert +{{- end }} +--- +# generate a server certificate for the apiservices to use +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + namespace: {{ template "kube-prometheus-stack.namespace" . }} +spec: + secretName: {{ template "kube-prometheus-stack.fullname" . }}-admission + duration: {{ .Values.prometheusOperator.admissionWebhooks.certManager.admissionCert.duration | default "8760h0m0s" | quote }} + {{- with .Values.prometheusOperator.admissionWebhooks.certManager.admissionCert.revisionHistoryLimit }} + revisionHistoryLimit: {{ . }} + {{- end }} + issuerRef: + {{- if .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef }} + {{- toYaml .Values.prometheusOperator.admissionWebhooks.certManager.issuerRef | nindent 4 }} + {{- else }} + name: {{ template "kube-prometheus-stack.fullname" . }}-root-issuer + {{- end }} + dnsNames: + {{- include "kube-prometheus-stack.operator.admission-webhook.dnsNames" . | splitList "\n" | toYaml | nindent 4 }} +{{- end -}} diff --git a/kube-prometheus-stack/templates/prometheus-operator/ciliumnetworkpolicy.yaml b/kube-prometheus-stack/templates/prometheus-operator/ciliumnetworkpolicy.yaml new file mode 100644 index 0000000..07e2e99 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/ciliumnetworkpolicy.yaml @@ -0,0 +1,40 @@ +{{- if and .Values.prometheusOperator.networkPolicy.enabled (eq .Values.prometheusOperator.networkPolicy.flavor "cilium") }} +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +spec: + endpointSelector: + matchLabels: + {{- if .Values.prometheusOperator.networkPolicy.matchLabels }} + app: {{ template "kube-prometheus-stack.name" . }}-operator + {{ toYaml .Values.prometheusOperator.networkPolicy.matchLabels | nindent 6 }} + {{- else }} + {{- include "kube-prometheus-stack.prometheus-operator.labels" $ | nindent 6 }} + {{- end }} + egress: + {{- if and .Values.prometheusOperator.networkPolicy.cilium .Values.prometheusOperator.networkPolicy.cilium.egress }} + {{ toYaml .Values.prometheusOperator.networkPolicy.cilium.egress | nindent 6 }} + {{- else }} + - toEntities: + - kube-apiserver + {{- end }} + ingress: + - toPorts: + - ports: + {{- if .Values.prometheusOperator.tls.enabled }} + - port: {{ .Values.prometheusOperator.tls.internalPort | quote }} + {{- else }} + - port: "8080" + {{- end }} + protocol: "TCP" + {{- if not .Values.prometheusOperator.tls.enabled }} + rules: + http: + - method: "GET" + path: "/metrics" + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/clusterrole.yaml b/kube-prometheus-stack/templates/prometheus-operator/clusterrole.yaml new file mode 100644 index 0000000..c02aed6 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/clusterrole.yaml @@ -0,0 +1,117 @@ +{{/* This file is based on https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/platform/rbac.md */}} +{{- if and .Values.prometheusOperator.enabled .Values.global.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +rules: +- apiGroups: + - monitoring.coreos.com + resources: + - alertmanagers + - alertmanagers/finalizers + - alertmanagers/status + - alertmanagerconfigs + - prometheuses + - prometheuses/finalizers + - prometheuses/status + - prometheusagents + - prometheusagents/finalizers + - prometheusagents/status + - thanosrulers + - thanosrulers/finalizers + - thanosrulers/status + - scrapeconfigs + - scrapeconfigs/status + - servicemonitors + - servicemonitors/status + - podmonitors + - podmonitors/status + - probes + - probes/status + - prometheusrules + - prometheusrules/status + verbs: + - '*' +- apiGroups: + - apps + resources: + - statefulsets + verbs: + - '*' +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - '*' +- apiGroups: + - "" + resources: + - pods + verbs: + - list + - delete +- apiGroups: + - "" + resources: + - services + - services/finalizers + - endpoints + verbs: + - get + - create + - update + - delete +- apiGroups: + - "" + resources: + - nodes + verbs: + - list + - watch +- apiGroups: + - "" + resources: + - namespaces + verbs: + - get + - list + - watch +- apiGroups: + - "" + - events.k8s.io + resources: + - events + verbs: + - patch + - create +- apiGroups: + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - watch +- apiGroups: + - storage.k8s.io + resources: + - storageclasses + verbs: + - get +- apiGroups: + - discovery.k8s.io + resources: + - endpointslices + verbs: + - get + - create + - list + - watch + - update + - delete +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/clusterrolebinding.yaml b/kube-prometheus-stack/templates/prometheus-operator/clusterrolebinding.yaml new file mode 100644 index 0000000..ad9e3ef --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/clusterrolebinding.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.prometheusOperator.enabled .Values.global.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "kube-prometheus-stack.operator.fullname" . }} +subjects: +- kind: ServiceAccount + name: {{ template "kube-prometheus-stack.operator.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/deployment.yaml b/kube-prometheus-stack/templates/prometheus-operator/deployment.yaml new file mode 100644 index 0000000..0125a88 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/deployment.yaml @@ -0,0 +1,249 @@ +{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} +{{- $defaultKubeletSvcName := printf "%s-kubelet" (include "kube-prometheus-stack.fullname" .) }} +{{- if .Values.prometheusOperator.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +{{- if .Values.prometheusOperator.labels }} +{{ toYaml .Values.prometheusOperator.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheusOperator.annotations }} + annotations: +{{ toYaml .Values.prometheusOperator.annotations | indent 4 }} +{{- end }} +spec: + replicas: 1 + revisionHistoryLimit: {{ .Values.prometheusOperator.revisionHistoryLimit }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-operator + release: {{ $.Release.Name | quote }} + {{- with .Values.prometheusOperator.strategy }} + strategy: + {{- toYaml . | nindent 4 }} + {{- end }} + template: + metadata: + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 8 }} +{{- if .Values.prometheusOperator.podLabels }} +{{ toYaml .Values.prometheusOperator.podLabels | indent 8 }} +{{- end }} +{{- if .Values.prometheusOperator.podAnnotations }} + annotations: +{{ toYaml .Values.prometheusOperator.podAnnotations | indent 8 }} +{{- end }} + spec: + {{- if .Values.prometheusOperator.priorityClassName }} + priorityClassName: {{ .Values.prometheusOperator.priorityClassName }} + {{- end }} + {{- if .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- include "kube-prometheus-stack.imagePullSecrets" . | indent 8 }} + {{- end }} + containers: + - name: {{ template "kube-prometheus-stack.name" . }} + {{- $configReloaderRegistry := .Values.global.imageRegistry | default .Values.prometheusOperator.prometheusConfigReloader.image.registry -}} + {{- $operatorRegistry := .Values.global.imageRegistry | default .Values.prometheusOperator.image.registry -}} + {{- $thanosRegistry := .Values.global.imageRegistry | default .Values.prometheusOperator.thanosImage.registry -}} + {{- if .Values.prometheusOperator.image.sha }} + image: "{{ $operatorRegistry }}/{{ .Values.prometheusOperator.image.repository }}:{{ .Values.prometheusOperator.image.tag | default .Chart.AppVersion }}@sha256:{{ .Values.prometheusOperator.image.sha }}" + {{- else }} + image: "{{ $operatorRegistry }}/{{ .Values.prometheusOperator.image.repository }}:{{ .Values.prometheusOperator.image.tag | default .Chart.AppVersion }}" + {{- end }} + imagePullPolicy: "{{ .Values.prometheusOperator.image.pullPolicy }}" + args: + {{- if .Values.prometheusOperator.kubeletService.enabled }} + - --kubelet-service={{ .Values.prometheusOperator.kubeletService.namespace }}/{{ default $defaultKubeletSvcName .Values.prometheusOperator.kubeletService.name }} + {{- if .Values.prometheusOperator.kubeletService.selector }} + - --kubelet-selector={{ .Values.prometheusOperator.kubeletService.selector }} + {{- end }} + {{- end }} + - --kubelet-endpoints={{ .Values.prometheusOperator.kubeletEndpointsEnabled }} + - --kubelet-endpointslice={{ .Values.prometheusOperator.kubeletEndpointSliceEnabled }} + {{- if .Values.prometheusOperator.logFormat }} + - --log-format={{ .Values.prometheusOperator.logFormat }} + {{- end }} + {{- if .Values.prometheusOperator.logLevel }} + - --log-level={{ .Values.prometheusOperator.logLevel }} + {{- end }} + {{- if .Values.prometheusOperator.denyNamespaces }} + - --deny-namespaces={{ tpl (.Values.prometheusOperator.denyNamespaces | join ",") $ }} + {{- end }} + {{- with $.Values.prometheusOperator.namespaces }} + {{- $namespaces := list }} + {{- if .releaseNamespace }} + {{- $namespaces = append $namespaces $namespace }} + {{- end }} + {{- if .additional }} + {{- range $ns := .additional }} + {{- $namespaces = append $namespaces (tpl $ns $) }} + {{- end }} + {{- end }} + - --namespaces={{ $namespaces | mustUniq | join "," }} + {{- end }} + - --localhost=127.0.0.1 + {{- if .Values.prometheusOperator.prometheusDefaultBaseImage }} + - --prometheus-default-base-image={{ .Values.global.imageRegistry | default .Values.prometheusOperator.prometheusDefaultBaseImageRegistry }}/{{ .Values.prometheusOperator.prometheusDefaultBaseImage }} + {{- end }} + {{- if .Values.prometheusOperator.alertmanagerDefaultBaseImage }} + - --alertmanager-default-base-image={{ .Values.global.imageRegistry | default .Values.prometheusOperator.alertmanagerDefaultBaseImageRegistry }}/{{ .Values.prometheusOperator.alertmanagerDefaultBaseImage }} + {{- end }} + {{- if .Values.prometheusOperator.prometheusConfigReloader.image.sha }} + - --prometheus-config-reloader={{ $configReloaderRegistry }}/{{ .Values.prometheusOperator.prometheusConfigReloader.image.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloader.image.tag | default .Chart.AppVersion }}@sha256:{{ .Values.prometheusOperator.prometheusConfigReloader.image.sha }} + {{- else }} + - --prometheus-config-reloader={{ $configReloaderRegistry }}/{{ .Values.prometheusOperator.prometheusConfigReloader.image.repository }}:{{ .Values.prometheusOperator.prometheusConfigReloader.image.tag | default .Chart.AppVersion }} + {{- end }} + - --config-reloader-cpu-request={{ (((.Values.prometheusOperator.prometheusConfigReloader.resources).requests).cpu) | default 0 }} + - --config-reloader-cpu-limit={{ (((.Values.prometheusOperator.prometheusConfigReloader.resources).limits).cpu) | default 0 }} + - --config-reloader-memory-request={{ (((.Values.prometheusOperator.prometheusConfigReloader.resources).requests).memory) | default 0 }} + - --config-reloader-memory-limit={{ (((.Values.prometheusOperator.prometheusConfigReloader.resources).limits).memory) | default 0 }} + {{- if .Values.prometheusOperator.prometheusConfigReloader.enableProbe }} + - --enable-config-reloader-probes=true + {{- end }} + {{- if .Values.prometheusOperator.alertmanagerInstanceNamespaces }} + - --alertmanager-instance-namespaces={{ .Values.prometheusOperator.alertmanagerInstanceNamespaces | join "," }} + {{- end }} + {{- if .Values.prometheusOperator.alertmanagerInstanceSelector }} + - --alertmanager-instance-selector={{ .Values.prometheusOperator.alertmanagerInstanceSelector }} + {{- end }} + {{- if .Values.prometheusOperator.alertmanagerConfigNamespaces }} + - --alertmanager-config-namespaces={{ .Values.prometheusOperator.alertmanagerConfigNamespaces | join "," }} + {{- end }} + {{- if .Values.prometheusOperator.prometheusInstanceNamespaces }} + - --prometheus-instance-namespaces={{ .Values.prometheusOperator.prometheusInstanceNamespaces | join "," }} + {{- end }} + {{- if .Values.prometheusOperator.prometheusInstanceSelector }} + - --prometheus-instance-selector={{ .Values.prometheusOperator.prometheusInstanceSelector }} + {{- end }} + {{- if .Values.prometheusOperator.thanosImage.sha }} + - --thanos-default-base-image={{ $thanosRegistry }}/{{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }}@sha256:{{ .Values.prometheusOperator.thanosImage.sha }} + {{- else }} + - --thanos-default-base-image={{ $thanosRegistry }}/{{ .Values.prometheusOperator.thanosImage.repository }}:{{ .Values.prometheusOperator.thanosImage.tag }} + {{- end }} + {{- if .Values.prometheusOperator.thanosRulerInstanceNamespaces }} + - --thanos-ruler-instance-namespaces={{ .Values.prometheusOperator.thanosRulerInstanceNamespaces | join "," }} + {{- end }} + {{- if .Values.prometheusOperator.thanosRulerInstanceSelector }} + - --thanos-ruler-instance-selector={{ .Values.prometheusOperator.thanosRulerInstanceSelector }} + {{- end }} + {{- if .Values.prometheusOperator.secretFieldSelector }} + - --secret-field-selector={{ tpl (.Values.prometheusOperator.secretFieldSelector) $ }} + {{- end }} + {{- if .Values.prometheusOperator.clusterDomain }} + - --cluster-domain={{ .Values.prometheusOperator.clusterDomain }} + {{- end }} + {{- if .Values.prometheusOperator.tls.enabled }} + - --web.enable-tls=true + - --web.cert-file=/cert/{{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}tls.crt{{ else }}cert{{ end }} + - --web.key-file=/cert/{{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}tls.key{{ else }}key{{ end }} + - --web.listen-address=:{{ .Values.prometheusOperator.tls.internalPort }} + - --web.tls-min-version={{ .Values.prometheusOperator.tls.tlsMinVersion }} + {{- else }} + - --web.enable-tls=false + - --web.listen-address=:8080 + {{- end }} + {{- with .Values.prometheusOperator.extraArgs }} + {{- tpl (toYaml .) $ | nindent 12 }} + {{- end }} + {{- with .Values.prometheusOperator.lifecycle }} + lifecycle: {{ toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.prometheusOperator.tls.enabled }} + ports: + - containerPort: {{ .Values.prometheusOperator.tls.internalPort }} + name: https + {{- else }} + ports: + - containerPort: 8080 + name: http + {{- end }} + env: + {{- range $key, $value := .Values.prometheusOperator.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + resources: +{{ toYaml .Values.prometheusOperator.resources | indent 12 }} + securityContext: +{{ toYaml .Values.prometheusOperator.containerSecurityContext | indent 12 }} + volumeMounts: + {{- if .Values.prometheusOperator.tls.enabled }} + - name: tls-secret + mountPath: /cert + readOnly: true + {{- end }} + {{- with .Values.prometheusOperator.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.prometheusOperator.readinessProbe.enabled }} + readinessProbe: + httpGet: + path: /healthz + port: {{ .Values.prometheusOperator.tls.enabled | ternary "https" "http" }} + scheme: {{ .Values.prometheusOperator.tls.enabled | ternary "HTTPS" "HTTP" }} + initialDelaySeconds: {{ .Values.prometheusOperator.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.prometheusOperator.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.prometheusOperator.readinessProbe.timeoutSeconds }} + successThreshold: {{ .Values.prometheusOperator.readinessProbe.successThreshold }} + failureThreshold: {{ .Values.prometheusOperator.readinessProbe.failureThreshold }} + {{- end }} + {{- if .Values.prometheusOperator.livenessProbe.enabled }} + livenessProbe: + httpGet: + path: /healthz + port: {{ .Values.prometheusOperator.tls.enabled | ternary "https" "http" }} + scheme: {{ .Values.prometheusOperator.tls.enabled | ternary "HTTPS" "HTTP" }} + initialDelaySeconds: {{ .Values.prometheusOperator.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.prometheusOperator.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.prometheusOperator.livenessProbe.timeoutSeconds }} + successThreshold: {{ .Values.prometheusOperator.livenessProbe.successThreshold }} + failureThreshold: {{ .Values.prometheusOperator.livenessProbe.failureThreshold }} + {{- end }} + volumes: + {{- if .Values.prometheusOperator.tls.enabled }} + - name: tls-secret + secret: + defaultMode: 420 + secretName: {{ template "kube-prometheus-stack.fullname" . }}-admission + {{- end }} + {{- with .Values.prometheusOperator.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheusOperator.dnsConfig }} + dnsConfig: +{{ toYaml . | indent 8 }} + {{- end }} +{{- if kindIs "bool" .Values.prometheusOperator.hostUsers }} + hostUsers: {{ .Values.prometheusOperator.hostUsers }} +{{- end }} +{{- if .Values.prometheusOperator.securityContext }} + securityContext: +{{ toYaml .Values.prometheusOperator.securityContext | indent 8 }} +{{- end }} + serviceAccountName: {{ template "kube-prometheus-stack.operator.serviceAccountName" . }} + automountServiceAccountToken: {{ .Values.prometheusOperator.automountServiceAccountToken }} +{{- if .Values.prometheusOperator.hostNetwork }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet +{{- end }} + {{- with .Values.prometheusOperator.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.prometheusOperator.affinity }} + affinity: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.prometheusOperator.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ . }} + {{- end }} + {{- with .Values.prometheusOperator.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/networkpolicy.yaml b/kube-prometheus-stack/templates/prometheus-operator/networkpolicy.yaml new file mode 100644 index 0000000..cf16a51 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/networkpolicy.yaml @@ -0,0 +1,29 @@ +{{- if and .Values.prometheusOperator.networkPolicy.enabled (eq .Values.prometheusOperator.networkPolicy.flavor "kubernetes") }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +spec: + egress: + - {} + ingress: + - ports: + {{- if .Values.prometheusOperator.tls.enabled }} + - port: {{ .Values.prometheusOperator.tls.internalPort }} + {{- else }} + - port: 8080 + {{- end }} + policyTypes: + - Egress + - Ingress + podSelector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-operator + release: {{ $.Release.Name | quote }} + {{- if .Values.prometheusOperator.networkPolicy.matchLabels }} + {{ toYaml .Values.prometheusOperator.networkPolicy.matchLabels | nindent 6 }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/pdb.yaml b/kube-prometheus-stack/templates/prometheus-operator/pdb.yaml new file mode 100644 index 0000000..4848ee5 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/pdb.yaml @@ -0,0 +1,15 @@ +{{- if .Values.prometheusOperator.podDisruptionBudget.enabled -}} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-operator + release: {{ $.Release.Name | quote }} +{{- toYaml (omit .Values.prometheusOperator.podDisruptionBudget "enabled") | nindent 2 }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/service.yaml b/kube-prometheus-stack/templates/prometheus-operator/service.yaml new file mode 100644 index 0000000..72e0788 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/service.yaml @@ -0,0 +1,61 @@ +{{- if .Values.prometheusOperator.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +{{- if .Values.prometheusOperator.service.labels }} +{{ toYaml .Values.prometheusOperator.service.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheusOperator.service.annotations }} + annotations: +{{ toYaml .Values.prometheusOperator.service.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.prometheusOperator.service.clusterIP }} + clusterIP: {{ .Values.prometheusOperator.service.clusterIP }} +{{- end }} +{{- if .Values.prometheusOperator.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.prometheusOperator.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.prometheusOperator.service.ipDualStack.ipFamilyPolicy }} +{{- end }} +{{- if .Values.prometheusOperator.service.externalIPs }} + externalIPs: +{{ toYaml .Values.prometheusOperator.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.prometheusOperator.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.prometheusOperator.service.loadBalancerIP }} +{{- end }} +{{- if .Values.prometheusOperator.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.prometheusOperator.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.prometheusOperator.service.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.prometheusOperator.service.externalTrafficPolicy }} +{{- end }} + ports: + {{- if not .Values.prometheusOperator.tls.enabled }} + - name: http + {{- if eq .Values.prometheusOperator.service.type "NodePort" }} + nodePort: {{ .Values.prometheusOperator.service.nodePort }} + {{- end }} + port: 8080 + targetPort: http + {{- end }} + {{- if .Values.prometheusOperator.tls.enabled }} + - name: https + {{- if eq .Values.prometheusOperator.service.type "NodePort"}} + nodePort: {{ .Values.prometheusOperator.service.nodePortTls }} + {{- end }} + port: 443 + targetPort: https + {{- end }} + selector: + app: {{ template "kube-prometheus-stack.name" . }}-operator + release: {{ $.Release.Name | quote }} + type: "{{ .Values.prometheusOperator.service.type }}" +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/serviceaccount.yaml b/kube-prometheus-stack/templates/prometheus-operator/serviceaccount.yaml new file mode 100644 index 0000000..0b09798 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "kube-prometheus-stack.operator.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} + {{- with .Values.prometheusOperator.serviceAccount.annotations }} + annotations: {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.prometheusOperator.serviceAccount.automountServiceAccountToken }} +{{- if .Values.global.imagePullSecrets }} +imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 2 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/servicemonitor.yaml b/kube-prometheus-stack/templates/prometheus-operator/servicemonitor.yaml new file mode 100644 index 0000000..d5ad94a --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/servicemonitor.yaml @@ -0,0 +1,47 @@ +{{- if and .Values.prometheusOperator.enabled .Values.prometheusOperator.serviceMonitor.selfMonitor }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +{{- with .Values.prometheusOperator.serviceMonitor.additionalLabels }} +{{ toYaml . | indent 4 }} +{{- end }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.prometheusOperator.serviceMonitor | nindent 2 }} + endpoints: + {{- if .Values.prometheusOperator.tls.enabled }} + - port: https + scheme: https + tlsConfig: + serverName: {{ template "kube-prometheus-stack.operator.fullname" . }} + ca: + secret: + name: {{ template "kube-prometheus-stack.fullname" . }}-admission + key: {{ if .Values.prometheusOperator.admissionWebhooks.certManager.enabled }}ca.crt{{ else }}ca{{ end }} + optional: false + {{- else }} + - port: http + {{- end }} + honorLabels: true + {{- if .Values.prometheusOperator.serviceMonitor.interval }} + interval: {{ .Values.prometheusOperator.serviceMonitor.interval }} + {{- end }} +{{- if .Values.prometheusOperator.serviceMonitor.metricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.prometheusOperator.serviceMonitor.metricRelabelings | indent 6) . }} +{{- end }} +{{- if .Values.prometheusOperator.serviceMonitor.relabelings }} + relabelings: +{{ toYaml .Values.prometheusOperator.serviceMonitor.relabelings | indent 6 }} +{{- end }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-operator + release: {{ $.Release.Name | quote }} + namespaceSelector: + matchNames: + - {{ printf "%s" (include "kube-prometheus-stack.namespace" .) | quote }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus-operator/verticalpodautoscaler.yaml b/kube-prometheus-stack/templates/prometheus-operator/verticalpodautoscaler.yaml new file mode 100644 index 0000000..f225d16 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus-operator/verticalpodautoscaler.yaml @@ -0,0 +1,40 @@ +{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") (.Values.prometheusOperator.verticalPodAutoscaler.enabled) }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + {{- include "kube-prometheus-stack.prometheus-operator.labels" . | nindent 4 }} +spec: + {{- with .Values.prometheusOperator.verticalPodAutoscaler.recommenders }} + recommenders: + {{- toYaml . | nindent 4 }} + {{- end }} + resourcePolicy: + containerPolicies: + - containerName: {{ template "kube-prometheus-stack.name" . }} + {{- with .Values.prometheusOperator.verticalPodAutoscaler.controlledResources }} + controlledResources: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.prometheusOperator.verticalPodAutoscaler.controlledValues }} + controlledValues: {{ .Values.prometheusOperator.verticalPodAutoscaler.controlledValues }} + {{- end }} + {{- if .Values.prometheusOperator.verticalPodAutoscaler.maxAllowed }} + maxAllowed: + {{- toYaml .Values.prometheusOperator.verticalPodAutoscaler.maxAllowed | nindent 8 }} + {{- end }} + {{- if .Values.prometheusOperator.verticalPodAutoscaler.minAllowed }} + minAllowed: + {{- toYaml .Values.prometheusOperator.verticalPodAutoscaler.minAllowed | nindent 8 }} + {{- end }} + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ template "kube-prometheus-stack.operator.fullname" . }} + {{- with .Values.prometheusOperator.verticalPodAutoscaler.updatePolicy }} + updatePolicy: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/_rules.tpl b/kube-prometheus-stack/templates/prometheus/_rules.tpl new file mode 100644 index 0000000..8283308 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/_rules.tpl @@ -0,0 +1,48 @@ +{{- /* +Generated file. Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- define "rules.names" }} +rules: + - "alertmanager.rules" + - "config-reloaders" + - "etcd" + - "general.rules" + - "k8s.rules.container-cpu-limits" + - "k8s.rules.container-cpu-requests" + - "k8s.rules.container-cpu-usage-seconds-total" + - "k8s.rules.container-memory-cache" + - "k8s.rules.container-memory-limits" + - "k8s.rules.container-memory-requests" + - "k8s.rules.container-memory-rss" + - "k8s.rules.container-memory-swap" + - "k8s.rules.container-memory-working-set-bytes" + - "k8s.rules.container-resource" + - "k8s.rules.pod-owner" + - "kube-apiserver-availability.rules" + - "kube-apiserver-burnrate.rules" + - "kube-apiserver-histogram.rules" + - "kube-apiserver-slos" + - "kube-prometheus-general.rules" + - "kube-prometheus-node-recording.rules" + - "kube-scheduler.rules" + - "kube-state-metrics" + - "kubelet.rules" + - "kubernetes-apps" + - "kubernetes-resources" + - "kubernetes-storage" + - "kubernetes-system" + - "kubernetes-system-kube-proxy" + - "kubernetes-system-apiserver" + - "kubernetes-system-kubelet" + - "kubernetes-system-controller-manager" + - "kubernetes-system-scheduler" + - "node-exporter.rules" + - "node-exporter" + - "node.rules" + - "node-network" + - "prometheus-operator" + - "prometheus" + - "windows.node.rules" + - "windows.pod.rules" +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/additionalAlertRelabelConfigs.yaml b/kube-prometheus-stack/templates/prometheus/additionalAlertRelabelConfigs.yaml new file mode 100644 index 0000000..b68c6f7 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/additionalAlertRelabelConfigs.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigs }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-am-relabel-confg + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations }} + annotations: +{{ toYaml .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus-am-relabel-confg +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: + additional-alert-relabel-configs.yaml: {{ tpl (toYaml .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigs) . | b64enc | quote }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/additionalAlertmanagerConfigs.yaml b/kube-prometheus-stack/templates/prometheus/additionalAlertmanagerConfigs.yaml new file mode 100644 index 0000000..2fe8fdb --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/additionalAlertmanagerConfigs.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-am-confg + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations }} + annotations: +{{ toYaml .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus-am-confg +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: + additional-alertmanager-configs.yaml: {{ tpl (toYaml .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs) . | b64enc | quote }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/additionalPrometheusRules.yaml b/kube-prometheus-stack/templates/prometheus/additionalPrometheusRules.yaml new file mode 100644 index 0000000..72cbb7d --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/additionalPrometheusRules.yaml @@ -0,0 +1,37 @@ +{{- if .Values.additionalPrometheusRulesMap }} +{{- range $prometheusRuleName, $prometheusRule := .Values.additionalPrometheusRulesMap }} +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) $prometheusRuleName | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }} + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} + {{- if $prometheusRule.additionalLabels }} + {{- toYaml $prometheusRule.additionalLabels | nindent 4 }} + {{- end }} +spec: + groups: + {{- toYaml $prometheusRule.groups | nindent 4 }} +{{- end }} +{{- else if .Values.additionalPrometheusRules }} +{{- range .Values.additionalPrometheusRules }} +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) .name | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }} + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} + {{- if .additionalLabels }} + {{- toYaml .additionalLabels | nindent 4 }} + {{- end }} +spec: + groups: + {{- toYaml .groups | nindent 4 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/additionalScrapeConfigs.yaml b/kube-prometheus-stack/templates/prometheus/additionalScrapeConfigs.yaml new file mode 100644 index 0000000..ebdf766 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/additionalScrapeConfigs.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.prometheusSpec.additionalScrapeConfigs }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-scrape-confg + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations }} + annotations: +{{ toYaml .Values.prometheus.prometheusSpec.additionalPrometheusSecretsAnnotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus-scrape-confg +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: +{{- if eq ( typeOf .Values.prometheus.prometheusSpec.additionalScrapeConfigs ) "string" }} + additional-scrape-configs.yaml: {{ tpl .Values.prometheus.prometheusSpec.additionalScrapeConfigs $ | b64enc | quote }} +{{- else }} + additional-scrape-configs.yaml: {{ tpl (toYaml .Values.prometheus.prometheusSpec.additionalScrapeConfigs) $ | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/ciliumnetworkpolicy.yaml b/kube-prometheus-stack/templates/prometheus/ciliumnetworkpolicy.yaml new file mode 100644 index 0000000..58f02d1 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/ciliumnetworkpolicy.yaml @@ -0,0 +1,26 @@ +{{- if and .Values.prometheus.networkPolicy.enabled (eq .Values.prometheus.networkPolicy.flavor "cilium") .Values.prometheus.networkPolicy.cilium }} +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} +spec: + endpointSelector: + {{- if .Values.prometheus.networkPolicy.cilium.endpointSelector }} + {{- toYaml .Values.prometheus.networkPolicy.cilium.endpointSelector | nindent 4 }} + {{- else }} + matchExpressions: +{{- include "kube-prometheus-stack.prometheus.pod-anti-affinity.matchExpressions" . | indent 6 }} + {{- end }} + {{- if .Values.prometheus.networkPolicy.cilium.egress }} + egress: + {{ toYaml .Values.prometheus.networkPolicy.cilium.egress | nindent 4 }} + {{- end }} + {{- if .Values.prometheus.networkPolicy.cilium.ingress }} + ingress: + {{ toYaml .Values.prometheus.networkPolicy.cilium.ingress | nindent 4 }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/clusterrole.yaml b/kube-prometheus-stack/templates/prometheus/clusterrole.yaml new file mode 100644 index 0000000..1bb1802 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/clusterrole.yaml @@ -0,0 +1,48 @@ +{{- if and .Values.prometheus.enabled .Values.global.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +rules: +# These permissions (to examine all namespaces) are not in the kube-prometheus repo. +# They're grabbed from https://github.com/prometheus/prometheus/blob/master/documentation/examples/rbac-setup.yml +# kube-prometheus deliberately defaults to a more restrictive setup that is not appropriate for our general audience. +- apiGroups: [""] + resources: + - nodes + - nodes/metrics + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] +- apiGroups: ["discovery.k8s.io"] + resources: + - endpointslices + verbs: ["get", "list", "watch"] +- apiGroups: + - "networking.k8s.io" + resources: + - ingresses + verbs: ["get", "list", "watch"] +- nonResourceURLs: ["/metrics", "/metrics/cadvisor"] + verbs: ["get"] +{{/* fix(#3338): add required rules to use node-exporter with the RBAC proxy */}} +{{- if and .Values.nodeExporter.enabled (index .Values "prometheus-node-exporter").kubeRBACProxy.enabled }} +- apiGroups: [ "" ] + resources: + - services/{{ include "prometheus-node-exporter.fullname" (index .Subcharts "prometheus-node-exporter") }} + verbs: [ "get", "list", "watch" ] +{{- end }} +{{- if and .Values.kubeStateMetrics.enabled (index .Values "kube-state-metrics").kubeRBACProxy.enabled }} +- apiGroups: [ "" ] + resources: + - services/{{ include "kube-state-metrics.fullname" (index .Subcharts "kube-state-metrics") }} + verbs: [ "get", "list", "watch" ] +{{- end }} +{{- if .Values.prometheus.additionalRulesForClusterRole }} +{{ toYaml .Values.prometheus.additionalRulesForClusterRole | indent 0 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/clusterrolebinding.yaml b/kube-prometheus-stack/templates/prometheus/clusterrolebinding.yaml new file mode 100644 index 0000000..9fc4f65 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/clusterrolebinding.yaml @@ -0,0 +1,18 @@ +{{- if and .Values.prometheus.enabled .Values.global.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus +subjects: + - kind: ServiceAccount + name: {{ template "kube-prometheus-stack.prometheus.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- end }} + diff --git a/kube-prometheus-stack/templates/prometheus/csi-secret.yaml b/kube-prometheus-stack/templates/prometheus/csi-secret.yaml new file mode 100644 index 0000000..e05382f --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/csi-secret.yaml @@ -0,0 +1,12 @@ +{{- if and .Values.prometheus.prometheusSpec.thanos .Values.prometheus.prometheusSpec.thanos.secretProviderClass }} +--- +apiVersion: secrets-store.csi.x-k8s.io/v1alpha1 +kind: SecretProviderClass +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +spec: +{{ toYaml .Values.prometheus.prometheusSpec.thanos.secretProviderClass | indent 2 }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/extrasecret.yaml b/kube-prometheus-stack/templates/prometheus/extrasecret.yaml new file mode 100644 index 0000000..17f3478 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/extrasecret.yaml @@ -0,0 +1,20 @@ +{{- if .Values.prometheus.extraSecret.data -}} +{{- $secretName := printf "prometheus-%s-extra" (include "kube-prometheus-stack.fullname" . ) -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ default $secretName .Values.prometheus.extraSecret.name }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.prometheus.extraSecret.annotations }} + annotations: +{{ toYaml .Values.prometheus.extraSecret.annotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + app.kubernetes.io/component: prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: +{{- range $key, $val := .Values.prometheus.extraSecret.data }} + {{ $key }}: {{ $val | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/ingress.yaml b/kube-prometheus-stack/templates/prometheus/ingress.yaml new file mode 100644 index 0000000..5d21e5a --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/ingress.yaml @@ -0,0 +1,66 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.ingress.enabled -}} + {{- $pathType := .Values.prometheus.ingress.pathType | default "ImplementationSpecific" -}} + {{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" -}} + {{- $servicePort := .Values.prometheus.ingress.servicePort | default .Values.prometheus.service.port -}} + {{- $routePrefix := list .Values.prometheus.prometheusSpec.routePrefix -}} + {{- $paths := .Values.prometheus.ingress.paths | default $routePrefix -}} + {{- $extraPaths := .Values.prometheus.ingress.extraPaths | default list -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: +{{- if .Values.prometheus.ingress.annotations }} + annotations: + {{- tpl (toYaml .Values.prometheus.ingress.annotations) . | nindent 4 }} +{{- end }} + name: {{ $serviceName }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.ingress.labels }} +{{ toYaml .Values.prometheus.ingress.labels | indent 4 }} +{{- end }} +spec: + {{- if .Values.prometheus.ingress.ingressClassName }} + ingressClassName: {{ .Values.prometheus.ingress.ingressClassName }} + {{- end }} + rules: + {{- if .Values.prometheus.ingress.hosts }} + {{- range $host := .Values.prometheus.ingress.hosts }} + - host: {{ tpl $host $ | quote }} + http: + paths: + {{- range $p := $paths }} + {{- with $extraPaths }} + {{- toYaml . | nindent 10 }} + {{- end }} + - path: {{ tpl $p $ }} + pathType: {{ $pathType }} + backend: + service: + name: {{ $serviceName }} + port: + number: {{ $servicePort }} + {{- end -}} + {{- end -}} + {{- else }} + - http: + paths: + {{- range $p := $paths }} + {{- with $extraPaths }} + {{- toYaml . | nindent 10 }} + {{- end }} + - path: {{ tpl $p $ }} + pathType: {{ $pathType }} + backend: + service: + name: {{ $serviceName }} + port: + number: {{ $servicePort }} + {{- end -}} + {{- end -}} + {{- if .Values.prometheus.ingress.tls }} + tls: +{{ tpl (toYaml .Values.prometheus.ingress.tls | indent 4) . }} + {{- end -}} +{{- end -}} diff --git a/kube-prometheus-stack/templates/prometheus/ingressThanosSidecar.yaml b/kube-prometheus-stack/templates/prometheus/ingressThanosSidecar.yaml new file mode 100644 index 0000000..c3a7e99 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/ingressThanosSidecar.yaml @@ -0,0 +1,59 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.thanosIngress.enabled }} +{{- $pathType := .Values.prometheus.thanosIngress.pathType | default "" }} +{{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "thanos-discovery" }} +{{- $thanosPort := .Values.prometheus.thanosIngress.servicePort -}} +{{- $routePrefix := list .Values.prometheus.prometheusSpec.routePrefix }} +{{- $paths := .Values.prometheus.thanosIngress.paths | default $routePrefix -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: +{{- if .Values.prometheus.thanosIngress.annotations }} + annotations: + {{- tpl (toYaml .Values.prometheus.thanosIngress.annotations) . | nindent 4 }} +{{- end }} + name: {{ template "kube-prometheus-stack.fullname" . }}-thanos-gateway + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.thanosIngress.labels }} +{{ toYaml .Values.prometheus.thanosIngress.labels | indent 4 }} +{{- end }} +spec: + {{- if .Values.prometheus.thanosIngress.ingressClassName }} + ingressClassName: {{ .Values.prometheus.thanosIngress.ingressClassName }} + {{- end }} + rules: + {{- if .Values.prometheus.thanosIngress.hosts }} + {{- range $host := .Values.prometheus.thanosIngress.hosts }} + - host: {{ tpl $host $ }} + http: + paths: + {{- range $p := $paths }} + - path: {{ tpl $p $ }} + pathType: {{ $pathType }} + backend: + service: + name: {{ $serviceName }} + port: + number: {{ $thanosPort }} + {{- end -}} + {{- end -}} + {{- else }} + - http: + paths: + {{- range $p := $paths }} + - path: {{ tpl $p $ }} + pathType: {{ $pathType }} + backend: + service: + name: {{ $serviceName }} + port: + number: {{ $thanosPort }} + {{- end -}} + {{- end -}} + {{- if .Values.prometheus.thanosIngress.tls }} + tls: +{{ tpl (toYaml .Values.prometheus.thanosIngress.tls | indent 4) . }} + {{- end -}} +{{- end -}} diff --git a/kube-prometheus-stack/templates/prometheus/ingressperreplica.yaml b/kube-prometheus-stack/templates/prometheus/ingressperreplica.yaml new file mode 100644 index 0000000..dc59b45 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/ingressperreplica.yaml @@ -0,0 +1,56 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.servicePerReplica.enabled .Values.prometheus.ingressPerReplica.enabled }} +{{- $pathType := .Values.prometheus.ingressPerReplica.pathType | default "" }} +{{- $count := .Values.prometheus.prometheusSpec.replicas | int -}} +{{- $servicePort := .Values.prometheus.servicePerReplica.port -}} +{{- $ingressValues := .Values.prometheus.ingressPerReplica -}} +apiVersion: v1 +kind: List +metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-prometheus-ingressperreplica + namespace: {{ template "kube-prometheus-stack.namespace" $ }} +items: +{{ range $i, $e := until $count }} + - kind: Ingress + apiVersion: networking.k8s.io/v1 + metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-prometheus-{{ $i }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ include "kube-prometheus-stack.name" $ }}-prometheus + {{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if $ingressValues.labels }} +{{ toYaml $ingressValues.labels | indent 8 }} + {{- end }} + {{- if $ingressValues.annotations }} + annotations: + {{- tpl (toYaml $ingressValues.annotations) $ | nindent 8 }} + {{- end }} + spec: + {{- if $ingressValues.ingressClassName }} + ingressClassName: {{ $ingressValues.ingressClassName }} + {{- end }} + rules: + - host: {{ $ingressValues.hostPrefix }}-{{ $i }}.{{ $ingressValues.hostDomain }} + http: + paths: + {{- range $p := $ingressValues.paths }} + - path: {{ tpl $p $ }} + pathType: {{ $pathType }} + backend: + service: + name: {{ include "kube-prometheus-stack.fullname" $ }}-prometheus-{{ $i }} + port: + number: {{ $servicePort }} + {{- end -}} + {{- if or $ingressValues.tlsSecretName $ingressValues.tlsSecretPerReplica.enabled }} + tls: + - hosts: + - {{ $ingressValues.hostPrefix }}-{{ $i }}.{{ $ingressValues.hostDomain }} + {{- if $ingressValues.tlsSecretPerReplica.enabled }} + secretName: {{ $ingressValues.tlsSecretPerReplica.prefix }}-{{ $i }} + {{- else }} + secretName: {{ $ingressValues.tlsSecretName }} + {{- end }} + {{- end }} +{{- end -}} +{{- end -}} diff --git a/kube-prometheus-stack/templates/prometheus/networkpolicy.yaml b/kube-prometheus-stack/templates/prometheus/networkpolicy.yaml new file mode 100644 index 0000000..f6c0248 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/networkpolicy.yaml @@ -0,0 +1,34 @@ +{{- if and .Values.prometheus.networkPolicy.enabled (eq .Values.prometheus.networkPolicy.flavor "kubernetes") }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ .Values.prometheus.networkPolicy.namespace | default (include "kube-prometheus-stack.namespace" .) }} +spec: + {{- if .Values.prometheus.networkPolicy.egress }} + egress: + {{- toYaml .Values.prometheus.networkPolicy.egress | nindent 4 }} + {{- end }} + {{- if .Values.prometheus.networkPolicy.ingress }} + ingress: + {{- toYaml .Values.prometheus.networkPolicy.ingress | nindent 4 }} + {{- end }} + policyTypes: + - Egress + - Ingress + podSelector: + {{- if .Values.prometheus.networkPolicy.podSelector }} + {{- toYaml .Values.prometheus.networkPolicy.podSelector | nindent 4 }} + {{- else }} + matchLabels: + {{- if .Values.prometheus.agentMode }} + app.kubernetes.io/name: prometheus-agent + {{- else }} + app.kubernetes.io/name: prometheus + {{- end }} + operator.prometheus.io/name: {{ template "kube-prometheus-stack.prometheus.crname" . }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/podDisruptionBudget.yaml b/kube-prometheus-stack/templates/prometheus/podDisruptionBudget.yaml new file mode 100644 index 0000000..45e02e0 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/podDisruptionBudget.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.podDisruptionBudget.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: +{{- toYaml (omit .Values.prometheus.podDisruptionBudget "enabled") | nindent 2 }} + selector: + matchLabels: + {{- if .Values.prometheus.agentMode }} + app.kubernetes.io/name: prometheus-agent + {{- else }} + app.kubernetes.io/name: prometheus + {{- end }} + operator.prometheus.io/name: {{ template "kube-prometheus-stack.prometheus.crname" . }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/podmonitors.yaml b/kube-prometheus-stack/templates/prometheus/podmonitors.yaml new file mode 100644 index 0000000..f98a63a --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/podmonitors.yaml @@ -0,0 +1,45 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.additionalPodMonitors }} +apiVersion: v1 +kind: List +items: +{{- range .Values.prometheus.additionalPodMonitors }} + - apiVersion: monitoring.coreos.com/v1 + kind: PodMonitor + metadata: + name: {{ .name }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-prometheus +{{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if .additionalLabels }} +{{ toYaml .additionalLabels | indent 8 }} + {{- end }} + spec: + {{- include "servicemonitor.scrapeLimits" . | nindent 6 }} + podMetricsEndpoints: +{{ toYaml .podMetricsEndpoints | indent 8 }} + {{- if .jobLabel }} + jobLabel: {{ .jobLabel }} + {{- end }} + {{- if .namespaceSelector }} + namespaceSelector: +{{ toYaml .namespaceSelector | indent 8 }} + {{- end }} + selector: +{{ toYaml .selector | indent 8 }} + {{- if .podTargetLabels }} + podTargetLabels: +{{ toYaml .podTargetLabels | indent 8 }} + {{- end }} + {{- if .sampleLimit }} + sampleLimit: {{ .sampleLimit }} + {{- end }} + {{- if .fallbackScrapeProtocol }} + fallbackScrapeProtocol: {{ .fallbackScrapeProtocol }} + {{- end }} + {{- with .attachMetadata }} + attachMetadata: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/prometheus.yaml b/kube-prometheus-stack/templates/prometheus/prometheus.yaml new file mode 100644 index 0000000..6e8d648 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/prometheus.yaml @@ -0,0 +1,538 @@ +{{- if .Values.prometheus.enabled }} +{{- if .Values.prometheus.agentMode }} +apiVersion: monitoring.coreos.com/v1alpha1 +kind: PrometheusAgent +{{- else }} +apiVersion: monitoring.coreos.com/v1 +kind: Prometheus +{{- end }} +metadata: + name: {{ template "kube-prometheus-stack.prometheus.crname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} + {{- with .Values.prometheus.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.prometheus.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: +{{- if not (kindIs "invalid" .Values.prometheus.prometheusSpec.automountServiceAccountToken) }} + automountServiceAccountToken: {{ .Values.prometheus.prometheusSpec.automountServiceAccountToken }} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) (not .Values.prometheus.prometheusSpec.disableAlerting) (or .Values.prometheus.prometheusSpec.alertingEndpoints .Values.alertmanager.enabled) }} + alerting: + alertmanagers: +{{- if .Values.prometheus.prometheusSpec.alertingEndpoints }} +{{ toYaml .Values.prometheus.prometheusSpec.alertingEndpoints | indent 6 }} +{{- else if .Values.alertmanager.enabled }} + - namespace: {{ template "kube-prometheus-stack.namespace" . }} + name: {{ template "kube-prometheus-stack.fullname" . }}-alertmanager + port: {{ .Values.alertmanager.alertmanagerSpec.portName }} + {{- if .Values.alertmanager.alertmanagerSpec.routePrefix }} + pathPrefix: "{{ .Values.alertmanager.alertmanagerSpec.routePrefix }}" + {{- end }} + {{- if .Values.alertmanager.alertmanagerSpec.scheme }} + scheme: {{ .Values.alertmanager.alertmanagerSpec.scheme }} + {{- end }} + {{- if .Values.alertmanager.alertmanagerSpec.tlsConfig }} + tlsConfig: +{{ toYaml .Values.alertmanager.alertmanagerSpec.tlsConfig | indent 10 }} + {{- end }} + apiVersion: {{ .Values.alertmanager.apiVersion }} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.apiserverConfig }} + apiserverConfig: +{{ toYaml .Values.prometheus.prometheusSpec.apiserverConfig | indent 4}} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.image }} + {{- $registry := .Values.global.imageRegistry | default .Values.prometheus.prometheusSpec.image.registry -}} + {{- $tag := (.Values.prometheus.prometheusSpec.image.tag | empty | not) | ternary (printf ":%s" .Values.prometheus.prometheusSpec.image.tag) "" -}} + {{- $sha := (.Values.prometheus.prometheusSpec.image.sha | empty | not) | ternary (printf "@sha256:%s" .Values.prometheus.prometheusSpec.image.sha) "" }} + image: "{{ printf "%s/%s%s%s" $registry .Values.prometheus.prometheusSpec.image.repository $tag $sha }}" + imagePullPolicy: "{{ .Values.prometheus.prometheusSpec.image.pullPolicy }}" + version: "{{ default .Values.prometheus.prometheusSpec.image.tag .Values.prometheus.prometheusSpec.version }}" +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalArgs }} + additionalArgs: +{{ toYaml .Values.prometheus.prometheusSpec.additionalArgs | indent 4}} +{{- end -}} +{{- if .Values.prometheus.prometheusSpec.externalLabels }} + externalLabels: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.externalLabels | indent 4) . }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.prometheusExternalLabelNameClear }} + prometheusExternalLabelName: "" +{{- else if .Values.prometheus.prometheusSpec.prometheusExternalLabelName }} + prometheusExternalLabelName: "{{ .Values.prometheus.prometheusSpec.prometheusExternalLabelName }}" +{{- end }} +{{- if .Values.prometheus.prometheusSpec.replicaExternalLabelNameClear }} + replicaExternalLabelName: "" +{{- else if .Values.prometheus.prometheusSpec.replicaExternalLabelName }} + replicaExternalLabelName: "{{ .Values.prometheus.prometheusSpec.replicaExternalLabelName }}" +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enableRemoteWriteReceiver }} + enableRemoteWriteReceiver: {{ .Values.prometheus.prometheusSpec.enableRemoteWriteReceiver }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.externalUrl }} + externalUrl: "{{ tpl .Values.prometheus.prometheusSpec.externalUrl . }}" +{{- else if and .Values.prometheus.ingress.enabled .Values.prometheus.ingress.hosts }} + externalUrl: "http://{{ tpl (index .Values.prometheus.ingress.hosts 0) . }}{{ .Values.prometheus.prometheusSpec.routePrefix }}" +{{- else }} + externalUrl: http://{{ template "kube-prometheus-stack.fullname" . }}-prometheus.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.prometheus.service.port }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.nodeSelector }} + nodeSelector: +{{ toYaml .Values.prometheus.prometheusSpec.nodeSelector | indent 4 }} +{{- end }} + paused: {{ .Values.prometheus.prometheusSpec.paused }} + replicas: {{ .Values.prometheus.prometheusSpec.replicas }} + shards: {{ .Values.prometheus.prometheusSpec.shards }} + logLevel: {{ .Values.prometheus.prometheusSpec.logLevel | quote }} + logFormat: {{ .Values.prometheus.prometheusSpec.logFormat }} + listenLocal: {{ .Values.prometheus.prometheusSpec.listenLocal }} + enableOTLPReceiver: {{ .Values.prometheus.prometheusSpec.enableOTLPReceiver }} +{{- if not .Values.prometheus.agentMode }} + enableAdminAPI: {{ .Values.prometheus.prometheusSpec.enableAdminAPI }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.web }} + web: +{{ toYaml .Values.prometheus.prometheusSpec.web | indent 4 }} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) .Values.prometheus.prometheusSpec.exemplars }} + exemplars: + {{- toYaml .Values.prometheus.prometheusSpec.exemplars | nindent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enableFeatures }} + enableFeatures: +{{- range $enableFeatures := .Values.prometheus.prometheusSpec.enableFeatures }} + - {{ tpl $enableFeatures $ }} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.otlp }} + otlp: +{{ toYaml .Values.prometheus.prometheusSpec.otlp | indent 4 }} +{{- end }} +{{- with .Values.prometheus.prometheusSpec.scrapeClasses }} + scrapeClasses: + {{- tpl (toYaml . | nindent 4) $ }} +{{- end }} +{{- with .Values.prometheus.prometheusSpec.podTargetLabels }} + podTargetLabels: + {{- tpl (toYaml . | nindent 4) $ }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.scrapeFailureLogFile }} + scrapeFailureLogFile: {{ .Values.prometheus.prometheusSpec.scrapeFailureLogFile }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.scrapeInterval }} + scrapeInterval: {{ .Values.prometheus.prometheusSpec.scrapeInterval }} +{{- end }} +{{- with .Values.prometheus.prometheusSpec.scrapeProtocols }} + scrapeProtocols: + {{- toYaml . | nindent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.scrapeTimeout }} + scrapeTimeout: {{ .Values.prometheus.prometheusSpec.scrapeTimeout }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.convertClassicHistogramsToNHCB }} + convertClassicHistogramsToNHCB: {{ .Values.prometheus.prometheusSpec.convertClassicHistogramsToNHCB }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.scrapeClassicHistograms }} + scrapeClassicHistograms: {{ .Values.prometheus.prometheusSpec.scrapeClassicHistograms }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.scrapeNativeHistograms }} + scrapeNativeHistograms: {{ .Values.prometheus.prometheusSpec.scrapeNativeHistograms }} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) .Values.prometheus.prometheusSpec.evaluationInterval }} + evaluationInterval: {{ .Values.prometheus.prometheusSpec.evaluationInterval }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.resources }} + resources: +{{ toYaml .Values.prometheus.prometheusSpec.resources | indent 4 }} +{{- end }} +{{- if not .Values.prometheus.agentMode }} + retention: {{ .Values.prometheus.prometheusSpec.retention | quote }} +{{- if .Values.prometheus.prometheusSpec.retentionSize }} + retentionSize: {{ .Values.prometheus.prometheusSpec.retentionSize | quote }} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.tsdb }} + tsdb: + {{- if .Values.prometheus.prometheusSpec.tsdb.outOfOrderTimeWindow }} + outOfOrderTimeWindow: {{ .Values.prometheus.prometheusSpec.tsdb.outOfOrderTimeWindow }} + {{- end }} +{{- end }} +{{- if eq .Values.prometheus.prometheusSpec.walCompression false }} + walCompression: false +{{ else }} + walCompression: true +{{- end }} +{{- if .Values.prometheus.prometheusSpec.routePrefix }} + routePrefix: {{ .Values.prometheus.prometheusSpec.routePrefix | quote }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.secrets }} + secrets: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.secrets) . | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.configMaps }} + configMaps: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.configMaps) . | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.serviceName }} + serviceName: {{ tpl .Values.prometheus.prometheusSpec.serviceName .}} +{{- end }} + serviceAccountName: {{ template "kube-prometheus-stack.prometheus.serviceAccountName" . }} +{{- if .Values.prometheus.prometheusSpec.serviceMonitorSelector }} + serviceMonitorSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.serviceMonitorSelector | indent 4) . }} +{{ else if .Values.prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues }} + serviceMonitorSelector: + matchLabels: + release: {{ $.Release.Name | quote }} +{{ else }} + serviceMonitorSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.serviceMonitorNamespaceSelector }} + serviceMonitorNamespaceSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.serviceMonitorNamespaceSelector | indent 4) . }} +{{ else }} + serviceMonitorNamespaceSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.podMonitorSelector }} + podMonitorSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.podMonitorSelector | indent 4) . }} +{{ else if .Values.prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues }} + podMonitorSelector: + matchLabels: + release: {{ $.Release.Name | quote }} +{{ else }} + podMonitorSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.podMonitorNamespaceSelector }} + podMonitorNamespaceSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.podMonitorNamespaceSelector | indent 4) . }} +{{ else }} + podMonitorNamespaceSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.probeSelector }} + probeSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.probeSelector | indent 4) . }} +{{ else if .Values.prometheus.prometheusSpec.probeSelectorNilUsesHelmValues }} + probeSelector: + matchLabels: + release: {{ $.Release.Name | quote }} +{{ else }} + probeSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.probeNamespaceSelector }} + probeNamespaceSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.probeNamespaceSelector | indent 4) . }} +{{ else }} + probeNamespaceSelector: {} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) (or .Values.prometheus.prometheusSpec.remoteRead .Values.prometheus.prometheusSpec.additionalRemoteRead) }} + remoteRead: +{{- if .Values.prometheus.prometheusSpec.remoteRead }} +{{ tpl (toYaml .Values.prometheus.prometheusSpec.remoteRead | indent 4) . }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalRemoteRead }} +{{ toYaml .Values.prometheus.prometheusSpec.additionalRemoteRead | indent 4 }} +{{- end }} +{{- end }} +{{- if (or .Values.prometheus.prometheusSpec.remoteWrite .Values.prometheus.prometheusSpec.additionalRemoteWrite) }} + remoteWrite: +{{- if .Values.prometheus.prometheusSpec.remoteWrite }} +{{ tpl (toYaml .Values.prometheus.prometheusSpec.remoteWrite | indent 4) . }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalRemoteWrite }} +{{ toYaml .Values.prometheus.prometheusSpec.additionalRemoteWrite | indent 4 }} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.securityContext }} + securityContext: +{{ toYaml .Values.prometheus.prometheusSpec.securityContext | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.dnsConfig }} + dnsConfig: +{{ toYaml .Values.prometheus.prometheusSpec.dnsConfig | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.dnsPolicy }} + dnsPolicy: {{ .Values.prometheus.prometheusSpec.dnsPolicy }} +{{- end }} +{{- if not .Values.prometheus.agentMode }} +{{- if .Values.prometheus.prometheusSpec.ruleNamespaceSelector }} + ruleNamespaceSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.ruleNamespaceSelector | indent 4) . }} +{{ else }} + ruleNamespaceSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.ruleSelector }} + ruleSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.ruleSelector | indent 4) . }} +{{- else if .Values.prometheus.prometheusSpec.ruleSelectorNilUsesHelmValues }} + ruleSelector: + matchLabels: + release: {{ $.Release.Name | quote }} +{{ else if not (kindIs "invalid" .Values.prometheus.prometheusSpec.ruleSelector) }} + ruleSelector: {} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.scrapeConfigSelector }} + scrapeConfigSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.scrapeConfigSelector | indent 4) . }} +{{ else if .Values.prometheus.prometheusSpec.scrapeConfigSelectorNilUsesHelmValues }} + scrapeConfigSelector: + matchLabels: + release: {{ $.Release.Name | quote }} +{{ else if not (kindIs "invalid" .Values.prometheus.prometheusSpec.scrapeConfigSelector) }} + scrapeConfigSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.scrapeConfigNamespaceSelector }} + scrapeConfigNamespaceSelector: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.scrapeConfigNamespaceSelector | indent 4) . }} +{{ else if not (kindIs "invalid" .Values.prometheus.prometheusSpec.scrapeConfigNamespaceSelector) }} + scrapeConfigNamespaceSelector: {} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.storageSpec }} + storage: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.storageSpec | indent 4) . }} +{{- end }} +{{- with .Values.prometheus.prometheusSpec.persistentVolumeClaimRetentionPolicy }} + persistentVolumeClaimRetentionPolicy: + {{- toYaml . | nindent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.podMetadata }} + podMetadata: +{{ tpl (toYaml .Values.prometheus.prometheusSpec.podMetadata | indent 4) . }} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) .Values.prometheus.prometheusSpec.query }} + query: +{{ toYaml .Values.prometheus.prometheusSpec.query | indent 4}} +{{- end }} +{{- if or .Values.prometheus.prometheusSpec.podAntiAffinity .Values.prometheus.prometheusSpec.affinity }} + affinity: +{{- if .Values.prometheus.prometheusSpec.affinity }} +{{ toYaml .Values.prometheus.prometheusSpec.affinity | indent 4 }} +{{- end }} +{{- if eq .Values.prometheus.prometheusSpec.podAntiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - topologyKey: {{ .Values.prometheus.prometheusSpec.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: +{{- include "kube-prometheus-stack.prometheus.pod-anti-affinity.matchExpressions" . | indent 12 }} +{{- else if eq .Values.prometheus.prometheusSpec.podAntiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: {{ .Values.prometheus.prometheusSpec.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: +{{- include "kube-prometheus-stack.prometheus.pod-anti-affinity.matchExpressions" . | indent 12 }} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.tolerations }} + tolerations: +{{ toYaml .Values.prometheus.prometheusSpec.tolerations | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.topologySpreadConstraints }} + topologySpreadConstraints: +{{ toYaml .Values.prometheus.prometheusSpec.topologySpreadConstraints | indent 4 }} +{{- end }} +{{- if .Values.global.imagePullSecrets }} + imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalScrapeConfigs }} + additionalScrapeConfigs: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-scrape-confg + key: additional-scrape-configs.yaml +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalScrapeConfigsSecret.enabled }} + additionalScrapeConfigs: + name: {{ tpl (.Values.prometheus.prometheusSpec.additionalScrapeConfigsSecret.name) . }} + key: {{ .Values.prometheus.prometheusSpec.additionalScrapeConfigsSecret.key }} +{{- end }} +{{- if not .Values.prometheus.agentMode }} +{{- if or .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret }} + additionalAlertManagerConfigs: +{{- if .Values.prometheus.prometheusSpec.additionalAlertManagerConfigs }} + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-am-confg + key: additional-alertmanager-configs.yaml +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret }} + name: {{ tpl (.Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret.name) . }} + key: {{ .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret.key }} + {{- if hasKey .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret "optional" }} + optional: {{ .Values.prometheus.prometheusSpec.additionalAlertManagerConfigsSecret.optional }} + {{- end }} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigs }} + additionalAlertRelabelConfigs: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus-am-relabel-confg + key: additional-alert-relabel-configs.yaml +{{- end }} +{{- if .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigsSecret }} + additionalAlertRelabelConfigs: + name: {{ tpl (.Values.prometheus.prometheusSpec.additionalAlertRelabelConfigsSecret.name) . }} + key: {{ .Values.prometheus.prometheusSpec.additionalAlertRelabelConfigsSecret.key }} +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.containers }} + containers: +{{ toYaml .Values.prometheus.prometheusSpec.containers | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.initContainers }} + initContainers: +{{ toYaml .Values.prometheus.prometheusSpec.initContainers | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.priorityClassName }} + priorityClassName: {{ .Values.prometheus.prometheusSpec.priorityClassName }} +{{- end }} +{{- if not .Values.prometheus.agentMode }} +{{- if .Values.prometheus.prometheusSpec.thanos }} + thanos: +{{- with .Values.prometheus.prometheusSpec.thanos.image }} + image: {{ . }} +{{- end }} +{{- with (omit .Values.prometheus.prometheusSpec.thanos "image" "objectStorageConfig" "secretProviderClass") }} +{{ toYaml . | indent 4 }} +{{- end }} +{{- if ((.Values.prometheus.prometheusSpec.thanos.objectStorageConfig).existingSecret) }} + objectStorageConfig: + key: "{{.Values.prometheus.prometheusSpec.thanos.objectStorageConfig.existingSecret.key }}" + name: "{{.Values.prometheus.prometheusSpec.thanos.objectStorageConfig.existingSecret.name }}" +{{- else if ((.Values.prometheus.prometheusSpec.thanos.objectStorageConfig).secret) }} + objectStorageConfig: + key: object-storage-configs.yaml + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus +{{- end }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.disableCompaction }} + disableCompaction: {{ .Values.prometheus.prometheusSpec.disableCompaction }} +{{- end }} +{{- end }} + portName: {{ .Values.prometheus.prometheusSpec.portName }} +{{- if .Values.prometheus.prometheusSpec.volumes }} + volumes: +{{ toYaml .Values.prometheus.prometheusSpec.volumes | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.volumeMounts }} + volumeMounts: +{{ toYaml .Values.prometheus.prometheusSpec.volumeMounts | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.arbitraryFSAccessThroughSMs }} + arbitraryFSAccessThroughSMs: +{{ toYaml .Values.prometheus.prometheusSpec.arbitraryFSAccessThroughSMs | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.overrideHonorLabels }} + overrideHonorLabels: {{ .Values.prometheus.prometheusSpec.overrideHonorLabels }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.overrideHonorTimestamps }} + overrideHonorTimestamps: {{ .Values.prometheus.prometheusSpec.overrideHonorTimestamps }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} + ignoreNamespaceSelectors: {{ .Values.prometheus.prometheusSpec.ignoreNamespaceSelectors }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedNamespaceLabel }} + enforcedNamespaceLabel: {{ .Values.prometheus.prometheusSpec.enforcedNamespaceLabel }} +{{- $prometheusDefaultRulesExcludedFromEnforce := (include "rules.names" .) | fromYaml }} +{{- if not .Values.prometheus.agentMode }} + prometheusRulesExcludedFromEnforce: +{{- range $prometheusDefaultRulesExcludedFromEnforce.rules }} + - ruleNamespace: "{{ template "kube-prometheus-stack.namespace" $ }}" + ruleName: "{{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) . | trunc 63 | trimSuffix "-" }}" +{{- end }} +{{- if .Values.prometheus.prometheusSpec.prometheusRulesExcludedFromEnforce }} +{{ toYaml .Values.prometheus.prometheusSpec.prometheusRulesExcludedFromEnforce | indent 4 }} +{{- end }} +{{- end }} + excludedFromEnforcement: +{{- range $prometheusDefaultRulesExcludedFromEnforce.rules }} + - group: monitoring.coreos.com + resource: prometheusrules + namespace: "{{ template "kube-prometheus-stack.namespace" $ }}" + name: "{{ printf "%s-%s" (include "kube-prometheus-stack.fullname" $) . | trunc 63 | trimSuffix "-" }}" +{{- end }} +{{- if .Values.prometheus.prometheusSpec.excludedFromEnforcement }} + {{- if kindIs "string" .Values.prometheus.prometheusSpec.excludedFromEnforcement }} +{{ tpl .Values.prometheus.prometheusSpec.excludedFromEnforcement . | indent 4 }} + {{- else }} +{{ tpl (toYaml .Values.prometheus.prometheusSpec.excludedFromEnforcement | indent 4) . }} + {{- end }} +{{- end }} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) .Values.prometheus.prometheusSpec.queryLogFile }} + queryLogFile: {{ .Values.prometheus.prometheusSpec.queryLogFile }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.sampleLimit }} + sampleLimit: {{ .Values.prometheus.prometheusSpec.sampleLimit }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedKeepDroppedTargets }} + enforcedKeepDroppedTargets: {{ .Values.prometheus.prometheusSpec.enforcedKeepDroppedTargets }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedSampleLimit }} + enforcedSampleLimit: {{ .Values.prometheus.prometheusSpec.enforcedSampleLimit }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedTargetLimit }} + enforcedTargetLimit: {{ .Values.prometheus.prometheusSpec.enforcedTargetLimit }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedLabelLimit }} + enforcedLabelLimit: {{ .Values.prometheus.prometheusSpec.enforcedLabelLimit }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedLabelNameLengthLimit }} + enforcedLabelNameLengthLimit: {{ .Values.prometheus.prometheusSpec.enforcedLabelNameLengthLimit }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.enforcedLabelValueLengthLimit}} + enforcedLabelValueLengthLimit: {{ .Values.prometheus.prometheusSpec.enforcedLabelValueLengthLimit }} +{{- end }} +{{- if and (not .Values.prometheus.agentMode) .Values.prometheus.prometheusSpec.allowOverlappingBlocks }} + allowOverlappingBlocks: {{ .Values.prometheus.prometheusSpec.allowOverlappingBlocks }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.nameValidationScheme }} + nameValidationScheme: {{ .Values.prometheus.prometheusSpec.nameValidationScheme }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.minReadySeconds }} + minReadySeconds: {{ .Values.prometheus.prometheusSpec.minReadySeconds }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.podManagementPolicy }} + podManagementPolicy: {{ .Values.prometheus.prometheusSpec.podManagementPolicy }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.updateStrategy }} + updateStrategy: +{{ toYaml .Values.prometheus.prometheusSpec.updateStrategy | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ .Values.prometheus.prometheusSpec.terminationGracePeriodSeconds }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.maximumStartupDurationSeconds }} + maximumStartupDurationSeconds: {{ .Values.prometheus.prometheusSpec.maximumStartupDurationSeconds }} +{{- end }} + hostNetwork: {{ .Values.prometheus.prometheusSpec.hostNetwork }} +{{- if kindIs "bool" .Values.prometheus.prometheusSpec.hostUsers }} + hostUsers: {{ .Values.prometheus.prometheusSpec.hostUsers }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.hostAliases }} + hostAliases: +{{ toYaml .Values.prometheus.prometheusSpec.hostAliases | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.tracingConfig }} + tracingConfig: +{{ toYaml .Values.prometheus.prometheusSpec.tracingConfig | indent 4 }} +{{- end }} +{{- if .Values.prometheus.prometheusSpec.serviceDiscoveryRole }} + serviceDiscoveryRole: {{ .Values.prometheus.prometheusSpec.serviceDiscoveryRole }} +{{- end }} +{{- with .Values.prometheus.prometheusSpec.additionalConfig }} + {{- tpl (toYaml .) $ | nindent 2 }} +{{- end }} +{{- with .Values.prometheus.prometheusSpec.additionalConfigString }} + {{- tpl . $ | nindent 2 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/route.yaml b/kube-prometheus-stack/templates/prometheus/route.yaml new file mode 100644 index 0000000..1a0ed29 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/route.yaml @@ -0,0 +1,63 @@ +{{- if .Values.prometheus.enabled -}} + {{- $serviceName := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" -}} + {{- $servicePort := .Values.prometheus.ingress.servicePort | default .Values.prometheus.service.port -}} + {{- range $name, $route := .Values.prometheus.route }} + {{- if $route.enabled }} +--- +apiVersion: {{ $route.apiVersion | default "gateway.networking.k8s.io/v1" }} +kind: {{ $route.kind | default "HTTPRoute" }} +metadata: + {{- with $route.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + name: {{ $serviceName }}{{ if ne $name "main" }}-{{ $name }}{{ end }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-prometheus + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} + {{- with $route.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with $route.parentRefs }} + parentRefs: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with $route.hostnames }} + hostnames: + {{- tpl (toYaml .) $ | nindent 4 }} + {{- end }} + rules: + {{- if $route.additionalRules }} + {{- tpl (toYaml $route.additionalRules) $ | nindent 4 }} + {{- end }} + {{- if $route.httpsRedirect }} + - filters: + - type: RequestRedirect + requestRedirect: + scheme: https + statusCode: 301 + {{- else }} + - backendRefs: + - group: "" + kind: Service + weight: 1 + name: {{ $serviceName }} + port: {{ $servicePort }} + {{- with $route.filters }} + filters: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $route.matches }} + matches: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $route.sessionPersistence }} + sessionPersistence: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/alertmanager.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/alertmanager.rules.yaml new file mode 100644 index 0000000..4636fa6 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/alertmanager.rules.yaml @@ -0,0 +1,303 @@ +{{- /* +Generated from 'alertmanager.rules' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.alertmanager }} +{{- $alertmanagerJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager" }} +{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "alertmanager.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: alertmanager.rules + rules: +{{- if not (.Values.defaultRules.disabled.AlertmanagerFailedReload | default false) }} + - alert: AlertmanagerFailedReload + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: Configuration has failed to load for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerfailedreload + summary: Reloading an Alertmanager configuration has failed. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(alertmanager_config_last_reload_successful{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}"}[5m]) == 0 + for: {{ dig "AlertmanagerFailedReload" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerFailedReload" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerMembersInconsistent | default false) }} + - alert: AlertmanagerMembersInconsistent + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: Alertmanager {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} has only found {{`{{`}} $value {{`}}`}} members of the {{`{{`}}$labels.job{{`}}`}} cluster. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagermembersinconsistent + summary: A member of an Alertmanager cluster has not found all other cluster members. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(alertmanager_cluster_members{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}"}[5m]) + < on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) group_left + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) (max_over_time(alertmanager_cluster_members{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}"}[5m])) + for: {{ dig "AlertmanagerMembersInconsistent" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerMembersInconsistent" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerFailedToSendAlerts | default false) }} + - alert: AlertmanagerFailedToSendAlerts + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: Alertmanager {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod{{`}}`}} failed to send {{`{{`}} $value | humanizePercentage {{`}}`}} of notifications to {{`{{`}} $labels.integration {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerfailedtosendalerts + summary: An Alertmanager instance failed to send notifications. + expr: |- + ( + rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}"}[15m]) + / + ignoring (reason) group_left rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}"}[15m]) + ) + > 0.01 + for: {{ dig "AlertmanagerFailedToSendAlerts" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerFailedToSendAlerts" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerClusterFailedToSendAlerts | default false) }} + - alert: AlertmanagerClusterFailedToSendAlerts + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: The minimum notification failure rate to {{`{{`}} $labels.integration {{`}}`}} sent from any instance in the {{`{{`}}$labels.job{{`}}`}} cluster is {{`{{`}} $value | humanizePercentage {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerclusterfailedtosendalerts + summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration. + expr: |- + min by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service, integration) ( + rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}", integration=~`.*`}[15m]) + / + ignoring (reason) group_left rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}", integration=~`.*`}[15m]) > 0 + ) + > 0.01 + for: {{ dig "AlertmanagerClusterFailedToSendAlerts" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerClusterFailedToSendAlerts" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerClusterFailedToSendAlerts | default false) }} + - alert: AlertmanagerClusterFailedToSendAlerts + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: The minimum notification failure rate to {{`{{`}} $labels.integration {{`}}`}} sent from any instance in the {{`{{`}}$labels.job{{`}}`}} cluster is {{`{{`}} $value | humanizePercentage {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerclusterfailedtosendalerts + summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration. + expr: |- + min by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service, integration) ( + rate(alertmanager_notifications_failed_total{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}", integration!~`.*`}[15m]) + / + ignoring (reason) group_left rate(alertmanager_notifications_total{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}", integration!~`.*`}[15m]) > 0 + ) + > 0.01 + for: {{ dig "AlertmanagerClusterFailedToSendAlerts" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerClusterFailedToSendAlerts" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerConfigInconsistent | default false) }} + - alert: AlertmanagerConfigInconsistent + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have different configurations. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerconfiginconsistent + summary: Alertmanager instances within the same cluster have different configurations. + expr: |- + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) ( + count_values by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) ("config_hash", alertmanager_config_hash{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}"}) + ) + != 1 + for: {{ dig "AlertmanagerConfigInconsistent" "for" "20m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerConfigInconsistent" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerClusterDown | default false) }} + - alert: AlertmanagerClusterDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have been up for less than half of the last 5m.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerclusterdown + summary: Half or more of the Alertmanager instances within the same cluster are down. + expr: |- + ( + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) ( + avg_over_time(up{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}"}[5m]) < 0.5 + ) + / + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) ( + up{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}"} + ) + ) + >= 0.5 + for: {{ dig "AlertmanagerClusterDown" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerClusterDown" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.AlertmanagerClusterCrashlooping | default false) }} + - alert: AlertmanagerClusterCrashlooping + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.alertmanager | indent 8 }} +{{- end }} + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of Alertmanager instances within the {{`{{`}}$labels.job{{`}}`}} cluster have restarted at least 5 times in the last 10m.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/alertmanager/alertmanagerclustercrashlooping + summary: Half or more of the Alertmanager instances within the same cluster are crashlooping. + expr: |- + ( + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) ( + changes(process_start_time_seconds{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}"}[10m]) > 4 + ) + / + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace,service,cluster) ( + up{job="{{ $alertmanagerJob }}",container="alertmanager",namespace="{{ $namespace }}"} + ) + ) + >= 0.5 + for: {{ dig "AlertmanagerClusterCrashlooping" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "AlertmanagerClusterCrashlooping" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.alertmanager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/config-reloaders.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/config-reloaders.yaml new file mode 100644 index 0000000..9f554c0 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/config-reloaders.yaml @@ -0,0 +1,57 @@ +{{- /* +Generated from 'config-reloaders' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.configReloaders }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "config-reloaders" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: config-reloaders + rules: +{{- if not (.Values.defaultRules.disabled.ConfigReloaderSidecarErrors | default false) }} + - alert: ConfigReloaderSidecarErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.configReloaders }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.configReloaders | indent 8 }} +{{- end }} + description: 'Errors encountered while the {{`{{`}}$labels.pod{{`}}`}} config-reloader sidecar attempts to sync config in {{`{{`}}$labels.namespace{{`}}`}} namespace. + + As a result, configuration for service running in {{`{{`}}$labels.pod{{`}}`}} may be stale and cannot be updated anymore.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/configreloadersidecarerrors + summary: config-reloader sidecar has not had a successful reload for 10m + expr: max_over_time(reloader_last_reload_successful{namespace=~".+"}[5m]) == 0 + for: {{ dig "ConfigReloaderSidecarErrors" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "ConfigReloaderSidecarErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.configReloaders }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.configReloaders }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml new file mode 100644 index 0000000..852b5e2 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/etcd.yaml @@ -0,0 +1,459 @@ +{{- /* +Generated from 'etcd' group from https://github.com/etcd-io/etcd.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeEtcd.enabled .Values.defaultRules.rules.etcd }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "etcd" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: etcd + rules: +{{- if not (.Values.defaultRules.disabled.etcdMembersDown | default false) }} + - alert: etcdMembersDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": members are down ({{`{{`}} $value {{`}}`}}).' + summary: etcd cluster members are down. + expr: |- + max without (endpoint) ( + sum without (instance, pod) (up{job=~".*etcd.*"} == bool 0) + or + count without (To) ( + sum without (instance, pod) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[120s])) > 0.01 + ) + ) + > 0 + for: {{ dig "etcdMembersDown" "for" "20m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdMembersDown" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdInsufficientMembers | default false) }} + - alert: etcdInsufficientMembers + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": insufficient members ({{`{{`}} $value {{`}}`}}).' + summary: etcd cluster has insufficient number of members. + expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance, pod) < ((count(up{job=~".*etcd.*"}) without (instance, pod) + 1) / 2) + for: {{ dig "etcdInsufficientMembers" "for" "3m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdInsufficientMembers" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdNoLeader | default false) }} + - alert: etcdNoLeader + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member {{`{{`}} $labels.instance {{`}}`}} has no leader.' + summary: etcd cluster has no leader. + expr: etcd_server_has_leader{job=~".*etcd.*"} == 0 + for: {{ dig "etcdNoLeader" "for" "1m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdNoLeader" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighNumberOfLeaderChanges | default false) }} + - alert: etcdHighNumberOfLeaderChanges + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} leader changes within the last 15 minutes. Frequent elections may be a sign of insufficient resources, high network latency, or disruptions by other components and should be investigated.' + summary: etcd cluster has high number of leader changes. + expr: increase((max without (instance, pod) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) >= 4 + for: {{ dig "etcdHighNumberOfLeaderChanges" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighNumberOfLeaderChanges" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighNumberOfFailedGRPCRequests | default false) }} + - alert: etcdHighNumberOfFailedGRPCRequests + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster has high number of failed grpc requests. + expr: |- + 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) + / + sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) + > 1 + for: {{ dig "etcdHighNumberOfFailedGRPCRequests" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighNumberOfFailedGRPCRequests" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighNumberOfFailedGRPCRequests | default false) }} + - alert: etcdHighNumberOfFailedGRPCRequests + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}}% of requests for {{`{{`}} $labels.grpc_method {{`}}`}} failed on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster has high number of failed grpc requests. + expr: |- + 100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code) + / + sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code) + > 5 + for: {{ dig "etcdHighNumberOfFailedGRPCRequests" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighNumberOfFailedGRPCRequests" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdGRPCRequestsSlow | default false) }} + - alert: etcdGRPCRequestsSlow + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile of gRPC requests is {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}} for {{`{{`}} $labels.grpc_method {{`}}`}} method.' + summary: etcd grpc requests are slow + expr: |- + histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type)) + > 0.15 + for: {{ dig "etcdGRPCRequestsSlow" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdGRPCRequestsSlow" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdMemberCommunicationSlow | default false) }} + - alert: etcdMemberCommunicationSlow + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": member communication with {{`{{`}} $labels.To {{`}}`}} is taking {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster member communication is slow. + expr: |- + histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.15 + for: {{ dig "etcdMemberCommunicationSlow" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdMemberCommunicationSlow" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighNumberOfFailedProposals | default false) }} + - alert: etcdHighNumberOfFailedProposals + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": {{`{{`}} $value {{`}}`}} proposal failures within the last 30 minutes on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster has high number of proposal failures. + expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 + for: {{ dig "etcdHighNumberOfFailedProposals" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighNumberOfFailedProposals" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighFsyncDurations | default false) }} + - alert: etcdHighFsyncDurations + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fsync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster 99th percentile fsync durations are too high. + expr: |- + histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.5 + for: {{ dig "etcdHighFsyncDurations" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighFsyncDurations" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighFsyncDurations | default false) }} + - alert: etcdHighFsyncDurations + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile fsync durations are {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster 99th percentile fsync durations are too high. + expr: |- + histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 1 + for: {{ dig "etcdHighFsyncDurations" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighFsyncDurations" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdHighCommitDurations | default false) }} + - alert: etcdHighCommitDurations + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": 99th percentile commit durations {{`{{`}} $value {{`}}`}}s on etcd instance {{`{{`}} $labels.instance {{`}}`}}.' + summary: etcd cluster 99th percentile commit durations are too high. + expr: |- + histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m])) + > 0.25 + for: {{ dig "etcdHighCommitDurations" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdHighCommitDurations" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdDatabaseQuotaLowSpace | default false) }} + - alert: etcdDatabaseQuotaLowSpace + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": database size exceeds the defined quota on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please defrag or increase the quota as the writes to etcd will be disabled when it is full.' + summary: etcd cluster database is running full. + expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > 95 + for: {{ dig "etcdDatabaseQuotaLowSpace" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdDatabaseQuotaLowSpace" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdExcessiveDatabaseGrowth | default false) }} + - alert: etcdExcessiveDatabaseGrowth + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": Predicting running out of disk space in the next four hours, based on write observations within the past four hours on etcd instance {{`{{`}} $labels.instance {{`}}`}}, please check as it might be disruptive.' + summary: etcd cluster database growing very fast. + expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60) > etcd_server_quota_backend_bytes{job=~".*etcd.*"} + for: {{ dig "etcdExcessiveDatabaseGrowth" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdExcessiveDatabaseGrowth" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.etcdDatabaseHighFragmentationRatio | default false) }} + - alert: etcdDatabaseHighFragmentationRatio + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.etcd }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.etcd | indent 8 }} +{{- end }} + description: 'etcd cluster "{{`{{`}} $labels.job {{`}}`}}": database size in use on instance {{`{{`}} $labels.instance {{`}}`}} is {{`{{`}} $value | humanizePercentage {{`}}`}} of the actual allocated disk space, please run defragmentation (e.g. etcdctl defrag) to retrieve the unused fragmented disk space.' + runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation + summary: etcd database size in use is less than 50% of the actual allocated storage. + expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m]) / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5 and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600 + for: {{ dig "etcdDatabaseHighFragmentationRatio" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "etcdDatabaseHighFragmentationRatio" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.etcd }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/general.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/general.rules.yaml new file mode 100644 index 0000000..e6d6870 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/general.rules.yaml @@ -0,0 +1,125 @@ +{{- /* +Generated from 'general.rules' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.general }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "general.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: general.rules + rules: +{{- if not (.Values.defaultRules.disabled.TargetDown | default false) }} + - alert: TargetDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.general }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.general | indent 8 }} +{{- end }} + description: '{{`{{`}} printf "%.4g" $value {{`}}`}}% of the {{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.service {{`}}`}} targets in {{`{{`}} $labels.namespace {{`}}`}} namespace are down.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/general/targetdown + summary: One or more targets are unreachable. + expr: 100 * (count(up == 0) BY ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, job, namespace, service) / count(up) BY ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, job, namespace, service)) > 10 + for: {{ dig "TargetDown" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "TargetDown" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.general }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.general }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.Watchdog | default false) }} + - alert: Watchdog + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.general }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.general | indent 8 }} +{{- end }} + description: 'This is an alert meant to ensure that the entire alerting pipeline is functional. + + This alert is always firing, therefore it should always be firing in Alertmanager + + and always fire against a receiver. There are integrations with various notification + + mechanisms that send a notification when this alert is not firing. For example the + + "DeadMansSnitch" integration in PagerDuty. + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/general/watchdog + summary: An alert that should always be firing to certify that Alertmanager is working properly. + expr: vector(1) + labels: + severity: {{ dig "Watchdog" "severity" "none" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.general }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.general }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.InfoInhibitor | default false) }} + - alert: InfoInhibitor + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.general }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.general | indent 8 }} +{{- end }} + description: 'This is an alert that is used to inhibit info alerts. + + By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with + + other alerts. + + This alert fires whenever there''s a severity="info" alert, and stops firing when another alert with a + + severity of ''warning'' or ''critical'' starts firing on the same namespace. + + This alert should be routed to a null receiver and configured to inhibit alerts with severity="info". + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/general/infoinhibitor + summary: Info-level alert inhibition. + expr: group by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace) (ALERTS{severity = "info"} == 1) unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace) group by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace) (ALERTS{alertname != "InfoInhibitor", alertstate = "firing", severity =~ "warning|critical"} == 1) + labels: + severity: {{ dig "InfoInhibitor" "severity" "none" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.general }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.general }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_limits.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_limits.yaml new file mode 100644 index 0000000..228ecde --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_limits.yaml @@ -0,0 +1,63 @@ +{{- /* +Generated from 'k8s.rules.container-cpu-limits' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerCpuLimits }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-cpu-limits" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_cpu_limits + rules: + - expr: |- + kube_pod_container_resource_limits{resource="cpu",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) + group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuLimits }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuLimits }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, cluster) ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) ( + kube_pod_container_resource_limits{resource="cpu",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_limits:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuLimits }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuLimits }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_requests.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_requests.yaml new file mode 100644 index 0000000..710cef8 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_requests.yaml @@ -0,0 +1,63 @@ +{{- /* +Generated from 'k8s.rules.container-cpu-requests' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerCpuRequests }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-cpu-requests" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_cpu_requests + rules: + - expr: |- + kube_pod_container_resource_requests{resource="cpu",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) + group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuRequests }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuRequests }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, cluster) ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) ( + kube_pod_container_resource_requests{resource="cpu",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_requests:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuRequests }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuRequests }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_usage_seconds_total.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_usage_seconds_total.yaml new file mode 100644 index 0000000..209315e --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_cpu_usage_seconds_total.yaml @@ -0,0 +1,60 @@ +{{- /* +Generated from 'k8s.rules.container-cpu-usage-seconds-total' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerCpuUsageSecondsTotal }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-cpu-usage-seconds-total" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_cpu_usage_seconds_total + rules: + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, container) ( + rate(container_cpu_usage_seconds_total{job="{{ $kubeletJob }}", metrics_path="/metrics/cadvisor", image!=""}[5m]) + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) ( + 1, max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuUsageSecondsTotal }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuUsageSecondsTotal }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, container) ( + irate(container_cpu_usage_seconds_total{job="{{ $kubeletJob }}", metrics_path="/metrics/cadvisor", image!=""}[5m]) + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) ( + 1, max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuUsageSecondsTotal }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerCpuUsageSecondsTotal }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_cache.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_cache.yaml new file mode 100644 index 0000000..0924b0c --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_cache.yaml @@ -0,0 +1,43 @@ +{{- /* +Generated from 'k8s.rules.container-memory-cache' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerMemoryCache }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-memory-cache" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_memory_cache + rules: + - expr: |- + container_memory_cache{job="{{ $kubeletJob }}", metrics_path="/metrics/cadvisor", image!=""} + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) (1, + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_cache + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryCache }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryCache }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_limits.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_limits.yaml new file mode 100644 index 0000000..d4a0bcb --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_limits.yaml @@ -0,0 +1,63 @@ +{{- /* +Generated from 'k8s.rules.container-memory-limits' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerMemoryLimits }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-memory-limits" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_memory_limits + rules: + - expr: |- + kube_pod_container_resource_limits{resource="memory",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) + group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryLimits }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryLimits }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, cluster) ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) ( + kube_pod_container_resource_limits{resource="memory",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_limits:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryLimits }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryLimits }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_requests.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_requests.yaml new file mode 100644 index 0000000..3894b69 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_requests.yaml @@ -0,0 +1,63 @@ +{{- /* +Generated from 'k8s.rules.container-memory-requests' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerMemoryRequests }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-memory-requests" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_memory_requests + rules: + - expr: |- + kube_pod_container_resource_requests{resource="memory",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) + group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryRequests }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryRequests }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, cluster) ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) ( + kube_pod_container_resource_requests{resource="memory",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_requests:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryRequests }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryRequests }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_rss.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_rss.yaml new file mode 100644 index 0000000..099218f --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_rss.yaml @@ -0,0 +1,43 @@ +{{- /* +Generated from 'k8s.rules.container-memory-rss' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerMemoryRss }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-memory-rss" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_memory_rss + rules: + - expr: |- + container_memory_rss{job="{{ $kubeletJob }}", metrics_path="/metrics/cadvisor", image!=""} + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) (1, + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_rss + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryRss }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryRss }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_swap.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_swap.yaml new file mode 100644 index 0000000..99a719f --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_swap.yaml @@ -0,0 +1,43 @@ +{{- /* +Generated from 'k8s.rules.container-memory-swap' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerMemorySwap }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-memory-swap" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_memory_swap + rules: + - expr: |- + container_memory_swap{job="{{ $kubeletJob }}", metrics_path="/metrics/cadvisor", image!=""} + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) (1, + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_swap + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemorySwap }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemorySwap }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_working_set_bytes.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_working_set_bytes.yaml new file mode 100644 index 0000000..9010d20 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_memory_working_set_bytes.yaml @@ -0,0 +1,43 @@ +{{- /* +Generated from 'k8s.rules.container-memory-working-set-bytes' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerMemoryWorkingSetBytes }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-memory-working-set-bytes" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_memory_working_set_bytes + rules: + - expr: |- + container_memory_working_set_bytes{job="{{ $kubeletJob }}", metrics_path="/metrics/cadvisor", image!=""} + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) (1, + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, node) (kube_pod_info{node!=""}) + ) + record: node_namespace_pod_container:container_memory_working_set_bytes + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryWorkingSetBytes }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerMemoryWorkingSetBytes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_resource.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_resource.yaml new file mode 100644 index 0000000..edba0c2 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.container_resource.yaml @@ -0,0 +1,168 @@ +{{- /* +Generated from 'k8s.rules.container-resource' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sContainerResource }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.container-resource" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.container_resource + rules: + - expr: |- + kube_pod_container_resource_requests{resource="memory",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) + group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, cluster) ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) ( + kube_pod_container_resource_requests{resource="memory",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_requests:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + kube_pod_container_resource_requests{resource="cpu",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) + group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, cluster) ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) ( + kube_pod_container_resource_requests{resource="cpu",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_requests:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + kube_pod_container_resource_limits{resource="memory",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) + group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, cluster) ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) ( + kube_pod_container_resource_limits{resource="memory",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_memory:kube_pod_container_resource_limits:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + kube_pod_container_resource_limits{resource="cpu",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) + group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + (kube_pod_status_phase{phase=~"Pending|Running"} == 1) + ) + record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, cluster) ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, container, cluster) ( + kube_pod_container_resource_limits{resource="cpu",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + kube_pod_status_phase{phase=~"Pending|Running"} == 1 + ) + ) + ) + record: namespace_cpu:kube_pod_container_resource_limits:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sContainerResource }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.pod_owner.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.pod_owner.yaml new file mode 100644 index 0000000..568b67f --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/k8s.rules.pod_owner.yaml @@ -0,0 +1,220 @@ +{{- /* +Generated from 'k8s.rules.pod-owner' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.k8sPodOwner }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "k8s.rules.pod-owner" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: k8s.rules.pod_owner + rules: + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, workload, pod) ( + label_replace( + label_replace( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="ReplicaSet"}, + "replicaset", "$1", "owner_name", "(.*)" + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, replicaset, namespace) group_left(owner_name) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, replicaset, namespace) ( + 1, max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, replicaset, namespace, owner_name) ( + kube_replicaset_owner{job="{{ $kubeStateMetricsJob }}", owner_kind=""} + ) + ), + "workload", "$1", "replicaset", "(.*)" + ) + ) + labels: + workload_type: replicaset + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, workload, pod) ( + label_replace( + label_replace( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="ReplicaSet"}, + "replicaset", "$1", "owner_name", "(.*)" + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}replicaset, namespace, cluster) group_left(owner_name) topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, replicaset, namespace) ( + 1, max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, replicaset, namespace, owner_name) ( + kube_replicaset_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="Deployment"} + ) + ), + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: deployment + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="DaemonSet"}, + "workload", "$1", "owner_name", "(.*)" + ) + ) + labels: + workload_type: daemonset + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="StatefulSet"}, + "workload", "$1", "owner_name", "(.*)") + ) + labels: + workload_type: statefulset + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + group by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, workload, pod) ( + label_join( + group by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, job_name, pod, owner_name) ( + label_join( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="Job"} + , "job_name", "", "owner_name") + ) + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, job_name) group_left() + group by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, job_name) ( + kube_job_owner{job="{{ $kubeStateMetricsJob }}", owner_kind=~"Pod|"} + ) + , "workload", "", "owner_name") + ) + labels: + workload_type: job + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="", owner_name=""}, + "workload", "$1", "pod", "(.+)") + ) + labels: + workload_type: barepod + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, workload, pod) ( + label_replace( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="Node"}, + "workload", "$1", "pod", "(.+)") + ) + labels: + workload_type: staticpod + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: namespace_workload_pod:kube_pod_owner:relabel + - expr: |- + group by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, workload, workload_type, pod) ( + label_join( + label_join( + group by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, job_name, pod) ( + label_join( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="Job"} + , "job_name", "", "owner_name") + ) + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, job_name) group_left(owner_kind, owner_name) + group by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, job_name, owner_kind, owner_name) ( + kube_job_owner{job="{{ $kubeStateMetricsJob }}", owner_kind!="Pod", owner_kind!=""} + ) + , "workload", "", "owner_name") + , "workload_type", "", "owner_kind") + + OR + + label_replace( + label_replace( + label_replace( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind="ReplicaSet"} + , "replicaset", "$1", "owner_name", "(.+)" + ) + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, replicaset) group_left(owner_kind, owner_name) + group by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, replicaset, owner_kind, owner_name) ( + kube_replicaset_owner{job="{{ $kubeStateMetricsJob }}", owner_kind!="Deployment", owner_kind!=""} + ) + , "workload", "$1", "owner_name", "(.+)") + OR + label_replace( + group by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, owner_name, owner_kind) ( + kube_pod_owner{job="{{ $kubeStateMetricsJob }}", owner_kind!="ReplicaSet", owner_kind!="DaemonSet", owner_kind!="StatefulSet", owner_kind!="Job", owner_kind!="Node", owner_kind!=""} + ) + , "workload", "$1", "owner_name", "(.+)" + ) + , "workload_type", "$1", "owner_kind", "(.+)") + ) + record: namespace_workload_pod:kube_pod_owner:relabel + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.k8sPodOwner }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml new file mode 100644 index 0000000..d36b10e --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-availability.rules.yaml @@ -0,0 +1,266 @@ +{{- /* +Generated from 'kube-apiserver-availability.rules' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserverAvailability }} +{{- $kubeApiserverJob := include "kube-prometheus-stack-kube-apiserver.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver-availability.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - interval: 3m + name: kube-apiserver-availability.rules + rules: + - expr: avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 + record: code_verb:apiserver_request_total:increase30d + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: code:apiserver_request_total:increase30d + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: code:apiserver_request_total:increase30d + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, verb, scope, le) (increase(apiserver_request_sli_duration_seconds_bucket[1h])) + record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d]) * 24 * 30) + record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h{le="+Inf"}) + record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le="+Inf"}) + record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + 1 - ( + ( + # write too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le=~"1(\\.0)?"} or vector(0)) + ) + + ( + # read too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) + - + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le=~"1(\\.0)?"} or vector(0)) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le=~"5(\\.0)?"} or vector(0)) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le=~"30(\\.0)?"} or vector(0)) + ) + ) + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (code:apiserver_request_total:increase30d) + labels: + verb: all + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:availability30d + - expr: |- + 1 - ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"LIST|GET"}) + - + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope=~"resource|",le=~"1(\\.0)?"} or vector(0)) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="namespace",le=~"5(\\.0)?"} or vector(0)) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"LIST|GET",scope="cluster",le=~"30(\\.0)?"} or vector(0)) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (code:apiserver_request_total:increase30d{verb="read"}) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:availability30d + - expr: |- + 1 - ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) + - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{verb=~"POST|PUT|PATCH|DELETE",le=~"1(\\.0)?"} or vector(0)) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (code:apiserver_request_total:increase30d{verb="write"}) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:availability30d + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,code,resource) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[5m])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: code_resource:apiserver_request_total:rate5m + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,code,resource) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: code_resource:apiserver_request_total:rate5m + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"2.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"3.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"4.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, code, verb) (increase(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET|POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + record: code_verb:apiserver_request_total:increase1h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverAvailability }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml new file mode 100644 index 0000000..4777a80 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-burnrate.rules.yaml @@ -0,0 +1,441 @@ +{{- /* +Generated from 'kube-apiserver-burnrate.rules' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserverBurnrate }} +{{- $kubeApiserverJob := include "kube-prometheus-stack-kube-apiserver.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver-burnrate.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-apiserver-burnrate.rules + rules: + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1d])) + - + ( + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[1d])) + or + vector(0) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[1d])) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[1d])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[1d])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[1d])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate1d + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[1h])) + - + ( + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[1h])) + or + vector(0) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[1h])) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[1h])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[1h])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[1h])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate1h + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[2h])) + - + ( + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[2h])) + or + vector(0) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[2h])) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[2h])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[2h])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[2h])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate2h + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[30m])) + - + ( + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[30m])) + or + vector(0) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[30m])) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[30m])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[30m])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[30m])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate30m + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[3d])) + - + ( + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[3d])) + or + vector(0) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[3d])) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[3d])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[3d])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[3d])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate3d + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m])) + - + ( + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[5m])) + or + vector(0) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[5m])) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[5m])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[5m])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[5m])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate5m + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[6h])) + - + ( + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope=~"resource|",le=~"1(\\.0)?"}[6h])) + or + vector(0) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="namespace",le=~"5(\\.0)?"}[6h])) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward",scope="cluster",le=~"30(\\.0)?"}[6h])) + ) + ) + + + # errors + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",code=~"5.."}[6h])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET"}[6h])) + labels: + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate6h + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1d])) + - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[1d])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[1d])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate1d + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[1h])) + - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[1h])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[1h])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate1h + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[2h])) + - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[2h])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[2h])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate2h + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[30m])) + - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[30m])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[30m])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate30m + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[3d])) + - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[3d])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[3d])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate3d + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m])) + - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[5m])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[5m])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate5m + - expr: |- + ( + ( + # too slow + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_count{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[6h])) + - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward",le=~"1(\\.0)?"}[6h])) + ) + + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE"}[6h])) + labels: + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverBurnrate }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: apiserver_request:burnrate6h +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml new file mode 100644 index 0000000..ed5df43 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-histogram.rules.yaml @@ -0,0 +1,54 @@ +{{- /* +Generated from 'kube-apiserver-histogram.rules' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserverHistogram }} +{{- $kubeApiserverJob := include "kube-prometheus-stack-kube-apiserver.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver-histogram.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-apiserver-histogram.rules + rules: + - expr: histogram_quantile(0.99, sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"LIST|GET",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 + labels: + quantile: '0.99' + verb: read + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverHistogram }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverHistogram }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.99, sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, le, resource) (rate(apiserver_request_sli_duration_seconds_bucket{job="{{ $kubeApiserverJob }}",verb=~"POST|PUT|PATCH|DELETE",subresource!~"proxy|attach|log|exec|portforward"}[5m]))) > 0 + labels: + quantile: '0.99' + verb: write + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverHistogram }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverHistogram }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml new file mode 100644 index 0000000..f4de035 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-apiserver-slos.yaml @@ -0,0 +1,159 @@ +{{- /* +Generated from 'kube-apiserver-slos' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeApiServer.enabled .Values.defaultRules.rules.kubeApiserverSlos }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-apiserver-slos" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-apiserver-slos + rules: +{{- if not (.Values.defaultRules.disabled.KubeAPIErrorBudgetBurn | default false) }} + - alert: KubeAPIErrorBudgetBurn + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos | indent 8 }} +{{- end }} + description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (apiserver_request:burnrate1h) > (14.40 * 0.01000) + and on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (apiserver_request:burnrate5m) > (14.40 * 0.01000) + for: {{ dig "KubeAPIErrorBudgetBurn" "for" "2m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + long: 1h + severity: {{ dig "KubeAPIErrorBudgetBurn" "severity" "critical" .Values.customRules }} + short: 5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAPIErrorBudgetBurn | default false) }} + - alert: KubeAPIErrorBudgetBurn + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos | indent 8 }} +{{- end }} + description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (apiserver_request:burnrate6h) > (6.00 * 0.01000) + and on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (apiserver_request:burnrate30m) > (6.00 * 0.01000) + for: {{ dig "KubeAPIErrorBudgetBurn" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + long: 6h + severity: {{ dig "KubeAPIErrorBudgetBurn" "severity" "critical" .Values.customRules }} + short: 30m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAPIErrorBudgetBurn | default false) }} + - alert: KubeAPIErrorBudgetBurn + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos | indent 8 }} +{{- end }} + description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (apiserver_request:burnrate1d) > (3.00 * 0.01000) + and on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (apiserver_request:burnrate2h) > (3.00 * 0.01000) + for: {{ dig "KubeAPIErrorBudgetBurn" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + long: 1d + severity: {{ dig "KubeAPIErrorBudgetBurn" "severity" "warning" .Values.customRules }} + short: 2h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAPIErrorBudgetBurn | default false) }} + - alert: KubeAPIErrorBudgetBurn + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeApiserverSlos | indent 8 }} +{{- end }} + description: The API server is burning too much error budget on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapierrorbudgetburn + summary: The API server is burning too much error budget. + expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (apiserver_request:burnrate3d) > (1.00 * 0.01000) + and on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (apiserver_request:burnrate6h) > (1.00 * 0.01000) + for: {{ dig "KubeAPIErrorBudgetBurn" "for" "3h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + long: 3d + severity: {{ dig "KubeAPIErrorBudgetBurn" "severity" "warning" .Values.customRules }} + short: 6h + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeApiserverSlos }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml new file mode 100644 index 0000000..fc199f1 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-general.rules.yaml @@ -0,0 +1,49 @@ +{{- /* +Generated from 'kube-prometheus-general.rules' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusGeneral }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-general.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-prometheus-general.rules + rules: + - expr: count without(instance, pod, node) (up == 1) + record: count:up1 + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusGeneral }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusGeneral }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: count without(instance, pod, node) (up == 0) + record: count:up0 + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusGeneral }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusGeneral }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml new file mode 100644 index 0000000..63f721f --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-prometheus-node-recording.rules.yaml @@ -0,0 +1,93 @@ +{{- /* +Generated from 'kube-prometheus-node-recording.rules' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubePrometheusNodeRecording }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-prometheus-node-recording.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-prometheus-node-recording.rules + rules: + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) BY ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}instance) + record: instance:node_cpu:rate:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum(rate(node_network_receive_bytes_total[3m])) BY ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}instance) + record: instance:node_network_receive_bytes:rate:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum(rate(node_network_transmit_bytes_total[3m])) BY ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}instance) + record: instance:node_network_transmit_bytes:rate:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) WITHOUT (cpu, mode) / ON ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) BY ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}instance, cpu)) BY ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}instance) + record: instance:node_cpu:ratio + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) + record: cluster:node_cpu:sum_rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: cluster:node_cpu:sum_rate5m / count(sum(node_cpu_seconds_total) BY ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}instance, cpu)) + record: cluster:node_cpu:ratio + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubePrometheusNodeRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml new file mode 100644 index 0000000..b349f0d --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-scheduler.rules.yaml @@ -0,0 +1,136 @@ +{{- /* +Generated from 'kube-scheduler.rules' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeSchedulerRecording }} +{{- $kubeSchedulerJob := include "kube-prometheus-stack-kube-scheduler.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-scheduler.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-scheduler.rules + rules: + - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket{job="{{ $kubeSchedulerJob }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.99' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_scheduling_attempt_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="{{ $kubeSchedulerJob }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.99' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.99, sum(rate(scheduler_pod_scheduling_sli_duration_seconds_bucket{job="{{ $kubeSchedulerJob }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.99' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_pod_scheduling_sli_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket{job="{{ $kubeSchedulerJob }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.9' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_scheduling_attempt_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="{{ $kubeSchedulerJob }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.9' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.9, sum(rate(scheduler_pod_scheduling_sli_duration_seconds_bucket{job="{{ $kubeSchedulerJob }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.9' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_pod_scheduling_sli_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_attempt_duration_seconds_bucket{job="{{ $kubeSchedulerJob }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.5' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_scheduling_attempt_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{job="{{ $kubeSchedulerJob }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.5' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_scheduling_algorithm_duration_seconds:histogram_quantile + - expr: histogram_quantile(0.5, sum(rate(scheduler_pod_scheduling_sli_duration_seconds_bucket{job="{{ $kubeSchedulerJob }}"}[5m])) without(instance, pod)) + labels: + quantile: '0.5' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: cluster_quantile:scheduler_pod_scheduling_sli_duration_seconds:histogram_quantile +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-state-metrics.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-state-metrics.yaml new file mode 100644 index 0000000..7f3600f --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kube-state-metrics.yaml @@ -0,0 +1,152 @@ +{{- /* +Generated from 'kube-state-metrics' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubeStateMetrics }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kube-state-metrics" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kube-state-metrics + rules: +{{- if not (.Values.defaultRules.disabled.KubeStateMetricsListErrors | default false) }} + - alert: KubeStateMetricsListErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics | indent 8 }} +{{- end }} + description: kube-state-metrics is experiencing errors at an elevated rate in list operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricslisterrors + summary: kube-state-metrics is experiencing errors in list operations. + expr: |- + (sum(rate(kube_state_metrics_list_total{job="{{ $kubeStateMetricsJob }}",result="error"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + / + sum(rate(kube_state_metrics_list_total{job="{{ $kubeStateMetricsJob }}"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) + > 0.01 + for: {{ dig "KubeStateMetricsListErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStateMetricsListErrors" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeStateMetricsWatchErrors | default false) }} + - alert: KubeStateMetricsWatchErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics | indent 8 }} +{{- end }} + description: kube-state-metrics is experiencing errors at an elevated rate in watch operations. This is likely causing it to not be able to expose metrics about Kubernetes objects correctly or at all. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricswatcherrors + summary: kube-state-metrics is experiencing errors in watch operations. + expr: |- + (sum(rate(kube_state_metrics_watch_total{job="{{ $kubeStateMetricsJob }}",result="error"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + / + sum(rate(kube_state_metrics_watch_total{job="{{ $kubeStateMetricsJob }}"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster)) + > 0.01 + for: {{ dig "KubeStateMetricsWatchErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStateMetricsWatchErrors" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeStateMetricsShardingMismatch | default false) }} + - alert: KubeStateMetricsShardingMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics | indent 8 }} +{{- end }} + description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricsshardingmismatch + summary: kube-state-metrics sharding is misconfigured. + expr: stdvar (kube_state_metrics_total_shards{job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) != 0 + for: {{ dig "KubeStateMetricsShardingMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStateMetricsShardingMismatch" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeStateMetricsShardsMissing | default false) }} + - alert: KubeStateMetricsShardsMissing + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeStateMetrics | indent 8 }} +{{- end }} + description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kube-state-metrics/kubestatemetricsshardsmissing + summary: kube-state-metrics shards are missing. + expr: |- + 2^max(kube_state_metrics_total_shards{job="{{ $kubeStateMetricsJob }}"}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) - 1 + - + sum( 2 ^ max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, shard_ordinal) (kube_state_metrics_shard_ordinal{job="{{ $kubeStateMetricsJob }}"}) ) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + != 0 + for: {{ dig "KubeStateMetricsShardsMissing" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStateMetricsShardsMissing" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeStateMetrics }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml new file mode 100644 index 0000000..14e65c0 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubelet.rules.yaml @@ -0,0 +1,82 @@ +{{- /* +Generated from 'kubelet.rules' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubelet.enabled .Values.defaultRules.rules.kubelet }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubelet.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubelet.rules + rules: + - expr: |- + histogram_quantile( + 0.99, + sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="{{ $kubeletJob }}", metrics_path="/metrics"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, le) + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) group_left (node) + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, node) (kubelet_node_name{job="{{ $kubeletJob }}", metrics_path="/metrics"}) + ) + labels: + quantile: '0.99' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubelet }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubelet }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile + - expr: |- + histogram_quantile( + 0.9, + sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="{{ $kubeletJob }}", metrics_path="/metrics"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, le) + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) group_left (node) + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, node) (kubelet_node_name{job="{{ $kubeletJob }}", metrics_path="/metrics"}) + ) + labels: + quantile: '0.9' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubelet }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubelet }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile + - expr: |- + histogram_quantile( + 0.5, + sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="{{ $kubeletJob }}", metrics_path="/metrics"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, le) + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) group_left (node) + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, node) (kubelet_node_name{job="{{ $kubeletJob }}", metrics_path="/metrics"}) + ) + labels: + quantile: '0.5' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubelet }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubelet }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml new file mode 100644 index 0000000..ccdc636 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-apps.yaml @@ -0,0 +1,609 @@ +{{- /* +Generated from 'kubernetes-apps' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesApps }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }}{{- $namespaceOperator := .Values.defaultRules.appNamespacesOperator | default "=~" }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-apps" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-apps + rules: +{{- if not (.Values.defaultRules.disabled.KubePodCrashLooping | default false) }} + - alert: KubePodCrashLooping + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: 'Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} ({{`{{`}} $labels.container {{`}}`}}) is in waiting state (reason: "CrashLoopBackOff") on cluster {{`{{`}} $labels.cluster {{`}}`}}.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodcrashlooping + summary: Pod is crash looping. + expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"}[5m]) >= 1 + for: {{ dig "KubePodCrashLooping" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePodCrashLooping" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubePodNotReady | default false) }} + - alert: KubePodNotReady + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: Pod {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} has been in a non-ready state for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepodnotready + summary: Pod has been in a non-ready state for more than 15 minutes. + expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, job, cluster) ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, job, cluster) ( + kube_pod_status_phase{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", phase=~"Pending|Unknown"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) group_left() topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, cluster) ( + 1, max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"}) + ) + ) > 0 + for: {{ dig "KubePodNotReady" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePodNotReady" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeDeploymentGenerationMismatch | default false) }} + - alert: KubeDeploymentGenerationMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: Deployment generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} does not match, this indicates that the Deployment has failed but has not been rolled back on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentgenerationmismatch + summary: Deployment generation mismatch due to possible roll-back + expr: |- + kube_deployment_status_observed_generation{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + != + kube_deployment_metadata_generation{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + for: {{ dig "KubeDeploymentGenerationMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeDeploymentGenerationMismatch" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeDeploymentReplicasMismatch | default false) }} + - alert: KubeDeploymentReplicasMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: Deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentreplicasmismatch + summary: Deployment has not matched the expected number of replicas. + expr: |- + ( + kube_deployment_spec_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + > + kube_deployment_status_replicas_available{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + ) and ( + changes(kube_deployment_status_replicas_updated{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"}[10m]) + == + 0 + ) + for: {{ dig "KubeDeploymentReplicasMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeDeploymentReplicasMismatch" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeDeploymentRolloutStuck | default false) }} + - alert: KubeDeploymentRolloutStuck + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: Rollout of deployment {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.deployment {{`}}`}} is not progressing for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedeploymentrolloutstuck + summary: Deployment rollout is not progressing. + expr: |- + kube_deployment_status_condition{condition="Progressing", status="false",job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + != 0 + for: {{ dig "KubeDeploymentRolloutStuck" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeDeploymentRolloutStuck" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeStatefulSetReplicasMismatch | default false) }} + - alert: KubeStatefulSetReplicasMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} has not matched the expected number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetreplicasmismatch + summary: StatefulSet has not matched the expected number of replicas. + expr: |- + ( + kube_statefulset_status_replicas_ready{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + != + kube_statefulset_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + ) and ( + changes(kube_statefulset_status_replicas_updated{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"}[10m]) + == + 0 + ) + for: {{ dig "KubeStatefulSetReplicasMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStatefulSetReplicasMismatch" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeStatefulSetGenerationMismatch | default false) }} + - alert: KubeStatefulSetGenerationMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: StatefulSet generation for {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} does not match, this indicates that the StatefulSet has failed but has not been rolled back on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetgenerationmismatch + summary: StatefulSet generation mismatch due to possible roll-back + expr: |- + kube_statefulset_status_observed_generation{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + != + kube_statefulset_metadata_generation{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + for: {{ dig "KubeStatefulSetGenerationMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStatefulSetGenerationMismatch" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeStatefulSetUpdateNotRolledOut | default false) }} + - alert: KubeStatefulSetUpdateNotRolledOut + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: StatefulSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.statefulset {{`}}`}} update has not been rolled out on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubestatefulsetupdatenotrolledout + summary: StatefulSet update has not been rolled out. + expr: |- + ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, statefulset, job, cluster) ( + kube_statefulset_status_current_revision{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + unless + kube_statefulset_status_update_revision{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + ) + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, statefulset, job, cluster) + ( + kube_statefulset_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + != + kube_statefulset_status_replicas_updated{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + ) + ) and on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, statefulset, job, cluster) ( + changes(kube_statefulset_status_replicas_updated{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"}[5m]) + == + 0 + ) + for: {{ dig "KubeStatefulSetUpdateNotRolledOut" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeStatefulSetUpdateNotRolledOut" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeDaemonSetRolloutStuck | default false) }} + - alert: KubeDaemonSetRolloutStuck + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} has not finished or progressed for at least 15m on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetrolloutstuck + summary: DaemonSet rollout is stuck. + expr: |- + ( + ( + kube_daemonset_status_current_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + != + kube_daemonset_status_desired_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + ) or ( + kube_daemonset_status_number_misscheduled{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + != + 0 + ) or ( + kube_daemonset_status_updated_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + != + kube_daemonset_status_desired_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + ) or ( + kube_daemonset_status_number_available{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + != + kube_daemonset_status_desired_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + ) + ) and ( + changes(kube_daemonset_status_updated_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"}[5m]) + == + 0 + ) + for: {{ dig "KubeDaemonSetRolloutStuck" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeDaemonSetRolloutStuck" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeContainerWaiting | default false) }} + - alert: KubeContainerWaiting + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: 'pod/{{`{{`}} $labels.pod {{`}}`}} in namespace {{`{{`}} $labels.namespace {{`}}`}} on container {{`{{`}} $labels.container{{`}}`}} has been in waiting state for longer than 1 hour. (reason: "{{`{{`}} $labels.reason {{`}}`}}") on cluster {{`{{`}} $labels.cluster {{`}}`}}.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecontainerwaiting + summary: Pod container waiting longer than 1 hour + expr: kube_pod_container_status_waiting_reason{reason!="CrashLoopBackOff", job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} > 0 + for: {{ dig "KubeContainerWaiting" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeContainerWaiting" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeDaemonSetNotScheduled | default false) }} + - alert: KubeDaemonSetNotScheduled + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are not scheduled on cluster {{`{{`}} $labels.cluster {{`}}`}}.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetnotscheduled + summary: DaemonSet pods are not scheduled. + expr: |- + kube_daemonset_status_desired_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + - + kube_daemonset_status_current_number_scheduled{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} > 0 + for: {{ dig "KubeDaemonSetNotScheduled" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeDaemonSetNotScheduled" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeDaemonSetMisScheduled | default false) }} + - alert: KubeDaemonSetMisScheduled + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: '{{`{{`}} $value {{`}}`}} Pods of DaemonSet {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.daemonset {{`}}`}} are running where they are not supposed to run on cluster {{`{{`}} $labels.cluster {{`}}`}}.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubedaemonsetmisscheduled + summary: DaemonSet pods are misscheduled. + expr: kube_daemonset_status_number_misscheduled{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} > 0 + for: {{ dig "KubeDaemonSetMisScheduled" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeDaemonSetMisScheduled" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeJobNotCompleted | default false) }} + - alert: KubeJobNotCompleted + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} is taking more than {{`{{`}} "43200" | humanizeDuration {{`}}`}} to complete on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobnotcompleted + summary: Job did not complete in time + expr: |- + time() - max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, job_name, cluster) (kube_job_status_start_time{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + and + kube_job_status_active{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} > 0) > 43200 + labels: + severity: {{ dig "KubeJobNotCompleted" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeJobFailed | default false) }} + - alert: KubeJobFailed + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: Job {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.job_name {{`}}`}} failed to complete. Removing failed job after investigation should clear this alert on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubejobfailed + summary: Job failed to complete. + expr: kube_job_failed{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} > 0 + for: {{ dig "KubeJobFailed" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeJobFailed" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeHpaReplicasMismatch | default false) }} + - alert: KubeHpaReplicasMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has not matched the desired number of replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpareplicasmismatch + summary: HPA has not matched desired number of replicas. + expr: |- + (kube_horizontalpodautoscaler_status_desired_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + != + kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"}) + and + (kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + > + kube_horizontalpodautoscaler_spec_min_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"}) + and + (kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + < + kube_horizontalpodautoscaler_spec_max_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"}) + and + changes(kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"}[15m]) == 0 + for: {{ dig "KubeHpaReplicasMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeHpaReplicasMismatch" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeHpaMaxedOut | default false) }} + - alert: KubeHpaMaxedOut + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: HPA {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.horizontalpodautoscaler {{`}}`}} has been running at max replicas for longer than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubehpamaxedout + summary: HPA is running at max replicas + expr: |- + ( + kube_horizontalpodautoscaler_status_current_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + == + kube_horizontalpodautoscaler_spec_max_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + ) + and on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}namespace, horizontalpodautoscaler, cluster) ( + kube_horizontalpodautoscaler_spec_max_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + != + kube_horizontalpodautoscaler_spec_min_replicas{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + ) + for: {{ dig "KubeHpaMaxedOut" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeHpaMaxedOut" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubePdbNotEnoughHealthyPods | default false) }} + - alert: KubePdbNotEnoughHealthyPods + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesApps | indent 8 }} +{{- end }} + description: PDB {{`{{`}} $labels.cluster {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.poddisruptionbudget {{`}}`}} expects {{`{{`}} $value {{`}}`}} more healthy pods. The desired number of healthy pods has not been met for at least 15m. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepdbnotenoughhealthypods + summary: PDB does not have enough healthy pods. + expr: |- + ( + kube_poddisruptionbudget_status_desired_healthy{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + - + kube_poddisruptionbudget_status_current_healthy{job="{{ $kubeStateMetricsJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}"} + ) + > 0 + for: {{ dig "KubePdbNotEnoughHealthyPods" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePdbNotEnoughHealthyPods" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesApps }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml new file mode 100644 index 0000000..ad65ccb --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-resources.yaml @@ -0,0 +1,360 @@ +{{- /* +Generated from 'kubernetes-resources' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesResources }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-resources" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-resources + rules: +{{- if not (.Values.defaultRules.disabled.KubeCPUOvercommit | default false) }} + - alert: KubeCPUOvercommit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted CPU resource requests for Pods by {{`{{`}} printf "%.2f" $value {{`}}`}} CPU shares and cannot tolerate node failure. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecpuovercommit + summary: Cluster has overcommitted CPU resource requests. + expr: |- + # Non-HA clusters. + ( + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (namespace_cpu:kube_pod_container_resource_requests:sum{}) + - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) > 0 + ) + and + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) (kube_node_role{job="{{ $kubeStateMetricsJob }}", role="control-plane"})) < 3 + ) + or + # HA clusters. + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (namespace_cpu:kube_pod_container_resource_requests:sum{}) + - + ( + # Skip clusters with only one allocatable node. + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) + - + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="cpu"}) + ) > 0 + ) > 0 + ) + for: {{ dig "KubeCPUOvercommit" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeCPUOvercommit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeMemoryOvercommit | default false) }} + - alert: KubeMemoryOvercommit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted memory resource requests for Pods by {{`{{`}} $value | humanize {{`}}`}} bytes and cannot tolerate node failure. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubememoryovercommit + summary: Cluster has overcommitted memory resource requests. + expr: |- + # Non-HA clusters. + ( + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (namespace_memory:kube_pod_container_resource_requests:sum{}) + - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="memory"}) > 0 + ) + and + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) (kube_node_role{job="{{ $kubeStateMetricsJob }}", role="control-plane"})) < 3 + ) + or + # HA clusters. + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (namespace_memory:kube_pod_container_resource_requests:sum{}) + - + ( + # Skip clusters with only one allocatable node. + ( + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="memory"}) + - + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (kube_node_status_allocatable{job="{{ $kubeStateMetricsJob }}",resource="memory"}) + ) > 0 + ) > 0 + ) + for: {{ dig "KubeMemoryOvercommit" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeMemoryOvercommit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeCPUQuotaOvercommit | default false) }} + - alert: KubeCPUQuotaOvercommit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted CPU resource requests for Namespaces. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecpuquotaovercommit + summary: Cluster has overcommitted CPU resource requests. + expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) ( + min without(resource) (kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard", resource=~"(cpu|requests.cpu)"}) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) ( + kube_node_status_allocatable{resource="cpu", job="{{ $kubeStateMetricsJob }}"} + ) > 1.5 + for: {{ dig "KubeCPUQuotaOvercommit" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeCPUQuotaOvercommit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeMemoryQuotaOvercommit | default false) }} + - alert: KubeMemoryQuotaOvercommit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Cluster {{`{{`}} $labels.cluster {{`}}`}} has overcommitted memory resource requests for Namespaces. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubememoryquotaovercommit + summary: Cluster has overcommitted memory resource requests. + expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) ( + min without(resource) (kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard", resource=~"(memory|requests.memory)"}) + ) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) ( + kube_node_status_allocatable{resource="memory", job="{{ $kubeStateMetricsJob }}"} + ) > 1.5 + for: {{ dig "KubeMemoryQuotaOvercommit" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeMemoryQuotaOvercommit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeQuotaAlmostFull | default false) }} + - alert: KubeQuotaAlmostFull + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotaalmostfull + summary: Namespace quota is going to be full. + expr: |- + max without (instance, job, type) ( + kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="used"} + ) + / on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, resource, resourcequota) group_left() + ( + max without (instance, job, type) ( + kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard"} + ) > 0 + ) + > 0.9 < 1 + for: {{ dig "KubeQuotaAlmostFull" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeQuotaAlmostFull" "severity" "info" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeQuotaFullyUsed | default false) }} + - alert: KubeQuotaFullyUsed + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotafullyused + summary: Namespace quota is fully used. + expr: |- + max without (instance, job, type) ( + kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="used"} + ) + / on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, resource, resourcequota) group_left() + ( + max without (instance, job, type) ( + kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard"} + ) > 0 + ) + == 1 + for: {{ dig "KubeQuotaFullyUsed" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeQuotaFullyUsed" "severity" "info" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeQuotaExceeded | default false) }} + - alert: KubeQuotaExceeded + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: Namespace {{`{{`}} $labels.namespace {{`}}`}} is using {{`{{`}} $value | humanizePercentage {{`}}`}} of its {{`{{`}} $labels.resource {{`}}`}} quota on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubequotaexceeded + summary: Namespace quota has exceeded the limits. + expr: |- + max without (instance, job, type) ( + kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="used"} + ) + / on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, resource, resourcequota) group_left() + ( + max without (instance, job, type) ( + kube_resourcequota{job="{{ $kubeStateMetricsJob }}", type="hard"} + ) > 0 + ) > 1 + for: {{ dig "KubeQuotaExceeded" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeQuotaExceeded" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.CPUThrottlingHigh | default false) }} + - alert: CPUThrottlingHigh + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesResources | indent 8 }} +{{- end }} + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} throttling of CPU in namespace {{`{{`}} $labels.namespace {{`}}`}} for container {{`{{`}} $labels.container {{`}}`}} in pod {{`{{`}} $labels.pod {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/cputhrottlinghigh + summary: Processes experience elevated CPU throttling. + expr: |- + sum without (id, metrics_path, name, image, endpoint, job, node) ( + topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, container, instance) (1, + increase( + container_cpu_cfs_throttled_periods_total{container!="", job="{{ $kubeletJob }}", metrics_path="/metrics/cadvisor", } + [5m]) + ) + ) + / on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, container, instance) group_left + sum without (id, metrics_path, name, image, endpoint, job, node) ( + topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, container, instance) (1, + increase( + container_cpu_cfs_periods_total{job="{{ $kubeletJob }}", metrics_path="/metrics/cadvisor", } + [5m]) + ) + ) + > ( 25 / 100 ) + for: {{ dig "CPUThrottlingHigh" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "CPUThrottlingHigh" "severity" "info" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesResources }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-storage.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-storage.yaml new file mode 100644 index 0000000..217186d --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-storage.yaml @@ -0,0 +1,219 @@ +{{- /* +Generated from 'kubernetes-storage' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesStorage }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +{{- $targetNamespace := .Values.defaultRules.appNamespacesTarget }} +{{- $namespaceOperator := .Values.defaultRules.appNamespacesOperator | default "=~" }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-storage" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-storage + rules: +{{- if not (.Values.defaultRules.disabled.KubePersistentVolumeFillingUp | default false) }} + - alert: KubePersistentVolumeFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage | indent 8 }} +{{- end }} + description: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} is only {{`{{`}} $value | humanizePercentage {{`}}`}} free. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumefillingup + summary: PersistentVolume is filling up. + expr: |- + ( + kubelet_volume_stats_available_bytes{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"} + / + kubelet_volume_stats_capacity_bytes{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"} + ) < 0.03 + and + kubelet_volume_stats_used_bytes{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"} > 0 + unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: {{ dig "KubePersistentVolumeFillingUp" "for" "1m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePersistentVolumeFillingUp" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubePersistentVolumeFillingUp | default false) }} + - alert: KubePersistentVolumeFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage | indent 8 }} +{{- end }} + description: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} is expected to fill up within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} is available. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumefillingup + summary: PersistentVolume is filling up. + expr: |- + ( + kubelet_volume_stats_available_bytes{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"} + / + kubelet_volume_stats_capacity_bytes{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"} + ) < 0.15 + and + kubelet_volume_stats_used_bytes{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"} > 0 + and + predict_linear(kubelet_volume_stats_available_bytes{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 + unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: {{ dig "KubePersistentVolumeFillingUp" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePersistentVolumeFillingUp" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubePersistentVolumeInodesFillingUp | default false) }} + - alert: KubePersistentVolumeInodesFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage | indent 8 }} +{{- end }} + description: The PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} only has {{`{{`}} $value | humanizePercentage {{`}}`}} free inodes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumeinodesfillingup + summary: PersistentVolumeInodes are filling up. + expr: |- + ( + kubelet_volume_stats_inodes_free{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"} + / + kubelet_volume_stats_inodes{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"} + ) < 0.03 + and + kubelet_volume_stats_inodes_used{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"} > 0 + unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: {{ dig "KubePersistentVolumeInodesFillingUp" "for" "1m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePersistentVolumeInodesFillingUp" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubePersistentVolumeInodesFillingUp | default false) }} + - alert: KubePersistentVolumeInodesFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage | indent 8 }} +{{- end }} + description: Based on recent sampling, the PersistentVolume claimed by {{`{{`}} $labels.persistentvolumeclaim {{`}}`}} in Namespace {{`{{`}} $labels.namespace {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} is expected to run out of inodes within four days. Currently {{`{{`}} $value | humanizePercentage {{`}}`}} of its inodes are free. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumeinodesfillingup + summary: PersistentVolumeInodes are filling up. + expr: |- + ( + kubelet_volume_stats_inodes_free{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"} + / + kubelet_volume_stats_inodes{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"} + ) < 0.15 + and + kubelet_volume_stats_inodes_used{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"} > 0 + and + predict_linear(kubelet_volume_stats_inodes_free{job="{{ $kubeletJob }}", namespace{{ $namespaceOperator }}"{{ $targetNamespace }}", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0 + unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1 + unless on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, persistentvolumeclaim) + kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1 + for: {{ dig "KubePersistentVolumeInodesFillingUp" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePersistentVolumeInodesFillingUp" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubePersistentVolumeErrors | default false) }} + - alert: KubePersistentVolumeErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesStorage | indent 8 }} +{{- end }} + description: The persistent volume {{`{{`}} $labels.persistentvolume {{`}}`}} {{`{{`}} with $labels.cluster -{{`}}`}} on Cluster {{`{{`}} . {{`}}`}} {{`{{`}}- end {{`}}`}} has status {{`{{`}} $labels.phase {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubepersistentvolumeerrors + summary: PersistentVolume is having issues with provisioning. + expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="{{ $kubeStateMetricsJob }}"} > 0 + for: {{ dig "KubePersistentVolumeErrors" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubePersistentVolumeErrors" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesStorage }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml new file mode 100644 index 0000000..01c5e58 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-apiserver.yaml @@ -0,0 +1,232 @@ +{{- /* +Generated from 'kubernetes-system-apiserver' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }} +{{- $kubeApiserverJob := include "kube-prometheus-stack-kube-apiserver.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system-apiserver" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system-apiserver + rules: +{{- if not (.Values.defaultRules.disabled.KubeClientCertificateExpiration | default false) }} + - alert: KubeClientCertificateExpiration + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 7.0 days on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclientcertificateexpiration + summary: Client certificate is about to expire. + expr: |- + histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="{{ $kubeApiserverJob }}"}[5m]))) < 604800 + and + on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="{{ $kubeApiserverJob }}"} > 0 + for: {{ dig "KubeClientCertificateExpiration" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeClientCertificateExpiration" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeClientCertificateExpiration | default false) }} + - alert: KubeClientCertificateExpiration + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: A client certificate used to authenticate to kubernetes apiserver is expiring in less than 24.0 hours on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclientcertificateexpiration + summary: Client certificate is about to expire. + expr: |- + histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="{{ $kubeApiserverJob }}"}[5m]))) < 86400 + and + on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="{{ $kubeApiserverJob }}"} > 0 + for: {{ dig "KubeClientCertificateExpiration" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeClientCertificateExpiration" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAggregatedAPIErrors | default false) }} + - alert: KubeAggregatedAPIErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubernetes aggregated API {{`{{`}} $labels.instance {{`}}`}}/{{`{{`}} $labels.name {{`}}`}} has reported {{`{{`}} $labels.reason {{`}}`}} errors on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeaggregatedapierrors + summary: Kubernetes aggregated API has reported errors. + expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, name, reason)(increase(aggregator_unavailable_apiservice_total{job="{{ $kubeApiserverJob }}"}[1m])) > 0 + for: {{ dig "KubeAggregatedAPIErrors" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeAggregatedAPIErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAggregatedAPIDown | default false) }} + - alert: KubeAggregatedAPIDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubernetes aggregated API {{`{{`}} $labels.name {{`}}`}}/{{`{{`}} $labels.namespace {{`}}`}} has been only {{`{{`}} $value | humanize {{`}}`}}% available over the last 10m on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeaggregatedapidown + summary: Kubernetes aggregated API is down. + expr: (1 - max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice{job="{{ $kubeApiserverJob }}"}[10m]))) * 100 < 85 + for: {{ dig "KubeAggregatedAPIDown" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeAggregatedAPIDown" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if .Values.kubeApiServer.enabled }} +{{- if not (.Values.defaultRules.disabled.KubeAPIDown | default false) }} + - alert: KubeAPIDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: KubeAPI has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapidown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="{{ $kubeApiserverJob }}"}) + for: {{ dig "KubeAPIDown" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeAPIDown" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAPIInstanceUnreachable | default false) }} + - alert: KubeAPIInstanceUnreachable + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: A KubeAPI instance has been unreachable for more than 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapiinstanceunreachable + summary: KubeAPI instance is unreachable. + expr: up{job="{{ $kubeApiserverJob }}"} == 0 + for: {{ dig "KubeAPIInstanceUnreachable" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeAPIInstanceUnreachable" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeAPITerminatedRequests | default false) }} + - alert: KubeAPITerminatedRequests + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeapiterminatedrequests + summary: The kubernetes apiserver has terminated {{`{{`}} $value | humanizePercentage {{`}}`}} of its incoming requests. + expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_terminations_total{job="{{ $kubeApiserverJob }}"}[10m])) / ( sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_total{job="{{ $kubeApiserverJob }}"}[10m])) + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(apiserver_request_terminations_total{job="{{ $kubeApiserverJob }}"}[10m])) ) > 0.20 + for: {{ dig "KubeAPITerminatedRequests" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeAPITerminatedRequests" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml new file mode 100644 index 0000000..e5dab47 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-controller-manager.yaml @@ -0,0 +1,86 @@ +{{- /* +Generated from 'kubernetes-system-controller-manager' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeControllerManager.enabled .Values.defaultRules.rules.kubeControllerManager (not (.Values.defaultRules.disabled.KubeControllerManagerDown | default false)) }} +{{- $kubeControllerManagerJob := include "kube-prometheus-stack-kube-controller-manager.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system-controller-manager" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system-controller-manager + rules: +{{- if .Values.kubeControllerManager.enabled }} +{{- if not (.Values.defaultRules.disabled.KubeControllerManagerDown | default false) }} + - alert: KubeControllerManagerDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeControllerManager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeControllerManager | indent 8 }} +{{- end }} + description: KubeControllerManager has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecontrollermanagerdown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="{{ $kubeControllerManagerJob }}"}) + for: {{ dig "KubeControllerManagerDown" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeControllerManagerDown" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeControllerManager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeControllerManager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeControllerManagerInstanceUnreachable | default false) }} + - alert: KubeControllerManagerInstanceUnreachable + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeControllerManager }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeControllerManager | indent 8 }} +{{- end }} + description: A KubeControllerManager instance has been unreachable for more than 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubecontrollermanagerinstanceunreachable + summary: KubeControllerManager instance is unreachable. + expr: up{job="{{ $kubeControllerManagerJob }}"} == 0 + for: {{ dig "KubeControllerManagerInstanceUnreachable" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeControllerManagerInstanceUnreachable" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeControllerManager }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeControllerManager }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml new file mode 100644 index 0000000..4286cee --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kube-proxy.yaml @@ -0,0 +1,84 @@ +{{- /* +Generated from 'kubernetes-system-kube-proxy' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeProxy.enabled .Values.defaultRules.rules.kubeProxy (not (.Values.defaultRules.disabled.KubeProxyDown | default false)) }} +{{- $kubeProxyJob := include "kube-prometheus-stack-kube-proxy.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system-kube-proxy" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system-kube-proxy + rules: +{{- if not (.Values.defaultRules.disabled.KubeProxyDown | default false) }} + - alert: KubeProxyDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeProxy }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeProxy | indent 8 }} +{{- end }} + description: KubeProxy has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeproxydown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="{{ $kubeProxyJob }}"}) + for: {{ dig "KubeProxyDown" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeProxyDown" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeProxy }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeProxy }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeProxyInstanceUnreachable | default false) }} + - alert: KubeProxyInstanceUnreachable + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeProxy }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeProxy | indent 8 }} +{{- end }} + description: A KubeProxy instance has been unreachable for more than 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeproxyinstanceunreachable + summary: KubeProxy instance is unreachable. + expr: up{job="{{ $kubeProxyJob }}"} == 0 + for: {{ dig "KubeProxyInstanceUnreachable" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeProxyInstanceUnreachable" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeProxy }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeProxy }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml new file mode 100644 index 0000000..3df2a87 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-kubelet.yaml @@ -0,0 +1,506 @@ +{{- /* +Generated from 'kubernetes-system-kubelet' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +{{- $kubeletJob := include "kube-prometheus-stack-kubelet.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system-kubelet" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system-kubelet + rules: +{{- if not (.Values.defaultRules.disabled.KubeNodeNotReady | default false) }} + - alert: KubeNodeNotReady + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: '{{`{{`}} $labels.node {{`}}`}} has been unready for more than 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodenotready + summary: Node is not ready. + expr: |- + kube_node_status_condition{job="{{ $kubeStateMetricsJob }}",condition="Ready",status="true"} == 0 + and on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) + kube_node_spec_unschedulable{job="{{ $kubeStateMetricsJob }}"} == 0 + for: {{ dig "KubeNodeNotReady" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeNodeNotReady" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeNodePressure | default false) }} + - alert: KubeNodePressure + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: '{{`{{`}} $labels.node {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}} has active Condition {{`{{`}} $labels.condition {{`}}`}}. This is caused by resource usage exceeding eviction thresholds.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodepressure + summary: Node has as active Condition. + expr: |- + kube_node_status_condition{job="{{ $kubeStateMetricsJob }}",condition=~"(MemoryPressure|DiskPressure|PIDPressure)",status="true"} == 1 + and on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) + kube_node_spec_unschedulable{job="{{ $kubeStateMetricsJob }}"} == 0 + for: {{ dig "KubeNodePressure" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeNodePressure" "severity" "info" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeNodeUnreachable | default false) }} + - alert: KubeNodeUnreachable + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: '{{`{{`}} $labels.node {{`}}`}} is unreachable and some workloads may be rescheduled on cluster {{`{{`}} $labels.cluster {{`}}`}}.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodeunreachable + summary: Node is unreachable. + expr: (kube_node_spec_taint{job="{{ $kubeStateMetricsJob }}",key="node.kubernetes.io/unreachable",effect="NoSchedule"} unless ignoring(key,value) kube_node_spec_taint{job="{{ $kubeStateMetricsJob }}",key=~"ToBeDeletedByClusterAutoscaler|cloud.google.com/impending-node-termination|aws-node-termination-handler/spot-itn"}) == 1 + for: {{ dig "KubeNodeUnreachable" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeNodeUnreachable" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletTooManyPods | default false) }} + - alert: KubeletTooManyPods + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubelet '{{`{{`}} $labels.node {{`}}`}}' is running at {{`{{`}} $value | humanizePercentage {{`}}`}} of its Pod capacity on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubelettoomanypods + summary: Kubelet is running at capacity. + expr: |- + ( + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + kubelet_running_pods{job="{{ $kubeletJob }}", metrics_path="/metrics"} > 1 + ) + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) group_left(node) + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, node) ( + kubelet_node_name{job="{{ $kubeletJob }}", metrics_path="/metrics"} + ) + ) + / on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) group_left() + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) ( + kube_node_status_capacity{job="{{ $kubeStateMetricsJob }}", resource="pods"} != 1 + ) > 0.95 + for: {{ dig "KubeletTooManyPods" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeletTooManyPods" "severity" "info" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeNodeReadinessFlapping | default false) }} + - alert: KubeNodeReadinessFlapping + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: The readiness status of node {{`{{`}} $labels.node {{`}}`}} has changed {{`{{`}} $value {{`}}`}} times in the last 15 minutes on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodereadinessflapping + summary: Node readiness status is flapping. + expr: |- + sum(changes(kube_node_status_condition{job="{{ $kubeStateMetricsJob }}",status="true",condition="Ready"}[15m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) > 2 + and on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) + kube_node_spec_unschedulable{job="{{ $kubeStateMetricsJob }}"} == 0 + for: {{ dig "KubeNodeReadinessFlapping" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeNodeReadinessFlapping" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeNodeEviction | default false) }} + - alert: KubeNodeEviction + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Node {{`{{`}} $labels.node {{`}}`}} on {{`{{`}} $labels.cluster {{`}}`}} is evicting Pods due to {{`{{`}} $labels.eviction_signal {{`}}`}}. Eviction occurs when eviction thresholds are crossed, typically caused by Pods exceeding RAM/ephemeral-storage limits. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubenodeeviction + summary: Node is evicting pods. + expr: |- + sum(rate(kubelet_evictions{job="{{ $kubeletJob }}", metrics_path="/metrics"}[15m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, eviction_signal, instance) + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) group_left(node) + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, node) ( + kubelet_node_name{job="{{ $kubeletJob }}", metrics_path="/metrics"} + ) + > 0 + for: {{ dig "KubeNodeEviction" "for" "0s" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeNodeEviction" "severity" "info" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletPlegDurationHigh | default false) }} + - alert: KubeletPlegDurationHigh + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile duration of {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletplegdurationhigh + summary: Kubelet Pod Lifecycle Event Generator is taking too long to relist. + expr: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{quantile="0.99"} >= 10 + for: {{ dig "KubeletPlegDurationHigh" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeletPlegDurationHigh" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletPodStartUpLatencyHigh | default false) }} + - alert: KubeletPodStartUpLatencyHigh + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubelet Pod startup 99th percentile latency is {{`{{`}} $value {{`}}`}} seconds on node {{`{{`}} $labels.node {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletpodstartuplatencyhigh + summary: Kubelet Pod startup latency is too high. + expr: |- + histogram_quantile(0.99, + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, le) ( + topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, le, operation_type) (1, + rate(kubelet_pod_worker_duration_seconds_bucket{job="{{ $kubeletJob }}", metrics_path="/metrics"}[5m]) + ) + ) + ) + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) group_left(node) + topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, node) (1, + kubelet_node_name{job="{{ $kubeletJob }}", metrics_path="/metrics"} + ) + > 60 + for: {{ dig "KubeletPodStartUpLatencyHigh" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeletPodStartUpLatencyHigh" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletClientCertificateExpiration | default false) }} + - alert: KubeletClientCertificateExpiration + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletclientcertificateexpiration + summary: Kubelet client certificate is about to expire. + expr: kubelet_certificate_manager_client_ttl_seconds < {{ .Values.defaultRules.kubeletClientCertificateExpiration.warning }} + labels: + severity: {{ dig "KubeletClientCertificateExpiration" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletClientCertificateExpiration | default false) }} + - alert: KubeletClientCertificateExpiration + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Client certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletclientcertificateexpiration + summary: Kubelet client certificate is about to expire. + expr: kubelet_certificate_manager_client_ttl_seconds < {{ .Values.defaultRules.kubeletClientCertificateExpiration.critical }} + labels: + severity: {{ dig "KubeletClientCertificateExpiration" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletServerCertificateExpiration | default false) }} + - alert: KubeletServerCertificateExpiration + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletservercertificateexpiration + summary: Kubelet server certificate is about to expire. + expr: kubelet_certificate_manager_server_ttl_seconds < {{ .Values.defaultRules.kubeletServerCertificateExpiration.warning }} + labels: + severity: {{ dig "KubeletServerCertificateExpiration" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletServerCertificateExpiration | default false) }} + - alert: KubeletServerCertificateExpiration + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Server certificate for Kubelet on node {{`{{`}} $labels.node {{`}}`}} expires in {{`{{`}} $value | humanizeDuration {{`}}`}} on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletservercertificateexpiration + summary: Kubelet server certificate is about to expire. + expr: kubelet_certificate_manager_server_ttl_seconds < {{ .Values.defaultRules.kubeletServerCertificateExpiration.critical }} + labels: + severity: {{ dig "KubeletServerCertificateExpiration" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletClientCertificateRenewalErrors | default false) }} + - alert: KubeletClientCertificateRenewalErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its client certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes) on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletclientcertificaterenewalerrors + summary: Kubelet has failed to renew its client certificate. + expr: increase(kubelet_certificate_manager_client_expiration_renew_errors[5m]) > 0 + for: {{ dig "KubeletClientCertificateRenewalErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeletClientCertificateRenewalErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletServerCertificateRenewalErrors | default false) }} + - alert: KubeletServerCertificateRenewalErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubelet on node {{`{{`}} $labels.node {{`}}`}} has failed to renew its server certificate ({{`{{`}} $value | humanize {{`}}`}} errors in the last 5 minutes) on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletservercertificaterenewalerrors + summary: Kubelet has failed to renew its server certificate. + expr: increase(kubelet_server_expiration_renew_errors[5m]) > 0 + for: {{ dig "KubeletServerCertificateRenewalErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeletServerCertificateRenewalErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeletInstanceUnreachable | default false) }} + - alert: KubeletInstanceUnreachable + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: A Kubelet instance has been unreachable for more than 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletinstanceunreachable + summary: Kubelet instance is unreachable. + expr: up{job="{{ $kubeletJob }}", metrics_path="/metrics"} == 0 + for: {{ dig "KubeletInstanceUnreachable" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeletInstanceUnreachable" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if .Values.prometheusOperator.kubeletService.enabled }} +{{- if not (.Values.defaultRules.disabled.KubeletDown | default false) }} + - alert: KubeletDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubelet has disappeared from Prometheus target discovery on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeletdown + summary: Target disappeared from Prometheus target discovery. + expr: |- + count by (cluster) (kube_node_info{job="{{ $kubeStateMetricsJob }}"}) + unless on (cluster) + count by (cluster) (up{job="{{ $kubeletJob }}", metrics_path="/metrics"} == 1) + for: {{ dig "KubeletDown" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeletDown" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml new file mode 100644 index 0000000..51e0e03 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system-scheduler.yaml @@ -0,0 +1,86 @@ +{{- /* +Generated from 'kubernetes-system-scheduler' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.kubeScheduler.enabled .Values.defaultRules.rules.kubeSchedulerAlerting (not (.Values.defaultRules.disabled.KubeSchedulerDown | default false)) }} +{{- $kubeSchedulerJob := include "kube-prometheus-stack-kube-scheduler.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system-scheduler" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system-scheduler + rules: +{{- if .Values.kubeScheduler.enabled }} +{{- if not (.Values.defaultRules.disabled.KubeSchedulerDown | default false) }} + - alert: KubeSchedulerDown + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeSchedulerAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeSchedulerAlerting | indent 8 }} +{{- end }} + description: KubeScheduler has disappeared from Prometheus target discovery. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeschedulerdown + summary: Target disappeared from Prometheus target discovery. + expr: absent(up{job="{{ $kubeSchedulerJob }}"}) + for: {{ dig "KubeSchedulerDown" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeSchedulerDown" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeSchedulerInstanceUnreachable | default false) }} + - alert: KubeSchedulerInstanceUnreachable + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubeSchedulerAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubeSchedulerAlerting | indent 8 }} +{{- end }} + description: A KubeScheduler instance has been unreachable for more than 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeschedulerinstanceunreachable + summary: KubeScheduler instance is unreachable. + expr: up{job="{{ $kubeSchedulerJob }}"} == 0 + for: {{ dig "KubeSchedulerInstanceUnreachable" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeSchedulerInstanceUnreachable" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubeSchedulerAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml new file mode 100644 index 0000000..e047089 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/kubernetes-system.yaml @@ -0,0 +1,88 @@ +{{- /* +Generated from 'kubernetes-system' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.kubernetesSystem }} +{{- $kubeApiserverJob := include "kube-prometheus-stack-kube-apiserver.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "kubernetes-system" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: kubernetes-system + rules: +{{- if not (.Values.defaultRules.disabled.KubeVersionMismatch | default false) }} + - alert: KubeVersionMismatch + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: There are {{`{{`}} $value {{`}}`}} different semantic versions of Kubernetes components running on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeversionmismatch + summary: Different semantic versions of Kubernetes components running. + expr: count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*"))) > 1 + for: {{ dig "KubeVersionMismatch" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeVersionMismatch" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.KubeClientErrors | default false) }} + - alert: KubeClientErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.kubernetesSystem | indent 8 }} +{{- end }} + description: Kubernetes API server client '{{`{{`}} $labels.job {{`}}`}}/{{`{{`}} $labels.instance {{`}}`}}' is experiencing {{`{{`}} $value | humanizePercentage {{`}}`}} errors on cluster {{`{{`}} $labels.cluster {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/kubernetes/kubeclienterrors + summary: Kubernetes API server client is experiencing errors. + expr: |- + (sum(rate(rest_client_requests_total{job="{{ $kubeApiserverJob }}",code=~"5.."}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, job, namespace) + / + sum(rate(rest_client_requests_total{job="{{ $kubeApiserverJob }}"}[5m])) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, job, namespace)) + > 0.01 + for: {{ dig "KubeClientErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "KubeClientErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.kubernetesSystem }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml new file mode 100644 index 0000000..58cd220 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.rules.yaml @@ -0,0 +1,244 @@ +{{- /* +Generated from 'node-exporter.rules' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.nodeExporterRecording }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-exporter.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: node-exporter.rules + rules: + - expr: |- + count without (cpu, mode) ( + node_cpu_seconds_total{job="node-exporter",mode="idle"} + ) + record: instance:node_num_cpu:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + 1 - avg without (cpu) ( + sum without (mode) (rate(node_cpu_seconds_total{job="node-exporter", mode=~"idle|iowait|steal"}[5m])) + ) + record: instance:node_cpu_utilisation:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + ( + node_load1{job="node-exporter"} + / + instance:node_num_cpu:sum{job="node-exporter"} + ) + record: instance:node_load1_per_cpu:ratio + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + 1 - ( + ( + node_memory_MemAvailable_bytes{job="node-exporter"} + or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + + node_memory_Cached_bytes{job="node-exporter"} + + + node_memory_MemFree_bytes{job="node-exporter"} + + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) + / + node_memory_MemTotal_bytes{job="node-exporter"} + ) + record: instance:node_memory_utilisation:ratio + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) + record: instance:node_vmstat_pgmajfault:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: rate(node_disk_io_time_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + record: instance_device:node_disk_io_time_seconds:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) + record: instance_device:node_disk_io_time_weighted_seconds:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum without (device) ( + rate(node_network_receive_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_receive_bytes_excluding_lo:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum without (device) ( + rate(node_network_transmit_bytes_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_transmit_bytes_excluding_lo:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum without (device) ( + rate(node_network_receive_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_receive_drop_excluding_lo:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum without (device) ( + rate(node_network_transmit_drop_total{job="node-exporter", device!="lo"}[5m]) + ) + record: instance:node_network_transmit_drop_excluding_lo:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum without (device) ( + rate(node_network_receive_bytes_total{job="node-exporter", device!~"lo|veth.+"}[5m]) + ) + record: instance:node_network_receive_bytes_physical:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum without (device) ( + rate(node_network_transmit_bytes_total{job="node-exporter", device!~"lo|veth.+"}[5m]) + ) + record: instance:node_network_transmit_bytes_physical:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum without (device) ( + rate(node_network_receive_drop_total{job="node-exporter", device!~"lo|veth.+"}[5m]) + ) + record: instance:node_network_receive_drop_physical:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum without (device) ( + rate(node_network_transmit_drop_total{job="node-exporter", device!~"lo|veth.+"}[5m]) + ) + record: instance:node_network_transmit_drop_physical:rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterRecording }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml new file mode 100644 index 0000000..e1a2cec --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/node-exporter.yaml @@ -0,0 +1,829 @@ +{{- /* +Generated from 'node-exporter' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.nodeExporterAlerting }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-exporter" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: node-exporter + rules: +{{- if not (.Values.defaultRules.disabled.NodeFilesystemSpaceFillingUp | default false) }} + - alert: NodeFilesystemSpaceFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemspacefillingup + summary: Filesystem is predicted to run out of space within the next 24 hours. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} * 100 < 15 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemSpaceFillingUp" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemSpaceFillingUp" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemSpaceFillingUp | default false) }} + - alert: NodeFilesystemSpaceFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left and is filling up fast. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemspacefillingup + summary: Filesystem is predicted to run out of space within the next 4 hours. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} * 100 < 10 + and + predict_linear(node_filesystem_avail_bytes{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemSpaceFillingUp" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemSpaceFillingUp" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemAlmostOutOfSpace | default false) }} + - alert: NodeFilesystemAlmostOutOfSpace + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutofspace + summary: Filesystem has less than 5% space left. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemAlmostOutOfSpace" "for" "30m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemAlmostOutOfSpace" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemAlmostOutOfSpace | default false) }} + - alert: NodeFilesystemAlmostOutOfSpace + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available space left. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutofspace + summary: Filesystem has less than 3% space left. + expr: |- + ( + node_filesystem_avail_bytes{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemAlmostOutOfSpace" "for" "30m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemAlmostOutOfSpace" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemFilesFillingUp | default false) }} + - alert: NodeFilesystemFilesFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemfilesfillingup + summary: Filesystem is predicted to run out of inodes within the next 24 hours. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} / node_filesystem_files{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} * 100 < 40 + and + predict_linear(node_filesystem_files_free{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""}[6h], 24*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemFilesFillingUp" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemFilesFillingUp" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemFilesFillingUp | default false) }} + - alert: NodeFilesystemFilesFillingUp + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left and is filling up fast. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemfilesfillingup + summary: Filesystem is predicted to run out of inodes within the next 4 hours. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} / node_filesystem_files{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} * 100 < 20 + and + predict_linear(node_filesystem_files_free{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""}[6h], 4*60*60) < 0 + and + node_filesystem_readonly{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemFilesFillingUp" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemFilesFillingUp" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemAlmostOutOfFiles | default false) }} + - alert: NodeFilesystemAlmostOutOfFiles + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutoffiles + summary: Filesystem has less than 5% inodes left. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} / node_filesystem_files{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} * 100 < 5 + and + node_filesystem_readonly{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemAlmostOutOfFiles" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemAlmostOutOfFiles" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFilesystemAlmostOutOfFiles | default false) }} + - alert: NodeFilesystemAlmostOutOfFiles + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Filesystem on {{`{{`}} $labels.device {{`}}`}}, mounted on {{`{{`}} $labels.mountpoint {{`}}`}}, at {{`{{`}} $labels.instance {{`}}`}} has only {{`{{`}} printf "%.2f" $value {{`}}`}}% available inodes left. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefilesystemalmostoutoffiles + summary: Filesystem has less than 3% inodes left. + expr: |- + ( + node_filesystem_files_free{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} / node_filesystem_files{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} * 100 < 3 + and + node_filesystem_readonly{job="node-exporter",{{ $.Values.defaultRules.node.fsSelector }},mountpoint!=""} == 0 + ) + for: {{ dig "NodeFilesystemAlmostOutOfFiles" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFilesystemAlmostOutOfFiles" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeNetworkReceiveErrs | default false) }} + - alert: NodeNetworkReceiveErrs + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} receive errors in the last two minutes.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodenetworkreceiveerrs + summary: Network interface is reporting many receive errors. + expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m]) > 0.01 + for: {{ dig "NodeNetworkReceiveErrs" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeNetworkReceiveErrs" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeNetworkTransmitErrs | default false) }} + - alert: NodeNetworkTransmitErrs + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: '{{`{{`}} $labels.instance {{`}}`}} interface {{`{{`}} $labels.device {{`}}`}} has encountered {{`{{`}} printf "%.0f" $value {{`}}`}} transmit errors in the last two minutes.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodenetworktransmiterrs + summary: Network interface is reporting many transmit errors. + expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m]) > 0.01 + for: {{ dig "NodeNetworkTransmitErrs" "for" "1h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeNetworkTransmitErrs" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeHighNumberConntrackEntriesUsed | default false) }} + - alert: NodeHighNumberConntrackEntriesUsed + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: '{{`{{`}} $labels.instance {{`}}`}} {{`{{`}} $value | humanizePercentage {{`}}`}} of conntrack entries are used.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodehighnumberconntrackentriesused + summary: Number of conntrack are getting close to the limit. + expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit) > 0.75 + labels: + severity: {{ dig "NodeHighNumberConntrackEntriesUsed" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeTextFileCollectorScrapeError | default false) }} + - alert: NodeTextFileCollectorScrapeError + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Node Exporter text file collector on {{`{{`}} $labels.instance {{`}}`}} failed to scrape. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodetextfilecollectorscrapeerror + summary: Node Exporter text file collector failed to scrape. + expr: node_textfile_scrape_error{job="node-exporter"} == 1 + labels: + severity: {{ dig "NodeTextFileCollectorScrapeError" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeClockSkewDetected | default false) }} + - alert: NodeClockSkewDetected + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Clock at {{`{{`}} $labels.instance {{`}}`}} is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodeclockskewdetected + summary: Clock skew detected. + expr: |- + ( + node_timex_offset_seconds{job="node-exporter"} > 0.05 + and + deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0 + ) + or + ( + node_timex_offset_seconds{job="node-exporter"} < -0.05 + and + deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0 + ) + for: {{ dig "NodeClockSkewDetected" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeClockSkewDetected" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeClockNotSynchronising | default false) }} + - alert: NodeClockNotSynchronising + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Clock at {{`{{`}} $labels.instance {{`}}`}} is not synchronising. Ensure NTP is configured on this host. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodeclocknotsynchronising + summary: Clock not synchronising. + expr: |- + min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0 + and + node_timex_maxerror_seconds{job="node-exporter"} >= 16 + for: {{ dig "NodeClockNotSynchronising" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeClockNotSynchronising" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeRAIDDegraded | default false) }} + - alert: NodeRAIDDegraded + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: RAID array '{{`{{`}} $labels.device {{`}}`}}' at {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/noderaiddegraded + summary: RAID Array is degraded. + expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) > 0 + for: {{ dig "NodeRAIDDegraded" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeRAIDDegraded" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeRAIDDiskFailure | default false) }} + - alert: NodeRAIDDiskFailure + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: At least one device in RAID array at {{`{{`}} $labels.instance {{`}}`}} failed. Array '{{`{{`}} $labels.device {{`}}`}}' needs attention and possibly a disk swap. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/noderaiddiskfailure + summary: Failed device in RAID array. + expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} > 0 + labels: + severity: {{ dig "NodeRAIDDiskFailure" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFileDescriptorLimit | default false) }} + - alert: NodeFileDescriptorLimit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: File descriptors limit at {{`{{`}} $labels.instance {{`}}`}} is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefiledescriptorlimit + summary: Kernel is predicted to exhaust file descriptors limit soon. + expr: |- + ( + node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70 + ) + for: {{ dig "NodeFileDescriptorLimit" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFileDescriptorLimit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeFileDescriptorLimit | default false) }} + - alert: NodeFileDescriptorLimit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: File descriptors limit at {{`{{`}} $labels.instance {{`}}`}} is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodefiledescriptorlimit + summary: Kernel is predicted to exhaust file descriptors limit soon. + expr: |- + ( + node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90 + ) + for: {{ dig "NodeFileDescriptorLimit" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeFileDescriptorLimit" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeCPUHighUsage | default false) }} + - alert: NodeCPUHighUsage + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: 'CPU usage at {{`{{`}} $labels.instance {{`}}`}} has been above 90% for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%. + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodecpuhighusage + summary: High CPU usage. + expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", mode!~"idle|iowait"}[2m]))) * 100 > 90 + for: {{ dig "NodeCPUHighUsage" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeCPUHighUsage" "severity" "info" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeSystemSaturation | default false) }} + - alert: NodeSystemSaturation + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: 'System load per core at {{`{{`}} $labels.instance {{`}}`}} has been above 2 for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}. + + This might indicate this instance resources saturation and can cause it becoming unresponsive. + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodesystemsaturation + summary: System saturated, load per core is very high. + expr: |- + node_load1{job="node-exporter"} + / count without (cpu, mode) (node_cpu_seconds_total{job="node-exporter", mode="idle"}) > 2 + for: {{ dig "NodeSystemSaturation" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeSystemSaturation" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeMemoryMajorPagesFaults | default false) }} + - alert: NodeMemoryMajorPagesFaults + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: 'Memory major pages are occurring at very high rate at {{`{{`}} $labels.instance {{`}}`}}, 500 major page faults per second for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}. + + Please check that there is enough memory available at this instance. + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodememorymajorpagesfaults + summary: Memory major page faults are occurring at very high rate. + expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > 500 + for: {{ dig "NodeMemoryMajorPagesFaults" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeMemoryMajorPagesFaults" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeMemoryHighUtilization | default false) }} + - alert: NodeMemoryHighUtilization + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: 'Memory is filling up at {{`{{`}} $labels.instance {{`}}`}}, has been above 90% for the last 15 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}%. + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodememoryhighutilization + summary: Host is running out of memory. + expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} * 100) > 90 + for: {{ dig "NodeMemoryHighUtilization" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeMemoryHighUtilization" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeDiskIOSaturation | default false) }} + - alert: NodeDiskIOSaturation + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: 'Disk IO queue (aqu-sq) is high on {{`{{`}} $labels.device {{`}}`}} at {{`{{`}} $labels.instance {{`}}`}}, has been above 10 for the last 30 minutes, is currently at {{`{{`}} printf "%.2f" $value {{`}}`}}. + + This symptom might indicate disk saturation. + + ' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodediskiosaturation + summary: Disk IO queue is high. + expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) > 10 + for: {{ dig "NodeDiskIOSaturation" "for" "30m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeDiskIOSaturation" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeSystemdServiceFailed | default false) }} + - alert: NodeSystemdServiceFailed + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Systemd service {{`{{`}} $labels.name {{`}}`}} has entered failed state at {{`{{`}} $labels.instance {{`}}`}} + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodesystemdservicefailed + summary: Systemd service has entered failed state. + expr: node_systemd_unit_state{job="node-exporter", state="failed"} == 1 + for: {{ dig "NodeSystemdServiceFailed" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeSystemdServiceFailed" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeSystemdServiceCrashlooping | default false) }} + - alert: NodeSystemdServiceCrashlooping + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Systemd service {{`{{`}} $labels.name {{`}}`}} has being restarted too many times at {{`{{`}} $labels.instance {{`}}`}} for the last 15 minutes. Please check if service is crash looping. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodesystemdservicecrashlooping + summary: Systemd service keeps restaring, possibly crash looping. + expr: increase(node_systemd_service_restart_total{job="node-exporter"}[5m]) > 2 + for: {{ dig "NodeSystemdServiceCrashlooping" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeSystemdServiceCrashlooping" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.NodeBondingDegraded | default false) }} + - alert: NodeBondingDegraded + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.nodeExporterAlerting | indent 8 }} +{{- end }} + description: Bonding interface {{`{{`}} $labels.master {{`}}`}} on {{`{{`}} $labels.instance {{`}}`}} is in degraded state due to one or more slave failures. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/node/nodebondingdegraded + summary: Bonding interface is degraded. + expr: (node_bonding_slaves{job="node-exporter"} - node_bonding_active{job="node-exporter"}) != 0 + for: {{ dig "NodeBondingDegraded" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeBondingDegraded" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.nodeExporterAlerting }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/node-network.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/node-network.yaml new file mode 100644 index 0000000..16690ee --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/node-network.yaml @@ -0,0 +1,55 @@ +{{- /* +Generated from 'node-network' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.network }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node-network" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: node-network + rules: +{{- if not (.Values.defaultRules.disabled.NodeNetworkInterfaceFlapping | default false) }} + - alert: NodeNetworkInterfaceFlapping + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.network }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.network | indent 8 }} +{{- end }} + description: Network interface "{{`{{`}} $labels.device {{`}}`}}" changing its up status often on node-exporter {{`{{`}} $labels.namespace {{`}}`}}/{{`{{`}} $labels.pod {{`}}`}} + runbook_url: {{ .Values.defaultRules.runbookUrl }}/general/nodenetworkinterfaceflapping + summary: Network interface is often changing its status + expr: changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) > 2 + for: {{ dig "NodeNetworkInterfaceFlapping" "for" "2m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "NodeNetworkInterfaceFlapping" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.network }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.network }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml new file mode 100644 index 0000000..0bf9b86 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/node.rules.yaml @@ -0,0 +1,109 @@ +{{- /* +Generated from 'node.rules' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.node }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "node.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: node.rules + rules: + - expr: |- + topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) (1, + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node, namespace, pod) ( + label_replace(kube_pod_info{job="{{ $kubeStateMetricsJob }}",node!=""}, "pod", "$1", "pod", "(.*)") + )) + record: 'node_namespace_pod:kube_pod_info:' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.node }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.node }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) ( + node_cpu_seconds_total{mode="idle",job="node-exporter"} + * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) group_left(node) + topk by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod) (1, node_namespace_pod:kube_pod_info:) + ) + record: node:node_num_cpu:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.node }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.node }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum( + node_memory_MemAvailable_bytes{job="node-exporter"} or + ( + node_memory_Buffers_bytes{job="node-exporter"} + + node_memory_Cached_bytes{job="node-exporter"} + + node_memory_MemFree_bytes{job="node-exporter"} + + node_memory_Slab_bytes{job="node-exporter"} + ) + ) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) + record: :node_memory_MemAvailable_bytes:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.node }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.node }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + avg by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, node) ( + sum without (mode) ( + rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",job="node-exporter"}[5m]) + ) + ) + record: node:node_cpu_utilization:ratio_rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.node }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.node }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + avg by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) ( + node:node_cpu_utilization:ratio_rate5m + ) + record: cluster:node_cpu:ratio_rate5m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.node }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.node }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus-operator.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus-operator.yaml new file mode 100644 index 0000000..cb5c495 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus-operator.yaml @@ -0,0 +1,253 @@ +{{- /* +Generated from 'prometheus-operator' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheusOperator }} +{{- $operatorJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "operator" }} +{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus-operator" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: prometheus-operator + rules: +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorListErrors | default false) }} + - alert: PrometheusOperatorListErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: Errors while performing List operations in controller {{`{{`}}$labels.controller{{`}}`}} in {{`{{`}}$labels.namespace{{`}}`}} namespace. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorlisterrors + summary: Errors while performing list operations in controller. + expr: (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[10m])) / sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_list_operations_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[10m]))) > 0.4 + for: {{ dig "PrometheusOperatorListErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorListErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorWatchErrors | default false) }} + - alert: PrometheusOperatorWatchErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: Errors while performing watch operations in controller {{`{{`}}$labels.controller{{`}}`}} in {{`{{`}}$labels.namespace{{`}}`}} namespace. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorwatcherrors + summary: Errors while performing watch operations in controller. + expr: (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m])) / sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_watch_operations_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) > 0.4 + for: {{ dig "PrometheusOperatorWatchErrors" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorWatchErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorSyncFailed | default false) }} + - alert: PrometheusOperatorSyncFailed + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: Controller {{`{{`}} $labels.controller {{`}}`}} in {{`{{`}} $labels.namespace {{`}}`}} namespace fails to reconcile {{`{{`}} $value {{`}}`}} objects. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorsyncfailed + summary: Last controller reconciliation failed + expr: min_over_time(prometheus_operator_syncs{status="failed",job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusOperatorSyncFailed" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorSyncFailed" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorReconcileErrors | default false) }} + - alert: PrometheusOperatorReconcileErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of reconciling operations failed for {{`{{`}} $labels.controller {{`}}`}} controller in {{`{{`}} $labels.namespace {{`}}`}} namespace.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorreconcileerrors + summary: Errors while reconciling objects. + expr: (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) / (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) > 0.1 + for: {{ dig "PrometheusOperatorReconcileErrors" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorReconcileErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorStatusUpdateErrors | default false) }} + - alert: PrometheusOperatorStatusUpdateErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: '{{`{{`}} $value | humanizePercentage {{`}}`}} of status update operations failed for {{`{{`}} $labels.controller {{`}}`}} controller in {{`{{`}} $labels.namespace {{`}}`}} namespace.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorstatusupdateerrors + summary: Errors while updating objects status. + expr: (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_status_update_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) / (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (rate(prometheus_operator_status_update_operations_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]))) > 0.1 + for: {{ dig "PrometheusOperatorStatusUpdateErrors" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorStatusUpdateErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorNodeLookupErrors | default false) }} + - alert: PrometheusOperatorNodeLookupErrors + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: Errors while reconciling Prometheus in {{`{{`}} $labels.namespace {{`}}`}} Namespace. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatornodelookuperrors + summary: Errors while reconciling Prometheus. + expr: rate(prometheus_operator_node_address_lookup_errors_total{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0.1 + for: {{ dig "PrometheusOperatorNodeLookupErrors" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorNodeLookupErrors" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorNotReady | default false) }} + - alert: PrometheusOperatorNotReady + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: Prometheus operator in {{`{{`}} $labels.namespace {{`}}`}} namespace isn't ready to reconcile {{`{{`}} $labels.controller {{`}}`}} resources. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatornotready + summary: Prometheus operator not ready + expr: min by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,controller,namespace) (max_over_time(prometheus_operator_ready{job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) == 0) + for: {{ dig "PrometheusOperatorNotReady" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorNotReady" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOperatorRejectedResources | default false) }} + - alert: PrometheusOperatorRejectedResources + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheusOperator | indent 8 }} +{{- end }} + description: Prometheus operator in {{`{{`}} $labels.namespace {{`}}`}} namespace rejected {{`{{`}} printf "%0.0f" $value {{`}}`}} {{`{{`}} $labels.controller {{`}}`}}/{{`{{`}} $labels.resource {{`}}`}} resources. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus-operator/prometheusoperatorrejectedresources + summary: Resources rejected by Prometheus operator + expr: min_over_time(prometheus_operator_managed_resources{state="rejected",job="{{ $operatorJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusOperatorRejectedResources" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOperatorRejectedResources" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheusOperator }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus.yaml new file mode 100644 index 0000000..368eb64 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/prometheus.yaml @@ -0,0 +1,735 @@ +{{- /* +Generated from 'prometheus' group from https://github.com/prometheus-operator/kube-prometheus.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.defaultRules.rules.prometheus }} +{{- $prometheusJob := printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" }} +{{- $namespace := printf "%s" (include "kube-prometheus-stack.namespace" .) }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "prometheus" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: prometheus + rules: +{{- if not (.Values.defaultRules.disabled.PrometheusBadConfig | default false) }} + - alert: PrometheusBadConfig + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to reload its configuration. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusbadconfig + summary: Failed Prometheus configuration reload. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(prometheus_config_last_reload_successful{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) == 0 + for: {{ dig "PrometheusBadConfig" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusBadConfig" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusSDRefreshFailure | default false) }} + - alert: PrometheusSDRefreshFailure + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to refresh SD with mechanism {{`{{`}}$labels.mechanism{{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheussdrefreshfailure + summary: Failed Prometheus SD refresh. + expr: increase(prometheus_sd_refresh_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[10m]) > 0 + for: {{ dig "PrometheusSDRefreshFailure" "for" "20m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusSDRefreshFailure" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusKubernetesListWatchFailures | default false) }} + - alert: PrometheusKubernetesListWatchFailures + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Kubernetes service discovery of Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is experiencing {{`{{`}} printf "%.0f" $value {{`}}`}} failures with LIST/WATCH requests to the Kubernetes API in the last 5 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheuskuberneteslistwatchfailures + summary: Requests in Kubernetes SD are failing. + expr: increase(prometheus_sd_kubernetes_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusKubernetesListWatchFailures" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusKubernetesListWatchFailures" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusNotificationQueueRunningFull | default false) }} + - alert: PrometheusNotificationQueueRunningFull + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Alert notification queue of Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is running full. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusnotificationqueuerunningfull + summary: Prometheus alert notification queue predicted to run full in less than 30m. + expr: |- + # Without min_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + predict_linear(prometheus_notifications_queue_length{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m], 60 * 30) + > + min_over_time(prometheus_notifications_queue_capacity{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + ) + for: {{ dig "PrometheusNotificationQueueRunningFull" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusNotificationQueueRunningFull" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusErrorSendingAlertsToSomeAlertmanagers | default false) }} + - alert: PrometheusErrorSendingAlertsToSomeAlertmanagers + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% of alerts sent by Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to Alertmanager {{`{{`}}$labels.alertmanager{{`}}`}} were affected by errors.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheuserrorsendingalertstosomealertmanagers + summary: More than 1% of alerts sent by Prometheus to a specific Alertmanager were affected by errors. + expr: |- + ( + rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + / + rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + ) + * 100 + > 1 + for: {{ dig "PrometheusErrorSendingAlertsToSomeAlertmanagers" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusErrorSendingAlertsToSomeAlertmanagers" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusNotConnectedToAlertmanagers | default false) }} + - alert: PrometheusNotConnectedToAlertmanagers + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not connected to any Alertmanagers. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusnotconnectedtoalertmanagers + summary: Prometheus is not connected to any Alertmanagers. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + max_over_time(prometheus_notifications_alertmanagers_discovered{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) < 1 + for: {{ dig "PrometheusNotConnectedToAlertmanagers" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusNotConnectedToAlertmanagers" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusTSDBReloadsFailing | default false) }} + - alert: PrometheusTSDBReloadsFailing + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} reload failures over the last 3h. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheustsdbreloadsfailing + summary: Prometheus has issues reloading blocks from disk. + expr: increase(prometheus_tsdb_reloads_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0 + for: {{ dig "PrometheusTSDBReloadsFailing" "for" "4h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusTSDBReloadsFailing" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusTSDBCompactionsFailing | default false) }} + - alert: PrometheusTSDBCompactionsFailing + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has detected {{`{{`}}$value | humanize{{`}}`}} compaction failures over the last 3h. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheustsdbcompactionsfailing + summary: Prometheus has issues compacting blocks. + expr: increase(prometheus_tsdb_compactions_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[3h]) > 0 + for: {{ dig "PrometheusTSDBCompactionsFailing" "for" "4h" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusTSDBCompactionsFailing" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusNotIngestingSamples | default false) }} + - alert: PrometheusNotIngestingSamples + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is not ingesting samples. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusnotingestingsamples + summary: Prometheus is not ingesting samples. + expr: |- + ( + sum without(type) (rate(prometheus_tsdb_head_samples_appended_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])) <= 0 + and + ( + sum without(scrape_job) (prometheus_target_metadata_cache_entries{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}) > 0 + or + sum without(rule_group) (prometheus_rule_group_rules{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}) > 0 + ) + ) + for: {{ dig "PrometheusNotIngestingSamples" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusNotIngestingSamples" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusDuplicateTimestamps | default false) }} + - alert: PrometheusDuplicateTimestamps + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with different values but duplicated timestamp. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusduplicatetimestamps + summary: Prometheus is dropping samples with duplicate timestamps. + expr: rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusDuplicateTimestamps" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusDuplicateTimestamps" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusOutOfOrderTimestamps | default false) }} + - alert: PrometheusOutOfOrderTimestamps + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} is dropping {{`{{`}} printf "%.4g" $value {{`}}`}} samples/s with timestamps arriving out of order. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusoutofordertimestamps + summary: Prometheus drops samples with out-of-order timestamps. + expr: rate(prometheus_target_scrapes_sample_out_of_order_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusOutOfOrderTimestamps" "for" "10m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusOutOfOrderTimestamps" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusRemoteStorageFailures | default false) }} + - alert: PrometheusRemoteStorageFailures + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} failed to send {{`{{`}} printf "%.1f" $value {{`}}`}}% of the samples to {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}} + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusremotestoragefailures + summary: Prometheus fails to send samples to remote storage. + expr: |- + ( + (rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])) + / + ( + (rate(prometheus_remote_storage_failed_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])) + + + (rate(prometheus_remote_storage_succeeded_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) or rate(prometheus_remote_storage_samples_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m])) + ) + ) + * 100 + > 1 + for: {{ dig "PrometheusRemoteStorageFailures" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusRemoteStorageFailures" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusRemoteWriteBehind | default false) }} + - alert: PrometheusRemoteWriteBehind + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write is {{`{{`}} printf "%.1f" $value {{`}}`}}s behind for {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusremotewritebehind + summary: Prometheus remote write is behind. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_queue_highest_timestamp_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + - + max_over_time(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + ) + > 120 + for: {{ dig "PrometheusRemoteWriteBehind" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusRemoteWriteBehind" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusRemoteWriteDesiredShards | default false) }} + - alert: PrometheusRemoteWriteDesiredShards + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} remote write desired shards calculation wants to run {{`{{`}} $value {{`}}`}} shards for queue {{`{{`}} $labels.remote_name{{`}}`}}:{{`{{`}} $labels.url {{`}}`}}, which is more than the max of {{`{{`}} printf `prometheus_remote_storage_shards_max{instance="%s",job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}` $labels.instance | query | first | value {{`}}`}}. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusremotewritedesiredshards + summary: Prometheus remote write desired shards calculation wants to run more than configured max shards. + expr: |- + # Without max_over_time, failed scrapes could create false negatives, see + # https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details. + ( + max_over_time(prometheus_remote_storage_shards_desired{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + > + max_over_time(prometheus_remote_storage_shards_max{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) + ) + for: {{ dig "PrometheusRemoteWriteDesiredShards" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusRemoteWriteDesiredShards" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusRuleFailures | default false) }} + - alert: PrometheusRuleFailures + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed to evaluate {{`{{`}} printf "%.0f" $value {{`}}`}} rules in the last 5m. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusrulefailures + summary: Prometheus is failing rule evaluations. + expr: increase(prometheus_rule_evaluation_failures_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusRuleFailures" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusRuleFailures" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusMissingRuleEvaluations | default false) }} + - alert: PrometheusMissingRuleEvaluations + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has missed {{`{{`}} printf "%.0f" $value {{`}}`}} rule group evaluations in the last 5m. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusmissingruleevaluations + summary: Prometheus is missing rule evaluations due to slow rule group evaluation. + expr: increase(prometheus_rule_group_iterations_missed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusMissingRuleEvaluations" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusMissingRuleEvaluations" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusTargetLimitHit | default false) }} + - alert: PrometheusTargetLimitHit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because the number of targets exceeded the configured target_limit. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheustargetlimithit + summary: Prometheus has dropped targets because some scrape configs have exceeded the targets limit. + expr: increase(prometheus_target_scrape_pool_exceeded_target_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusTargetLimitHit" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusTargetLimitHit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusLabelLimitHit | default false) }} + - alert: PrometheusLabelLimitHit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has dropped {{`{{`}} printf "%.0f" $value {{`}}`}} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheuslabellimithit + summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit. + expr: increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusLabelLimitHit" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusLabelLimitHit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusScrapeBodySizeLimitHit | default false) }} + - alert: PrometheusScrapeBodySizeLimitHit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed {{`{{`}} printf "%.0f" $value {{`}}`}} scrapes in the last 5m because some targets exceeded the configured body_size_limit. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusscrapebodysizelimithit + summary: Prometheus has dropped some targets that exceeded body size limit. + expr: increase(prometheus_target_scrapes_exceeded_body_size_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusScrapeBodySizeLimitHit" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusScrapeBodySizeLimitHit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusScrapeSampleLimitHit | default false) }} + - alert: PrometheusScrapeSampleLimitHit + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} has failed {{`{{`}} printf "%.0f" $value {{`}}`}} scrapes in the last 5m because some targets exceeded the configured sample_limit. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheusscrapesamplelimithit + summary: Prometheus has failed scrapes that have exceeded the configured sample limit. + expr: increase(prometheus_target_scrapes_exceeded_sample_limit_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0 + for: {{ dig "PrometheusScrapeSampleLimitHit" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusScrapeSampleLimitHit" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusTargetSyncFailure | default false) }} + - alert: PrometheusTargetSyncFailure + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: '{{`{{`}} printf "%.0f" $value {{`}}`}} targets in Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} have failed to sync because invalid configuration was supplied.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheustargetsyncfailure + summary: Prometheus has failed to sync targets. + expr: increase(prometheus_target_sync_failed_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[30m]) > 0 + for: {{ dig "PrometheusTargetSyncFailure" "for" "5m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusTargetSyncFailure" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusHighQueryLoad | default false) }} + - alert: PrometheusHighQueryLoad + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} query API has less than 20% available capacity in its query engine for the last 15 minutes. + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheushighqueryload + summary: Prometheus is reaching its maximum capacity serving concurrent requests. + expr: avg_over_time(prometheus_engine_queries{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) / max_over_time(prometheus_engine_queries_concurrent_max{job="{{ $prometheusJob }}",namespace="{{ $namespace }}"}[5m]) > 0.8 + for: {{ dig "PrometheusHighQueryLoad" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusHighQueryLoad" "severity" "warning" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- if not (.Values.defaultRules.disabled.PrometheusErrorSendingAlertsToAnyAlertmanager | default false) }} + - alert: PrometheusErrorSendingAlertsToAnyAlertmanager + annotations: +{{- if .Values.defaultRules.additionalRuleAnnotations }} +{{ toYaml .Values.defaultRules.additionalRuleAnnotations | indent 8 }} +{{- end }} +{{- if .Values.defaultRules.additionalRuleGroupAnnotations.prometheus }} +{{ toYaml .Values.defaultRules.additionalRuleGroupAnnotations.prometheus | indent 8 }} +{{- end }} + description: '{{`{{`}} printf "%.1f" $value {{`}}`}}% minimum errors while sending alerts from Prometheus {{`{{`}}$labels.namespace{{`}}`}}/{{`{{`}}$labels.pod{{`}}`}} to any Alertmanager.' + runbook_url: {{ .Values.defaultRules.runbookUrl }}/prometheus/prometheuserrorsendingalertstoanyalertmanager + summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager. + expr: |- + min without (alertmanager) ( + rate(prometheus_notifications_errors_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}",alertmanager!~``}[5m]) + / + rate(prometheus_notifications_sent_total{job="{{ $prometheusJob }}",namespace="{{ $namespace }}",alertmanager!~``}[5m]) + ) + * 100 + > 3 + for: {{ dig "PrometheusErrorSendingAlertsToAnyAlertmanager" "for" "15m" .Values.customRules }} + {{- with .Values.defaultRules.keepFiringFor }} + keep_firing_for: "{{ . }}" + {{- end }} + labels: + severity: {{ dig "PrometheusErrorSendingAlertsToAnyAlertmanager" "severity" "critical" .Values.customRules }} + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.prometheus }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.node.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.node.rules.yaml new file mode 100644 index 0000000..1572666 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.node.rules.yaml @@ -0,0 +1,302 @@ +{{- /* +Generated from 'windows.node.rules' group from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.windowsMonitoring.enabled .Values.defaultRules.rules.windows }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "windows.node.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: windows.node.rules + rules: + - expr: |- + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) ( + windows_system_boot_time_timestamp_seconds{job="windows-exporter"} + ) + record: node:windows_node:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + count by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) (sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, core) ( + windows_cpu_time_total{job="windows-exporter"} + )) + record: node:windows_node_num_cpu:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: 1 - avg by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (rate(windows_cpu_time_total{job="windows-exporter",mode="idle"}[1m])) + record: :windows_node_cpu_utilisation:avg1m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + 1 - avg by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + rate(windows_cpu_time_total{job="windows-exporter",mode="idle"}[1m]) + ) + record: node:windows_node_cpu_utilisation:avg1m + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + 1 - + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (windows_memory_available_bytes{job="windows-exporter"}) + / + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (windows_os_visible_memory_bytes{job="windows-exporter"}) + record: ':windows_node_memory_utilisation:' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (windows_memory_available_bytes{job="windows-exporter"} + windows_memory_cache_bytes{job="windows-exporter"}) + record: :windows_node_memory_MemFreeCached_bytes:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: (windows_memory_cache_bytes{job="windows-exporter"} + windows_memory_modified_page_list_bytes{job="windows-exporter"} + windows_memory_standby_cache_core_bytes{job="windows-exporter"} + windows_memory_standby_cache_normal_priority_bytes{job="windows-exporter"} + windows_memory_standby_cache_reserve_bytes{job="windows-exporter"}) + record: node:windows_node_memory_totalCached_bytes:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (windows_os_visible_memory_bytes{job="windows-exporter"}) + record: :windows_node_memory_MemTotal_bytes:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + (windows_memory_available_bytes{job="windows-exporter"}) + ) + record: node:windows_node_memory_bytes_available:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + windows_os_visible_memory_bytes{job="windows-exporter"} + ) + record: node:windows_node_memory_bytes_total:sum + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + (node:windows_node_memory_bytes_total:sum - node:windows_node_memory_bytes_available:sum) + / + scalar(sum(node:windows_node_memory_bytes_total:sum)) + record: node:windows_node_memory_utilisation:ratio + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: 1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum) + record: 'node:windows_node_memory_utilisation:' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: irate(windows_memory_swap_page_operations_total{job="windows-exporter"}[5m]) + record: node:windows_node_memory_swap_io_pages:irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + avg by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (irate(windows_logical_disk_read_seconds_total{job="windows-exporter"}[1m]) + + irate(windows_logical_disk_write_seconds_total{job="windows-exporter"}[1m]) + ) + record: :windows_node_disk_utilisation:avg_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + avg by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + (irate(windows_logical_disk_read_seconds_total{job="windows-exporter"}[1m]) + + irate(windows_logical_disk_write_seconds_total{job="windows-exporter"}[1m])) + ) + record: node:windows_node_disk_utilisation:avg_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster,instance,volume)( + (windows_logical_disk_size_bytes{job="windows-exporter"} - + windows_logical_disk_free_bytes{job="windows-exporter"}) + / + windows_logical_disk_size_bytes{job="windows-exporter"} + ) + record: 'node:windows_node_filesystem_usage:' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance, volume) (windows_logical_disk_free_bytes{job="windows-exporter"} / windows_logical_disk_size_bytes{job="windows-exporter"}) + record: 'node:windows_node_filesystem_avail:' + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (irate(windows_net_bytes_total{job="windows-exporter"}[1m])) + record: :windows_node_net_utilisation:sum_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + (irate(windows_net_bytes_total{job="windows-exporter"}[1m])) + ) + record: node:windows_node_net_utilisation:sum_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (irate(windows_net_packets_received_discarded_total{job="windows-exporter"}[1m])) + + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster) (irate(windows_net_packets_outbound_discarded_total{job="windows-exporter"}[1m])) + record: :windows_node_net_saturation:sum_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, instance) ( + (irate(windows_net_packets_received_discarded_total{job="windows-exporter"}[1m]) + + irate(windows_net_packets_outbound_discarded_total{job="windows-exporter"}[1m])) + ) + record: node:windows_node_net_saturation:sum_irate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.pod.rules.yaml b/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.pod.rules.yaml new file mode 100644 index 0000000..86340b5 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/rules-1.14/windows.pod.rules.yaml @@ -0,0 +1,158 @@ +{{- /* +Generated from 'windows.pod.rules' group from https://github.com/kubernetes-monitoring/kubernetes-mixin.git +Do not change in-place! In order to change this file first read following link: +https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/hack +*/ -}} +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and (semverCompare ">=1.14.0-0" $kubeTargetVersion) (semverCompare "<9.9.9-9" $kubeTargetVersion) .Values.defaultRules.create .Values.windowsMonitoring.enabled .Values.defaultRules.rules.windows }} +{{- $kubeStateMetricsJob := include "kube-prometheus-stack-kube-state-metrics.name" . }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ printf "%s-%s" (include "kube-prometheus-stack.fullname" .) "windows.pod.rules" | trunc 63 | trimSuffix "-" }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.defaultRules.labels }} +{{ toYaml .Values.defaultRules.labels | indent 4 }} +{{- end }} +{{- if .Values.defaultRules.annotations }} + annotations: +{{ toYaml .Values.defaultRules.annotations | indent 4 }} +{{- end }} +spec: + groups: + - name: windows.pod.rules + rules: + - expr: windows_container_available{job="windows-exporter", container_id != ""} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="{{ $kubeStateMetricsJob }}", container_id != ""}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container, container_id, pod, namespace, cluster) + record: windows_pod_container_available + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: windows_container_cpu_usage_seconds_total{job="windows-exporter", container_id != ""} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="{{ $kubeStateMetricsJob }}", container_id != ""}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container, container_id, pod, namespace, cluster) + record: windows_container_total_runtime + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: windows_container_memory_usage_commit_bytes{job="windows-exporter", container_id != ""} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="{{ $kubeStateMetricsJob }}", container_id != ""}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container, container_id, pod, namespace, cluster) + record: windows_container_memory_usage + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: windows_container_memory_usage_private_working_set_bytes{job="windows-exporter", container_id != ""} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="{{ $kubeStateMetricsJob }}", container_id != ""}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container, container_id, pod, namespace, cluster) + record: windows_container_private_working_set_usage + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: windows_container_network_receive_bytes_total{job="windows-exporter", container_id != ""} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="{{ $kubeStateMetricsJob }}", container_id != ""}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container, container_id, pod, namespace, cluster) + record: windows_container_network_received_bytes_total + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: windows_container_network_transmit_bytes_total{job="windows-exporter", container_id != ""} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container_id, cluster) group_left(container, pod, namespace) max(kube_pod_container_info{job="{{ $kubeStateMetricsJob }}", container_id != ""}) by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container, container_id, pod, namespace, cluster) + record: windows_container_network_transmitted_bytes_total + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, container) ( + kube_pod_container_resource_requests{resource="memory",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container,pod,namespace,cluster) (windows_pod_container_available) + record: kube_pod_windows_container_resource_memory_request + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: kube_pod_container_resource_limits{resource="memory",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container,pod,namespace,cluster) (windows_pod_container_available) + record: kube_pod_windows_container_resource_memory_limit + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + max by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, container) ( + kube_pod_container_resource_requests{resource="cpu",job="{{ $kubeStateMetricsJob }}"} + ) * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container,pod,namespace,cluster) (windows_pod_container_available) + record: kube_pod_windows_container_resource_cpu_cores_request + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: kube_pod_container_resource_limits{resource="cpu",job="{{ $kubeStateMetricsJob }}"} * on ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}container,pod,namespace,cluster) (windows_pod_container_available) + record: kube_pod_windows_container_resource_cpu_cores_limit + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + - expr: |- + sum by ({{ range $.Values.defaultRules.additionalAggregationLabels }}{{ . }},{{ end }}cluster, namespace, pod, container) ( + rate(windows_container_total_runtime{}[5m]) + ) + record: namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate + {{- if or .Values.defaultRules.additionalRuleLabels .Values.defaultRules.additionalRuleGroupLabels.windows }} + labels: + {{- with .Values.defaultRules.additionalRuleLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.defaultRules.additionalRuleGroupLabels.windows }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/kube-prometheus-stack/templates/prometheus/secret.yaml b/kube-prometheus-stack/templates/prometheus/secret.yaml new file mode 100644 index 0000000..809379f --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/secret.yaml @@ -0,0 +1,15 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.prometheusSpec.thanos .Values.prometheus.prometheusSpec.thanos.objectStorageConfig}} +{{- if and .Values.prometheus.prometheusSpec.thanos.objectStorageConfig.secret (not .Values.prometheus.prometheusSpec.thanos.objectStorageConfig.existingSecret) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + app.kubernetes.io/component: prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: + object-storage-configs.yaml: {{ tpl (toYaml .Values.prometheus.prometheusSpec.thanos.objectStorageConfig.secret) $ | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/service.yaml b/kube-prometheus-stack/templates/prometheus/service.yaml new file mode 100644 index 0000000..60bb392 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/service.yaml @@ -0,0 +1,89 @@ +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if and .Values.prometheus.enabled .Values.prometheus.service.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + self-monitor: {{ .Values.prometheus.serviceMonitor.selfMonitor | quote }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.service.labels }} +{{ toYaml .Values.prometheus.service.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheus.service.annotations }} + annotations: +{{ toYaml .Values.prometheus.service.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.prometheus.service.clusterIP }} + clusterIP: {{ .Values.prometheus.service.clusterIP }} +{{- end }} +{{- if .Values.prometheus.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.prometheus.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.prometheus.service.ipDualStack.ipFamilyPolicy }} +{{- end }} +{{- if .Values.prometheus.service.externalIPs }} + externalIPs: +{{ toYaml .Values.prometheus.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.prometheus.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.prometheus.service.loadBalancerIP }} +{{- end }} +{{- if .Values.prometheus.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.prometheus.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.prometheus.service.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.prometheus.service.externalTrafficPolicy }} +{{- end }} + ports: + - name: {{ .Values.prometheus.prometheusSpec.portName }} + {{- if eq .Values.prometheus.service.type "NodePort" }} + nodePort: {{ .Values.prometheus.service.nodePort }} + {{- end }} + port: {{ .Values.prometheus.service.port }} + targetPort: {{ .Values.prometheus.service.targetPort }} + {{- if .Values.prometheus.service.reloaderWebPort }} + - name: reloader-web + {{- if semverCompare "> 1.20.0-0" $kubeTargetVersion }} + appProtocol: http + {{- end }} + {{- if and (eq .Values.prometheus.service.type "NodePort") (not (empty .Values.prometheus.service.reloaderWebNodePort)) }} + nodePort: {{ .Values.prometheus.service.reloaderWebNodePort }} + {{- end }} + port: {{ .Values.prometheus.service.reloaderWebPort }} + targetPort: reloader-web + {{- end }} + {{- if .Values.prometheus.thanosIngress.enabled }} + - name: grpc + {{- if eq .Values.prometheus.service.type "NodePort" }} + nodePort: {{ .Values.prometheus.thanosIngress.nodePort }} + {{- end }} + port: {{ .Values.prometheus.thanosIngress.servicePort }} + targetPort: {{ .Values.prometheus.thanosIngress.servicePort }} + {{- end }} +{{- if .Values.prometheus.service.additionalPorts }} +{{ toYaml .Values.prometheus.service.additionalPorts | indent 2 }} +{{- end }} + publishNotReadyAddresses: {{ .Values.prometheus.service.publishNotReadyAddresses }} + selector: + {{- if .Values.prometheus.agentMode }} + app.kubernetes.io/name: prometheus-agent + {{- else }} + app.kubernetes.io/name: prometheus + {{- end }} + operator.prometheus.io/name: {{ template "kube-prometheus-stack.prometheus.crname" . }} +{{- if .Values.prometheus.service.sessionAffinity }} + sessionAffinity: {{ .Values.prometheus.service.sessionAffinity }} +{{- end }} +{{- if eq .Values.prometheus.service.sessionAffinity "ClientIP" }} + sessionAffinityConfig: + clientIP: + timeoutSeconds: {{ .Values.prometheus.service.sessionAffinityConfig.clientIP.timeoutSeconds }} +{{- end }} + type: "{{ .Values.prometheus.service.type }}" +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/serviceThanosSidecar.yaml b/kube-prometheus-stack/templates/prometheus/serviceThanosSidecar.yaml new file mode 100644 index 0000000..7e1fda3 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/serviceThanosSidecar.yaml @@ -0,0 +1,45 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.thanosService.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-thanos-discovery + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-thanos-discovery +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.thanosService.labels }} +{{ toYaml .Values.prometheus.thanosService.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheus.thanosService.annotations }} + annotations: +{{ toYaml .Values.prometheus.thanosService.annotations | indent 4 }} +{{- end }} +spec: + type: {{ .Values.prometheus.thanosService.type }} + {{- if .Values.prometheus.thanosService.clusterIP }} + clusterIP: {{ .Values.prometheus.thanosService.clusterIP }} + {{- end }} +{{- if .Values.prometheus.thanosService.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.prometheus.thanosService.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.prometheus.thanosService.ipDualStack.ipFamilyPolicy }} +{{- end }} +{{- if ne .Values.prometheus.thanosService.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.prometheus.thanosService.externalTrafficPolicy }} +{{- end }} + ports: + - name: {{ .Values.prometheus.thanosService.portName }} + port: {{ .Values.prometheus.thanosService.port }} + targetPort: {{ .Values.prometheus.thanosService.targetPort }} + {{- if eq .Values.prometheus.thanosService.type "NodePort" }} + nodePort: {{ .Values.prometheus.thanosService.nodePort }} + {{- end }} + - name: {{ .Values.prometheus.thanosService.httpPortName }} + port: {{ .Values.prometheus.thanosService.httpPort }} + targetPort: {{ .Values.prometheus.thanosService.targetHttpPort }} + {{- if eq .Values.prometheus.thanosService.type "NodePort" }} + nodePort: {{ .Values.prometheus.thanosService.httpNodePort }} + {{- end }} + selector: + app.kubernetes.io/name: prometheus + operator.prometheus.io/name: {{ template "kube-prometheus-stack.prometheus.crname" . }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/serviceThanosSidecarExternal.yaml b/kube-prometheus-stack/templates/prometheus/serviceThanosSidecarExternal.yaml new file mode 100644 index 0000000..453eed7 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/serviceThanosSidecarExternal.yaml @@ -0,0 +1,46 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.thanosServiceExternal.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-thanos-external + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- if .Values.prometheus.thanosServiceExternal.labels }} +{{ toYaml .Values.prometheus.thanosServiceExternal.labels | indent 4 }} +{{- end }} +{{- if .Values.prometheus.thanosServiceExternal.annotations }} + annotations: +{{ toYaml .Values.prometheus.thanosServiceExternal.annotations | indent 4 }} +{{- end }} +spec: + type: {{ .Values.prometheus.thanosServiceExternal.type }} +{{- if .Values.prometheus.thanosServiceExternal.loadBalancerIP }} + loadBalancerIP: {{ .Values.prometheus.thanosServiceExternal.loadBalancerIP }} +{{- end }} +{{- if .Values.prometheus.thanosServiceExternal.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.prometheus.thanosServiceExternal.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.prometheus.thanosServiceExternal.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.prometheus.thanosServiceExternal.externalTrafficPolicy }} +{{- end }} + ports: + - name: {{ .Values.prometheus.thanosServiceExternal.portName }} + port: {{ .Values.prometheus.thanosServiceExternal.port }} + targetPort: {{ .Values.prometheus.thanosServiceExternal.targetPort }} + {{- if eq .Values.prometheus.thanosServiceExternal.type "NodePort" }} + nodePort: {{ .Values.prometheus.thanosServiceExternal.nodePort }} + {{- end }} + - name: {{ .Values.prometheus.thanosServiceExternal.httpPortName }} + port: {{ .Values.prometheus.thanosServiceExternal.httpPort }} + targetPort: {{ .Values.prometheus.thanosServiceExternal.targetHttpPort }} + {{- if eq .Values.prometheus.thanosServiceExternal.type "NodePort" }} + nodePort: {{ .Values.prometheus.thanosServiceExternal.httpNodePort }} + {{- end }} + selector: + app.kubernetes.io/name: prometheus + operator.prometheus.io/name: {{ template "kube-prometheus-stack.prometheus.crname" . }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/serviceaccount.yaml b/kube-prometheus-stack/templates/prometheus/serviceaccount.yaml new file mode 100644 index 0000000..558d84a --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/serviceaccount.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "kube-prometheus-stack.prometheus.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + app.kubernetes.io/name: {{ template "kube-prometheus-stack.name" . }}-prometheus + app.kubernetes.io/component: prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- with .Values.prometheus.serviceAccount.annotations }} + annotations: +{{ tpl (toYaml .) $ | indent 4 }} +{{- end }} +automountServiceAccountToken: {{ .Values.prometheus.serviceAccount.automountServiceAccountToken }} +{{- if .Values.global.imagePullSecrets }} +imagePullSecrets: +{{ include "kube-prometheus-stack.imagePullSecrets" . | trim | indent 2 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/servicemonitor.yaml b/kube-prometheus-stack/templates/prometheus/servicemonitor.yaml new file mode 100644 index 0000000..31cbc90 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/servicemonitor.yaml @@ -0,0 +1,86 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.serviceMonitor.selfMonitor }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- with .Values.prometheus.serviceMonitor.additionalLabels }} +{{- toYaml . | nindent 4 }} +{{- end }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.prometheus.serviceMonitor | nindent 2 }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + release: {{ $.Release.Name | quote }} + self-monitor: "true" + namespaceSelector: + matchNames: + - {{ printf "%s" (include "kube-prometheus-stack.namespace" .) | quote }} + endpoints: + - port: {{ .Values.prometheus.prometheusSpec.portName }} + {{- if .Values.prometheus.serviceMonitor.interval }} + interval: {{ .Values.prometheus.serviceMonitor.interval }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.scheme }} + scheme: {{ .Values.prometheus.serviceMonitor.scheme }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.tlsConfig }} + tlsConfig: {{- toYaml .Values.prometheus.serviceMonitor.tlsConfig | nindent 6 }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.prometheus.serviceMonitor.bearerTokenFile }} + {{- end }} + path: "{{ trimSuffix "/" .Values.prometheus.prometheusSpec.routePrefix }}/metrics" + {{- if .Values.prometheus.serviceMonitor.metricRelabelings }} + metricRelabelings: {{- tpl (toYaml .Values.prometheus.serviceMonitor.metricRelabelings | nindent 6) . }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.relabelings }} + relabelings: {{- toYaml .Values.prometheus.serviceMonitor.relabelings | nindent 6 }} + {{- end }} + - port: reloader-web + {{- if .Values.prometheus.serviceMonitor.interval }} + interval: {{ .Values.prometheus.serviceMonitor.interval }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.scheme }} + scheme: {{ .Values.prometheus.serviceMonitor.scheme }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.tlsConfig }} + tlsConfig: {{- toYaml .Values.prometheus.serviceMonitor.tlsConfig | nindent 6 }} + {{- end }} + path: "/metrics" + {{- if .Values.prometheus.serviceMonitor.metricRelabelings }} + metricRelabelings: {{- tpl (toYaml .Values.prometheus.serviceMonitor.metricRelabelings | nindent 6) . }} + {{- end }} + {{- if .Values.prometheus.serviceMonitor.relabelings }} + relabelings: {{- toYaml .Values.prometheus.serviceMonitor.relabelings | nindent 6 }} + {{- end }} + {{- range .Values.prometheus.serviceMonitor.additionalEndpoints }} + - port: {{ .port }} + {{- if or $.Values.prometheus.serviceMonitor.interval .interval }} + interval: {{ default $.Values.prometheus.serviceMonitor.interval .interval }} + {{- end }} + {{- if or $.Values.prometheus.serviceMonitor.proxyUrl .proxyUrl }} + proxyUrl: {{ default $.Values.prometheus.serviceMonitor.proxyUrl .proxyUrl }} + {{- end }} + {{- if or $.Values.prometheus.serviceMonitor.scheme .scheme }} + scheme: {{ default $.Values.prometheus.serviceMonitor.scheme .scheme }} + {{- end }} + {{- if or $.Values.prometheus.serviceMonitor.bearerTokenFile .bearerTokenFile }} + bearerTokenFile: {{ default $.Values.prometheus.serviceMonitor.bearerTokenFile .bearerTokenFile }} + {{- end }} + {{- if or $.Values.prometheus.serviceMonitor.tlsConfig .tlsConfig }} + tlsConfig: {{- default $.Values.prometheus.serviceMonitor.tlsConfig .tlsConfig | toYaml | nindent 6 }} + {{- end }} + path: {{ .path }} + {{- if or $.Values.prometheus.serviceMonitor.metricRelabelings .metricRelabelings }} + metricRelabelings: {{- tpl (default $.Values.prometheus.serviceMonitor.metricRelabelings .metricRelabelings | toYaml | nindent 6) . }} + {{- end }} + {{- if or $.Values.prometheus.serviceMonitor.relabelings .relabelings }} + relabelings: {{- default $.Values.prometheus.serviceMonitor.relabelings .relabelings | toYaml | nindent 6 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/servicemonitorThanosSidecar.yaml b/kube-prometheus-stack/templates/prometheus/servicemonitorThanosSidecar.yaml new file mode 100644 index 0000000..5643099 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/servicemonitorThanosSidecar.yaml @@ -0,0 +1,45 @@ +{{- if and .Values.prometheus.thanosService.enabled .Values.prometheus.thanosServiceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-thanos-sidecar + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-thanos-sidecar +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- with .Values.prometheus.thanosServiceMonitor.additionalLabels }} +{{- toYaml . | nindent 4 }} +{{- end }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.prometheus.thanosServiceMonitor | nindent 2 }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.name" . }}-thanos-discovery + release: {{ $.Release.Name | quote }} + namespaceSelector: + matchNames: + - {{ printf "%s" (include "kube-prometheus-stack.namespace" .) | quote }} + endpoints: + - port: {{ .Values.prometheus.thanosService.httpPortName }} + {{- if .Values.prometheus.thanosServiceMonitor.interval }} + interval: {{ .Values.prometheus.thanosServiceMonitor.interval }} + {{- end }} + {{- if .Values.prometheus.thanosServiceMonitor.scheme }} + scheme: {{ .Values.prometheus.thanosServiceMonitor.scheme }} + {{- end }} + {{- if .Values.prometheus.thanosServiceMonitor.tlsConfig }} + tlsConfig: {{ toYaml .Values.prometheus.thanosServiceMonitor.tlsConfig | nindent 6 }} + {{- end }} + {{- if .Values.prometheus.thanosServiceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.prometheus.thanosServiceMonitor.bearerTokenFile }} + {{- end }} + path: "/metrics" +{{- if .Values.prometheus.thanosServiceMonitor.metricRelabelings }} + metricRelabelings: +{{ tpl (toYaml .Values.prometheus.thanosServiceMonitor.metricRelabelings | indent 6) . }} +{{- end }} +{{- if .Values.prometheus.thanosServiceMonitor.relabelings }} + relabelings: +{{ toYaml .Values.prometheus.thanosServiceMonitor.relabelings | indent 6 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/servicemonitors.yaml b/kube-prometheus-stack/templates/prometheus/servicemonitors.yaml new file mode 100644 index 0000000..4a0c7f1 --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/servicemonitors.yaml @@ -0,0 +1,46 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.additionalServiceMonitors }} +apiVersion: v1 +kind: List +items: +{{- range .Values.prometheus.additionalServiceMonitors }} + - apiVersion: monitoring.coreos.com/v1 + kind: ServiceMonitor + metadata: + name: {{ .name }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-prometheus +{{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if .additionalLabels }} +{{ toYaml .additionalLabels | indent 8 }} + {{- end }} + spec: + {{- include "servicemonitor.scrapeLimits" . | nindent 6 }} + endpoints: +{{ toYaml .endpoints | indent 8 }} + {{- if .jobLabel }} + jobLabel: {{ .jobLabel }} + {{- end }} + {{- if .namespaceSelector }} + namespaceSelector: +{{ toYaml .namespaceSelector | indent 8 }} + {{- end }} + selector: +{{ toYaml .selector | indent 8 }} + {{- if .targetLabels }} + targetLabels: +{{ toYaml .targetLabels | indent 8 }} + {{- end }} + {{- if .podTargetLabels }} + podTargetLabels: +{{ toYaml .podTargetLabels | indent 8 }} + {{- end }} + {{- if .fallbackScrapeProtocol }} + fallbackScrapeProtocol: {{ .fallbackScrapeProtocol }} + {{- end }} + {{- with .attachMetadata }} + attachMetadata: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/serviceperreplica.yaml b/kube-prometheus-stack/templates/prometheus/serviceperreplica.yaml new file mode 100644 index 0000000..3a88b2d --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/serviceperreplica.yaml @@ -0,0 +1,58 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.servicePerReplica.enabled }} +{{- $count := .Values.prometheus.prometheusSpec.replicas | int -}} +{{- $serviceValues := .Values.prometheus.servicePerReplica -}} +apiVersion: v1 +kind: List +metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-prometheus-serviceperreplica + namespace: {{ template "kube-prometheus-stack.namespace" . }} +items: +{{- range $i, $e := until $count }} + - apiVersion: v1 + kind: Service + metadata: + name: {{ include "kube-prometheus-stack.fullname" $ }}-prometheus-{{ $i }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ include "kube-prometheus-stack.name" $ }}-prometheus +{{ include "kube-prometheus-stack.labels" $ | indent 8 }} + {{- if $serviceValues.annotations }} + annotations: +{{ toYaml $serviceValues.annotations | indent 8 }} + {{- end }} + spec: + {{- if $serviceValues.clusterIP }} + clusterIP: {{ $serviceValues.clusterIP }} + {{- end }} + {{- if $serviceValues.ipDualStack.enabled }} + ipFamilies: {{ toYaml $serviceValues.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ $serviceValues.ipDualStack.ipFamilyPolicy }} + {{- end }} + {{- if $serviceValues.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := $serviceValues.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} + {{- end }} + {{- if ne $serviceValues.type "ClusterIP" }} + externalTrafficPolicy: {{ $serviceValues.externalTrafficPolicy }} + {{- end }} + ports: + - name: {{ $.Values.prometheus.prometheusSpec.portName }} + {{- if eq $serviceValues.type "NodePort" }} + nodePort: {{ $serviceValues.nodePort }} + {{- end }} + port: {{ $serviceValues.port }} + targetPort: {{ $serviceValues.targetPort }} + selector: + {{- if $.Values.prometheus.agentMode }} + app.kubernetes.io/name: prometheus-agent + statefulset.kubernetes.io/pod-name: prom-agent-{{ include "kube-prometheus-stack.prometheus.crname" $ }}-{{ $i }} + {{- else }} + app.kubernetes.io/name: prometheus + statefulset.kubernetes.io/pod-name: prometheus-{{ include "kube-prometheus-stack.prometheus.crname" $ }}-{{ $i }} + {{- end }} + operator.prometheus.io/name: {{ template "kube-prometheus-stack.prometheus.crname" $ }} + type: "{{ $serviceValues.type }}" +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/prometheus/verticalpodautoscaler.yaml b/kube-prometheus-stack/templates/prometheus/verticalpodautoscaler.yaml new file mode 100644 index 0000000..ec35eec --- /dev/null +++ b/kube-prometheus-stack/templates/prometheus/verticalpodautoscaler.yaml @@ -0,0 +1,46 @@ +{{- if and .Values.prometheus.enabled .Values.prometheus.verticalPodAutoscaler.enabled }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ template "kube-prometheus-stack.fullname" . }}-prometheus + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.name" . }}-prometheus + {{- include "kube-prometheus-stack.labels" . | nindent 4 }} +spec: + {{- with .Values.prometheus.verticalPodAutoscaler.recommenders }} + recommenders: + {{- toYaml . | nindent 4 }} + {{- end }} + resourcePolicy: + containerPolicies: + - containerName: prometheus + {{- with .Values.prometheus.verticalPodAutoscaler.controlledResources }} + controlledResources: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.prometheus.verticalPodAutoscaler.controlledValues }} + controlledValues: {{ .Values.prometheus.verticalPodAutoscaler.controlledValues }} + {{- end }} + {{- if .Values.prometheus.verticalPodAutoscaler.maxAllowed }} + maxAllowed: + {{- toYaml .Values.prometheus.verticalPodAutoscaler.maxAllowed | nindent 8 }} + {{- end }} + {{- if .Values.prometheus.verticalPodAutoscaler.minAllowed }} + minAllowed: + {{- toYaml .Values.prometheus.verticalPodAutoscaler.minAllowed | nindent 8 }} + {{- end }} + targetRef: + {{- if .Values.prometheus.agentMode }} + apiVersion: monitoring.coreos.com/v1alpha1 + kind: PrometheusAgent + {{- else }} + apiVersion: monitoring.coreos.com/v1 + kind: Prometheus + {{- end }} + name: {{ template "kube-prometheus-stack.prometheus.crname" . }} + {{- with .Values.prometheus.verticalPodAutoscaler.updatePolicy }} + updatePolicy: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/thanos-ruler/extrasecret.yaml b/kube-prometheus-stack/templates/thanos-ruler/extrasecret.yaml new file mode 100644 index 0000000..587fca2 --- /dev/null +++ b/kube-prometheus-stack/templates/thanos-ruler/extrasecret.yaml @@ -0,0 +1,20 @@ +{{- if .Values.thanosRuler.extraSecret.data -}} +{{- $secretName := printf "%s-extra" (include "kube-prometheus-stack.thanosRuler.name" . ) -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ default $secretName .Values.thanosRuler.extraSecret.name }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.thanosRuler.extraSecret.annotations }} + annotations: +{{ toYaml .Values.thanosRuler.extraSecret.annotations | indent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + app.kubernetes.io/component: thanos-ruler +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: +{{- range $key, $val := .Values.thanosRuler.extraSecret.data }} + {{ $key }}: {{ $val | b64enc | quote }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/thanos-ruler/ingress.yaml b/kube-prometheus-stack/templates/thanos-ruler/ingress.yaml new file mode 100644 index 0000000..483c2d1 --- /dev/null +++ b/kube-prometheus-stack/templates/thanos-ruler/ingress.yaml @@ -0,0 +1,59 @@ +{{- if and .Values.thanosRuler.enabled .Values.thanosRuler.ingress.enabled }} +{{- $pathType := .Values.thanosRuler.ingress.pathType | default "ImplementationSpecific" }} +{{- $serviceName := include "kube-prometheus-stack.thanosRuler.name" . }} +{{- $servicePort := .Values.thanosRuler.service.port -}} +{{- $routePrefix := list .Values.thanosRuler.thanosRulerSpec.routePrefix }} +{{- $paths := .Values.thanosRuler.ingress.paths | default $routePrefix -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ $serviceName }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} +{{- if .Values.thanosRuler.ingress.annotations }} + annotations: + {{- tpl (toYaml .Values.thanosRuler.ingress.annotations) . | nindent 4 }} +{{- end }} + labels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{- if .Values.thanosRuler.ingress.labels }} +{{ toYaml .Values.thanosRuler.ingress.labels | indent 4 }} +{{- end }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: + {{- if .Values.thanosRuler.ingress.ingressClassName }} + ingressClassName: {{ .Values.thanosRuler.ingress.ingressClassName }} + {{- end }} + rules: + {{- if .Values.thanosRuler.ingress.hosts }} + {{- range $host := .Values.thanosRuler.ingress.hosts }} + - host: {{ tpl $host $ | quote }} + http: + paths: + {{- range $p := $paths }} + - path: {{ tpl $p $ }} + pathType: {{ $pathType }} + backend: + service: + name: {{ $serviceName }} + port: + number: {{ $servicePort }} + {{- end -}} + {{- end -}} + {{- else }} + - http: + paths: + {{- range $p := $paths }} + - path: {{ tpl $p $ }} + pathType: {{ $pathType }} + backend: + service: + name: {{ $serviceName }} + port: + number: {{ $servicePort }} + {{- end -}} + {{- end -}} + {{- if .Values.thanosRuler.ingress.tls }} + tls: +{{ tpl (toYaml .Values.thanosRuler.ingress.tls | indent 4) . }} + {{- end -}} +{{- end -}} diff --git a/kube-prometheus-stack/templates/thanos-ruler/podDisruptionBudget.yaml b/kube-prometheus-stack/templates/thanos-ruler/podDisruptionBudget.yaml new file mode 100644 index 0000000..0b42c2c --- /dev/null +++ b/kube-prometheus-stack/templates/thanos-ruler/podDisruptionBudget.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.thanosRuler.enabled .Values.thanosRuler.podDisruptionBudget.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +spec: +{{- toYaml (omit .Values.thanosRuler.podDisruptionBudget "enabled") | nindent 2 }} + selector: + matchLabels: + app.kubernetes.io/name: thanos-ruler + thanos-ruler: {{ template "kube-prometheus-stack.thanosRuler.crname" . }} +{{- end }} diff --git a/kube-prometheus-stack/templates/thanos-ruler/route.yaml b/kube-prometheus-stack/templates/thanos-ruler/route.yaml new file mode 100644 index 0000000..063edc1 --- /dev/null +++ b/kube-prometheus-stack/templates/thanos-ruler/route.yaml @@ -0,0 +1,63 @@ +{{- if .Values.thanosRuler.enabled -}} + {{- $serviceName := include "kube-prometheus-stack.thanosRuler.name" . }} + {{- $servicePort := .Values.thanosRuler.service.port -}} + {{- range $name, $route := .Values.thanosRuler.route }} + {{- if $route.enabled }} +--- +apiVersion: {{ $route.apiVersion | default "gateway.networking.k8s.io/v1" }} +kind: {{ $route.kind | default "HTTPRoute" }} +metadata: + {{- with $route.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + name: {{ $serviceName }}{{ if ne $name "main" }}-{{ $name }}{{ end }} + namespace: {{ template "kube-prometheus-stack.namespace" $ }} + labels: + app: {{ template "kube-prometheus-stack.name" $ }}-prometheus + {{- include "kube-prometheus-stack.labels" $ | nindent 4 }} + {{- with $route.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with $route.parentRefs }} + parentRefs: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with $route.hostnames }} + hostnames: + {{- tpl (toYaml .) $ | nindent 4 }} + {{- end }} + rules: + {{- if $route.additionalRules }} + {{- tpl (toYaml $route.additionalRules) $ | nindent 4 }} + {{- end }} + {{- if $route.httpsRedirect }} + - filters: + - type: RequestRedirect + requestRedirect: + scheme: https + statusCode: 301 + {{- else }} + - backendRefs: + - group: "" + kind: Service + weight: 1 + name: {{ $serviceName }} + port: {{ $servicePort }} + {{- with $route.filters }} + filters: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $route.matches }} + matches: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with $route.sessionPersistence }} + sessionPersistence: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/thanos-ruler/ruler.yaml b/kube-prometheus-stack/templates/thanos-ruler/ruler.yaml new file mode 100644 index 0000000..8c7d9c8 --- /dev/null +++ b/kube-prometheus-stack/templates/thanos-ruler/ruler.yaml @@ -0,0 +1,219 @@ +{{- if .Values.thanosRuler.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ThanosRuler +metadata: + name: {{ template "kube-prometheus-stack.thanosRuler.crname" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ include "kube-prometheus-stack.thanosRuler.name" . }} +{{- include "kube-prometheus-stack.labels" . | indent 4 -}} +{{- if .Values.thanosRuler.annotations }} + annotations: +{{ toYaml .Values.thanosRuler.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.thanosRuler.thanosRulerSpec.image }} + {{- $registry := .Values.global.imageRegistry | default .Values.thanosRuler.thanosRulerSpec.image.registry -}} + {{- if and .Values.thanosRuler.thanosRulerSpec.image.tag .Values.thanosRuler.thanosRulerSpec.image.sha }} + image: "{{ $registry }}/{{ .Values.thanosRuler.thanosRulerSpec.image.repository }}:{{ .Values.thanosRuler.thanosRulerSpec.image.tag }}@sha256:{{ .Values.thanosRuler.thanosRulerSpec.image.sha }}" + {{- else if .Values.thanosRuler.thanosRulerSpec.image.sha }} + image: "{{ $registry }}/{{ .Values.thanosRuler.thanosRulerSpec.image.repository }}@sha256:{{ .Values.thanosRuler.thanosRulerSpec.image.sha }}" + {{- else if .Values.thanosRuler.thanosRulerSpec.image.tag }} + image: "{{ $registry }}/{{ .Values.thanosRuler.thanosRulerSpec.image.repository }}:{{ .Values.thanosRuler.thanosRulerSpec.image.tag }}" + {{- else }} + image: "{{ $registry }}/{{ .Values.thanosRuler.thanosRulerSpec.image.repository }}" + {{- end }} + {{- if .Values.thanosRuler.thanosRulerSpec.image.sha }} + sha: {{ .Values.thanosRuler.thanosRulerSpec.image.sha }} + {{- end }} +{{- end }} + replicas: {{ .Values.thanosRuler.thanosRulerSpec.replicas }} + listenLocal: {{ .Values.thanosRuler.thanosRulerSpec.listenLocal }} + {{- if .Values.thanosRuler.thanosRulerSpec.serviceName }} + serviceName: {{ tpl .Values.thanosRuler.thanosRulerSpec.serviceName . }} + {{- end }} + serviceAccountName: {{ template "kube-prometheus-stack.thanosRuler.serviceAccountName" . }} +{{- if .Values.thanosRuler.thanosRulerSpec.externalPrefix }} + externalPrefix: "{{ tpl .Values.thanosRuler.thanosRulerSpec.externalPrefix . }}" +{{- else if and .Values.thanosRuler.ingress.enabled .Values.thanosRuler.ingress.hosts }} + externalPrefix: "http://{{ tpl (index .Values.thanosRuler.ingress.hosts 0) . }}{{ .Values.thanosRuler.thanosRulerSpec.routePrefix }}" +{{- else if .Values.thanosRuler.thanosRulerSpec.externalPrefixNilUsesHelmValues }} + externalPrefix: "http://{{ template "kube-prometheus-stack.thanosRuler.name" . }}.{{ template "kube-prometheus-stack.namespace" . }}:{{ .Values.thanosRuler.service.port }}" +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.additionalArgs }} + additionalArgs: +{{ tpl (toYaml .Values.thanosRuler.thanosRulerSpec.additionalArgs) $ | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.nodeSelector }} + nodeSelector: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.nodeSelector | indent 4 }} +{{- end }} + paused: {{ .Values.thanosRuler.thanosRulerSpec.paused }} + logFormat: {{ .Values.thanosRuler.thanosRulerSpec.logFormat | quote }} + logLevel: {{ .Values.thanosRuler.thanosRulerSpec.logLevel | quote }} + retention: {{ .Values.thanosRuler.thanosRulerSpec.retention | quote }} +{{- if .Values.thanosRuler.thanosRulerSpec.evaluationInterval }} + evaluationInterval: {{ .Values.thanosRuler.thanosRulerSpec.evaluationInterval }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.ruleNamespaceSelector }} + ruleNamespaceSelector: +{{ tpl (toYaml .Values.thanosRuler.thanosRulerSpec.ruleNamespaceSelector | indent 4) . }} +{{ else }} + ruleNamespaceSelector: {} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.ruleSelector }} + ruleSelector: +{{ tpl (toYaml .Values.thanosRuler.thanosRulerSpec.ruleSelector | indent 4) .}} +{{- else if .Values.thanosRuler.thanosRulerSpec.ruleSelectorNilUsesHelmValues }} + ruleSelector: + matchLabels: + release: {{ $.Release.Name | quote }} +{{ else }} + ruleSelector: {} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.alertQueryUrl }} + alertQueryUrl: "{{ .Values.thanosRuler.thanosRulerSpec.alertQueryUrl }}" +{{- end}} +{{- if .Values.thanosRuler.thanosRulerSpec.alertmanagersUrl }} + alertmanagersUrl: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.alertmanagersUrl | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.alertmanagersConfig.existingSecret }} + alertmanagersConfig: + key: "{{.Values.thanosRuler.thanosRulerSpec.alertmanagersConfig.existingSecret.key }}" + name: "{{.Values.thanosRuler.thanosRulerSpec.alertmanagersConfig.existingSecret.name }}" +{{- else if .Values.thanosRuler.thanosRulerSpec.alertmanagersConfig.secret }} + alertmanagersConfig: + key: alertmanager-configs.yaml + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.queryEndpoints }} + queryEndpoints: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.queryEndpoints | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.queryConfig.existingSecret }} + queryConfig: + key: "{{.Values.thanosRuler.thanosRulerSpec.queryConfig.existingSecret.key }}" + name: "{{.Values.thanosRuler.thanosRulerSpec.queryConfig.existingSecret.name }}" +{{- else if .Values.thanosRuler.thanosRulerSpec.queryConfig.secret }} + queryConfig: + key: query-configs.yaml + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.resources }} + resources: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.resources | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.routePrefix }} + routePrefix: "{{ .Values.thanosRuler.thanosRulerSpec.routePrefix }}" +{{- end }} +{{- if kindIs "bool" .Values.thanosRuler.thanosRulerSpec.hostUsers }} + hostUsers: {{ .Values.thanosRuler.thanosRulerSpec.hostUsers }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.securityContext }} + securityContext: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.securityContext | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.storage }} + storage: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.storage | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.objectStorageConfig.existingSecret }} + objectStorageConfig: + key: "{{.Values.thanosRuler.thanosRulerSpec.objectStorageConfig.existingSecret.key }}" + name: "{{.Values.thanosRuler.thanosRulerSpec.objectStorageConfig.existingSecret.name }}" +{{- else if .Values.thanosRuler.thanosRulerSpec.objectStorageConfig.secret }} + objectStorageConfig: + key: object-storage-configs.yaml + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.labels }} + labels: +{{ tpl (toYaml .Values.thanosRuler.thanosRulerSpec.labels) $ | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.podMetadata }} + podMetadata: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.podMetadata | indent 4 }} +{{- end }} +{{- if or .Values.thanosRuler.thanosRulerSpec.podAntiAffinity .Values.thanosRuler.thanosRulerSpec.affinity }} + affinity: +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.affinity }} +{{ toYaml .Values.thanosRuler.thanosRulerSpec.affinity | indent 4 }} +{{- end }} +{{- if eq .Values.thanosRuler.thanosRulerSpec.podAntiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - topologyKey: {{ .Values.thanosRuler.thanosRulerSpec.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [thanos-ruler]} + - {key: thanos-ruler, operator: In, values: [{{ template "kube-prometheus-stack.thanosRuler.crname" . }}]} +{{- else if eq .Values.thanosRuler.thanosRulerSpec.podAntiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: {{ .Values.thanosRuler.thanosRulerSpec.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [thanos-ruler]} + - {key: thanos-ruler, operator: In, values: [{{ template "kube-prometheus-stack.thanosRuler.crname" . }}]} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.tolerations }} + tolerations: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.tolerations | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.topologySpreadConstraints }} + topologySpreadConstraints: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.topologySpreadConstraints | indent 4 }} +{{- end }} +{{- if .Values.global.imagePullSecrets }} + imagePullSecrets: +{{ toYaml .Values.global.imagePullSecrets | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.containers }} + containers: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.containers | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.initContainers }} + initContainers: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.initContainers | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.priorityClassName }} + priorityClassName: {{.Values.thanosRuler.thanosRulerSpec.priorityClassName }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.volumes }} + volumes: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.volumes | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.volumeMounts }} + volumeMounts: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.volumeMounts | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.alertDropLabels }} + alertDropLabels: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.alertDropLabels | indent 4 }} +{{- end }} + portName: {{ .Values.thanosRuler.thanosRulerSpec.portName }} +{{- if .Values.thanosRuler.thanosRulerSpec.podManagementPolicy }} + podManagementPolicy: {{ .Values.thanosRuler.thanosRulerSpec.podManagementPolicy }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.updateStrategy }} + updateStrategy: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.updateStrategy | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ .Values.thanosRuler.thanosRulerSpec.terminationGracePeriodSeconds }} +{{- end }} +{{- with .Values.thanosRuler.thanosRulerSpec.additionalConfig }} + {{- tpl (toYaml .) $ | nindent 2 }} +{{- end }} +{{- if .Values.thanosRuler.thanosRulerSpec.web }} + web: +{{ toYaml .Values.thanosRuler.thanosRulerSpec.web | indent 4 }} +{{- end }} +{{- with .Values.thanosRuler.thanosRulerSpec.additionalConfigString }} + {{- tpl . $ | nindent 2 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/thanos-ruler/secret.yaml b/kube-prometheus-stack/templates/thanos-ruler/secret.yaml new file mode 100644 index 0000000..acab7fd --- /dev/null +++ b/kube-prometheus-stack/templates/thanos-ruler/secret.yaml @@ -0,0 +1,26 @@ +{{- if .Values.thanosRuler.enabled }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ include "kube-prometheus-stack.thanosRuler.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +data: + {{- with .Values.thanosRuler.thanosRulerSpec.alertmanagersConfig }} + {{- if and .secret (not .existingSecret) }} + alertmanager-configs.yaml: {{ toYaml .secret | b64enc | quote }} + {{- end }} + {{- end }} + {{- with .Values.thanosRuler.thanosRulerSpec.objectStorageConfig }} + {{- if and .secret (not .existingSecret) }} + object-storage-configs.yaml: {{ toYaml .secret | b64enc | quote }} + {{- end }} + {{- end }} + {{- with .Values.thanosRuler.thanosRulerSpec.queryConfig }} + {{- if and .secret (not .existingSecret) }} + query-configs.yaml: {{ toYaml .secret | b64enc | quote }} + {{- end }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/thanos-ruler/service.yaml b/kube-prometheus-stack/templates/thanos-ruler/service.yaml new file mode 100644 index 0000000..7480f5a --- /dev/null +++ b/kube-prometheus-stack/templates/thanos-ruler/service.yaml @@ -0,0 +1,57 @@ +{{- if and .Values.thanosRuler.enabled .Values.thanosRuler.service.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + self-monitor: {{ .Values.thanosRuler.serviceMonitor.selfMonitor | quote }} +{{- include "kube-prometheus-stack.labels" . | indent 4 -}} +{{- if .Values.thanosRuler.service.labels }} +{{ toYaml .Values.thanosRuler.service.labels | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.service.annotations }} + annotations: +{{ toYaml .Values.thanosRuler.service.annotations | indent 4 }} +{{- end }} +spec: +{{- if .Values.thanosRuler.service.clusterIP }} + clusterIP: {{ .Values.thanosRuler.service.clusterIP }} +{{- end }} +{{- if .Values.thanosRuler.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.thanosRuler.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.thanosRuler.service.ipDualStack.ipFamilyPolicy }} +{{- end }} +{{- if .Values.thanosRuler.service.externalIPs }} + externalIPs: +{{ toYaml .Values.thanosRuler.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.thanosRuler.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.thanosRuler.service.loadBalancerIP }} +{{- end }} +{{- if .Values.thanosRuler.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.thanosRuler.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if ne .Values.thanosRuler.service.type "ClusterIP" }} + externalTrafficPolicy: {{ .Values.thanosRuler.service.externalTrafficPolicy }} +{{- end }} + ports: + - name: {{ .Values.thanosRuler.thanosRulerSpec.portName }} + {{- if eq .Values.thanosRuler.service.type "NodePort" }} + nodePort: {{ .Values.thanosRuler.service.nodePort }} + {{- end }} + port: {{ .Values.thanosRuler.service.port }} + targetPort: {{ .Values.thanosRuler.service.targetPort }} + protocol: TCP +{{- if .Values.thanosRuler.service.additionalPorts }} +{{ toYaml .Values.thanosRuler.service.additionalPorts | indent 2 }} +{{- end }} + selector: + app.kubernetes.io/name: thanos-ruler + thanos-ruler: {{ template "kube-prometheus-stack.thanosRuler.crname" . }} + type: "{{ .Values.thanosRuler.service.type }}" +{{- end }} diff --git a/kube-prometheus-stack/templates/thanos-ruler/serviceaccount.yaml b/kube-prometheus-stack/templates/thanos-ruler/serviceaccount.yaml new file mode 100644 index 0000000..b58f1cd --- /dev/null +++ b/kube-prometheus-stack/templates/thanos-ruler/serviceaccount.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.thanosRuler.enabled .Values.thanosRuler.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "kube-prometheus-stack.thanosRuler.serviceAccountName" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + app.kubernetes.io/name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + app.kubernetes.io/component: thanos-ruler +{{- include "kube-prometheus-stack.labels" . | indent 4 -}} +{{- if .Values.thanosRuler.serviceAccount.annotations }} + annotations: +{{ toYaml .Values.thanosRuler.serviceAccount.annotations | indent 4 }} +{{- end }} +{{- if .Values.global.imagePullSecrets }} +imagePullSecrets: +{{ toYaml .Values.global.imagePullSecrets | indent 2 }} +{{- end }} +{{- end }} diff --git a/kube-prometheus-stack/templates/thanos-ruler/servicemonitor.yaml b/kube-prometheus-stack/templates/thanos-ruler/servicemonitor.yaml new file mode 100644 index 0000000..83bd8ba --- /dev/null +++ b/kube-prometheus-stack/templates/thanos-ruler/servicemonitor.yaml @@ -0,0 +1,72 @@ +{{- if and .Values.thanosRuler.enabled .Values.thanosRuler.serviceMonitor.selfMonitor }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + namespace: {{ template "kube-prometheus-stack.namespace" . }} + labels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} +{{ include "kube-prometheus-stack.labels" . | indent 4 }} +{{- with .Values.thanosRuler.serviceMonitor.additionalLabels }} +{{- toYaml . | nindent 4 }} +{{- end }} +spec: + {{- include "servicemonitor.scrapeLimits" .Values.thanosRuler.serviceMonitor | nindent 2 }} + selector: + matchLabels: + app: {{ template "kube-prometheus-stack.thanosRuler.name" . }} + release: {{ $.Release.Name | quote }} + self-monitor: {{ .Values.thanosRuler.serviceMonitor.selfMonitor | quote }} + namespaceSelector: + matchNames: + - {{ printf "%s" (include "kube-prometheus-stack.namespace" .) | quote }} + endpoints: + - port: {{ .Values.thanosRuler.thanosRulerSpec.portName }} + {{- if .Values.thanosRuler.serviceMonitor.interval }} + interval: {{ .Values.thanosRuler.serviceMonitor.interval }} + {{- end }} + {{- if .Values.thanosRuler.serviceMonitor.proxyUrl }} + proxyUrl: {{ .Values.thanosRuler.serviceMonitor.proxyUrl}} + {{- end }} + {{- if .Values.thanosRuler.serviceMonitor.scheme }} + scheme: {{ .Values.thanosRuler.serviceMonitor.scheme }} + {{- end }} + {{- if .Values.thanosRuler.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.thanosRuler.serviceMonitor.bearerTokenFile }} + {{- end }} + {{- if .Values.thanosRuler.serviceMonitor.tlsConfig }} + tlsConfig: {{- toYaml .Values.thanosRuler.serviceMonitor.tlsConfig | nindent 6 }} + {{- end }} + path: "{{ trimSuffix "/" .Values.thanosRuler.thanosRulerSpec.routePrefix }}/metrics" + {{- if .Values.thanosRuler.serviceMonitor.metricRelabelings }} + metricRelabelings: {{- tpl (toYaml .Values.thanosRuler.serviceMonitor.metricRelabelings | nindent 6) . }} + {{- end }} + {{- if .Values.thanosRuler.serviceMonitor.relabelings }} + relabelings: {{- toYaml .Values.thanosRuler.serviceMonitor.relabelings | nindent 6 }} + {{- end }} + {{- range .Values.thanosRuler.serviceMonitor.additionalEndpoints }} + - port: {{ .port }} + {{- if or $.Values.thanosRuler.serviceMonitor.interval .interval }} + interval: {{ default $.Values.thanosRuler.serviceMonitor.interval .interval }} + {{- end }} + {{- if or $.Values.thanosRuler.serviceMonitor.proxyUrl .proxyUrl }} + proxyUrl: {{ default $.Values.thanosRuler.serviceMonitor.proxyUrl .proxyUrl }} + {{- end }} + {{- if or $.Values.thanosRuler.serviceMonitor.scheme .scheme }} + scheme: {{ default $.Values.thanosRuler.serviceMonitor.scheme .scheme }} + {{- end }} + {{- if or $.Values.thanosRuler.serviceMonitor.bearerTokenFile .bearerTokenFile }} + bearerTokenFile: {{ default $.Values.thanosRuler.serviceMonitor.bearerTokenFile .bearerTokenFile }} + {{- end }} + {{- if or $.Values.thanosRuler.serviceMonitor.tlsConfig .tlsConfig }} + tlsConfig: {{- default $.Values.thanosRuler.serviceMonitor.tlsConfig .tlsConfig | toYaml | nindent 6 }} + {{- end }} + path: {{ .path }} + {{- if or $.Values.thanosRuler.serviceMonitor.metricRelabelings .metricRelabelings }} + metricRelabelings: {{- tpl (default $.Values.thanosRuler.serviceMonitor.metricRelabelings .metricRelabelings | toYaml | nindent 6) . }} + {{- end }} + {{- if or $.Values.thanosRuler.serviceMonitor.relabelings .relabelings }} + relabelings: {{- default $.Values.thanosRuler.serviceMonitor.relabelings .relabelings | toYaml | nindent 6 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/kube-prometheus-stack/values.yaml b/kube-prometheus-stack/values.yaml new file mode 100644 index 0000000..f1e8692 --- /dev/null +++ b/kube-prometheus-stack/values.yaml @@ -0,0 +1,5645 @@ +# Default values for kube-prometheus-stack. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +## Provide a name in place of kube-prometheus-stack for `app:` labels +## +nameOverride: "" + +## Override the deployment namespace +## +namespaceOverride: "" + +## Provide a k8s version to auto dashboard import script example: kubeTargetVersionOverride: 1.26.6 +## +kubeTargetVersionOverride: "" + +## Allow kubeVersion to be overridden while creating the ingress +## +kubeVersionOverride: "" + +## Provide a name to substitute for the full names of resources +## +fullnameOverride: "" + +## Labels to apply to all resources +## +commonLabels: {} +# scmhash: abc123 +# myLabel: aakkmd + +## Install Prometheus Operator CRDs +## +crds: + enabled: true + ## The CRD upgrade job mitigates the limitation of helm not being able to upgrade CRDs. + ## The job will apply the CRDs to the cluster before the operator is deployed, using helm hooks. + ## It deploys a corresponding clusterrole, clusterrolebinding and serviceaccount to apply the CRDs. + ## This feature is in preview, off by default and may change in the future. + upgradeJob: + enabled: false + forceConflicts: false + image: + busybox: + registry: docker.io + repository: busybox + tag: "latest" + sha: "" + pullPolicy: IfNotPresent + kubectl: + registry: registry.k8s.io + repository: kubectl + tag: "" # defaults to the Kubernetes version + sha: "" + pullPolicy: IfNotPresent + + env: {} + ## Define resources requests and limits for single Pods. + ## ref: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + ## + resources: {} + + ## Additional volumes + ## + extraVolumes: [] + + ## Additional volume mounts + ## + extraVolumeMounts: [] + + ## Define which Nodes the Pods are scheduled on. + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector + ## + nodeSelector: {} + + ## Assign custom affinity rules to the upgrade-crd job + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/ + ## + affinity: {} + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/e2e-az-name + # operator: In + # values: + # - e2e-az1 + # - e2e-az2 + + ## If specified, the pod's tolerations. + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + + ## If specified, the pod's topology spread constraints. + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + ## + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule + # labelSelector: + # matchLabels: + # app: alertmanager + + # ## Labels to add to the upgrade-crd job + # ## + labels: {} + + ## Annotations to add to the upgrade-crd job + ## + annotations: {} + + ## Labels to add to the upgrade-crd pod + ## + podLabels: {} + + ## Annotations to add to the upgrade-crd pod + ## + podAnnotations: {} + + ## Service account for upgrade crd job to use. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + ## + serviceAccount: + create: true + name: "" + annotations: {} + labels: {} + automountServiceAccountToken: true + + ## Automounting API credentials for upgrade crd job pod. + ## + automountServiceAccountToken: true + + ## Container-specific security context configuration + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + ## + containerSecurityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + + ## SecurityContext holds pod-level security attributes and common container settings. + ## This defaults to non root user with uid 1000 and gid 2000. *v1.PodSecurityContext false + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + ## + podSecurityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + +## Custom rules to override "for" and "severity" in defaultRules +## +customRules: {} + # AlertmanagerFailedReload: + # for: 3m + # AlertmanagerMembersInconsistent: + # for: 5m + # severity: "warning" + +## Create default rules for monitoring the cluster +## +defaultRules: + create: true + rules: + alertmanager: true + etcd: true + configReloaders: true + general: true + k8sContainerCpuUsageSecondsTotal: true + k8sContainerMemoryCache: true + k8sContainerMemoryRss: true + k8sContainerMemorySwap: true + k8sContainerResource: true + k8sContainerMemoryWorkingSetBytes: true + k8sPodOwner: true + kubeApiserverAvailability: true + kubeApiserverBurnrate: true + kubeApiserverHistogram: true + kubeApiserverSlos: true + kubeControllerManager: true + kubelet: true + kubeProxy: true + kubePrometheusGeneral: true + kubePrometheusNodeRecording: true + kubernetesApps: true + kubernetesResources: true + kubernetesStorage: true + kubernetesSystem: true + kubeSchedulerAlerting: true + kubeSchedulerRecording: true + kubeStateMetrics: true + network: true + node: true + nodeExporterAlerting: true + nodeExporterRecording: true + prometheus: true + prometheusOperator: true + windows: true + + # Defines the operator for namespace selection in rules + # Use "=~" to include namespaces matching the pattern (default) + # Use "!~" to exclude namespaces matching the pattern + appNamespacesOperator: "=~" + + ## Reduce app namespace alert scope + appNamespacesTarget: ".*" + + ## Set keep_firing_for for all alerts + keepFiringFor: "" + + ## Labels for default rules + labels: {} + ## Annotations for default rules + annotations: {} + + ## Additional labels for PrometheusRule alerts + additionalRuleLabels: {} + + ## Additional annotations for PrometheusRule alerts + additionalRuleAnnotations: {} + + ## Additional labels for specific PrometheusRule alert groups + additionalRuleGroupLabels: + alertmanager: {} + etcd: {} + configReloaders: {} + general: {} + k8sContainerCpuUsageSecondsTotal: {} + k8sContainerMemoryCache: {} + k8sContainerMemoryRss: {} + k8sContainerMemorySwap: {} + k8sContainerResource: {} + k8sPodOwner: {} + kubeApiserverAvailability: {} + kubeApiserverBurnrate: {} + kubeApiserverHistogram: {} + kubeApiserverSlos: {} + kubeControllerManager: {} + kubelet: {} + kubeProxy: {} + kubePrometheusGeneral: {} + kubePrometheusNodeRecording: {} + kubernetesApps: {} + kubernetesResources: {} + kubernetesStorage: {} + kubernetesSystem: {} + kubeSchedulerAlerting: {} + kubeSchedulerRecording: {} + kubeStateMetrics: {} + network: {} + node: {} + nodeExporterAlerting: {} + nodeExporterRecording: {} + prometheus: {} + prometheusOperator: {} + + ## Additional annotations for specific PrometheusRule alert groups + additionalRuleGroupAnnotations: + alertmanager: {} + etcd: {} + configReloaders: {} + general: {} + k8sContainerCpuUsageSecondsTotal: {} + k8sContainerMemoryCache: {} + k8sContainerMemoryRss: {} + k8sContainerMemorySwap: {} + k8sContainerResource: {} + k8sPodOwner: {} + kubeApiserverAvailability: {} + kubeApiserverBurnrate: {} + kubeApiserverHistogram: {} + kubeApiserverSlos: {} + kubeControllerManager: {} + kubelet: {} + kubeProxy: {} + kubePrometheusGeneral: {} + kubePrometheusNodeRecording: {} + kubernetesApps: {} + kubernetesResources: {} + kubernetesStorage: {} + kubernetesSystem: {} + kubeSchedulerAlerting: {} + kubeSchedulerRecording: {} + kubeStateMetrics: {} + network: {} + node: {} + nodeExporterAlerting: {} + nodeExporterRecording: {} + prometheus: {} + prometheusOperator: {} + + additionalAggregationLabels: [] + + ## Prefix for runbook URLs. Use this to override the first part of the runbookURLs that is common to all rules. + runbookUrl: "https://runbooks.prometheus-operator.dev/runbooks" + + ## Thresholds for kubelet certificate expiration alerts (in seconds) + kubeletServerCertificateExpiration: + warning: 604800 # 7 days + critical: 86400 # 1 day + kubeletClientCertificateExpiration: + warning: 604800 # 7 days + critical: 86400 # 1 day + + node: + fsSelector: 'fstype!=""' + # fsSelector: 'fstype=~"ext[234]|btrfs|xfs|zfs"' + + ## Disabled PrometheusRule alerts + disabled: {} + # KubeAPIDown: true + # NodeRAIDDegraded: true + +## Deprecated way to provide custom recording or alerting rules to be deployed into the cluster. +## +# additionalPrometheusRules: [] +# - name: my-rule-file +# groups: +# - name: my_group +# rules: +# - record: my_record +# expr: 100 * my_record + +## Provide custom recording or alerting rules to be deployed into the cluster. +## +additionalPrometheusRulesMap: {} +# rule-name: +# groups: +# - name: my_group +# rules: +# - record: my_record +# expr: 100 * my_record + +## +global: + rbac: + create: true + + ## Create ClusterRoles that extend the existing view, edit and admin ClusterRoles to interact with prometheus-operator CRDs + ## Ref: https://kubernetes.io/docs/reference/access-authn-authz/rbac/#aggregated-clusterroles + createAggregateClusterRoles: false + + ## Global image registry to use if it needs to be overridden for some specific use cases (e.g. local registries, custom images, ...) + ## + imageRegistry: "" + + ## Reference to one or more secrets to be used when pulling images + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + ## + imagePullSecrets: [] + # - name: "image-pull-secret" + # or + # - "image-pull-secret" + +windowsMonitoring: + ## Deploys the windows-exporter and Windows-specific dashboards and rules (job name must be 'windows-exporter') + enabled: false + +## Configuration for prometheus-windows-exporter +## ref: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-windows-exporter +## +prometheus-windows-exporter: + ## Enable ServiceMonitor and set Kubernetes label to use as a job label + ## + prometheus: + monitor: + enabled: true + jobLabel: jobLabel + + releaseLabel: true + + ## Set job label to 'windows-exporter' as required by the default Prometheus rules and Grafana dashboards + ## + podLabels: + jobLabel: windows-exporter + + ## Enable memory and container metrics as required by the default Prometheus rules and Grafana dashboards + ## + config: |- + collectors: + enabled: '[defaults],memory,container' + +## Configuration for alertmanager +## ref: https://prometheus.io/docs/alerting/alertmanager/ +## +alertmanager: + + ## Deploy alertmanager + ## + enabled: true + + # Optional: Override the namespace where Alertmanager will be deployed. + namespaceOverride: "" + + ## Annotations for Alertmanager + ## + annotations: {} + + ## Additional labels for Alertmanager + ## + additionalLabels: {} + + ## API that Prometheus will use to communicate with alertmanager. Possible values are v1, v2 + ## + apiVersion: v2 + + ## @param alertmanager.enableFeatures Enable access to Alertmanager disabled features. + ## + enableFeatures: [] + + ## Create dashboard configmap even if alertmanager deployment has been disabled + ## + forceDeployDashboards: false + + ## Network Policy configuration + ## + networkPolicy: + # -- Enable network policy for Alertmanager + enabled: false + + # -- Define policy types. If egress is enabled, both Ingress and Egress will be used + # Valid values are ["Ingress"] or ["Ingress", "Egress"] + ## + policyTypes: + - Ingress + + # -- Gateway (formerly ingress controller) configuration + ## + gateway: + # -- Gateway namespace + ## + namespace: "" + # -- Gateway pod labels + ## + podLabels: {} + # app.kubernetes.io/name: ingress-nginx + + # -- Additional custom ingress rules + ## + additionalIngress: [] + # - from: + # - namespaceSelector: + # matchLabels: + # name: another-namespace + # podSelector: + # matchLabels: + # app: another-app + # - from: + # - podSelector: + # matchLabels: + # app.kubernetes.io/name: loki + # ports: + # - port: 9093 + # protocol: TCP + + # -- Configure egress rules + ## + egress: + # -- Enable egress rules. When enabled, policyTypes will include Egress + ## + enabled: false + # -- Custom egress rules + ## + rules: [] + # - to: + # - namespaceSelector: {} + # podSelector: + # matchLabels: + # name: smtp-relay + # ports: + # - port: 25 + # protocol: TCP + + # -- Enable rules for alertmanager cluster traffic + ## + enableClusterRules: true + + # -- Configure monitoring component rules + ## + monitoringRules: + # -- Enable ingress from Prometheus + ## + prometheus: true + # -- Enable ingress for config reloader metrics + ## + configReloader: true + + ## Service account for Alertmanager to use. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + ## + serviceAccount: + create: true + name: "" + annotations: {} + automountServiceAccountToken: true + + ## Configure pod disruption budgets for Alertmanager + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/#specifying-a-poddisruptionbudget + ## + podDisruptionBudget: + enabled: false + minAvailable: 1 + # maxUnavailable: "" + unhealthyPodEvictionPolicy: AlwaysAllow + + ## Enable vertical pod autoscaler support for Alertmanager + ## ref: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler + ## + verticalPodAutoscaler: + enabled: false + + # Recommender responsible for generating recommendation for the object. + # List should be empty (then the default recommender will generate the recommendation) + # or contain exactly one recommender. + # recommenders: + # - name: custom-recommender-performance + + # List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory + controlledResources: [] + # Specifies which resource values should be controlled: RequestsOnly or RequestsAndLimits. + # controlledValues: RequestsAndLimits + + # Define the max allowed resources for the pod + maxAllowed: {} + # cpu: 200m + # memory: 100Mi + # Define the min allowed resources for the pod + minAllowed: {} + # cpu: 200m + # memory: 100Mi + + updatePolicy: + # Specifies whether recommended updates are applied when a Pod is started and whether recommended updates + # are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "InPlaceOrRecreate". + updateMode: Recreate + + ## Alertmanager configuration directives + ## ref: https://prometheus.io/docs/alerting/configuration/#configuration-file + ## https://prometheus.io/webtools/alerting/routing-tree-editor/ + ## + config: + global: + resolve_timeout: 5m + inhibit_rules: + - source_matchers: + - 'severity = critical' + target_matchers: + - 'severity =~ warning|info' + equal: + - 'namespace' + - 'alertname' + - source_matchers: + - 'severity = warning' + target_matchers: + - 'severity = info' + equal: + - 'namespace' + - 'alertname' + - source_matchers: + - 'alertname = InfoInhibitor' + target_matchers: + - 'severity = info' + equal: + - 'namespace' + - target_matchers: + - 'alertname = InfoInhibitor' + route: + group_by: ['namespace'] + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: 'null' + routes: + - receiver: 'null' + matchers: + - alertname = "Watchdog" + receivers: + - name: 'null' + templates: + - '/etc/alertmanager/config/*.tmpl' + + ## Alertmanager configuration directives (as string type, preferred over the config hash map) + ## stringConfig will be used only if tplConfig is true + ## ref: https://prometheus.io/docs/alerting/configuration/#configuration-file + ## https://prometheus.io/webtools/alerting/routing-tree-editor/ + ## + stringConfig: "" + + ## Pass the Alertmanager configuration directives through Helm's templating + ## engine. If the Alertmanager configuration contains Alertmanager templates, + ## they'll need to be properly escaped so that they are not interpreted by + ## Helm + ## ref: https://helm.sh/docs/developing_charts/#using-the-tpl-function + ## https://prometheus.io/docs/alerting/configuration/#tmpl_string + ## https://prometheus.io/docs/alerting/notifications/ + ## https://prometheus.io/docs/alerting/notification_examples/ + tplConfig: false + + ## Alertmanager template files to format alerts + ## By default, templateFiles are placed in /etc/alertmanager/config/ and if + ## they have a .tmpl file suffix will be loaded. See config.templates above + ## to change, add other suffixes. If adding other suffixes, be sure to update + ## config.templates above to include those suffixes. + ## ref: https://prometheus.io/docs/alerting/notifications/ + ## https://prometheus.io/docs/alerting/notification_examples/ + ## + templateFiles: {} + # + ## An example template: + # template_1.tmpl: |- + # {{ define "cluster" }}{{ .ExternalURL | reReplaceAll ".*alertmanager\\.(.*)" "$1" }}{{ end }} + # + # {{ define "slack.myorg.text" }} + # {{- $root := . -}} + # {{ range .Alerts }} + # *Alert:* {{ .Annotations.summary }} - `{{ .Labels.severity }}` + # *Cluster:* {{ template "cluster" $root }} + # *Description:* {{ .Annotations.description }} + # *Graph:* <{{ .GeneratorURL }}|:chart_with_upwards_trend:> + # *Runbook:* <{{ .Annotations.runbook }}|:spiral_note_pad:> + # *Details:* + # {{ range .Labels.SortedPairs }} - *{{ .Name }}:* `{{ .Value }}` + # {{ end }} + # {{ end }} + # {{ end }} + + ingress: + enabled: false + + ingressClassName: "" + + annotations: {} + + labels: {} + + ## Override ingress to a different defined port on the service + # servicePort: 8081 + ## Override ingress to a different service then the default, this is useful if you need to + ## point to a specific instance of the alertmanager (eg kube-prometheus-stack-alertmanager-0) + # serviceName: kube-prometheus-stack-alertmanager-0 + + ## Hosts must be provided if Ingress is enabled. + ## + hosts: [] + # - alertmanager.domain.com + + ## Paths to use for ingress rules - one path should match the alertmanagerSpec.routePrefix + ## + paths: [] + # - / + + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + + ## TLS configuration for Alertmanager Ingress + ## Secret must be manually created in the namespace + ## + tls: [] + # - secretName: alertmanager-general-tls + # hosts: + # - alertmanager.example.com + + # -- BETA: Configure the gateway routes for the chart here. + # More routes can be added by adding a dictionary key like the 'main' route. + # Be aware that this is an early beta of this feature, + # kube-prometheus-stack does not guarantee this works and is subject to change. + # Being BETA this can/will change in the future without notice, do not use unless you want to take that risk + # [[ref]](https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1alpha2) + route: + main: + # -- Enables or disables the route + enabled: false + + # -- Set the route apiVersion, e.g. gateway.networking.k8s.io/v1 or gateway.networking.k8s.io/v1alpha2 + apiVersion: gateway.networking.k8s.io/v1 + # -- Set the route kind + # Valid options are GRPCRoute, HTTPRoute, TCPRoute, TLSRoute, UDPRoute + kind: HTTPRoute + + annotations: {} + labels: {} + + hostnames: [] + # - my-filter.example.com + parentRefs: [] + # - name: acme-gw + + # -- create http route for redirect (https://gateway-api.sigs.k8s.io/guides/http-redirect-rewrite/#http-to-https-redirects) + ## Take care that you only enable this on the http listener of the gateway to avoid an infinite redirect. + ## matches, filters and additionalRules will be ignored if this is set to true. Be are + httpsRedirect: false + + matches: + - path: + type: PathPrefix + value: / + + ## Filters define the filters that are applied to requests that match this rule. + filters: [] + + ## Session persistence configuration for the route rule. + sessionPersistence: {} + # sessionName: route + # type: Cookie + # absoluteTimeout: 12h + # cookieConfig: + # lifetimeType: Permanent + + ## Additional custom rules that can be added to the route + additionalRules: [] + + ## Configuration for Alertmanager secret + ## + secret: + annotations: {} + + ## Configuration for creating an Ingress that will map to each Alertmanager replica service + ## alertmanager.servicePerReplica must be enabled + ## + ingressPerReplica: + enabled: false + + ingressClassName: "" + + annotations: {} + labels: {} + + ## Final form of the hostname for each per replica ingress is + ## {{ ingressPerReplica.hostPrefix }}-{{ $replicaNumber }}.{{ ingressPerReplica.hostDomain }} + ## + ## Prefix for the per replica ingress that will have `-$replicaNumber` + ## appended to the end + hostPrefix: "" + ## Domain that will be used for the per replica ingress + hostDomain: "" + + ## Paths to use for ingress rules + ## + paths: [] + # - / + + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + + ## Secret name containing the TLS certificate for alertmanager per replica ingress + ## Secret must be manually created in the namespace + tlsSecretName: "" + + ## Separated secret for each per replica Ingress. Can be used together with cert-manager + ## + tlsSecretPerReplica: + enabled: false + ## Final form of the secret for each per replica ingress is + ## {{ tlsSecretPerReplica.prefix }}-{{ $replicaNumber }} + ## + prefix: "alertmanager" + + ## Configuration for Alertmanager service + ## + service: + enabled: true + annotations: {} + labels: {} + clusterIP: "" + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + + ## Port for Alertmanager Service to listen on + ## + port: 9093 + ## Port for Alertmanager cluster communication + ## + # clusterPort: 9094 + ## To be used with a proxy extraContainer port + ## + targetPort: 9093 + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30903 + ## List of IP addresses at which the Prometheus server service is available + ## Ref: https://kubernetes.io/docs/concepts/services-networking/service/#external-ips + ## + + ## Additional ports to open for Alertmanager service + ## + additionalPorts: [] + # - name: oauth-proxy + # port: 8081 + # targetPort: 8081 + # - name: oauth-metrics + # port: 8082 + # targetPort: 8082 + + externalIPs: [] + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## If you want to make sure that connections from a particular client are passed to the same Pod each time + ## Accepts 'ClientIP' or 'None' + ## + sessionAffinity: None + + ## If you want to modify the ClientIP sessionAffinity timeout + ## The value must be >0 && <=86400(for 1 day) if ServiceAffinity == "ClientIP" + ## + sessionAffinityConfig: + clientIP: + timeoutSeconds: 10800 + + ## Service type + ## + type: ClusterIP + + ## Configuration for creating a separate Service for each statefulset Alertmanager replica + ## + servicePerReplica: + enabled: false + annotations: {} + + ## Port for Alertmanager Service per replica to listen on + ## + port: 9093 + + ## To be used with a proxy extraContainer port + targetPort: 9093 + + ## Port to expose on each node + ## Only used if servicePerReplica.type is 'NodePort' + ## + nodePort: 30904 + + ## Loadbalancer source IP ranges + ## Only used if servicePerReplica.type is "LoadBalancer" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## + type: ClusterIP + + ## Configuration for creating a ServiceMonitor for AlertManager + ## + serviceMonitor: + ## If true, a ServiceMonitor will be created for the AlertManager service. + ## + selfMonitor: true + + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## Additional labels + ## + additionalLabels: {} + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## scheme: HTTP scheme to use for scraping. Can be used with `tlsConfig` for example if using istio mTLS. + scheme: "" + + ## enableHttp2: Whether to enable HTTP2. + ## See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#endpoint + enableHttp2: true + + ## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS. + ## Of type: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#tlsconfig + tlsConfig: {} + + bearerTokenFile: + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional Endpoints + ## + additionalEndpoints: [] + # - port: oauth-metrics + # path: /metrics + + ## Settings affecting alertmanagerSpec + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#alertmanagerspec + ## + alertmanagerSpec: + ## Statefulset's persistent volume claim retention policy + ## whenDeleted and whenScaled determine whether + ## statefulset's PVCs are deleted (true) or retained (false) + ## on scaling down and deleting statefulset, respectively. + ## Requires Kubernetes version 1.27.0+. + ## Ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + persistentVolumeClaimRetentionPolicy: {} + # whenDeleted: Retain + # whenScaled: Retain + + ## Standard object's metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata + ## Metadata Labels and Annotations gets propagated to the Alertmanager pods. + ## + podMetadata: {} + + ## + serviceName: + + ## Image of Alertmanager + ## + image: + registry: quay.io + repository: prometheus/alertmanager + tag: v0.32.1 + sha: "" + pullPolicy: IfNotPresent + + ## If true then the user will be responsible to provide a secret with alertmanager configuration + ## So when true the config part will be ignored (including templateFiles) and the one in the secret will be used + ## + useExistingSecret: false + + ## Secrets is a list of Secrets in the same namespace as the Alertmanager object, which shall be mounted into the + ## Alertmanager Pods. The Secrets are mounted into /etc/alertmanager/secrets/. + ## + secrets: [] + + ## If false then the user will opt out of automounting API credentials. + ## + automountServiceAccountToken: true + + ## ConfigMaps is a list of ConfigMaps in the same namespace as the Alertmanager object, which shall be mounted into the Alertmanager Pods. + ## The ConfigMaps are mounted into /etc/alertmanager/configmaps/. + ## + configMaps: [] + + ## ConfigSecret is the name of a Kubernetes Secret in the same namespace as the Alertmanager object, which contains configuration for + ## this Alertmanager instance. Defaults to 'alertmanager-' The secret is mounted into /etc/alertmanager/config. + ## + # configSecret: + + ## WebTLSConfig defines the TLS parameters for HTTPS + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#alertmanagerwebspec + web: {} + + ## AlertmanagerConfigs to be selected to merge and configure Alertmanager with. + ## + alertmanagerConfigSelector: {} + ## Example which selects all alertmanagerConfig resources + ## with label "alertconfig" with values any of "example-config" or "example-config-2" + # alertmanagerConfigSelector: + # matchExpressions: + # - key: alertconfig + # operator: In + # values: + # - example-config + # - example-config-2 + # + ## Example which selects all alertmanagerConfig resources with label "role" set to "example-config" + # alertmanagerConfigSelector: + # matchLabels: + # role: example-config + + ## Namespaces to be selected for AlertmanagerConfig discovery. If nil, only check own namespace. + ## + alertmanagerConfigNamespaceSelector: {} + ## Example which selects all namespaces + ## with label "alertmanagerconfig" with values any of "example-namespace" or "example-namespace-2" + # alertmanagerConfigNamespaceSelector: + # matchExpressions: + # - key: alertmanagerconfig + # operator: In + # values: + # - example-namespace + # - example-namespace-2 + + ## Example which selects all namespaces with label "alertmanagerconfig" set to "enabled" + # alertmanagerConfigNamespaceSelector: + # matchLabels: + # alertmanagerconfig: enabled + + ## AlermanagerConfig to be used as top level configuration + ## + alertmanagerConfiguration: {} + ## Example with select a global alertmanagerconfig + # alertmanagerConfiguration: + # name: global-alertmanager-Configuration + + ## Defines the strategy used by AlertmanagerConfig objects to match alerts. eg: + ## + alertmanagerConfigMatcherStrategy: {} + ## Example with use OnNamespace strategy + # alertmanagerConfigMatcherStrategy: + # type: OnNamespace + + ## Additional command line arguments to pass to Alertmanager (in addition to those generated by the chart) + additionalArgs: [] + + ## Define Log Format + # Use logfmt (default) or json logging + logFormat: logfmt + + ## Log level for Alertmanager to be configured with. + ## + logLevel: info + + ## Size is the expected size of the alertmanager cluster. The controller will eventually make the size of the + ## running cluster equal to the expected size. + replicas: 1 + + ## Time duration Alertmanager shall retain data for. Default is '120h', and must match the regular expression + ## [0-9]+(ms|s|m|h) (milliseconds seconds minutes hours). + ## + retention: 120h + + ## Storage is the definition of how storage will be used by the Alertmanager instances. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/platform/storage.md + ## + storage: {} + # volumeClaimTemplate: + # spec: + # storageClassName: gluster + # accessModes: ["ReadWriteOnce"] + # resources: + # requests: + # storage: 50Gi + # selector: {} + + + ## The external URL the Alertmanager instances will be available under. This is necessary to generate correct URLs. This is necessary if Alertmanager is not served from root of a DNS name. string false + ## + externalUrl: + + ## The route prefix Alertmanager registers HTTP handlers for. This is useful, if using ExternalURL and a proxy is rewriting HTTP routes of a request, and the actual ExternalURL is still true, + ## but the server serves requests under a different route prefix. For example for use with kubectl proxy. + ## + routePrefix: / + + ## scheme: HTTP scheme to use. Can be used with `tlsConfig` for example if using istio mTLS. + scheme: "" + + ## tlsConfig: TLS configuration to use when connect to the endpoint. For example if using istio mTLS. + ## Of type: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#tlsconfig + tlsConfig: {} + + ## If set to true all actions on the underlying managed objects are not going to be performed, except for delete actions. + ## + paused: false + + ## Define which Nodes the Pods are scheduled on. + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector + ## + nodeSelector: {} + + ## Define resources requests and limits for single Pods. + ## ref: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + ## + resources: {} + # requests: + # memory: 400Mi + + ## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node. + ## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided. + ## The value "hard" means that the scheduler is *required* to not schedule two replica pods onto the same node. + ## The value "" will disable pod anti-affinity so that no anti-affinity rules will be configured. + ## + podAntiAffinity: "soft" + + ## If anti-affinity is enabled sets the topologyKey to use for anti-affinity. + ## This can be changed to, for example, failure-domain.beta.kubernetes.io/zone + ## + podAntiAffinityTopologyKey: kubernetes.io/hostname + + ## Assign custom affinity rules to the alertmanager instance + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/ + ## + affinity: {} + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/e2e-az-name + # operator: In + # values: + # - e2e-az1 + # - e2e-az2 + + ## If specified, the pod's tolerations. + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + + ## If specified, the pod's topology spread constraints. + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + ## + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule + # labelSelector: + # matchLabels: + # app: alertmanager + + ## SecurityContext holds pod-level security attributes and common container settings. + ## This defaults to non root user with uid 1000 and gid 2000. *v1.PodSecurityContext false + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + ## + securityContext: + runAsGroup: 2000 + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 2000 + seccompProfile: + type: RuntimeDefault + + ## Use the host's user namespace for Alertmanager pods. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/share-process-namespace/ + hostUsers: ~ + + ## DNS configuration for Alertmanager. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.PodDNSConfig + dnsConfig: {} + + ## DNS policy for Alertmanager. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#dnspolicystring-alias + dnsPolicy: "" + + ## Enable hostNetwork for Alertmanager. + hostNetwork: false + + ## ListenLocal makes the Alertmanager server listen on loopback, so that it does not bind against the Pod IP. + ## Note this is only for the Alertmanager UI, not the gossip communication. + ## + listenLocal: false + + ## Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to an Alertmanager pod. + ## + containers: [] + # containers: + # - name: oauth-proxy + # image: quay.io/oauth2-proxy/oauth2-proxy:v7.15.2 + # args: + # - --upstream=http://127.0.0.1:9093 + # - --http-address=0.0.0.0:8081 + # - --metrics-address=0.0.0.0:8082 + # - ... + # ports: + # - containerPort: 8081 + # name: oauth-proxy + # protocol: TCP + # - containerPort: 8082 + # name: oauth-metrics + # protocol: TCP + # resources: {} + + # Additional volumes on the output StatefulSet definition. + volumes: [] + + # Additional VolumeMounts on the output StatefulSet definition. + volumeMounts: [] + + ## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes + ## (permissions, dir tree) on mounted volumes before starting prometheus + initContainers: [] + + ## Priority class assigned to the Pods + ## + priorityClassName: "" + + ## AdditionalPeers allows injecting a set of additional Alertmanagers to peer with to form a highly available cluster. + ## + additionalPeers: [] + + ## PortName to use for Alert Manager. + ## + portName: "http-web" + + ## ClusterAdvertiseAddress is the explicit address to advertise in cluster. Needs to be provided for non RFC1918 [1] (public) addresses. [1] RFC1918: https://tools.ietf.org/html/rfc1918 + ## + clusterAdvertiseAddress: false + + ## clusterGossipInterval determines interval between gossip attempts. + ## Needs to be specified as GoDuration, a time duration that can be parsed by Go's time.ParseDuration() (e.g. 45ms, 30s, 1m, 1h20m15s) + clusterGossipInterval: "" + + ## clusterPeerTimeout determines timeout for cluster peering. + ## Needs to be specified as GoDuration, a time duration that can be parsed by Go's time.ParseDuration() (e.g. 45ms, 30s, 1m, 1h20m15s) + clusterPeerTimeout: "" + + ## clusterPushpullInterval determines interval between pushpull attempts. + ## Needs to be specified as GoDuration, a time duration that can be parsed by Go's time.ParseDuration() (e.g. 45ms, 30s, 1m, 1h20m15s) + clusterPushpullInterval: "" + + ## clusterLabel defines the identifier that uniquely identifies the Alertmanager cluster. + clusterLabel: "" + + ## ForceEnableClusterMode ensures Alertmanager does not deactivate the cluster mode when running with a single replica. + ## Use case is e.g. spanning an Alertmanager cluster across Kubernetes clusters with a single replica in each. + forceEnableClusterMode: false + + ## Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to + ## be considered available. Defaults to 0 (pod will be considered available as soon as it is ready). + minReadySeconds: 0 + + ## Pod management policy. Kubernetes default is OrderedReady but prometheus-operator default is Parallel. + ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#pod-management-policies + podManagementPolicy: "" + + ## Update strategy for the StatefulSet. + ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#update-strategies + updateStrategy: {} + # type: RollingUpdate + # rollingUpdate: + # maxUnavailable: 1 + + ## Duration in seconds the pod needs to terminate gracefully. + ## ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-termination + terminationGracePeriodSeconds: ~ + + ## Additional configuration which is not covered by the properties above. (passed through tpl) + additionalConfig: {} + + ## Additional configuration which is not covered by the properties above. + ## Useful, if you need advanced templating inside alertmanagerSpec. + ## Otherwise, use alertmanager.alertmanagerSpec.additionalConfig (passed through tpl) + additionalConfigString: "" + + ## ExtraSecret can be used to store various data in an extra secret + ## (use it for example to store hashed basic auth credentials) + extraSecret: + ## if not set, name will be auto generated + # name: "" + annotations: {} + data: {} + # auth: | + # foo:$apr1$OFG3Xybp$ckL0FHDAkoXYIlH9.cysT0 + # someoneelse:$apr1$DMZX2Z4q$6SbQIfyuLQd.xmo/P0m2c. + +## Using default values from https://github.com/grafana-community/helm-charts/blob/main/charts/grafana/values.yaml +## +grafana: + enabled: true + namespaceOverride: "" + + ## ForceDeployDatasources Create datasource configmap even if grafana deployment has been disabled + ## + forceDeployDatasources: false + + ## ForceDeployDashboard Create dashboard configmap even if grafana deployment has been disabled + ## + forceDeployDashboards: false + + ## Deploy default dashboards + ## + defaultDashboardsEnabled: true + + ## Deploy GrafanaDashboard CRDs that reference dashboards from ConfigMaps when grafana-operator is used + ## These settings control how dashboards are integrated with the Grafana Operator + ## Note: End user still need to create is own kind: GrafanaDataSource for Prometheus + ## eg: + ## apiVersion: grafana.integreatly.org/v1beta1 + ## kind: GrafanaDatasource + ## metadata: + ## name: prometheus + ## annotations: {} + ## spec: + ## allowCrossNamespaceImport: true + ## instanceSelector: + ## matchLabels: + ## app: grafana + ## datasource: + ## name: prometheus + ## type: prometheus + ## access: proxy + ## url: http://prometheus-operated.prometheus-stack.svc.cluster.local:9090 + ## isDefault: true + ## jsonData: + ## "tlsSkipVerify": true + ## "timeInterval": "5s" + ## + operator: + ## Enable references to ConfigMaps containing dashboards in GrafanaDashboard CRs + ## Set to true to allow dashboards to be loaded from ConfigMap references + dashboardsConfigMapRefEnabled: false + + ## Annotations for GrafanaDashboard Cr + ## + annotations: {} + ## Labels that should be matched kind: Grafana instance + ## Example: { app: grafana, category: dashboard } + ## + matchLabels: {} + + ## How frequently the operator should resync resources (in duration format) + ## Controls how often dashboards are reconciled by the operator + ## + resyncPeriod: 10m + + ## Which folder contains all dashboards in Grafana + ## This folder will be created on the Root level + ## Only one of 'folder', 'folderUID' or 'folderRef' can be set + ## + folder: General + + ## Which UID of the target folder contains all dashboards in Grafana + ## This allows you to use subfolder hierarchy + ## Only one of 'folder', 'folderUID' or 'folderRef' can be set + ## + folderUID: null + + ## Which GrafanaFolder reference contains all dashboards in Grafana + ## This allows you to use subfolder hierarchy. + ## Only one of 'folder', 'folderUID' or 'folderRef' can be set + ## + folderRef: null + + ## Timezone for the default dashboards + ## Other options are: browser or a specific timezone, i.e. Europe/Luxembourg + ## + defaultDashboardsTimezone: utc + + ## Editable flag for the default dashboards + ## + defaultDashboardsEditable: true + + ## Default interval for Grafana dashboards + ## + defaultDashboardsInterval: 1m + + # Administrator credentials when not using an existing secret (see below) + adminUser: admin + # adminPassword: strongpassword + + # Use an existing secret for the admin user. + admin: + ## Name of the secret. Can be templated. + existingSecret: "" + userKey: admin-user + passwordKey: admin-password + + rbac: + ## If true, Grafana PSPs will be created + ## + pspEnabled: false + + ingress: + ## If true, Grafana Ingress will be created + ## + enabled: false + + ## IngressClassName for Grafana Ingress. + ## Should be provided if Ingress is enable. + ## + # ingressClassName: nginx + + ## Annotations for Grafana Ingress + ## + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + + ## Labels to be added to the Ingress + ## + labels: {} + + ## Hostnames. + ## Must be provided if Ingress is enable. + ## + # hosts: + # - grafana.domain.com + hosts: [] + + ## Path for grafana ingress + path: / + + ## TLS configuration for grafana Ingress + ## Secret must be manually created in the namespace + ## + tls: [] + # - secretName: grafana-general-tls + # hosts: + # - grafana.example.com + + # # To make Grafana persistent (Using Statefulset) + # # + # persistence: + # enabled: true + # type: sts + # storageClassName: "storageClassName" + # accessModes: + # - ReadWriteOnce + # size: 20Gi + # finalizers: + # - kubernetes.io/pvc-protection + + serviceAccount: + create: true + autoMount: true + + sidecar: + dashboards: + enabled: true + label: grafana_dashboard + labelValue: "1" + # Allow discovery in all namespaces for dashboards + searchNamespace: ALL + + # Support for new table panels, when enabled grafana auto migrates the old table panels to newer table panels + enableNewTablePanelSyntax: false + + ## Annotations for Grafana dashboard configmaps + ## + annotations: {} + multicluster: + global: + enabled: false + etcd: + enabled: false + provider: + allowUiUpdates: false + datasources: + enabled: true + defaultDatasourceEnabled: true + isDefaultDatasource: true + + name: Prometheus + uid: prometheus + + ## Extra jsonData properties to add to the datasource + # extraJsonData: + # prometheusType: Prometheus + + ## URL of prometheus datasource + ## + # url: http://prometheus-stack-prometheus:9090/ + + ## Prometheus request timeout in seconds + # timeout: 30 + + ## Query parameters to add, as a URL-encoded string, + ## to query Prometheus + # customQueryParameters: "" + + # If not defined, will use prometheus.prometheusSpec.scrapeInterval or its default + # defaultDatasourceScrapeInterval: 15s + + ## Annotations for Grafana datasource configmaps + ## + annotations: {} + + ## Set method for HTTP to send query to datasource + httpMethod: POST + + ## Create datasource for each Pod of Prometheus StatefulSet; + ## this uses by default the headless service `prometheus-operated` which is + ## created by Prometheus Operator. In case you deployed your own Service for your + ## Prometheus instance, you can specify it with the field `prometheusServiceName` + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/0fee93e12dc7c2ea1218f19ae25ec6b893460590/pkg/prometheus/statefulset.go#L255-L286 + createPrometheusReplicasDatasources: false + prometheusServiceName: prometheus-operated + label: grafana_datasource + labelValue: "1" + + ## Field with internal link pointing to existing data source in Grafana. + ## Can be provisioned via additionalDataSources + exemplarTraceIdDestinations: {} + # datasourceUid: Jaeger + # traceIdLabelName: trace_id + # urlDisplayLabel: View traces + alertmanager: + enabled: true + name: Alertmanager + uid: alertmanager + handleGrafanaManagedAlerts: false + implementation: prometheus + + extraConfigmapMounts: [] + # - name: certs-configmap + # mountPath: /etc/grafana/ssl/ + # configMap: certs-configmap + # readOnly: true + + deleteDatasources: [] + # - name: example-datasource + # orgId: 1 + + ## Configure additional grafana datasources (passed through tpl) + ## ref: https://grafana.com/docs/grafana/latest/administration/provisioning/#datasources + additionalDataSources: [] + # - name: prometheus-sample + # access: proxy + # basicAuth: true + # secureJsonData: + # basicAuthPassword: pass + # basicAuthUser: daco + # editable: false + # jsonData: + # tlsSkipVerify: true + # orgId: 1 + # type: prometheus + # url: https://{{ printf "%s-prometheus.svc" .Release.Name }}:9090 + # version: 1 + + ## Configure additional grafana datasources as a templated string (passed through tpl) + ## Useful when you need Helm flow control or templating inside the datasource definition + additionalDataSourcesString: "" + + # Flag to mark provisioned data sources for deletion if they are no longer configured. + # It takes no effect if data sources are already listed in the deleteDatasources section. + # ref: https://grafana.com/docs/grafana/latest/administration/provisioning/#example-data-source-configuration-file + prune: false + + ## Passed to grafana subchart and used by servicemonitor below + ## + service: + portName: http-web + ipFamilies: [] + ipFamilyPolicy: "" + + serviceMonitor: + # If true, a ServiceMonitor CRD is created for a prometheus operator + # https://github.com/prometheus-operator/prometheus-operator + # + enabled: true + + # Path to use for scraping metrics. Might be different if server.root_url is set + # in grafana.ini + path: "/metrics" + + # namespace: monitoring (defaults to use the namespace this chart is deployed to) + + # labels for the ServiceMonitor + labels: {} + + # Scrape interval. If not set, the Prometheus default scrape interval is used. + # + interval: "" + scheme: http + tlsConfig: {} + scrapeTimeout: 30s + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + +## Flag to disable all the kubernetes component scrapers +## +kubernetesServiceMonitors: + enabled: true + +## Component scraping the kube api server +## +kubeApiServer: + enabled: true + tlsConfig: + serverName: kubernetes + insecureSkipVerify: false + serviceMonitor: + enabled: true + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + jobLabel: component + selector: + matchLabels: + component: apiserver + provider: kubernetes + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + metricRelabelings: + # Drop excessively noisy apiserver buckets. + - action: drop + regex: (etcd_request|apiserver_request_slo|apiserver_request_sli|apiserver_request)_duration_seconds_bucket;(0\.15|0\.2|0\.3|0\.35|0\.4|0\.45|0\.6|0\.7|0\.8|0\.9|1\.25|1\.5|1\.75|2|3|3\.5|4|4\.5|6|7|8|9|15|20|40|45|50)(\.0)? + sourceLabels: + - __name__ + - le + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: + # - __meta_kubernetes_namespace + # - __meta_kubernetes_service_name + # - __meta_kubernetes_endpoint_port_name + # action: keep + # regex: default;kubernetes;https + # - targetLabel: __address__ + # replacement: kubernetes.default.svc:443 + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + + ## defines the labels which are transferred from the associated Kubernetes Service object onto the ingested metrics. + ## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#servicemonitor + targetLabels: [] + + ## Override the job label used for the apiserver. + ## This allows users who scrape apiserver metrics under a different job name (e.g. k3s-server via PushProx) + ## to align the recording rules and alerts with their actual job label. + jobNameOverride: "" + +## Component scraping the kubelet and kubelet-hosted cAdvisor +## +kubelet: + enabled: true + namespace: kube-system + + # Overrides the job selector in Grafana dashboards and Prometheus rules + # For k3s clusters, change to k3s-server + jobNameOverride: "" + + serviceMonitor: + enabled: true + ## Enable scraping /metrics from kubelet's service + kubelet: true + + ## Attach metadata to discovered targets. Requires Prometheus v2.45 for endpoints created by the operator. + ## + attachMetadata: + node: false + + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## If true, Prometheus use (respect) labels provided by exporter. + ## + honorLabels: true + + ## If true, Prometheus ingests metrics with timestamp provided by exporter. If false, Prometheus ingests metrics with timestamp of scrape. + ## + honorTimestamps: true + + ## If true, defines whether Prometheus tracks staleness of the metrics that have an explicit timestamp present in scraped data. Has no effect if `honorTimestamps` is false. + ## We recommend enabling this if you want the best possible accuracy for container_ metrics scraped from cadvisor. + ## For more details see: https://github.com/prometheus-community/helm-charts/pull/5063#issuecomment-2545374849 + trackTimestampsStaleness: true + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## Enable scraping the kubelet over https. For requirements to enable this see + ## https://github.com/prometheus-operator/prometheus-operator/issues/926 + ## + https: true + + ## Skip TLS certificate validation when scraping. + ## This is enabled by default because kubelet serving certificate deployed by kubeadm is by default self-signed + ## ref: https://kubernetes.io/docs/tasks/administer-cluster/kubeadm/kubeadm-certs/#kubelet-serving-certs + ## + insecureSkipVerify: true + + ## Enable scraping /metrics/probes from kubelet's service + ## + probes: true + + ## Enable scraping /metrics/resource from kubelet's service + ## This is disabled by default because container metrics are already exposed by cAdvisor + ## + resource: false + # From kubernetes 1.18, /metrics/resource/v1alpha1 renamed to /metrics/resource + resourcePath: "/metrics/resource/v1alpha1" + ## Configure the scrape interval for resource metrics. This is configured to the default Kubelet cAdvisor + ## minimum housekeeping interval in order to avoid missing samples. Note, this value is ignored + ## if kubelet.serviceMonitor.interval is not empty. + resourceInterval: 10s + + ## Enable scraping /metrics/cadvisor from kubelet's service + ## + cAdvisor: true + ## Configure the scrape interval for cAdvisor. This is configured to the default Kubelet cAdvisor + ## minimum housekeeping interval in order to avoid missing samples. Note, this value is ignored + ## if kubelet.serviceMonitor.interval is not empty. + cAdvisorInterval: 10s + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + cAdvisorMetricRelabelings: + # Drop less useful container CPU metrics. + - sourceLabels: [__name__] + action: drop + regex: 'container_cpu_(cfs_throttled_seconds_total|load_average_10s|system_seconds_total|user_seconds_total)' + # Drop less useful container / always zero filesystem metrics. + - sourceLabels: [__name__] + action: drop + regex: 'container_fs_(io_current|io_time_seconds_total|io_time_weighted_seconds_total|reads_merged_total|sector_reads_total|sector_writes_total|writes_merged_total)' + # Drop less useful / always zero container memory metrics. + - sourceLabels: [__name__] + action: drop + regex: 'container_memory_(mapped_file|swap)' + # Drop less useful container process metrics. + - sourceLabels: [__name__] + action: drop + regex: 'container_(file_descriptors|tasks_state|threads_max)' + # Drop container_memory_failures_total{scope="hierarchy"} metrics, + # we only need the container scope. + - sourceLabels: [__name__, scope] + action: drop + regex: 'container_memory_failures_total;hierarchy' + # Drop container_network_... metrics that match various interfaces that + # correspond to CNI and similar interfaces. This avoids capturing network + # metrics for host network containers. + - sourceLabels: [__name__, interface] + action: drop + regex: 'container_network_.*;(cali|cilium|cni|lxc|nodelocaldns|tunl).*' + # Drop container spec metrics that overlap with kube-state-metrics. + - sourceLabels: [__name__] + action: drop + regex: 'container_spec.*' + # Drop cgroup metrics with no pod. + - sourceLabels: [id, pod] + action: drop + regex: '.+;' + # - sourceLabels: [__name__, image] + # separator: ; + # regex: container_([a-z_]+); + # replacement: $1 + # action: drop + # - sourceLabels: [__name__] + # separator: ; + # regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) + # replacement: $1 + # action: drop + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + probesMetricRelabelings: [] + # - sourceLabels: [__name__, image] + # separator: ; + # regex: container_([a-z_]+); + # replacement: $1 + # action: drop + # - sourceLabels: [__name__] + # separator: ; + # regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) + # replacement: $1 + # action: drop + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + ## metrics_path is required to match upstream rules and charts + cAdvisorRelabelings: + - action: replace + sourceLabels: [__metrics_path__] + targetLabel: metrics_path + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + probesRelabelings: + - action: replace + sourceLabels: [__metrics_path__] + targetLabel: metrics_path + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + resourceRelabelings: + - action: replace + sourceLabels: [__metrics_path__] + targetLabel: metrics_path + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + metricRelabelings: + # Reduce bucket cardinality of kubelet storage operations. + - action: drop + sourceLabels: [__name__, le] + regex: (csi_operations|storage_operation_duration)_seconds_bucket;(0.25|2.5|15|25|120|600)(\.0)? + # - sourceLabels: [__name__, image] + # separator: ; + # regex: container_([a-z_]+); + # replacement: $1 + # action: drop + # - sourceLabels: [__name__] + # separator: ; + # regex: container_(network_tcp_usage_total|network_udp_usage_total|tasks_state|cpu_load_average_10s) + # replacement: $1 + # action: drop + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + ## metrics_path is required to match upstream rules and charts + relabelings: + - action: replace + sourceLabels: [__metrics_path__] + targetLabel: metrics_path + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + + ## defines the labels which are transferred from the associated Kubernetes Service object onto the ingested metrics. + ## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#servicemonitor + targetLabels: [] + +## Component scraping the kube controller manager +## +kubeControllerManager: + enabled: true + + # Overrides the job selector in Grafana dashboards and Prometheus rules + # For k3s clusters, change to k3s-server + jobNameOverride: "" + + ## If your kube controller manager is not deployed as a pod, specify IPs it can be found on + ## + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + ## If using kubeControllerManager.endpoints only the port and targetPort are used + ## + service: + enabled: true + ## If null or unset, the value is determined dynamically based on target Kubernetes version due to change + ## of default port in Kubernetes 1.22. + ## + port: null + targetPort: null + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + # selector: + # component: kube-controller-manager + + serviceMonitor: + enabled: true + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## port: Name of the port the metrics will be scraped from + ## + port: http-metrics + + jobLabel: jobLabel + selector: {} + # matchLabels: + # component: kube-controller-manager + + ## Enable scraping kube-controller-manager over https. + ## Requires proper certs (not self-signed) and delegated authentication/authorization checks. + ## If null or unset, the value is determined dynamically based on target Kubernetes version. + ## + https: null + + # Skip TLS certificate validation when scraping + insecureSkipVerify: null + + # Name of the server to use when validating TLS certificate + serverName: null + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + + ## defines the labels which are transferred from the associated Kubernetes Service object onto the ingested metrics. + ## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#servicemonitor + targetLabels: [] + +## Component scraping coreDns. Use either this or kubeDns +## +coreDns: + enabled: true + service: + enabled: true + port: 9153 + targetPort: 9153 + + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + # selector: + # k8s-app: kube-dns + serviceMonitor: + enabled: true + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## port: Name of the port the metrics will be scraped from + ## + port: http-metrics + + jobLabel: jobLabel + selector: {} + # matchLabels: + # k8s-app: kube-dns + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + + ## defines the labels which are transferred from the associated Kubernetes Service object onto the ingested metrics. + ## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#servicemonitor + targetLabels: [] + + ## File containing bearer token to be used when scraping targets + ## Empty value do not send any bearer token. + ## + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + +## Component scraping kubeDns. Use either this or coreDns +## +kubeDns: + enabled: false + service: + dnsmasq: + port: 10054 + targetPort: 10054 + skydns: + port: 10055 + targetPort: 10055 + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + # selector: + # k8s-app: kube-dns + serviceMonitor: + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + jobLabel: jobLabel + selector: {} + # matchLabels: + # k8s-app: kube-dns + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + dnsmasqMetricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + dnsmasqRelabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + + ## defines the labels which are transferred from the associated Kubernetes Service object onto the ingested metrics. + ## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#servicemonitor + targetLabels: [] + + ## File containing bearer token to be used when scraping targets + ## Empty value do not send any bearer token. + ## + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + +## Component scraping etcd +## +kubeEtcd: + enabled: true + + ## If your etcd is not deployed as a pod, specify IPs it can be found on + ## + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + ## Etcd service. If using kubeEtcd.endpoints only the port and targetPort are used + ## + service: + enabled: true + port: 2381 + targetPort: 2381 + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + # selector: + # component: etcd + + ## Configure secure access to the etcd cluster by loading a secret into prometheus and + ## specifying security configuration below. For example, with a secret named etcd-client-cert + ## + ## serviceMonitor: + ## scheme: https + ## insecureSkipVerify: false + ## serverName: localhost + ## caFile: /etc/prometheus/secrets/etcd-client-cert/etcd-ca + ## certFile: /etc/prometheus/secrets/etcd-client-cert/etcd-client + ## keyFile: /etc/prometheus/secrets/etcd-client-cert/etcd-client-key + ## + serviceMonitor: + enabled: true + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + scheme: http + insecureSkipVerify: false + serverName: "" + caFile: "" + certFile: "" + keyFile: "" + + ## port: Name of the port the metrics will be scraped from + ## + port: http-metrics + + jobLabel: jobLabel + selector: {} + # matchLabels: + # component: etcd + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + + ## defines the labels which are transferred from the associated Kubernetes Service object onto the ingested metrics. + ## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#servicemonitor + targetLabels: [] + + ## File containing bearer token to be used when scraping targets + ## Empty value do not send any bearer token. + ## + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + +## Component scraping kube scheduler +## +kubeScheduler: + enabled: true + + # Overrides the job selector in Grafana dashboards and Prometheus rules + # For k3s clusters, change to k3s-server + jobNameOverride: "" + + ## If your kube scheduler is not deployed as a pod, specify IPs it can be found on + ## + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + ## If using kubeScheduler.endpoints only the port and targetPort are used + ## + service: + enabled: true + ## If null or unset, the value is determined dynamically based on target Kubernetes version due to change + ## of default port in Kubernetes 1.23. + ## + port: null + targetPort: null + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + # selector: + # component: kube-scheduler + + serviceMonitor: + enabled: true + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + ## Enable scraping kube-scheduler over https. + ## Requires proper certs (not self-signed) and delegated authentication/authorization checks. + ## If null or unset, the value is determined dynamically based on target Kubernetes version. + ## + https: null + + ## port: Name of the port the metrics will be scraped from + ## + port: http-metrics + + jobLabel: jobLabel + selector: {} + # matchLabels: + # component: kube-scheduler + + ## Skip TLS certificate validation when scraping + insecureSkipVerify: null + + ## Name of the server to use when validating TLS certificate + serverName: null + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + + ## defines the labels which are transferred from the associated Kubernetes Service object onto the ingested metrics. + ## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#servicemonitor + targetLabels: [] + +## Component scraping kube proxy +## +kubeProxy: + enabled: true + + # Overrides the job selector in Grafana dashboards and Prometheus rules + # For k3s clusters, change to k3s-server + jobNameOverride: "" + + ## If your kube proxy is not deployed as a pod, specify IPs it can be found on + ## + endpoints: [] + # - 10.141.4.22 + # - 10.141.4.23 + # - 10.141.4.24 + + service: + enabled: true + port: 10249 + targetPort: 10249 + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + # selector: + # k8s-app: kube-proxy + + serviceMonitor: + enabled: true + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## port: Name of the port the metrics will be scraped from + ## + port: http-metrics + + jobLabel: jobLabel + selector: {} + # matchLabels: + # k8s-app: kube-proxy + + ## Enable scraping kube-proxy over https. + ## Requires proper certs (not self-signed) and delegated authentication/authorization checks + ## + https: false + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + relabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## Additional labels + ## + additionalLabels: {} + # foo: bar + + ## defines the labels which are transferred from the associated Kubernetes Service object onto the ingested metrics. + ## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#servicemonitor + targetLabels: [] + + ## File containing bearer token to be used when scraping targets + ## Empty value do not send any bearer token. + ## + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + +## Component scraping kube state metrics +## +kubeStateMetrics: + enabled: true + +## Configuration for kube-state-metrics subchart +## +kube-state-metrics: + ## set to true to add the release label so scraping of the servicemonitor with kube-prometheus-stack works out of the box + releaseLabel: true + + ## Enable scraping via kubernetes-service-endpoints + ## Disabled by default as we service monitor is enabled below + ## + prometheusScrape: false + + prometheus: + monitor: + ## Enable scraping via service monitor + ## Disable to prevent duplication if you enable prometheusScrape above + enabled: true + + ## kube-state-metrics endpoint + http: + ## Keep labels from scraped data, overriding server-side labels + honorLabels: true + + ## selfMonitor endpoint + metrics: + ## Keep labels from scraped data, overriding server-side labels + honorLabels: true + +## Deploy node exporter as a daemonset to all nodes +## +nodeExporter: + enabled: true + operatingSystems: + linux: + enabled: true + aix: + enabled: true + darwin: + enabled: true + + ## ForceDeployDashboard Create dashboard configmap even if nodeExporter deployment has been disabled + ## + forceDeployDashboards: false + +## Configuration for prometheus-node-exporter subchart +## +prometheus-node-exporter: + namespaceOverride: "" + podLabels: + ## Add the 'node-exporter' label to be used by serviceMonitor and podMonitor to match standard common usage in rules and grafana dashboards + ## + jobLabel: node-exporter + releaseLabel: true + extraArgs: + - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|run/containerd/.+|var/lib/docker/.+|var/lib/kubelet/.+)($|/) + - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs|erofs)$ + service: + portName: http-metrics + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + labels: + jobLabel: node-exporter + + image: + distroless: true + + prometheus: + monitor: + enabled: true + + jobLabel: jobLabel + + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## How long until a scrape request times out. If not set, the Prometheus default scape timeout is used. + ## + scrapeTimeout: "" + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## MetricRelabelConfigs to apply to samples after scraping, but before ingestion. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + metricRelabelings: [] + # - sourceLabels: [__name__] + # separator: ; + # regex: ^node_mountstats_nfs_(event|operations|transport)_.+ + # replacement: $1 + # action: drop + + ## RelabelConfigs to apply to samples before scraping + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#relabelconfig + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Attach node metadata to discovered targets. Requires Prometheus v2.35.0 and above. + ## + # attachMetadata: + # node: false + + podMonitor: + enabled: false + + jobLabel: jobLabel + + rbac: + ## If true, create PSPs for node-exporter + ## + pspEnabled: false + +## Manages Prometheus and Alertmanager components +## +prometheusOperator: + enabled: true + + ## Use '{{ template "kube-prometheus-stack.fullname" . }}-operator' by default + fullnameOverride: "" + + ## Number of old replicasets to retain ## + ## The default value is 10, 0 will garbage-collect old replicasets ## + revisionHistoryLimit: 10 + + ## Strategy of the deployment + ## + strategy: {} + + ## Prometheus-Operator v0.39.0 and later support TLS natively. + ## + tls: + enabled: true + # Value must match version names from https://pkg.go.dev/crypto/tls#pkg-constants + tlsMinVersion: VersionTLS13 + # The default webhook port is 10250 in order to work out-of-the-box in GKE private clusters and avoid adding firewall rules. + internalPort: 10250 + + ## Liveness probe for the prometheusOperator deployment + ## + livenessProbe: + enabled: true + failureThreshold: 3 + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + ## Readiness probe for the prometheusOperator deployment + ## + readinessProbe: + enabled: true + failureThreshold: 3 + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + + ## Admission webhook support for PrometheusRules resources added in Prometheus Operator 0.30 can be enabled to prevent incorrectly formatted + ## rules from making their way into prometheus and potentially preventing the container from starting + admissionWebhooks: + ## Valid values: Fail, Ignore, IgnoreOnInstallOnly + ## IgnoreOnInstallOnly - If Release.IsInstall returns "true", set "Ignore" otherwise "Fail" + failurePolicy: "" + ## The default timeoutSeconds is 10 and the maximum value is 30. + timeoutSeconds: 10 + enabled: true + ## A PEM encoded CA bundle which will be used to validate the webhook's server certificate. + ## If unspecified, system trust roots on the apiserver are used. + caBundle: "" + ## If enabled, generate a self-signed certificate, then patch the webhook configurations with the generated data. + ## On chart upgrades (or if the secret exists) the cert will not be re-generated. You can use this to provide your own + ## certs ahead of time if you wish. + ## + annotations: {} + # argocd.argoproj.io/hook: PreSync + # argocd.argoproj.io/hook-delete-policy: HookSucceeded + + namespaceSelector: {} + objectSelector: {} + matchConditions: {} + + mutatingWebhookConfiguration: + annotations: {} + # argocd.argoproj.io/hook: PreSync + + validatingWebhookConfiguration: + annotations: {} + # argocd.argoproj.io/hook: PreSync + + deployment: + enabled: false + + ## Number of replicas + ## + replicas: 1 + + ## Strategy of the deployment + ## + strategy: {} + + # Ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/ + podDisruptionBudget: + enabled: false + minAvailable: 1 + # maxUnavailable: "" + unhealthyPodEvictionPolicy: AlwaysAllow + + ## Number of old replicasets to retain ## + ## The default value is 10, 0 will garbage-collect old replicasets ## + revisionHistoryLimit: 10 + + ## Prometheus-Operator v0.39.0 and later support TLS natively. + ## + tls: + enabled: true + # Value must match version names from https://pkg.go.dev/crypto/tls#pkg-constants + tlsMinVersion: VersionTLS13 + # The default webhook port is 10250 in order to work out-of-the-box in GKE private clusters and avoid adding firewall rules. + internalPort: 10250 + + ## Service account for Prometheus Operator Webhook to use. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + ## + serviceAccount: + annotations: {} + automountServiceAccountToken: false + create: true + name: "" + + ## Configuration for Prometheus operator Webhook service + ## + service: + annotations: {} + labels: {} + clusterIP: "" + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 31080 + + nodePortTls: 31443 + + ## Additional ports to open for Prometheus operator Webhook service + ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#multi-port-services + ## + additionalPorts: [] + + ## Loadbalancer IP + ## Only use if service.type is "LoadBalancer" + ## + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## NodePort, ClusterIP, LoadBalancer + ## + type: ClusterIP + + ## List of IP addresses at which the Prometheus server service is available + ## Ref: https://kubernetes.io/docs/concepts/services-networking/service/#external-ips + ## + externalIPs: [] + + # ## Labels to add to the operator webhook deployment + # ## + labels: {} + + ## Annotations to add to the operator webhook deployment + ## + annotations: {} + + ## Labels to add to the operator webhook pod + ## + podLabels: {} + + ## Annotations to add to the operator webhook pod + ## + podAnnotations: {} + + ## Assign a PriorityClassName to pods if set + # priorityClassName: "" + + ## Define Log Format + # Use logfmt (default) or json logging + # logFormat: logfmt + + ## Decrease log verbosity to errors only + # logLevel: error + + ## Prometheus-operator webhook image + ## + image: + registry: quay.io + repository: prometheus-operator/admission-webhook + # if not set appVersion field from Chart.yaml is used + tag: "" + sha: "" + pullPolicy: IfNotPresent + + ## Define Log Format + # Use logfmt (default) or json logging + # logFormat: logfmt + + ## Decrease log verbosity to errors only + # logLevel: error + + + ## Liveness probe + ## + livenessProbe: + enabled: true + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + + ## Readiness probe + ## + readinessProbe: + enabled: true + failureThreshold: 3 + initialDelaySeconds: 5 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + + ## Resource limits & requests + ## + resources: {} + # limits: + # cpu: 200m + # memory: 200Mi + # requests: + # cpu: 100m + # memory: 100Mi + + # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), + # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working + ## + hostNetwork: false + + ## Define which Nodes the Pods are scheduled on. + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector + ## + nodeSelector: {} + + ## Tolerations for use with node taints + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + + ## Assign custom affinity rules to the prometheus operator + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/ + ## + affinity: {} + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/e2e-az-name + # operator: In + # values: + # - e2e-az1 + # - e2e-az2 + dnsConfig: {} + # nameservers: + # - 1.2.3.4 + # searches: + # - ns1.svc.cluster-domain.example + # - my.dns.search.suffix + # options: + # - name: ndots + # value: "2" + # - name: edns0 + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + + ## Container-specific security context configuration + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + ## + containerSecurityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + + ## If false then the user will opt out of automounting API credentials. + ## + automountServiceAccountToken: true + + patch: + enabled: true + image: + registry: ghcr.io + repository: jkroepke/kube-webhook-certgen + tag: 1.8.2 + sha: "" + pullPolicy: IfNotPresent + resources: {} + ## Provide a priority class name to the webhook patching job + ## + priorityClassName: "" + ttlSecondsAfterFinished: 60 + annotations: {} + # argocd.argoproj.io/hook: PreSync + # argocd.argoproj.io/hook-delete-policy: HookSucceeded + podAnnotations: {} + nodeSelector: {} + affinity: {} + tolerations: [] + + ## SecurityContext holds pod-level security attributes and common container settings. + ## This defaults to non root user with uid 2000 and gid 2000. *v1.PodSecurityContext false + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + ## + securityContext: + runAsGroup: 2000 + runAsNonRoot: true + runAsUser: 2000 + seccompProfile: + type: RuntimeDefault + ## Service account for Prometheus Operator Webhook Job Patch to use. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + ## + serviceAccount: + create: true + annotations: {} + automountServiceAccountToken: true + + # Security context for create job container + createSecretJob: + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + + # Security context for patch job container + patchWebhookJob: + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + + # Use certmanager to generate webhook certs + certManager: + enabled: false + # self-signed root certificate + rootCert: + duration: "" # default to be 5y + # -- Set the revisionHistoryLimit on the Certificate. See + # https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec + # Defaults to nil. + revisionHistoryLimit: + admissionCert: + duration: "" # default to be 1y + # -- Set the revisionHistoryLimit on the Certificate. See + # https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec + # Defaults to nil. + revisionHistoryLimit: + # issuerRef: + # name: "issuer" + # kind: "ClusterIssuer" + + ## Namespaces to scope the interaction of the Prometheus Operator and the apiserver (allow list). + ## This is mutually exclusive with denyNamespaces. Setting this to an empty object will disable the configuration + ## + namespaces: {} + # releaseNamespace: true + # additional: + # - kube-system + + ## Namespaces not to scope the interaction of the Prometheus Operator (deny list). + ## + denyNamespaces: [] + + ## Filter namespaces to look for prometheus-operator custom resources + ## + alertmanagerInstanceNamespaces: [] + alertmanagerConfigNamespaces: [] + prometheusInstanceNamespaces: [] + thanosRulerInstanceNamespaces: [] + + ## The clusterDomain value will be added to the cluster.peer option of the alertmanager. + ## Without this specified option cluster.peer will have value alertmanager-monitoring-alertmanager-0.alertmanager-operated:9094 (default value) + ## With this specified option cluster.peer will have value alertmanager-monitoring-alertmanager-0.alertmanager-operated.namespace.svc.cluster-domain:9094 + ## + # clusterDomain: "cluster.local" + + networkPolicy: + ## Enable creation of NetworkPolicy resources. + ## + enabled: false + + ## Flavor of the network policy to use. + # Can be: + # * kubernetes for networking.k8s.io/v1/NetworkPolicy + # * cilium for cilium.io/v2/CiliumNetworkPolicy + flavor: kubernetes + + # cilium: + # egress: + + ## match labels used in selector + # matchLabels: {} + + ## Service account for Prometheus Operator to use. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + ## + serviceAccount: + create: true + name: "" + automountServiceAccountToken: true + annotations: {} + + # -- terminationGracePeriodSeconds for container lifecycle hook + terminationGracePeriodSeconds: 30 + # -- Specify lifecycle hooks for the controller + lifecycle: {} + ## Configuration for Prometheus operator service + ## + service: + annotations: {} + labels: {} + clusterIP: "" + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30080 + + nodePortTls: 30443 + + ## Additional ports to open for Prometheus operator service + ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#multi-port-services + ## + additionalPorts: [] + + ## Loadbalancer IP + ## Only use if service.type is "LoadBalancer" + ## + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## NodePort, ClusterIP, LoadBalancer + ## + type: ClusterIP + + ## List of IP addresses at which the Prometheus server service is available + ## Ref: https://kubernetes.io/docs/concepts/services-networking/service/#external-ips + ## + externalIPs: [] + + # ## Labels to add to the operator deployment + # ## + labels: {} + + ## Annotations to add to the operator deployment + ## + annotations: {} + + ## Labels to add to the operator pod + ## + podLabels: {} + + ## Annotations to add to the operator pod + ## + podAnnotations: {} + + ## Assign a podDisruptionBudget to the operator + ## + podDisruptionBudget: + enabled: false + minAvailable: 1 + # maxUnavailable: "" + unhealthyPodEvictionPolicy: AlwaysAllow + + ## Assign a PriorityClassName to pods if set + # priorityClassName: "" + + ## Define Log Format + # Use logfmt (default) or json logging + # logFormat: logfmt + + ## Decrease log verbosity to errors only + # logLevel: error + + kubeletService: + ## If true, the operator will create and maintain a service for scraping kubelets + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/helm/prometheus-operator/README.md + ## + enabled: true + namespace: kube-system + selector: "" + ## Use '{{ template "kube-prometheus-stack.fullname" . }}-kubelet' by default + name: "" + + ## Create Endpoints objects for kubelet targets. + kubeletEndpointsEnabled: true + ## Create EndpointSlice objects for kubelet targets. + kubeletEndpointSliceEnabled: false + + ## Extra arguments to pass to prometheusOperator + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/platform/operator.md + extraArgs: [] + # - --labels="cluster=talos-cluster" + + ## Create a servicemonitor for the operator + ## + serviceMonitor: + ## If true, create a serviceMonitor for prometheus operator + ## + selfMonitor: true + + ## Labels for ServiceMonitor + additionalLabels: {} + + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## Scrape timeout. If not set, the Prometheus default scrape timeout is used. + scrapeTimeout: "" + + ## Metric relabel configs to apply to samples before ingestion. + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + # relabel configs to apply to samples before ingestion. + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Resource limits & requests + ## + resources: {} + # limits: + # cpu: 200m + # memory: 200Mi + # requests: + # cpu: 100m + # memory: 100Mi + + ## Operator Environment + ## env: + ## VARIABLE: value + env: + GOGC: "30" + + # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), + # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working + ## + hostNetwork: false + + ## Define which Nodes the Pods are scheduled on. + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector + ## + nodeSelector: {} + + ## Tolerations for use with node taints + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + + ## Assign custom affinity rules to the prometheus operator + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/ + ## + affinity: {} + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/e2e-az-name + # operator: In + # values: + # - e2e-az1 + # - e2e-az2 + dnsConfig: {} + # nameservers: + # - 1.2.3.4 + # searches: + # - ns1.svc.cluster-domain.example + # - my.dns.search.suffix + # options: + # - name: ndots + # value: "2" + # - name: edns0 + securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + seccompProfile: + type: RuntimeDefault + + ## Setup hostUsers for prometheus-operator + ## ref: https://kubernetes.io/docs/concepts/workloads/pods/user-namespaces/ + hostUsers: ~ + + ## Container-specific security context configuration + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ + ## + containerSecurityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - ALL + + # Enable vertical pod autoscaler support for prometheus-operator + verticalPodAutoscaler: + enabled: false + + # Recommender responsible for generating recommendation for the object. + # List should be empty (then the default recommender will generate the recommendation) + # or contain exactly one recommender. + # recommenders: + # - name: custom-recommender-performance + + # List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory + controlledResources: [] + # Specifies which resource values should be controlled: RequestsOnly or RequestsAndLimits. + # controlledValues: RequestsAndLimits + + # Define the max allowed resources for the pod + maxAllowed: {} + # cpu: 200m + # memory: 100Mi + # Define the min allowed resources for the pod + minAllowed: {} + # cpu: 200m + # memory: 100Mi + + updatePolicy: + # Specifies minimal number of replicas which need to be alive for VPA Updater to attempt pod eviction + # minReplicas: 1 + # Specifies whether recommended updates are applied when a Pod is started and whether recommended updates + # are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "InPlaceOrRecreate". + updateMode: Recreate + + ## Prometheus-operator image + ## + image: + registry: quay.io + repository: prometheus-operator/prometheus-operator + # if not set appVersion field from Chart.yaml is used + tag: "" + sha: "" + pullPolicy: IfNotPresent + + ## Prometheus image to use for prometheuses managed by the operator + ## + # prometheusDefaultBaseImage: prometheus/prometheus + + ## Prometheus image registry to use for prometheuses managed by the operator + ## + # prometheusDefaultBaseImageRegistry: quay.io + + ## Alertmanager image to use for alertmanagers managed by the operator + ## + # alertmanagerDefaultBaseImage: prometheus/alertmanager + + ## Alertmanager image registry to use for alertmanagers managed by the operator + ## + # alertmanagerDefaultBaseImageRegistry: quay.io + + ## Prometheus-config-reloader + ## + prometheusConfigReloader: + image: + registry: quay.io + repository: prometheus-operator/prometheus-config-reloader + # if not set appVersion field from Chart.yaml is used + tag: "" + sha: "" + + # add prometheus config reloader liveness and readiness probe. Default: false + enableProbe: false + + # resource config for prometheusConfigReloader + resources: {} + # requests: + # cpu: 200m + # memory: 50Mi + # limits: + # cpu: 200m + # memory: 50Mi + + ## Thanos side-car image when configured + ## + thanosImage: + registry: quay.io + repository: thanos/thanos + tag: v0.41.0 + sha: "" + + ## Set a Label Selector to filter watched prometheus and prometheusAgent + ## + prometheusInstanceSelector: "" + + ## Set a Label Selector to filter watched alertmanager + ## + alertmanagerInstanceSelector: "" + + ## Set a Label Selector to filter watched thanosRuler + thanosRulerInstanceSelector: "" + + ## Set a Field Selector to filter watched secrets + ## + secretFieldSelector: "type!=kubernetes.io/dockercfg,type!=kubernetes.io/service-account-token,type!=helm.sh/release.v1" + + ## If false then the user will opt out of automounting API credentials. + ## + automountServiceAccountToken: true + + ## Additional volumes + ## + extraVolumes: [] + + ## Additional volume mounts + ## + extraVolumeMounts: [] + +## Deploy a Prometheus instance +## +prometheus: + enabled: true + + ## Toggle prometheus into agent mode + ## Note many of features described below (e.g. rules, query, alerting, remote read, thanos) will not work in agent mode. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/platform/prometheus-agent.md + ## + agentMode: false + + ## Annotations for Prometheus + ## + annotations: {} + + ## Additional labels for Prometheus + ## + additionalLabels: {} + + ## Configure network policy for the prometheus + networkPolicy: + enabled: false + + ## Flavor of the network policy to use. + # Can be: + # * kubernetes for networking.k8s.io/v1/NetworkPolicy + # * cilium for cilium.io/v2/CiliumNetworkPolicy + flavor: kubernetes + + namespace: + + # cilium: + # endpointSelector: + # egress: + # ingress: + + # egress: + # - {} + # ingress: + # - {} + # podSelector: + # matchLabels: + # app: prometheus + + ## Service account for Prometheuses to use. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ + ## + serviceAccount: + create: true + name: "" + annotations: {} + automountServiceAccountToken: true + + # Service for thanos service discovery on sidecar + # Enable this can make Thanos Query can use + # `--store=dnssrv+_grpc._tcp.${kube-prometheus-stack.fullname}-thanos-discovery.${namespace}.svc.cluster.local` to discovery + # Thanos sidecar on prometheus nodes + # (Please remember to change ${kube-prometheus-stack.fullname} and ${namespace}. Not just copy and paste!) + thanosService: + enabled: false + annotations: {} + labels: {} + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## + type: ClusterIP + + ## Service dual stack + ## + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + + ## gRPC port config + portName: grpc + port: 10901 + targetPort: "grpc" + + ## HTTP port config (for metrics) + httpPortName: http + httpPort: 10902 + targetHttpPort: "http" + + ## ClusterIP to assign + # Default is to make this a headless service ("None") + clusterIP: "None" + + ## Port to expose on each node, if service type is NodePort + ## + nodePort: 30901 + httpNodePort: 30902 + + # ServiceMonitor to scrape Sidecar metrics + # Needs thanosService to be enabled as well + thanosServiceMonitor: + enabled: false + interval: "" + + ## Additional labels + ## + additionalLabels: {} + + ## scheme: HTTP scheme to use for scraping. Can be used with `tlsConfig` for example if using istio mTLS. + scheme: "" + + ## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS. + ## Of type: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#tlsconfig + tlsConfig: {} + + bearerTokenFile: + + ## Metric relabel configs to apply to samples before ingestion. + metricRelabelings: [] + + ## relabel configs to apply to samples before ingestion. + relabelings: [] + + # Service for external access to sidecar + # Enabling this creates a service to expose thanos-sidecar outside the cluster. + thanosServiceExternal: + enabled: false + annotations: {} + labels: {} + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## gRPC port config + portName: grpc + port: 10901 + targetPort: "grpc" + + ## HTTP port config (for metrics) + httpPortName: http + httpPort: 10902 + targetHttpPort: "http" + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## + type: LoadBalancer + + ## Port to expose on each node + ## + nodePort: 30901 + httpNodePort: 30902 + + ## Configuration for Prometheus service + ## + service: + enabled: true + annotations: {} + labels: {} + clusterIP: "" + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + + ## Port for Prometheus Service to listen on + ## + port: 9090 + + ## To be used with a proxy extraContainer port + targetPort: 9090 + + ## Port for Prometheus Reloader to listen on + ## + reloaderWebPort: 8080 + + ## Port to expose for Prometheus Reloader + ## Only used if service.type is 'NodePort' + ## + reloaderWebNodePort: null + + ## List of IP addresses at which the Prometheus server service is available + ## Ref: https://kubernetes.io/docs/concepts/services-networking/service/#external-ips + ## + externalIPs: [] + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30090 + + ## Loadbalancer IP + ## Only use if service.type is "LoadBalancer" + loadBalancerIP: "" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## + type: ClusterIP + + ## Additional ports to open for Prometheus service + ## + additionalPorts: [] + # additionalPorts: + # - name: oauth-proxy + # port: 8081 + # targetPort: 8081 + # - name: oauth-metrics + # port: 8082 + # targetPort: 8082 + + ## Consider that all endpoints are considered "ready" even if the Pods themselves are not + ## Ref: https://kubernetes.io/docs/reference/kubernetes-api/service-resources/service-v1/#ServiceSpec + publishNotReadyAddresses: false + + ## If you want to make sure that connections from a particular client are passed to the same Pod each time + ## Accepts 'ClientIP' or 'None' + ## + sessionAffinity: None + + ## If you want to modify the ClientIP sessionAffinity timeout + ## The value must be >0 && <=86400(for 1 day) if ServiceAffinity == "ClientIP" + ## + sessionAffinityConfig: + clientIP: + timeoutSeconds: 10800 + + ## Configuration for creating a separate Service for each statefulset Prometheus replica + ## + servicePerReplica: + enabled: false + annotations: {} + + ## Port for Prometheus Service per replica to listen on + ## + port: 9090 + + ## To be used with a proxy extraContainer port + targetPort: 9090 + + ## Port to expose on each node + ## Only used if servicePerReplica.type is 'NodePort' + ## + nodePort: 30091 + + ## Loadbalancer source IP ranges + ## Only used if servicePerReplica.type is "LoadBalancer" + loadBalancerSourceRanges: [] + + ## Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + ## + externalTrafficPolicy: Cluster + + ## Service type + ## + type: ClusterIP + + ## Service dual stack + ## + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + + ## Configure pod disruption budgets for Prometheus + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/#specifying-a-poddisruptionbudget + ## + podDisruptionBudget: + enabled: false + minAvailable: 1 + # maxUnavailable: "" + unhealthyPodEvictionPolicy: AlwaysAllow + + ## Enable vertical pod autoscaler support for Prometheus + ## ref: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler + ## + verticalPodAutoscaler: + enabled: false + + # Recommender responsible for generating recommendation for the object. + # List should be empty (then the default recommender will generate the recommendation) + # or contain exactly one recommender. + # recommenders: + # - name: custom-recommender-performance + + # List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory + controlledResources: [] + # Specifies which resource values should be controlled: RequestsOnly or RequestsAndLimits. + # controlledValues: RequestsAndLimits + + # Define the max allowed resources for the pod + maxAllowed: {} + # cpu: 200m + # memory: 100Mi + # Define the min allowed resources for the pod + minAllowed: {} + # cpu: 200m + # memory: 100Mi + + updatePolicy: + # Specifies whether recommended updates are applied when a Pod is started and whether recommended updates + # are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "InPlaceOrRecreate". + updateMode: Recreate + + # Ingress exposes thanos sidecar outside the cluster + thanosIngress: + enabled: false + + ingressClassName: "" + + annotations: {} + labels: {} + servicePort: 10901 + + ## Port to expose on each node + ## Only used if service.type is 'NodePort' + ## + nodePort: 30901 + + ## Hosts must be provided if Ingress is enabled. + ## + hosts: [] + # - thanos-gateway.domain.com + + ## Paths to use for ingress rules + ## + paths: [] + # - / + + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + + ## TLS configuration for Thanos Ingress + ## Secret must be manually created in the namespace + ## + tls: [] + # - secretName: thanos-gateway-tls + # hosts: + # - thanos-gateway.domain.com + # + + ## ExtraSecret can be used to store various data in an extra secret + ## (use it for example to store hashed basic auth credentials) + extraSecret: + ## if not set, name will be auto generated + # name: "" + annotations: {} + data: {} + # auth: | + # foo:$apr1$OFG3Xybp$ckL0FHDAkoXYIlH9.cysT0 + # someoneelse:$apr1$DMZX2Z4q$6SbQIfyuLQd.xmo/P0m2c. + + ingress: + enabled: false + + ingressClassName: "" + + annotations: {} + labels: {} + + ## Redirect ingress to an additional defined port on the service + # servicePort: 8081 + + ## Hostnames. + ## Must be provided if Ingress is enabled. + ## + # hosts: + # - prometheus.domain.com + hosts: [] + + ## Paths to use for ingress rules - one path should match the prometheusSpec.routePrefix + ## + paths: [] + # - / + + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + + ## TLS configuration for Prometheus Ingress + ## Secret must be manually created in the namespace + ## + tls: [] + # - secretName: prometheus-general-tls + # hosts: + # - prometheus.example.com + + # -- BETA: Configure the gateway routes for the chart here. + # More routes can be added by adding a dictionary key like the 'main' route. + # Be aware that this is an early beta of this feature, + # kube-prometheus-stack does not guarantee this works and is subject to change. + # Being BETA this can/will change in the future without notice, do not use unless you want to take that risk + # [[ref]](https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io%2fv1alpha2) + route: + main: + # -- Enables or disables the route + enabled: false + + # -- Set the route apiVersion, e.g. gateway.networking.k8s.io/v1 or gateway.networking.k8s.io/v1alpha2 + apiVersion: gateway.networking.k8s.io/v1 + # -- Set the route kind + # Valid options are GRPCRoute, HTTPRoute, TCPRoute, TLSRoute, UDPRoute + kind: HTTPRoute + + annotations: {} + labels: {} + + hostnames: [] + # - my-filter.example.com + parentRefs: [] + # - name: acme-gw + + # -- create http route for redirect (https://gateway-api.sigs.k8s.io/guides/http-redirect-rewrite/#http-to-https-redirects) + ## Take care that you only enable this on the http listener of the gateway to avoid an infinite redirect. + ## matches, filters and additionalRules will be ignored if this is set to true. Be are + httpsRedirect: false + + matches: + - path: + type: PathPrefix + value: / + + ## Filters define the filters that are applied to requests that match this rule. + filters: [] + + ## Session persistence configuration for the route rule. + sessionPersistence: {} + # sessionName: route + # type: Cookie + # absoluteTimeout: 12h + # cookieConfig: + # lifetimeType: Permanent + + ## Additional custom rules that can be added to the route + additionalRules: [] + + ## Configuration for creating an Ingress that will map to each Prometheus replica service + ## prometheus.servicePerReplica must be enabled + ## + ingressPerReplica: + enabled: false + + ingressClassName: "" + + annotations: {} + labels: {} + + ## Final form of the hostname for each per replica ingress is + ## {{ ingressPerReplica.hostPrefix }}-{{ $replicaNumber }}.{{ ingressPerReplica.hostDomain }} + ## + ## Prefix for the per replica ingress that will have `-$replicaNumber` + ## appended to the end + hostPrefix: "" + ## Domain that will be used for the per replica ingress + hostDomain: "" + + ## Paths to use for ingress rules + ## + paths: [] + # - / + + ## For Kubernetes >= 1.18 you should specify the pathType (determines how Ingress paths should be matched) + ## See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#better-path-matching-with-path-types + # pathType: ImplementationSpecific + + ## Secret name containing the TLS certificate for Prometheus per replica ingress + ## Secret must be manually created in the namespace + tlsSecretName: "" + + ## Separated secret for each per replica Ingress. Can be used together with cert-manager + ## + tlsSecretPerReplica: + enabled: false + ## Final form of the secret for each per replica ingress is + ## {{ tlsSecretPerReplica.prefix }}-{{ $replicaNumber }} + ## + prefix: "prometheus" + + serviceMonitor: + ## If true, create a serviceMonitor for prometheus + ## + selfMonitor: true + + ## Scrape interval. If not set, the Prometheus default scrape interval is used. + ## + interval: "" + + ## Additional labels + ## + additionalLabels: {} + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + ## scheme: HTTP scheme to use for scraping. Can be used with `tlsConfig` for example if using istio mTLS. + scheme: "" + + ## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS. + ## Of type: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#tlsconfig + tlsConfig: {} + + bearerTokenFile: + + ## Metric relabel configs to apply to samples before ingestion. + ## + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + # relabel configs to apply to samples before ingestion. + ## + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + + ## Additional Endpoints + ## + additionalEndpoints: [] + # - port: oauth-metrics + # path: /metrics + + ## Settings affecting prometheusSpec + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#prometheusspec + ## + prometheusSpec: + ## Statefulset's persistent volume claim retention policy + ## whenDeleted and whenScaled determine whether + ## statefulset's PVCs are deleted (true) or retained (false) + ## on scaling down and deleting statefulset, respectively. + ## Requires Kubernetes version 1.27.0+. + ## Ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + persistentVolumeClaimRetentionPolicy: {} + # whenDeleted: Retain + # whenScaled: Retain + + ## If true, pass --storage.tsdb.max-block-duration=2h to prometheus. This is already done if using Thanos + ## + disableCompaction: false + + ## AutomountServiceAccountToken indicates whether a service account token should be automatically mounted in the pod, + ## If the field isn't set, the operator mounts the service account token by default. + ## Warning: be aware that by default, Prometheus requires the service account token for Kubernetes service discovery, + ## It is possible to use strategic merge patch to project the service account token into the 'prometheus' container. + automountServiceAccountToken: true + + ## APIServerConfig + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#apiserverconfig + ## + apiserverConfig: {} + + ## Allows setting additional arguments for the Prometheus container + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.Prometheus + additionalArgs: [] + + ## Convert all classic histograms to native histograms with custom buckets. + ## This corresponds to the 'convert_classic_histograms_to_nhcb' field in Prometheus configuration. + ## + convertClassicHistogramsToNHCB: false + + ## Enable scraping of classic histograms that are also exposed as native histograms. + ## This corresponds to the 'always_scrape_classic_histograms' field in Prometheus configuration. + ## + scrapeClassicHistograms: false + + ## Enable scraping of native histograms. + ## This corresponds to the 'scrape_native_histograms' field in Prometheus configuration. + ## + scrapeNativeHistograms: false + + ## File to which scrape failures are logged. + ## Reloading the configuration will reopen the file. + ## Defaults to empty (disabled) + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.Prometheus + ## + scrapeFailureLogFile: "" + + ## Interval between consecutive scrapes. + ## Defaults to 30s. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/release-0.44/pkg/prometheus/promcfg.go#L180-L183 + ## + scrapeInterval: "" + + ## Number of seconds to wait for target to respond before erroring + ## + scrapeTimeout: "" + + ## List of scrape classes to expose to scraping objects such as + ## PodMonitors, ServiceMonitors, Probes and ScrapeConfigs. + ## + scrapeClasses: [] + # - name: istio-mtls + # default: false + # tlsConfig: + # caFile: /etc/prometheus/secrets/istio.default/root-cert.pem + # certFile: /etc/prometheus/secrets/istio.default/cert-chain.pem + + ## PodTargetLabels are appended to the `spec.podTargetLabels` field of all PodMonitor and ServiceMonitor objects. + ## + podTargetLabels: [] + # - customlabel + + ## Interval between consecutive evaluations. + ## + evaluationInterval: "" + + ## ListenLocal makes the Prometheus server listen on loopback, so that it does not bind against the Pod IP. + ## + listenLocal: false + + ## enableOTLPReceiver enables the OTLP receiver for Prometheus. + enableOTLPReceiver: false + + ## EnableAdminAPI enables Prometheus the administrative HTTP API which includes functionality such as deleting time series. + ## This is disabled by default. + ## ref: https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-admin-apis + ## + enableAdminAPI: false + + ## Sets version of Prometheus overriding the Prometheus version as derived + ## from the image tag. Useful in cases where the tag does not follow semver v2. + version: "" + + ## WebTLSConfig defines the TLS parameters for HTTPS + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#webtlsconfig + web: {} + + ## Exemplars related settings that are runtime reloadable. + ## It requires to enable the exemplar storage feature to be effective. + exemplars: {} + ## Maximum number of exemplars stored in memory for all series. + ## If not set, Prometheus uses its default value. + ## A value of zero or less than zero disables the storage. + # maxSize: 100000 + + # EnableFeatures API enables access to Prometheus disabled features. + # ref: https://prometheus.io/docs/prometheus/latest/feature_flags/ + enableFeatures: [] + # - exemplar-storage + + ## https://prometheus.io/docs/guides/opentelemetry + ## + otlp: {} + # promoteResourceAttributes: [] + # keepIdentifyingResourceAttributes: false + # translationStrategy: NoUTF8EscapingWithSuffixes + # convertHistogramsToNHCB: false + + ## + serviceName: + + ## Image of Prometheus. + ## + image: + registry: quay.io + repository: prometheus/prometheus + tag: v3.11.3-distroless + sha: "" + pullPolicy: IfNotPresent + + ## Tolerations for use with node taints + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal" + # value: "value" + # effect: "NoSchedule" + + ## If specified, the pod's topology spread constraints. + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + ## + topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: topology.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule + # labelSelector: + # matchLabels: + # app: prometheus + + ## Disable alerting + ## + disableAlerting: false + + ## Alertmanagers to which alerts will be sent + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#alertmanagerendpoints + ## + ## Default configuration will connect to the alertmanager deployed as part of this release + ## + alertingEndpoints: [] + # - name: "" + # namespace: "" + # port: http + # scheme: http + # pathPrefix: "" + # tlsConfig: {} + # bearerTokenFile: "" + # apiVersion: v2 + + ## External labels to add to any time series or alerts when communicating with external systems + ## + externalLabels: {} + + ## enable --web.enable-remote-write-receiver flag on prometheus-server + ## + enableRemoteWriteReceiver: false + + ## Name of the external label used to denote replica name + ## + replicaExternalLabelName: "" + + ## If true, the Operator won't add the external label used to denote replica name + ## + replicaExternalLabelNameClear: false + + ## Name of the external label used to denote Prometheus instance name + ## + prometheusExternalLabelName: "" + + ## If true, the Operator won't add the external label used to denote Prometheus instance name + ## + prometheusExternalLabelNameClear: false + + ## External URL at which Prometheus will be reachable. + ## + externalUrl: "" + + ## Define which Nodes the Pods are scheduled on. + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector + ## + nodeSelector: {} + + ## Secrets is a list of Secrets in the same namespace as the Prometheus object, which shall be mounted into the Prometheus Pods. + ## The Secrets are mounted into /etc/prometheus/secrets/. Secrets changes after initial creation of a Prometheus object are not + ## reflected in the running Pods. To change the secrets mounted into the Prometheus Pods, the object must be deleted and recreated + ## with the new list of secrets. + ## + secrets: [] + + ## ConfigMaps is a list of ConfigMaps in the same namespace as the Prometheus object, which shall be mounted into the Prometheus Pods. + ## The ConfigMaps are mounted into /etc/prometheus/configmaps/. + ## + configMaps: [] + + ## QuerySpec defines the query command line flags when starting Prometheus. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#queryspec + ## + query: {} + + ## If nil, select own namespace. Namespaces to be selected for PrometheusRules discovery. + ruleNamespaceSelector: {} + ## Example which selects PrometheusRules in namespaces with label "prometheus" set to "somelabel" + # ruleNamespaceSelector: + # matchLabels: + # prometheus: somelabel + + ## If true, a nil or {} value for prometheus.prometheusSpec.ruleSelector will cause the + ## prometheus resource to be created with selectors based on values in the helm deployment, + ## which will also match the PrometheusRule resources created + ## + ruleSelectorNilUsesHelmValues: true + + ## PrometheusRules to be selected for target discovery. + ## If {}, select all PrometheusRules + ## + ruleSelector: {} + ## Example which select all PrometheusRules resources + ## with label "prometheus" with values any of "example-rules" or "example-rules-2" + # ruleSelector: + # matchExpressions: + # - key: prometheus + # operator: In + # values: + # - example-rules + # - example-rules-2 + # + ## Example which select all PrometheusRules resources with label "role" set to "example-rules" + # ruleSelector: + # matchLabels: + # role: example-rules + + ## If true, a nil or {} value for prometheus.prometheusSpec.serviceMonitorSelector will cause the + ## prometheus resource to be created with selectors based on values in the helm deployment, + ## which will also match the servicemonitors created + ## + serviceMonitorSelectorNilUsesHelmValues: true + + ## ServiceMonitors to be selected for target discovery. + ## If {}, select all ServiceMonitors + ## + serviceMonitorSelector: {} + ## Example which selects ServiceMonitors with label "prometheus" set to "somelabel" + # serviceMonitorSelector: + # matchLabels: + # prometheus: somelabel + + ## Namespaces to be selected for ServiceMonitor discovery. + ## + serviceMonitorNamespaceSelector: {} + ## Example which selects ServiceMonitors in namespaces with label "prometheus" set to "somelabel" + # serviceMonitorNamespaceSelector: + # matchLabels: + # prometheus: somelabel + + ## If true, a nil or {} value for prometheus.prometheusSpec.podMonitorSelector will cause the + ## prometheus resource to be created with selectors based on values in the helm deployment, + ## which will also match the podmonitors created + ## + podMonitorSelectorNilUsesHelmValues: true + + ## PodMonitors to be selected for target discovery. + ## If {}, select all PodMonitors + ## + podMonitorSelector: {} + ## Example which selects PodMonitors with label "prometheus" set to "somelabel" + # podMonitorSelector: + # matchLabels: + # prometheus: somelabel + + ## If nil, select own namespace. Namespaces to be selected for PodMonitor discovery. + podMonitorNamespaceSelector: {} + ## Example which selects PodMonitor in namespaces with label "prometheus" set to "somelabel" + # podMonitorNamespaceSelector: + # matchLabels: + # prometheus: somelabel + + ## If true, a nil or {} value for prometheus.prometheusSpec.probeSelector will cause the + ## prometheus resource to be created with selectors based on values in the helm deployment, + ## which will also match the probes created + ## + probeSelectorNilUsesHelmValues: true + + ## Probes to be selected for target discovery. + ## If {}, select all Probes + ## + probeSelector: {} + ## Example which selects Probes with label "prometheus" set to "somelabel" + # probeSelector: + # matchLabels: + # prometheus: somelabel + + ## If nil, select own namespace. Namespaces to be selected for Probe discovery. + probeNamespaceSelector: {} + ## Example which selects Probe in namespaces with label "prometheus" set to "somelabel" + # probeNamespaceSelector: + # matchLabels: + # prometheus: somelabel + + ## If true, a nil or {} value for prometheus.prometheusSpec.scrapeConfigSelector will cause the + ## prometheus resource to be created with selectors based on values in the helm deployment, + ## which will also match the scrapeConfigs created + ## + ## If null and scrapeConfigSelector is also null, exclude field from the prometheusSpec + ## (keeping downward compatibility with older versions of CRD) + ## + scrapeConfigSelectorNilUsesHelmValues: true + + ## scrapeConfigs to be selected for target discovery. + ## If {}, select all scrapeConfigs + ## + scrapeConfigSelector: {} + ## Example which selects scrapeConfigs with label "prometheus" set to "somelabel" + # scrapeConfigSelector: + # matchLabels: + # prometheus: somelabel + + ## If nil, select own namespace. Namespaces to be selected for scrapeConfig discovery. + ## If null, exclude the field from the prometheusSpec (keeping downward compatibility with older versions of CRD) + scrapeConfigNamespaceSelector: {} + ## Example which selects scrapeConfig in namespaces with label "prometheus" set to "somelabel" + # scrapeConfigNamespaceSelector: + # matchLabels: + # prometheus: somelabel + + ## How long to retain metrics + ## + retention: 10d + + ## Maximum size of metrics + ## Unit format should be in the form of "50GiB" + retentionSize: "" + + ## Allow out-of-order/out-of-bounds samples ingested into Prometheus for a specified duration + ## See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tsdb + tsdb: + outOfOrderTimeWindow: 0s + + ## Enable compression of the write-ahead log using Snappy. + ## + walCompression: true + + ## If true, the Operator won't process any Prometheus configuration changes + ## + paused: false + + ## Number of replicas of each shard to deploy for a Prometheus deployment. + ## Number of replicas multiplied by shards is the total number of Pods created. + ## + replicas: 1 + + ## EXPERIMENTAL: Number of shards to distribute targets onto. + ## Number of replicas multiplied by shards is the total number of Pods created. + ## Note that scaling down shards will not reshard data onto remaining instances, it must be manually moved. + ## Increasing shards will not reshard data either but it will continue to be available from the same instances. + ## To query globally use Thanos sidecar and Thanos querier or remote write data to a central location. + ## Sharding is done on the content of the `__address__` target meta-label. + ## + shards: 1 + + ## Log level for Prometheus be configured in + ## + logLevel: info + + ## Log format for Prometheus be configured in + ## + logFormat: logfmt + + ## Prefix used to register routes, overriding externalUrl route. + ## Useful for proxies that rewrite URLs. + ## + routePrefix: / + + ## Standard object's metadata. More info: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#metadata + ## Metadata Labels and Annotations gets propagated to the prometheus pods. + ## + podMetadata: {} + # labels: + # app: prometheus + # k8s-app: prometheus + + ## Pod anti-affinity can prevent the scheduler from placing Prometheus replicas on the same node. + ## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided. + ## The value "hard" means that the scheduler is *required* to not schedule two replica pods onto the same node. + ## The value "" will disable pod anti-affinity so that no anti-affinity rules will be configured. + podAntiAffinity: "soft" + + ## If anti-affinity is enabled sets the topologyKey to use for anti-affinity. + ## This can be changed to, for example, failure-domain.beta.kubernetes.io/zone + ## + podAntiAffinityTopologyKey: kubernetes.io/hostname + + ## Assign custom affinity rules to the prometheus instance + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/ + ## + affinity: {} + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: kubernetes.io/e2e-az-name + # operator: In + # values: + # - e2e-az1 + # - e2e-az2 + + ## The remote_read spec configuration for Prometheus. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#remotereadspec + remoteRead: [] + # - url: http://remote1/read + ## additionalRemoteRead is appended to remoteRead + additionalRemoteRead: [] + + ## The remote_write spec configuration for Prometheus. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#remotewritespec + remoteWrite: [] + # - url: http://remote1/push + ## additionalRemoteWrite is appended to remoteWrite + additionalRemoteWrite: [] + + ## Enable/Disable Grafana dashboards provisioning for prometheus remote write feature + remoteWriteDashboards: false + + ## Resource limits & requests + ## + resources: {} + # requests: + # memory: 400Mi + + ## Prometheus StorageSpec for persistent data + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/platform/storage.md + ## + storageSpec: {} + ## Using PersistentVolumeClaim + ## + # volumeClaimTemplate: + # spec: + # storageClassName: gluster + # accessModes: ["ReadWriteOnce"] + # resources: + # requests: + # storage: 50Gi + # selector: {} + + ## Using tmpfs volume + ## + # emptyDir: + # medium: Memory + + # Additional volumes on the output StatefulSet definition. + volumes: [] + + # Additional VolumeMounts on the output StatefulSet definition. + volumeMounts: [] + + ## AdditionalScrapeConfigs allows specifying additional Prometheus scrape configurations. Scrape configurations + ## are appended to the configurations generated by the Prometheus Operator. Job configurations must have the form + ## as specified in the official Prometheus documentation: + ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config. As scrape configs are + ## appended, the user is responsible to make sure it is valid. Note that using this feature may expose the possibility + ## to break upgrades of Prometheus. It is advised to review Prometheus release notes to ensure that no incompatible + ## scrape configs are going to break Prometheus after the upgrade. + ## AdditionalScrapeConfigs can be defined as a list or as a templated string. + ## + ## The scrape configuration example below will find master nodes, provided they have the name .*mst.*, relabel the + ## port to 2379 and allow etcd scraping provided it is running on all Kubernetes master nodes + ## + additionalScrapeConfigs: [] + # - job_name: kube-etcd + # kubernetes_sd_configs: + # - role: node + # scheme: https + # tls_config: + # ca_file: /etc/prometheus/secrets/etcd-client-cert/etcd-ca + # cert_file: /etc/prometheus/secrets/etcd-client-cert/etcd-client + # key_file: /etc/prometheus/secrets/etcd-client-cert/etcd-client-key + # relabel_configs: + # - action: labelmap + # regex: __meta_kubernetes_node_label_(.+) + # - source_labels: [__address__] + # action: replace + # target_label: __address__ + # regex: ([^:;]+):(\d+) + # replacement: ${1}:2379 + # - source_labels: [__meta_kubernetes_node_name] + # action: keep + # regex: .*mst.* + # - source_labels: [__meta_kubernetes_node_name] + # action: replace + # target_label: node + # regex: (.*) + # replacement: ${1} + # metric_relabel_configs: + # - regex: (kubernetes_io_hostname|failure_domain_beta_kubernetes_io_region|beta_kubernetes_io_os|beta_kubernetes_io_arch|beta_kubernetes_io_instance_type|failure_domain_beta_kubernetes_io_zone) + # action: labeldrop + # + ## If scrape config contains a repetitive section, you may want to use a template. + ## In the following example, you can see how to define `gce_sd_configs` for multiple zones + # additionalScrapeConfigs: | + # - job_name: "node-exporter" + # gce_sd_configs: + # {{range $zone := .Values.gcp_zones}} + # - project: "project1" + # zone: "{{$zone}}" + # port: 9100 + # {{end}} + # relabel_configs: + # ... + + + ## If additional scrape configurations are already deployed in a single secret file you can use this section. + ## Expected values are the secret name and key + ## Cannot be used with additionalScrapeConfigs + additionalScrapeConfigsSecret: {} + # enabled: false + # name: + # key: + + ## additionalPrometheusSecretsAnnotations allows to add annotations to the kubernetes secret. This can be useful + ## when deploying via spinnaker to disable versioning on the secret, strategy.spinnaker.io/versioned: 'false' + additionalPrometheusSecretsAnnotations: {} + + ## AdditionalAlertManagerConfigs allows for manual configuration of alertmanager jobs in the form as specified + ## in the official Prometheus documentation https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alertmanager_config. + ## AlertManager configurations specified are appended to the configurations generated by the Prometheus Operator. + ## As AlertManager configs are appended, the user is responsible to make sure it is valid. Note that using this + ## feature may expose the possibility to break upgrades of Prometheus. It is advised to review Prometheus release + ## notes to ensure that no incompatible AlertManager configs are going to break Prometheus after the upgrade. + ## + additionalAlertManagerConfigs: [] + # - consul_sd_configs: + # - server: consul.dev.test:8500 + # scheme: http + # datacenter: dev + # tag_separator: ',' + # services: + # - metrics-prometheus-alertmanager + + ## If additional alertmanager configurations are already deployed in a single secret, or you want to manage + ## them separately from the helm deployment, you can use this section. + ## Expected values are the secret name and key + ## Cannot be used with additionalAlertManagerConfigs + additionalAlertManagerConfigsSecret: {} + # name: + # key: + # optional: false + + ## AdditionalAlertRelabelConfigs allows specifying Prometheus alert relabel configurations. Alert relabel configurations specified are appended + ## to the configurations generated by the Prometheus Operator. Alert relabel configurations specified must have the form as specified in the + ## official Prometheus documentation: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alert_relabel_configs. + ## As alert relabel configs are appended, the user is responsible to make sure it is valid. Note that using this feature may expose the + ## possibility to break upgrades of Prometheus. It is advised to review Prometheus release notes to ensure that no incompatible alert relabel + ## configs are going to break Prometheus after the upgrade. + ## + additionalAlertRelabelConfigs: [] + # - separator: ; + # regex: prometheus_replica + # replacement: $1 + # action: labeldrop + + ## If additional alert relabel configurations are already deployed in a single secret, or you want to manage + ## them separately from the helm deployment, you can use this section. + ## Expected values are the secret name and key + ## Cannot be used with additionalAlertRelabelConfigs + additionalAlertRelabelConfigsSecret: {} + # name: + # key: + + ## SecurityContext holds pod-level security attributes and common container settings. + ## This defaults to non root user with uid 1000 and gid 2000. + ## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md + ## + securityContext: + runAsGroup: 2000 + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 2000 + seccompProfile: + type: RuntimeDefault + + ## DNS configuration for Prometheus. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#monitoring.coreos.com/v1.PodDNSConfig + dnsConfig: {} + + ## DNS policy for Prometheus. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#dnspolicystring-alias + dnsPolicy: "" + + ## Priority class assigned to the Pods + ## + priorityClassName: "" + + ## Thanos configuration allows configuring various aspects of a Prometheus server in a Thanos environment. + ## This section is experimental, it may change significantly without deprecation notice in any release. + ## This is experimental and may change significantly without backward compatibility in any release. + ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#thanosspec + ## + thanos: {} + # image: quay.io/thanos/thanos + # secretProviderClass: + # provider: gcp + # parameters: + # secrets: | + # - resourceName: "projects/$PROJECT_ID/secrets/testsecret/versions/latest" + # fileName: "objstore.yaml" + ## ObjectStorageConfig configures object storage in Thanos. + # objectStorageConfig: + # # use existing secret, if configured, objectStorageConfig.secret will not be used + # existingSecret: {} + # # name: "" + # # key: "" + # # will render objectStorageConfig secret data and configure it to be used by Thanos custom resource, + # # ignored when prometheusspec.thanos.objectStorageConfig.existingSecret is set + # # https://thanos.io/tip/thanos/storage.md/#s3 + # secret: {} + # # type: S3 + # # config: + # # bucket: "" + # # endpoint: "" + # # region: "" + # # access_key: "" + # # secret_key: "" + + ## Containers allows injecting additional containers. This is meant to allow adding an authentication proxy to a Prometheus pod. + ## if using proxy extraContainer update targetPort with proxy container port + containers: [] + # containers: + # - name: oauth-proxy + # image: quay.io/oauth2-proxy/oauth2-proxy:v7.15.2 + # args: + # - --upstream=http://127.0.0.1:9090 + # - --http-address=0.0.0.0:8081 + # - --metrics-address=0.0.0.0:8082 + # - ... + # ports: + # - containerPort: 8081 + # name: oauth-proxy + # protocol: TCP + # - containerPort: 8082 + # name: oauth-metrics + # protocol: TCP + # resources: {} + + ## InitContainers allows injecting additional initContainers. This is meant to allow doing some changes + ## (permissions, dir tree) on mounted volumes before starting prometheus + initContainers: [] + + ## PortName to use for Prometheus. + ## + portName: "http-web" + + ## ArbitraryFSAccessThroughSMs configures whether configuration based on a service monitor can access arbitrary files + ## on the file system of the Prometheus container e.g. bearer token files. + arbitraryFSAccessThroughSMs: false + + ## OverrideHonorLabels if set to true overrides all user configured honor_labels. If HonorLabels is set in ServiceMonitor + ## or PodMonitor to true, this overrides honor_labels to false. + overrideHonorLabels: false + + ## OverrideHonorTimestamps allows to globally enforce honoring timestamps in all scrape configs. + overrideHonorTimestamps: false + + ## When ignoreNamespaceSelectors is set to true, namespaceSelector from all PodMonitor, ServiceMonitor and Probe objects will be ignored, + ## they will only discover targets within the namespace of the PodMonitor, ServiceMonitor and Probe object, + ## and servicemonitors will be installed in the default service namespace. + ## Defaults to false. + ignoreNamespaceSelectors: false + + ## EnforcedNamespaceLabel enforces adding a namespace label of origin for each alert and metric that is user created. + ## The label value will always be the namespace of the object that is being created. + ## Disabled by default + enforcedNamespaceLabel: "" + + ## PrometheusRulesExcludedFromEnforce - list of prometheus rules to be excluded from enforcing of adding namespace labels. + ## Works only if enforcedNamespaceLabel set to true. Make sure both ruleNamespace and ruleName are set for each pair + ## Deprecated, use `excludedFromEnforcement` instead + prometheusRulesExcludedFromEnforce: [] + + ## ExcludedFromEnforcement - list of object references to PodMonitor, ServiceMonitor, Probe and PrometheusRule objects + ## to be excluded from enforcing a namespace label of origin. + ## Works only if enforcedNamespaceLabel set to true. + ## See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#objectreference + excludedFromEnforcement: [] + + ## QueryLogFile specifies the file to which PromQL queries are logged. Note that this location must be writable, + ## and can be persisted using an attached volume. Alternatively, the location can be set to a stdout location such + ## as /dev/stdout to log querie information to the default Prometheus log stream. This is only available in versions + ## of Prometheus >= 2.16.0. For more details, see the Prometheus docs (https://prometheus.io/docs/guides/query-log/) + queryLogFile: false + + # Use to set global sample_limit for Prometheus. This act as default SampleLimit for ServiceMonitor or/and PodMonitor. + # Set to 'false' to disable global sample_limit. or set to a number to override the default value. + sampleLimit: false + + # EnforcedKeepDroppedTargetsLimit defines on the number of targets dropped by relabeling that will be kept in memory. + # The value overrides any spec.keepDroppedTargets set by ServiceMonitor, PodMonitor, Probe objects unless spec.keepDroppedTargets + # is greater than zero and less than spec.enforcedKeepDroppedTargets. 0 means no limit. + enforcedKeepDroppedTargets: 0 + + ## EnforcedSampleLimit defines global limit on number of scraped samples that will be accepted. This overrides any SampleLimit + ## set per ServiceMonitor or/and PodMonitor. It is meant to be used by admins to enforce the SampleLimit to keep overall + ## number of samples/series under the desired limit. Note that if SampleLimit is lower that value will be taken instead. + enforcedSampleLimit: false + + ## EnforcedTargetLimit defines a global limit on the number of scraped targets. This overrides any TargetLimit set + ## per ServiceMonitor or/and PodMonitor. It is meant to be used by admins to enforce the TargetLimit to keep the overall + ## number of targets under the desired limit. Note that if TargetLimit is lower, that value will be taken instead, except + ## if either value is zero, in which case the non-zero value will be used. If both values are zero, no limit is enforced. + enforcedTargetLimit: false + + + ## Per-scrape limit on number of labels that will be accepted for a sample. If more than this number of labels are present + ## post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. Only valid in Prometheus versions + ## 2.27.0 and newer. + enforcedLabelLimit: false + + ## Per-scrape limit on length of labels name that will be accepted for a sample. If a label name is longer than this number + ## post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. Only valid in Prometheus versions + ## 2.27.0 and newer. + enforcedLabelNameLengthLimit: false + + ## Per-scrape limit on length of labels value that will be accepted for a sample. If a label value is longer than this + ## number post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. Only valid in Prometheus + ## versions 2.27.0 and newer. + enforcedLabelValueLengthLimit: false + + ## AllowOverlappingBlocks enables vertical compaction and vertical query merge in Prometheus. This is still experimental + ## in Prometheus so it may change in any upcoming release. + allowOverlappingBlocks: false + + ## Specifies the validation scheme for metric and label names. + ## Supported values are: Legacy, UTF8 + nameValidationScheme: "" + + ## Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to + ## be considered available. Defaults to 0 (pod will be considered available as soon as it is ready). + minReadySeconds: 0 + + ## Duration in seconds the pod needs to terminate gracefully. + ## ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-termination + terminationGracePeriodSeconds: ~ + + # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), + # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working + # Use the host's network namespace if true. Make sure to understand the security implications if you want to enable it. + # When hostNetwork is enabled, this will set dnsPolicy to ClusterFirstWithHostNet automatically. + hostNetwork: false + + ## Use the host's user namespace for Prometheus pods. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/share-process-namespace/ + hostUsers: ~ + + # HostAlias holds the mapping between IP and hostnames that will be injected + # as an entry in the pod's hosts file. + hostAliases: [] + # - ip: 10.10.0.100 + # hostnames: + # - a1.app.local + # - b1.app.local + + ## TracingConfig configures tracing in Prometheus. + ## See https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#prometheustracingconfig + tracingConfig: {} + + ## Defines the service discovery role used to discover targets from ServiceMonitor objects and Alertmanager endpoints. + ## If set, the value should be either "Endpoints" or "EndpointSlice". If unset, the operator assumes the "Endpoints" role. + serviceDiscoveryRole: "" + + ## Pod management policy. Kubernetes default is OrderedReady but prometheus-operator default is Parallel. + ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#pod-management-policies + podManagementPolicy: "" + + ## Update strategy for the StatefulSet. + ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#update-strategies + updateStrategy: {} + # type: RollingUpdate + # rollingUpdate: + # maxUnavailable: 1 + + ## Additional configuration which is not covered by the properties above. (passed through tpl) + additionalConfig: {} + + ## Additional configuration which is not covered by the properties above. + ## Useful, if you need advanced templating inside alertmanagerSpec. + ## Otherwise, use prometheus.prometheusSpec.additionalConfig (passed through tpl) + additionalConfigString: "" + + ## Defines the maximum time that the `prometheus` container's startup probe + ## will wait before being considered failed. The startup probe will return + ## success after the WAL replay is complete. If set, the value should be + ## greater than 60 (seconds). Otherwise it will be equal to 900 seconds (15 + ## minutes). + maximumStartupDurationSeconds: 0 + + ## Set default scrapeProtocols for Prometheus instances + ## https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api-reference/api.md#scrapeprotocolstring-alias + scrapeProtocols: [] + + additionalRulesForClusterRole: [] + # - apiGroups: [ "" ] + # resources: + # - nodes/proxy + # verbs: [ "get", "list", "watch" ] + + additionalServiceMonitors: [] + ## Name of the ServiceMonitor to create + ## + # - name: "" + + ## Additional labels to set used for the ServiceMonitorSelector. Together with standard labels from + ## the chart + ## + # additionalLabels: {} + + ## Service label for use in assembling a job name of the form