fix(rules/bootstrap): Format yaml

2025-12-29 13:23:04 +01:00
parent f81a4b2732
commit 957526a6bc
22 changed files with 2142 additions and 2044 deletions
@@ -1,6 +1,7 @@
 #!/usr/bin/env bash
 # the shebang is ignored, but nice for editors
 watch_file nix/sources.json
 watch_file nix/checks.nix
 # Load .env file if it exists
 dotenv_if_exists
@@ -8,7 +8,7 @@ stages:
 release:
  stage: release
  rules:
-  - if: '$CI_COMMIT_BRANCH =~ /^main/'
+    - if: "$CI_COMMIT_BRANCH =~ /^main/"
      when: always
    - when: never
  script:
@@ -43,4 +43,3 @@ rebuild:
               "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
        fi
      done
@@ -4,15 +4,15 @@ metadata:
  name: argocd-cluster-admin
 rules:
  - apiGroups:
-  - '*'
+      - "*"
    resources:
-  - '*'
+      - "*"
    verbs:
-  - '*'
+      - "*"
  - nonResourceURLs:
-  - '*'
+      - "*"
    verbs:
-  - '*'
+      - "*"
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
@@ -6,5 +6,3 @@ metadata:
  name: cluster-admin-token
  namespace: kube-system
 type: kubernetes.io/service-account-token
@@ -10,5 +10,3 @@ metadata:
  name: cluster-ekman
  namespace: argocd
 type: Opaque
@@ -66,7 +66,7 @@ spec:
        itemType: string
        collectionType: string
        string: ""
-      # All the fields above besides "string" apply to both the array and map type parameter announcements.
+        # All the fields above besides 'string' apply to both the array and map type parameter announcements.
        # - name: array-param
        #   # This field communicates the parameter's default value to the UI. Setting this field is optional.
        #   array: [default, items]
@@ -84,4 +84,3 @@ spec:
    # If set to `true` then the plugin receives repository files with original file mode. Dangerous since the repository
    # might have executable files. Set to true only if you trust the CMP plugin authors.
    preserveFileMode: false
@@ -422,4 +422,3 @@ spec:
                path: ca.crt
            optional: true
            secretName: argocd-repo-server-tls
@@ -13,4 +13,3 @@ stringData:
  name: staging-vcluster
  server: https://staging-vcluster.staging-vcluster
 type: Opaque
@@ -32,12 +32,12 @@ projects:
    additionalAnnotations: {}
    description: sys components project
    sourceRepos:
-    - '*'
+      - "*"
    destinations:
-    - namespace: '*'
+      - namespace: "*"
        server: https://kubernetes.default.svc
    clusterResourceWhitelist:
-    - group: '*'
+      - group: "*"
-      kind: '*'
+        kind: "*"
    orphanedResources:
      warn: false
@@ -5,6 +5,8 @@ let
  globalExcludes = [
    "nix/default.nix"
    "attic"
    "vcluster"
    ".*vendor"
    ".*chart/.*"
    ".*schema.json"
@@ -32,6 +34,7 @@ pre-commit.run {
      enable = true;
      excludes = [
        "vcluster/"
        "attic/"
      ];
      args = [
        "-x"
@@ -41,15 +44,17 @@ pre-commit.run {
    };
    yamllint = {
-      enable = false;
+      enable = true;
      excludes = [
        "attic/"
        "charts/templates/"
-        "charts/charts/"
+        "charts/"
        "values/"
        "vcluster/"
      ];
      settings = {
        strict = true;
-        configData = ''{ extends: default, rules: { document-start: disable, line-length: {max: 165} } }'';
+        configData = ''{ extends: default, rules: { document-start: disable, line-length: {max: 300} } }'';
      };
    };
@@ -3,7 +3,8 @@ groups:
    rules:
      - alert: etcdMembersDown
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value
+          description:
            'etcd cluster "{{ $labels.job }}": members are down ({{ $value
            }}).'
          summary: etcd cluster members are down.
        expr: |-
@@ -20,17 +21,20 @@ groups:
          severity: critical
      - alert: etcdInsufficientMembers
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
+          description:
            'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
            }}).'
          summary: etcd cluster has insufficient number of members.
-    expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"})
+        expr:
          sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"})
          without (instance) + 1) / 2)
        for: 3m
        labels:
          severity: critical
      - alert: etcdNoLeader
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }}
+          description:
            'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }}
            has no leader.'
          summary: etcd cluster has no leader.
        expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
@@ -39,12 +43,14 @@ groups:
          severity: critical
      - alert: etcdHighNumberOfLeaderChanges
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes
+          description:
            'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes
            within the last 15 minutes. Frequent elections may be a sign of insufficient
            resources, high network latency, or disruptions by other components and should
            be investigated.'
          summary: etcd cluster has high number of leader changes.
-    expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"})
+        expr:
          increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"})
          or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m])
          >= 4
        for: 5m
@@ -52,7 +58,8 @@ groups:
          severity: warning
      - alert: etcdHighNumberOfFailedGRPCRequests
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
+          description:
            'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
            {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
          summary: etcd cluster has high number of failed grpc requests.
        expr: |-
@@ -65,7 +72,8 @@ groups:
          severity: warning
      - alert: etcdHighNumberOfFailedGRPCRequests
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
+          description:
            'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
            {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
          summary: etcd cluster has high number of failed grpc requests.
        expr: |-
@@ -78,7 +86,8 @@ groups:
          severity: critical
      - alert: etcdGRPCRequestsSlow
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests
+          description:
            'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests
            is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method
            }} method.'
          summary: etcd grpc requests are slow
@@ -90,7 +99,8 @@ groups:
          severity: critical
      - alert: etcdMemberCommunicationSlow
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": member communication with {{
+          description:
            'etcd cluster "{{ $labels.job }}": member communication with {{
            $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance
            }}.'
          summary: etcd cluster member communication is slow.
@@ -102,7 +112,8 @@ groups:
          severity: warning
      - alert: etcdHighNumberOfFailedProposals
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures
+          description:
            'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures
            within the last 30 minutes on etcd instance {{ $labels.instance }}.'
          summary: etcd cluster has high number of proposal failures.
        expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
@@ -111,7 +122,8 @@ groups:
          severity: warning
      - alert: etcdHighFsyncDurations
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
+          description:
            'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
            are {{ $value }}s on etcd instance {{ $labels.instance }}.'
          summary: etcd cluster 99th percentile fsync durations are too high.
        expr: |-
@@ -122,7 +134,8 @@ groups:
          severity: warning
      - alert: etcdHighFsyncDurations
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
+          description:
            'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
            are {{ $value }}s on etcd instance {{ $labels.instance }}.'
          summary: etcd cluster 99th percentile fsync durations are too high.
        expr: |-
@@ -133,7 +146,8 @@ groups:
          severity: critical
      - alert: etcdHighCommitDurations
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
+          description:
            'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
            {{ $value }}s on etcd instance {{ $labels.instance }}.'
          summary: etcd cluster 99th percentile commit durations are too high.
        expr: |-
@@ -144,11 +158,13 @@ groups:
          severity: warning
      - alert: etcdDatabaseQuotaLowSpace
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": database size exceeds the defined
+          description:
            'etcd cluster "{{ $labels.job }}": database size exceeds the defined
            quota on etcd instance {{ $labels.instance }}, please defrag or increase the
            quota as the writes to etcd will be disabled when it is full.'
          summary: etcd cluster database is running full.
-    expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) /
+        expr:
          (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) /
          last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 >
          95
        for: 10m
@@ -156,26 +172,31 @@ groups:
          severity: critical
      - alert: etcdExcessiveDatabaseGrowth
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": Predicting running out of disk
+          description:
            'etcd cluster "{{ $labels.job }}": Predicting running out of disk
            space in the next four hours, based on write observations within the past
            four hours on etcd instance {{ $labels.instance }}, please check as it might
            be disruptive.'
          summary: etcd cluster database growing very fast.
-    expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60)
+        expr:
          predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60)
          > etcd_server_quota_backend_bytes{job=~".*etcd.*"}
        for: 10m
        labels:
          severity: warning
      - alert: etcdDatabaseHighFragmentationRatio
        annotations:
-      description: 'etcd cluster "{{ $labels.job }}": database size in use on instance
+          description:
            'etcd cluster "{{ $labels.job }}": database size in use on instance
            {{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual
            allocated disk space, please run defragmentation (e.g. etcdctl defrag) to
            retrieve the unused fragmented disk space.'
          runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation
-      summary: etcd database size in use is less than 50% of the actual allocated
+          summary:
            etcd database size in use is less than 50% of the actual allocated
            storage.
-    expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m])
+        expr:
          (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m])
          / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5
          and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600
        for: 10m
@@ -3,11 +3,13 @@ groups:
    rules:
      - alert: TargetDown
        annotations:
-      description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
+          description:
            '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
            }} targets in {{ $labels.namespace }} namespace are down.'
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
          summary: One or more targets are unreachable.
-    expr: 100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
+        expr:
          100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
          BY (cluster, job, namespace, service)) > 10
        for: 10m
        labels:
@@ -21,7 +23,8 @@ groups:
            mechanisms that send a notification when this alert is not firing. For example the
            "DeadMansSnitch" integration in PagerDuty.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
-      summary: An alert that should always be firing to certify that Alertmanager
+          summary:
            An alert that should always be firing to certify that Alertmanager
            is working properly.
        expr: vector(1)
        labels:
@@ -37,7 +40,8 @@ groups:
            This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
          summary: Info-level alert inhibition.
-    expr: ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname !=
+        expr:
          ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname !=
          "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
        labels:
          severity: none
@@ -3,18 +3,21 @@ groups:
    rules:
      - alert: KubePodCrashLooping
        annotations:
-      description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
+          description:
            'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
            }}) is in waiting state (reason: "CrashLoopBackOff").'
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
          summary: Pod is crash looping.
-    expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",
+        expr:
          max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",
          job="kube-state-metrics", namespace=~".*"}[5m]) >= 1
        for: 15m
        labels:
          severity: warning
      - alert: KubePodNotReady
        annotations:
-      description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
+          description:
            Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
            state for longer than 15 minutes.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
          summary: Pod has been in a non-ready state for more than 15 minutes.
@@ -31,7 +34,8 @@ groups:
          severity: warning
      - alert: KubeDeploymentGenerationMismatch
        annotations:
-      description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
+          description:
            Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
            }} does not match, this indicates that the Deployment has failed but has not
            been rolled back.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
@@ -45,7 +49,8 @@ groups:
          severity: warning
      - alert: KubeDeploymentReplicasMismatch
        annotations:
-      description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
+          description:
            Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
            not matched the expected number of replicas for longer than 15 minutes.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
          summary: Deployment has not matched the expected number of replicas.
@@ -64,7 +69,8 @@ groups:
          severity: warning
      - alert: KubeDeploymentRolloutStuck
        annotations:
-      description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment
+          description:
            Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment
            }} is not progressing for longer than 15 minutes.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck
          summary: Deployment rollout is not progressing.
@@ -76,7 +82,8 @@ groups:
          severity: warning
      - alert: KubeStatefulSetReplicasMismatch
        annotations:
-      description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
+          description:
            StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
            not matched the expected number of replicas for longer than 15 minutes.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
          summary: StatefulSet has not matched the expected number of replicas.
@@ -95,7 +102,8 @@ groups:
          severity: warning
      - alert: KubeStatefulSetGenerationMismatch
        annotations:
-      description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
+          description:
            StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
            }} does not match, this indicates that the StatefulSet has failed but has
            not been rolled back.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
@@ -109,7 +117,8 @@ groups:
          severity: warning
      - alert: KubeStatefulSetUpdateNotRolledOut
        annotations:
-      description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
+          description:
            StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
            has not been rolled out.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
          summary: StatefulSet update has not been rolled out.
@@ -136,7 +145,8 @@ groups:
          severity: warning
      - alert: KubeDaemonSetRolloutStuck
        annotations:
-      description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not
+          description:
            DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not
            finished or progressed for at least 15 minutes.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
          summary: DaemonSet rollout is stuck.
@@ -169,19 +179,22 @@ groups:
          severity: warning
      - alert: KubeContainerWaiting
        annotations:
-      description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
+          description:
            pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
            {{ $labels.container}} has been in waiting state for longer than 1 hour.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
          summary: Pod container waiting longer than 1 hour
-    expr: sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
+        expr:
          sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
          namespace=~".*"}) > 0
        for: 1h
        labels:
          severity: warning
      - alert: KubeDaemonSetNotScheduled
        annotations:
-      description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
+          description:
-        }} are not scheduled.'
+            "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
            }} are not scheduled."
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
          summary: DaemonSet pods are not scheduled.
        expr: |-
@@ -193,18 +206,21 @@ groups:
          severity: warning
      - alert: KubeDaemonSetMisScheduled
        annotations:
-      description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
+          description:
-        }} are running where they are not supposed to run.'
+            "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
            }} are running where they are not supposed to run."
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
          summary: DaemonSet pods are misscheduled.
-    expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
+        expr:
          kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
          > 0
        for: 15m
        labels:
          severity: warning
      - alert: KubeJobNotCompleted
        annotations:
-      description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
+          description:
            Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
            than {{ "43200" | humanizeDuration }} to complete.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
          summary: Job did not complete in time
@@ -216,7 +232,8 @@ groups:
          severity: warning
      - alert: KubeJobFailed
        annotations:
-      description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
+          description:
            Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
            Removing failed job after investigation should clear this alert.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
          summary: Job failed to complete.
@@ -226,7 +243,8 @@ groups:
          severity: warning
      - alert: KubeHpaReplicasMismatch
        annotations:
-      description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler  }}
+          description:
            HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler  }}
            has not matched the desired number of replicas for longer than 15 minutes.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
          summary: HPA has not matched desired number of replicas.
@@ -249,7 +267,8 @@ groups:
          severity: warning
      - alert: KubeHpaMaxedOut
        annotations:
-      description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler  }}
+          description:
            HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler  }}
            has been running at max replicas for longer than 15 minutes.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
          summary: HPA is running at max replicas
@@ -3,7 +3,8 @@ groups:
    rules:
      - alert: KubeCPUOvercommit
        annotations:
-      description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
+          description:
            Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
            for Pods by {{ $value }} CPU shares and cannot tolerate node failure.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit
          summary: Cluster has overcommitted CPU resource requests.
@@ -16,7 +17,8 @@ groups:
          severity: warning
      - alert: KubeMemoryOvercommit
        annotations:
-      description: Cluster {{ $labels.cluster }} has overcommitted memory resource
+          description:
            Cluster {{ $labels.cluster }} has overcommitted memory resource
            requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node
            failure.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
@@ -30,7 +32,8 @@ groups:
          severity: warning
      - alert: KubeCPUQuotaOvercommit
        annotations:
-      description: Cluster {{ $labels.cluster }}  has overcommitted CPU resource requests
+          description:
            Cluster {{ $labels.cluster }}  has overcommitted CPU resource requests
            for Namespaces.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit
          summary: Cluster has overcommitted CPU resource requests.
@@ -44,7 +47,8 @@ groups:
          severity: warning
      - alert: KubeMemoryQuotaOvercommit
        annotations:
-      description: Cluster {{ $labels.cluster }}  has overcommitted memory resource
+          description:
            Cluster {{ $labels.cluster }}  has overcommitted memory resource
            requests for Namespaces.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit
          summary: Cluster has overcommitted memory resource requests.
@@ -58,7 +62,8 @@ groups:
          severity: warning
      - alert: KubeQuotaAlmostFull
        annotations:
-      description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
+          description:
            Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
            }} of its {{ $labels.resource }} quota.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
          summary: Namespace quota is going to be full.
@@ -72,7 +77,8 @@ groups:
          severity: info
      - alert: KubeQuotaFullyUsed
        annotations:
-      description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
+          description:
            Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
            }} of its {{ $labels.resource }} quota.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
          summary: Namespace quota is fully used.
@@ -86,7 +92,8 @@ groups:
          severity: info
      - alert: KubeQuotaExceeded
        annotations:
-      description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
+          description:
            Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
            }} of its {{ $labels.resource }} quota.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
          summary: Namespace quota has exceeded the limits.
@@ -100,9 +107,10 @@ groups:
          severity: warning
      - alert: CPUThrottlingHigh
        annotations:
-      description: '{{ $value | humanizePercentage }} throttling of CPU in namespace
+          description:
            "{{ $value | humanizePercentage }} throttling of CPU in namespace
            {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod
-        }}.'
+            }}."
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
          summary: Processes experience elevated CPU throttling.
        expr: |-
@@ -1,10 +1,10 @@
 groups:
  - name: kubernetes-storage
    rules:
      - alert: KubePersistentVolumeFillingUp
        annotations:
-      description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
+          description:
            The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
            }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
            {{ . }} {{- end }} is only {{ $value | humanizePercentage }} free.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
@@ -26,7 +26,8 @@ groups:
          severity: critical
      - alert: KubePersistentVolumeFillingUp
        annotations:
-      description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
+          description:
            Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
            }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
            {{ . }} {{- end }} is expected to fill up within four days. Currently {{ $value
            | humanizePercentage }} is available.
@@ -51,7 +52,8 @@ groups:
          severity: warning
      - alert: KubePersistentVolumeInodesFillingUp
        annotations:
-      description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
+          description:
            The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
            }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
            {{ . }} {{- end }} only has {{ $value | humanizePercentage }} free inodes.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
@@ -73,7 +75,8 @@ groups:
          severity: critical
      - alert: KubePersistentVolumeInodesFillingUp
        annotations:
-      description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
+          description:
            Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
            }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
            {{ . }} {{- end }} is expected to run out of inodes within four days. Currently
            {{ $value | humanizePercentage }} of its inodes are free.
@@ -98,11 +101,13 @@ groups:
          severity: warning
      - alert: KubePersistentVolumeErrors
        annotations:
-      description: The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster
+          description:
            The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster
            -}} on Cluster {{ . }} {{- end }} has status {{ $labels.phase }}.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors
          summary: PersistentVolume is having issues with provisioning.
-    expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"}
+        expr:
          kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"}
          > 0
        for: 5m
        labels:
@@ -3,7 +3,8 @@ groups:
    rules:
      - alert: NodeFilesystemSpaceFillingUp
        annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+          description:
            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
            space left and is filling up.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
@@ -21,7 +22,8 @@ groups:
          severity: warning
      - alert: NodeFilesystemSpaceFillingUp
        annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+          description:
            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
            space left and is filling up fast.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
@@ -39,7 +41,8 @@ groups:
          severity: critical
      - alert: NodeFilesystemAlmostOutOfSpace
        annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+          description:
            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
            space left.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
@@ -55,7 +58,8 @@ groups:
          severity: warning
      - alert: NodeFilesystemAlmostOutOfSpace
        annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+          description:
            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
            space left.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
@@ -71,7 +75,8 @@ groups:
          severity: critical
      - alert: NodeFilesystemFilesFillingUp
        annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+          description:
            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
            inodes left and is filling up.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
@@ -89,7 +94,8 @@ groups:
          severity: warning
      - alert: NodeFilesystemFilesFillingUp
        annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+          description:
            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
            inodes left and is filling up fast.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
@@ -107,7 +113,8 @@ groups:
          severity: critical
      - alert: NodeFilesystemAlmostOutOfFiles
        annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+          description:
            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
            inodes left.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
@@ -123,7 +130,8 @@ groups:
          severity: warning
      - alert: NodeFilesystemAlmostOutOfFiles
        annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+          description:
            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
            inodes left.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
@@ -139,38 +147,44 @@ groups:
          severity: critical
      - alert: NodeNetworkReceiveErrs
        annotations:
-      description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
+          description:
            '{{ $labels.instance }} interface {{ $labels.device }} has encountered
            {{ printf "%.0f" $value }} receive errors in the last two minutes.'
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
          summary: Network interface is reporting many receive errors.
-    expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m])
+        expr:
          rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m])
          > 0.01
        for: 1h
        labels:
          severity: warning
      - alert: NodeNetworkTransmitErrs
        annotations:
-      description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
+          description:
            '{{ $labels.instance }} interface {{ $labels.device }} has encountered
            {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
          summary: Network interface is reporting many transmit errors.
-    expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m])
+        expr:
          rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m])
          > 0.01
        for: 1h
        labels:
          severity: warning
      - alert: NodeHighNumberConntrackEntriesUsed
        annotations:
-      description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
+          description: "{{ $value | humanizePercentage }} of conntrack entries are used."
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
          summary: Number of conntrack are getting close to the limit.
-    expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit)
+        expr:
          (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit)
          > 0.75
        labels:
          severity: warning
      - alert: NodeTextFileCollectorScrapeError
        annotations:
-      description: Node Exporter text file collector on {{ $labels.instance }} failed
+          description:
            Node Exporter text file collector on {{ $labels.instance }} failed
            to scrape.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror
          summary: Node Exporter text file collector failed to scrape.
@@ -179,7 +193,8 @@ groups:
          severity: warning
      - alert: NodeClockSkewDetected
        annotations:
-      description: Clock at {{ $labels.instance }} is out of sync by more than 0.05s.
+          description:
            Clock at {{ $labels.instance }} is out of sync by more than 0.05s.
            Ensure NTP is configured correctly on this host.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
          summary: Clock skew detected.
@@ -200,7 +215,8 @@ groups:
          severity: warning
      - alert: NodeClockNotSynchronising
        annotations:
-      description: Clock at {{ $labels.instance }} is not synchronising. Ensure NTP
+          description:
            Clock at {{ $labels.instance }} is not synchronising. Ensure NTP
            is configured on this host.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
          summary: Clock not synchronising.
@@ -213,12 +229,14 @@ groups:
          severity: warning
      - alert: NodeRAIDDegraded
        annotations:
-      description: RAID array '{{ $labels.device }}' at {{ $labels.instance }} is
+          description:
            RAID array '{{ $labels.device }}' at {{ $labels.instance }} is
            in degraded state due to one or more disks failures. Number of spare drives
            is insufficient to fix issue automatically.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
          summary: RAID Array is degraded.
-    expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
+        expr:
          node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
          - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"})
          > 0
        for: 15m
@@ -226,17 +244,20 @@ groups:
          severity: critical
      - alert: NodeRAIDDiskFailure
        annotations:
-      description: At least one device in RAID array at {{ $labels.instance }} failed.
+          description:
            At least one device in RAID array at {{ $labels.instance }} failed.
            Array '{{ $labels.device }}' needs attention and possibly a disk swap.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
          summary: Failed device in RAID array.
-    expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
+        expr:
          node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
          > 0
        labels:
          severity: warning
      - alert: NodeFileDescriptorLimit
        annotations:
-      description: File descriptors limit at {{ $labels.instance }} is currently at
+          description:
            File descriptors limit at {{ $labels.instance }} is currently at
            {{ printf "%.2f" $value }}%.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
          summary: Kernel is predicted to exhaust file descriptors limit soon.
@@ -249,7 +270,8 @@ groups:
          severity: warning
      - alert: NodeFileDescriptorLimit
        annotations:
-      description: File descriptors limit at {{ $labels.instance }} is currently at
+          description:
            File descriptors limit at {{ $labels.instance }} is currently at
            {{ printf "%.2f" $value }}%.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
          summary: Kernel is predicted to exhaust file descriptors limit soon.
@@ -266,7 +288,8 @@ groups:
            CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage
          summary: High CPU usage.
-    expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter",
+        expr:
          sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter",
          mode!="idle"}[2m]))) * 100 > 90
        for: 15m
        labels:
@@ -301,7 +324,8 @@ groups:
            Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization
          summary: Host is running out of memory.
-    expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"}
+        expr:
          100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"}
          * 100) > 90
        for: 15m
        labels:
@@ -313,14 +337,16 @@ groups:
            This symptom might indicate disk saturation.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation
          summary: Disk IO queue is high.
-    expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
+        expr:
          rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
          > 10
        for: 30m
        labels:
          severity: warning
      - alert: NodeSystemdServiceFailed
        annotations:
-      description: Systemd service {{ $labels.name }} has entered failed state at
+          description:
            Systemd service {{ $labels.name }} has entered failed state at
            {{ $labels.instance }}
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed
          summary: Systemd service has entered failed state.
@@ -330,7 +356,8 @@ groups:
          severity: warning
      - alert: NodeBondingDegraded
        annotations:
-      description: Bonding interface {{ $labels.master }} on {{ $labels.instance }}
+          description:
            Bonding interface {{ $labels.master }} on {{ $labels.instance }}
            is in degraded state due to one or more slave failures.
          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded
          summary: Bonding interface is degraded
@@ -8,14 +8,16 @@ groups:
              VALUE = {{ $value }}
              LABELS = {{ $labels }}
          summary: Host high CPU load (instance {{ $labels.instance }})
-    expr: (sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m])))
+        expr:
          (sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m])))
          > 0.9) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
        for: 10m
        labels:
          severity: critical
      - alert: MemoryUtilizationHighWarning
        annotations:
-      dashboard: https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
+          dashboard:
            https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
            $labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
          description: Node {{ $labels.instance }} has less than 10% available memory.
          summary: Node Memory utilization warning
@@ -25,7 +27,8 @@ groups:
          severity: critical
      - alert: MemoryUtilizationHighCritical
        annotations:
-      dashboard: https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
+          dashboard:
            https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
            $labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
          description: Node {{ $labels.instance }} has less than 5% available memory.
          summary: Node Memory utilization critical
@@ -37,7 +40,8 @@ groups:
        annotations:
          description: Node {{ $labels.node }} has CPU utilization over 90%.
          summary: Node has been in not-ready state for longer than 3 minutes
-    expr: (sum(max_over_time(kube_node_status_condition{condition="Ready",status="true"}[3m])
+        expr:
          (sum(max_over_time(kube_node_status_condition{condition="Ready",status="true"}[3m])
          <= 0) by (node)) or (absent(kube_node_status_condition{condition="Ready",status="true"}))
          > 0
        for: 5m
@@ -50,7 +54,8 @@ groups:
              VALUE = {{ $value }}
              LABELS = {{ $labels }}
          summary: Kubernetes Node memory pressure (instance {{ $labels.instance }})
-    expr: kube_node_status_condition{condition="MemoryPressure",status="true"} ==
+        expr:
          kube_node_status_condition{condition="MemoryPressure",status="true"} ==
          1
        for: 2m
        labels:
@@ -62,7 +67,8 @@ groups:
              VALUE = {{ $value }}
              LABELS = {{ $labels }}
          summary: Kubernetes Container oom killer (instance {{ $labels.instance }})
-    expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total
+        expr:
          (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total
          offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m])
          == 1
        for: 0m
@@ -3,18 +3,22 @@ groups:
    rules:
      - alert: VeleroBackupPartialFailures
        annotations:
-      message: Velero backup  {{ $labels.schedule }}  has  {{$value | humanizePercentage}}  partialy
+          message:
            Velero backup  {{ $labels.schedule }}  has  {{$value | humanizePercentage}}  partialy
            failed backups.
-    expr: velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
+        expr:
          velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
          > 0.25
        for: 15m
        labels:
          severity: critical
      - alert: VeleroBackupFailures
        annotations:
-      message: Velero backup  {{$labels.schedule}}  has  {{$value | humanizePercentage}}  failed
+          message:
            Velero backup  {{$labels.schedule}}  has  {{$value | humanizePercentage}}  failed
            backups.
-    expr: velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
+        expr:
          velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
          > 0.25
        for: 15m
        labels:
@@ -3,7 +3,8 @@ groups:
    rules:
      - alert: X509ExporterReadErrors
        annotations:
-      description: Over the last 15 minutes, this x509-certificate-exporter instance
+          description:
            Over the last 15 minutes, this x509-certificate-exporter instance
            has experienced errors reading certificate files or querying the Kubernetes
            API. This could be caused by a misconfiguration if triggered when the exporter
            starts.
@@ -14,7 +15,8 @@ groups:
          severity: warning
      - alert: CertificateError
        annotations:
-      description: Certificate could not be decoded {{if $labels.secret_name }} in
+          description:
            Certificate could not be decoded {{if $labels.secret_name }} in
            Kubernetes secret "{{ $labels.secret_namespace }}/{{ $labels.secret_name }}"{{else}}at
            location "{{ $labels.filepath }}"{{end}}
          summary: Certificate cannot be decoded
@@ -24,22 +26,26 @@ groups:
          severity: warning
      - alert: CertificateRenewal
        annotations:
-      description: Certificate for "{{ $labels.subject_CN }}" should be renewed {{if
+          description:
            Certificate for "{{ $labels.subject_CN }}" should be renewed {{if
            $labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
            $labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
          summary: Certificate should be renewed
-    expr: ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
+        expr:
          ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
          issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 28
        for: 15m
        labels:
          severity: warning
      - alert: CertificateExpiration
        annotations:
-      description: Certificate for "{{ $labels.subject_CN }}" is about to expire {{if
+          description:
            Certificate for "{{ $labels.subject_CN }}" is about to expire {{if
            $labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
            $labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
          summary: Certificate is about to expire
-    expr: ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
+        expr:
          ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
          issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 14
        for: 15m
        labels: