fix(rules/bootstrap): Format yaml

This commit is contained in:
2025-12-29 13:23:04 +01:00
parent f81a4b2732
commit 957526a6bc
22 changed files with 2142 additions and 2044 deletions
+1
View File
@@ -1,6 +1,7 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# the shebang is ignored, but nice for editors # the shebang is ignored, but nice for editors
watch_file nix/sources.json watch_file nix/sources.json
watch_file nix/checks.nix
# Load .env file if it exists # Load .env file if it exists
dotenv_if_exists dotenv_if_exists
+1 -2
View File
@@ -8,7 +8,7 @@ stages:
release: release:
stage: release stage: release
rules: rules:
- if: '$CI_COMMIT_BRANCH =~ /^main/' - if: "$CI_COMMIT_BRANCH =~ /^main/"
when: always when: always
- when: never - when: never
script: script:
@@ -43,4 +43,3 @@ rebuild:
"${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts" "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
fi fi
done done
+5 -5
View File
@@ -4,15 +4,15 @@ metadata:
name: argocd-cluster-admin name: argocd-cluster-admin
rules: rules:
- apiGroups: - apiGroups:
- '*' - "*"
resources: resources:
- '*' - "*"
verbs: verbs:
- '*' - "*"
- nonResourceURLs: - nonResourceURLs:
- '*' - "*"
verbs: verbs:
- '*' - "*"
--- ---
apiVersion: rbac.authorization.k8s.io/v1 apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding kind: ClusterRoleBinding
-2
View File
@@ -6,5 +6,3 @@ metadata:
name: cluster-admin-token name: cluster-admin-token
namespace: kube-system namespace: kube-system
type: kubernetes.io/service-account-token type: kubernetes.io/service-account-token
-2
View File
@@ -10,5 +10,3 @@ metadata:
name: cluster-ekman name: cluster-ekman
namespace: argocd namespace: argocd
type: Opaque type: Opaque
+1 -2
View File
@@ -66,7 +66,7 @@ spec:
itemType: string itemType: string
collectionType: string collectionType: string
string: "" string: ""
# All the fields above besides "string" apply to both the array and map type parameter announcements. # All the fields above besides 'string' apply to both the array and map type parameter announcements.
# - name: array-param # - name: array-param
# # This field communicates the parameter's default value to the UI. Setting this field is optional. # # This field communicates the parameter's default value to the UI. Setting this field is optional.
# array: [default, items] # array: [default, items]
@@ -84,4 +84,3 @@ spec:
# If set to `true` then the plugin receives repository files with original file mode. Dangerous since the repository # If set to `true` then the plugin receives repository files with original file mode. Dangerous since the repository
# might have executable files. Set to true only if you trust the CMP plugin authors. # might have executable files. Set to true only if you trust the CMP plugin authors.
preserveFileMode: false preserveFileMode: false
@@ -422,4 +422,3 @@ spec:
path: ca.crt path: ca.crt
optional: true optional: true
secretName: argocd-repo-server-tls secretName: argocd-repo-server-tls
-1
View File
@@ -13,4 +13,3 @@ stringData:
name: staging-vcluster name: staging-vcluster
server: https://staging-vcluster.staging-vcluster server: https://staging-vcluster.staging-vcluster
type: Opaque type: Opaque
+4 -4
View File
@@ -32,12 +32,12 @@ projects:
additionalAnnotations: {} additionalAnnotations: {}
description: sys components project description: sys components project
sourceRepos: sourceRepos:
- '*' - "*"
destinations: destinations:
- namespace: '*' - namespace: "*"
server: https://kubernetes.default.svc server: https://kubernetes.default.svc
clusterResourceWhitelist: clusterResourceWhitelist:
- group: '*' - group: "*"
kind: '*' kind: "*"
orphanedResources: orphanedResources:
warn: false warn: false
+8 -3
View File
@@ -5,6 +5,8 @@ let
globalExcludes = [ globalExcludes = [
"nix/default.nix" "nix/default.nix"
"attic"
"vcluster"
".*vendor" ".*vendor"
".*chart/.*" ".*chart/.*"
".*schema.json" ".*schema.json"
@@ -32,6 +34,7 @@ pre-commit.run {
enable = true; enable = true;
excludes = [ excludes = [
"vcluster/" "vcluster/"
"attic/"
]; ];
args = [ args = [
"-x" "-x"
@@ -41,15 +44,17 @@ pre-commit.run {
}; };
yamllint = { yamllint = {
enable = false; enable = true;
excludes = [ excludes = [
"attic/" "attic/"
"charts/templates/" "charts/templates/"
"charts/charts/" "charts/"
"values/"
"vcluster/"
]; ];
settings = { settings = {
strict = true; strict = true;
configData = ''{ extends: default, rules: { document-start: disable, line-length: {max: 165} } }''; configData = ''{ extends: default, rules: { document-start: disable, line-length: {max: 300} } }'';
}; };
}; };
+42 -21
View File
@@ -3,7 +3,8 @@ groups:
rules: rules:
- alert: etcdMembersDown - alert: etcdMembersDown
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value description:
'etcd cluster "{{ $labels.job }}": members are down ({{ $value
}}).' }}).'
summary: etcd cluster members are down. summary: etcd cluster members are down.
expr: |- expr: |-
@@ -20,17 +21,20 @@ groups:
severity: critical severity: critical
- alert: etcdInsufficientMembers - alert: etcdInsufficientMembers
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value description:
'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
}}).' }}).'
summary: etcd cluster has insufficient number of members. summary: etcd cluster has insufficient number of members.
expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"}) expr:
sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"})
without (instance) + 1) / 2) without (instance) + 1) / 2)
for: 3m for: 3m
labels: labels:
severity: critical severity: critical
- alert: etcdNoLeader - alert: etcdNoLeader
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }} description:
'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }}
has no leader.' has no leader.'
summary: etcd cluster has no leader. summary: etcd cluster has no leader.
expr: etcd_server_has_leader{job=~".*etcd.*"} == 0 expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
@@ -39,12 +43,14 @@ groups:
severity: critical severity: critical
- alert: etcdHighNumberOfLeaderChanges - alert: etcdHighNumberOfLeaderChanges
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes description:
'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes
within the last 15 minutes. Frequent elections may be a sign of insufficient within the last 15 minutes. Frequent elections may be a sign of insufficient
resources, high network latency, or disruptions by other components and should resources, high network latency, or disruptions by other components and should
be investigated.' be investigated.'
summary: etcd cluster has high number of leader changes. summary: etcd cluster has high number of leader changes.
expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"}) expr:
increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"})
or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m]) or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m])
>= 4 >= 4
for: 5m for: 5m
@@ -52,7 +58,8 @@ groups:
severity: warning severity: warning
- alert: etcdHighNumberOfFailedGRPCRequests - alert: etcdHighNumberOfFailedGRPCRequests
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for description:
'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
{{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.' {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
summary: etcd cluster has high number of failed grpc requests. summary: etcd cluster has high number of failed grpc requests.
expr: |- expr: |-
@@ -65,7 +72,8 @@ groups:
severity: warning severity: warning
- alert: etcdHighNumberOfFailedGRPCRequests - alert: etcdHighNumberOfFailedGRPCRequests
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for description:
'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
{{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.' {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
summary: etcd cluster has high number of failed grpc requests. summary: etcd cluster has high number of failed grpc requests.
expr: |- expr: |-
@@ -78,7 +86,8 @@ groups:
severity: critical severity: critical
- alert: etcdGRPCRequestsSlow - alert: etcdGRPCRequestsSlow
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests description:
'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests
is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method
}} method.' }} method.'
summary: etcd grpc requests are slow summary: etcd grpc requests are slow
@@ -90,7 +99,8 @@ groups:
severity: critical severity: critical
- alert: etcdMemberCommunicationSlow - alert: etcdMemberCommunicationSlow
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": member communication with {{ description:
'etcd cluster "{{ $labels.job }}": member communication with {{
$labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance
}}.' }}.'
summary: etcd cluster member communication is slow. summary: etcd cluster member communication is slow.
@@ -102,7 +112,8 @@ groups:
severity: warning severity: warning
- alert: etcdHighNumberOfFailedProposals - alert: etcdHighNumberOfFailedProposals
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures description:
'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures
within the last 30 minutes on etcd instance {{ $labels.instance }}.' within the last 30 minutes on etcd instance {{ $labels.instance }}.'
summary: etcd cluster has high number of proposal failures. summary: etcd cluster has high number of proposal failures.
expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5 expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
@@ -111,7 +122,8 @@ groups:
severity: warning severity: warning
- alert: etcdHighFsyncDurations - alert: etcdHighFsyncDurations
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations description:
'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
are {{ $value }}s on etcd instance {{ $labels.instance }}.' are {{ $value }}s on etcd instance {{ $labels.instance }}.'
summary: etcd cluster 99th percentile fsync durations are too high. summary: etcd cluster 99th percentile fsync durations are too high.
expr: |- expr: |-
@@ -122,7 +134,8 @@ groups:
severity: warning severity: warning
- alert: etcdHighFsyncDurations - alert: etcdHighFsyncDurations
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations description:
'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
are {{ $value }}s on etcd instance {{ $labels.instance }}.' are {{ $value }}s on etcd instance {{ $labels.instance }}.'
summary: etcd cluster 99th percentile fsync durations are too high. summary: etcd cluster 99th percentile fsync durations are too high.
expr: |- expr: |-
@@ -133,7 +146,8 @@ groups:
severity: critical severity: critical
- alert: etcdHighCommitDurations - alert: etcdHighCommitDurations
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations description:
'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
{{ $value }}s on etcd instance {{ $labels.instance }}.' {{ $value }}s on etcd instance {{ $labels.instance }}.'
summary: etcd cluster 99th percentile commit durations are too high. summary: etcd cluster 99th percentile commit durations are too high.
expr: |- expr: |-
@@ -144,11 +158,13 @@ groups:
severity: warning severity: warning
- alert: etcdDatabaseQuotaLowSpace - alert: etcdDatabaseQuotaLowSpace
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": database size exceeds the defined description:
'etcd cluster "{{ $labels.job }}": database size exceeds the defined
quota on etcd instance {{ $labels.instance }}, please defrag or increase the quota on etcd instance {{ $labels.instance }}, please defrag or increase the
quota as the writes to etcd will be disabled when it is full.' quota as the writes to etcd will be disabled when it is full.'
summary: etcd cluster database is running full. summary: etcd cluster database is running full.
expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) / expr:
(last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) /
last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 > last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 >
95 95
for: 10m for: 10m
@@ -156,26 +172,31 @@ groups:
severity: critical severity: critical
- alert: etcdExcessiveDatabaseGrowth - alert: etcdExcessiveDatabaseGrowth
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": Predicting running out of disk description:
'etcd cluster "{{ $labels.job }}": Predicting running out of disk
space in the next four hours, based on write observations within the past space in the next four hours, based on write observations within the past
four hours on etcd instance {{ $labels.instance }}, please check as it might four hours on etcd instance {{ $labels.instance }}, please check as it might
be disruptive.' be disruptive.'
summary: etcd cluster database growing very fast. summary: etcd cluster database growing very fast.
expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60) expr:
predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60)
> etcd_server_quota_backend_bytes{job=~".*etcd.*"} > etcd_server_quota_backend_bytes{job=~".*etcd.*"}
for: 10m for: 10m
labels: labels:
severity: warning severity: warning
- alert: etcdDatabaseHighFragmentationRatio - alert: etcdDatabaseHighFragmentationRatio
annotations: annotations:
description: 'etcd cluster "{{ $labels.job }}": database size in use on instance description:
'etcd cluster "{{ $labels.job }}": database size in use on instance
{{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual {{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual
allocated disk space, please run defragmentation (e.g. etcdctl defrag) to allocated disk space, please run defragmentation (e.g. etcdctl defrag) to
retrieve the unused fragmented disk space.' retrieve the unused fragmented disk space.'
runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation
summary: etcd database size in use is less than 50% of the actual allocated summary:
etcd database size in use is less than 50% of the actual allocated
storage. storage.
expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m]) expr:
(last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m])
/ last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5 / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5
and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600 and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600
for: 10m for: 10m
+8 -4
View File
@@ -3,11 +3,13 @@ groups:
rules: rules:
- alert: TargetDown - alert: TargetDown
annotations: annotations:
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service description:
'{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
}} targets in {{ $labels.namespace }} namespace are down.' }} targets in {{ $labels.namespace }} namespace are down.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
summary: One or more targets are unreachable. summary: One or more targets are unreachable.
expr: 100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up) expr:
100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
BY (cluster, job, namespace, service)) > 10 BY (cluster, job, namespace, service)) > 10
for: 10m for: 10m
labels: labels:
@@ -21,7 +23,8 @@ groups:
mechanisms that send a notification when this alert is not firing. For example the mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty. "DeadMansSnitch" integration in PagerDuty.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
summary: An alert that should always be firing to certify that Alertmanager summary:
An alert that should always be firing to certify that Alertmanager
is working properly. is working properly.
expr: vector(1) expr: vector(1)
labels: labels:
@@ -37,7 +40,8 @@ groups:
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info". This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
summary: Info-level alert inhibition. summary: Info-level alert inhibition.
expr: ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname != expr:
ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname !=
"InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1 "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
labels: labels:
severity: none severity: none
+40 -21
View File
@@ -3,18 +3,21 @@ groups:
rules: rules:
- alert: KubePodCrashLooping - alert: KubePodCrashLooping
annotations: annotations:
description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container description:
'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
}}) is in waiting state (reason: "CrashLoopBackOff").' }}) is in waiting state (reason: "CrashLoopBackOff").'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
summary: Pod is crash looping. summary: Pod is crash looping.
expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff", expr:
max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",
job="kube-state-metrics", namespace=~".*"}[5m]) >= 1 job="kube-state-metrics", namespace=~".*"}[5m]) >= 1
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
- alert: KubePodNotReady - alert: KubePodNotReady
annotations: annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready description:
Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
state for longer than 15 minutes. state for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
summary: Pod has been in a non-ready state for more than 15 minutes. summary: Pod has been in a non-ready state for more than 15 minutes.
@@ -31,7 +34,8 @@ groups:
severity: warning severity: warning
- alert: KubeDeploymentGenerationMismatch - alert: KubeDeploymentGenerationMismatch
annotations: annotations:
description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment description:
Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
}} does not match, this indicates that the Deployment has failed but has not }} does not match, this indicates that the Deployment has failed but has not
been rolled back. been rolled back.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
@@ -45,7 +49,8 @@ groups:
severity: warning severity: warning
- alert: KubeDeploymentReplicasMismatch - alert: KubeDeploymentReplicasMismatch
annotations: annotations:
description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has description:
Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
not matched the expected number of replicas for longer than 15 minutes. not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
summary: Deployment has not matched the expected number of replicas. summary: Deployment has not matched the expected number of replicas.
@@ -64,7 +69,8 @@ groups:
severity: warning severity: warning
- alert: KubeDeploymentRolloutStuck - alert: KubeDeploymentRolloutStuck
annotations: annotations:
description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment description:
Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment
}} is not progressing for longer than 15 minutes. }} is not progressing for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck
summary: Deployment rollout is not progressing. summary: Deployment rollout is not progressing.
@@ -76,7 +82,8 @@ groups:
severity: warning severity: warning
- alert: KubeStatefulSetReplicasMismatch - alert: KubeStatefulSetReplicasMismatch
annotations: annotations:
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has description:
StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
not matched the expected number of replicas for longer than 15 minutes. not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
summary: StatefulSet has not matched the expected number of replicas. summary: StatefulSet has not matched the expected number of replicas.
@@ -95,7 +102,8 @@ groups:
severity: warning severity: warning
- alert: KubeStatefulSetGenerationMismatch - alert: KubeStatefulSetGenerationMismatch
annotations: annotations:
description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset description:
StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
}} does not match, this indicates that the StatefulSet has failed but has }} does not match, this indicates that the StatefulSet has failed but has
not been rolled back. not been rolled back.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
@@ -109,7 +117,8 @@ groups:
severity: warning severity: warning
- alert: KubeStatefulSetUpdateNotRolledOut - alert: KubeStatefulSetUpdateNotRolledOut
annotations: annotations:
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update description:
StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
has not been rolled out. has not been rolled out.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
summary: StatefulSet update has not been rolled out. summary: StatefulSet update has not been rolled out.
@@ -136,7 +145,8 @@ groups:
severity: warning severity: warning
- alert: KubeDaemonSetRolloutStuck - alert: KubeDaemonSetRolloutStuck
annotations: annotations:
description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not description:
DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not
finished or progressed for at least 15 minutes. finished or progressed for at least 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
summary: DaemonSet rollout is stuck. summary: DaemonSet rollout is stuck.
@@ -169,19 +179,22 @@ groups:
severity: warning severity: warning
- alert: KubeContainerWaiting - alert: KubeContainerWaiting
annotations: annotations:
description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container description:
pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
{{ $labels.container}} has been in waiting state for longer than 1 hour. {{ $labels.container}} has been in waiting state for longer than 1 hour.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
summary: Pod container waiting longer than 1 hour summary: Pod container waiting longer than 1 hour
expr: sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", expr:
sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
namespace=~".*"}) > 0 namespace=~".*"}) > 0
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
- alert: KubeDaemonSetNotScheduled - alert: KubeDaemonSetNotScheduled
annotations: annotations:
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset description:
}} are not scheduled.' "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are not scheduled."
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
summary: DaemonSet pods are not scheduled. summary: DaemonSet pods are not scheduled.
expr: |- expr: |-
@@ -193,18 +206,21 @@ groups:
severity: warning severity: warning
- alert: KubeDaemonSetMisScheduled - alert: KubeDaemonSetMisScheduled
annotations: annotations:
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset description:
}} are running where they are not supposed to run.' "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are running where they are not supposed to run."
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
summary: DaemonSet pods are misscheduled. summary: DaemonSet pods are misscheduled.
expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"} expr:
kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
> 0 > 0
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
- alert: KubeJobNotCompleted - alert: KubeJobNotCompleted
annotations: annotations:
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more description:
Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
than {{ "43200" | humanizeDuration }} to complete. than {{ "43200" | humanizeDuration }} to complete.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
summary: Job did not complete in time summary: Job did not complete in time
@@ -216,7 +232,8 @@ groups:
severity: warning severity: warning
- alert: KubeJobFailed - alert: KubeJobFailed
annotations: annotations:
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete. description:
Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
Removing failed job after investigation should clear this alert. Removing failed job after investigation should clear this alert.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
summary: Job failed to complete. summary: Job failed to complete.
@@ -226,7 +243,8 @@ groups:
severity: warning severity: warning
- alert: KubeHpaReplicasMismatch - alert: KubeHpaReplicasMismatch
annotations: annotations:
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} description:
HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
has not matched the desired number of replicas for longer than 15 minutes. has not matched the desired number of replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
summary: HPA has not matched desired number of replicas. summary: HPA has not matched desired number of replicas.
@@ -249,7 +267,8 @@ groups:
severity: warning severity: warning
- alert: KubeHpaMaxedOut - alert: KubeHpaMaxedOut
annotations: annotations:
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }} description:
HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
has been running at max replicas for longer than 15 minutes. has been running at max replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
summary: HPA is running at max replicas summary: HPA is running at max replicas
+17 -9
View File
@@ -3,7 +3,8 @@ groups:
rules: rules:
- alert: KubeCPUOvercommit - alert: KubeCPUOvercommit
annotations: annotations:
description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests description:
Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
for Pods by {{ $value }} CPU shares and cannot tolerate node failure. for Pods by {{ $value }} CPU shares and cannot tolerate node failure.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit
summary: Cluster has overcommitted CPU resource requests. summary: Cluster has overcommitted CPU resource requests.
@@ -16,7 +17,8 @@ groups:
severity: warning severity: warning
- alert: KubeMemoryOvercommit - alert: KubeMemoryOvercommit
annotations: annotations:
description: Cluster {{ $labels.cluster }} has overcommitted memory resource description:
Cluster {{ $labels.cluster }} has overcommitted memory resource
requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node
failure. failure.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
@@ -30,7 +32,8 @@ groups:
severity: warning severity: warning
- alert: KubeCPUQuotaOvercommit - alert: KubeCPUQuotaOvercommit
annotations: annotations:
description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests description:
Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
for Namespaces. for Namespaces.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit
summary: Cluster has overcommitted CPU resource requests. summary: Cluster has overcommitted CPU resource requests.
@@ -44,7 +47,8 @@ groups:
severity: warning severity: warning
- alert: KubeMemoryQuotaOvercommit - alert: KubeMemoryQuotaOvercommit
annotations: annotations:
description: Cluster {{ $labels.cluster }} has overcommitted memory resource description:
Cluster {{ $labels.cluster }} has overcommitted memory resource
requests for Namespaces. requests for Namespaces.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit
summary: Cluster has overcommitted memory resource requests. summary: Cluster has overcommitted memory resource requests.
@@ -58,7 +62,8 @@ groups:
severity: warning severity: warning
- alert: KubeQuotaAlmostFull - alert: KubeQuotaAlmostFull
annotations: annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage description:
Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota. }} of its {{ $labels.resource }} quota.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
summary: Namespace quota is going to be full. summary: Namespace quota is going to be full.
@@ -72,7 +77,8 @@ groups:
severity: info severity: info
- alert: KubeQuotaFullyUsed - alert: KubeQuotaFullyUsed
annotations: annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage description:
Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota. }} of its {{ $labels.resource }} quota.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
summary: Namespace quota is fully used. summary: Namespace quota is fully used.
@@ -86,7 +92,8 @@ groups:
severity: info severity: info
- alert: KubeQuotaExceeded - alert: KubeQuotaExceeded
annotations: annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage description:
Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota. }} of its {{ $labels.resource }} quota.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
summary: Namespace quota has exceeded the limits. summary: Namespace quota has exceeded the limits.
@@ -100,9 +107,10 @@ groups:
severity: warning severity: warning
- alert: CPUThrottlingHigh - alert: CPUThrottlingHigh
annotations: annotations:
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace description:
"{{ $value | humanizePercentage }} throttling of CPU in namespace
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod
}}.' }}."
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
summary: Processes experience elevated CPU throttling. summary: Processes experience elevated CPU throttling.
expr: |- expr: |-
+12 -7
View File
@@ -1,10 +1,10 @@
groups: groups:
- name: kubernetes-storage - name: kubernetes-storage
rules: rules:
- alert: KubePersistentVolumeFillingUp - alert: KubePersistentVolumeFillingUp
annotations: annotations:
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim description:
The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
{{ . }} {{- end }} is only {{ $value | humanizePercentage }} free. {{ . }} {{- end }} is only {{ $value | humanizePercentage }} free.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
@@ -26,7 +26,8 @@ groups:
severity: critical severity: critical
- alert: KubePersistentVolumeFillingUp - alert: KubePersistentVolumeFillingUp
annotations: annotations:
description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim description:
Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
{{ . }} {{- end }} is expected to fill up within four days. Currently {{ $value {{ . }} {{- end }} is expected to fill up within four days. Currently {{ $value
| humanizePercentage }} is available. | humanizePercentage }} is available.
@@ -51,7 +52,8 @@ groups:
severity: warning severity: warning
- alert: KubePersistentVolumeInodesFillingUp - alert: KubePersistentVolumeInodesFillingUp
annotations: annotations:
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim description:
The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
{{ . }} {{- end }} only has {{ $value | humanizePercentage }} free inodes. {{ . }} {{- end }} only has {{ $value | humanizePercentage }} free inodes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
@@ -73,7 +75,8 @@ groups:
severity: critical severity: critical
- alert: KubePersistentVolumeInodesFillingUp - alert: KubePersistentVolumeInodesFillingUp
annotations: annotations:
description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim description:
Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
{{ . }} {{- end }} is expected to run out of inodes within four days. Currently {{ . }} {{- end }} is expected to run out of inodes within four days. Currently
{{ $value | humanizePercentage }} of its inodes are free. {{ $value | humanizePercentage }} of its inodes are free.
@@ -98,11 +101,13 @@ groups:
severity: warning severity: warning
- alert: KubePersistentVolumeErrors - alert: KubePersistentVolumeErrors
annotations: annotations:
description: The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster description:
The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster
-}} on Cluster {{ . }} {{- end }} has status {{ $labels.phase }}. -}} on Cluster {{ . }} {{- end }} has status {{ $labels.phase }}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors
summary: PersistentVolume is having issues with provisioning. summary: PersistentVolume is having issues with provisioning.
expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"} expr:
kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"}
> 0 > 0
for: 5m for: 5m
labels: labels:
+55 -28
View File
@@ -3,7 +3,8 @@ groups:
rules: rules:
- alert: NodeFilesystemSpaceFillingUp - alert: NodeFilesystemSpaceFillingUp
annotations: annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
space left and is filling up. space left and is filling up.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
@@ -21,7 +22,8 @@ groups:
severity: warning severity: warning
- alert: NodeFilesystemSpaceFillingUp - alert: NodeFilesystemSpaceFillingUp
annotations: annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
space left and is filling up fast. space left and is filling up fast.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
@@ -39,7 +41,8 @@ groups:
severity: critical severity: critical
- alert: NodeFilesystemAlmostOutOfSpace - alert: NodeFilesystemAlmostOutOfSpace
annotations: annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
space left. space left.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
@@ -55,7 +58,8 @@ groups:
severity: warning severity: warning
- alert: NodeFilesystemAlmostOutOfSpace - alert: NodeFilesystemAlmostOutOfSpace
annotations: annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
space left. space left.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
@@ -71,7 +75,8 @@ groups:
severity: critical severity: critical
- alert: NodeFilesystemFilesFillingUp - alert: NodeFilesystemFilesFillingUp
annotations: annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
inodes left and is filling up. inodes left and is filling up.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
@@ -89,7 +94,8 @@ groups:
severity: warning severity: warning
- alert: NodeFilesystemFilesFillingUp - alert: NodeFilesystemFilesFillingUp
annotations: annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
inodes left and is filling up fast. inodes left and is filling up fast.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
@@ -107,7 +113,8 @@ groups:
severity: critical severity: critical
- alert: NodeFilesystemAlmostOutOfFiles - alert: NodeFilesystemAlmostOutOfFiles
annotations: annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
inodes left. inodes left.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
@@ -123,7 +130,8 @@ groups:
severity: warning severity: warning
- alert: NodeFilesystemAlmostOutOfFiles - alert: NodeFilesystemAlmostOutOfFiles
annotations: annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
inodes left. inodes left.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
@@ -139,38 +147,44 @@ groups:
severity: critical severity: critical
- alert: NodeNetworkReceiveErrs - alert: NodeNetworkReceiveErrs
annotations: annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered description:
'{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} receive errors in the last two minutes.' {{ printf "%.0f" $value }} receive errors in the last two minutes.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
summary: Network interface is reporting many receive errors. summary: Network interface is reporting many receive errors.
expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m]) expr:
rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m])
> 0.01 > 0.01
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
- alert: NodeNetworkTransmitErrs - alert: NodeNetworkTransmitErrs
annotations: annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered description:
'{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} transmit errors in the last two minutes.' {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
summary: Network interface is reporting many transmit errors. summary: Network interface is reporting many transmit errors.
expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m]) expr:
rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m])
> 0.01 > 0.01
for: 1h for: 1h
labels: labels:
severity: warning severity: warning
- alert: NodeHighNumberConntrackEntriesUsed - alert: NodeHighNumberConntrackEntriesUsed
annotations: annotations:
description: '{{ $value | humanizePercentage }} of conntrack entries are used.' description: "{{ $value | humanizePercentage }} of conntrack entries are used."
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
summary: Number of conntrack are getting close to the limit. summary: Number of conntrack are getting close to the limit.
expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit) expr:
(node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit)
> 0.75 > 0.75
labels: labels:
severity: warning severity: warning
- alert: NodeTextFileCollectorScrapeError - alert: NodeTextFileCollectorScrapeError
annotations: annotations:
description: Node Exporter text file collector on {{ $labels.instance }} failed description:
Node Exporter text file collector on {{ $labels.instance }} failed
to scrape. to scrape.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror
summary: Node Exporter text file collector failed to scrape. summary: Node Exporter text file collector failed to scrape.
@@ -179,7 +193,8 @@ groups:
severity: warning severity: warning
- alert: NodeClockSkewDetected - alert: NodeClockSkewDetected
annotations: annotations:
description: Clock at {{ $labels.instance }} is out of sync by more than 0.05s. description:
Clock at {{ $labels.instance }} is out of sync by more than 0.05s.
Ensure NTP is configured correctly on this host. Ensure NTP is configured correctly on this host.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
summary: Clock skew detected. summary: Clock skew detected.
@@ -200,7 +215,8 @@ groups:
severity: warning severity: warning
- alert: NodeClockNotSynchronising - alert: NodeClockNotSynchronising
annotations: annotations:
description: Clock at {{ $labels.instance }} is not synchronising. Ensure NTP description:
Clock at {{ $labels.instance }} is not synchronising. Ensure NTP
is configured on this host. is configured on this host.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
summary: Clock not synchronising. summary: Clock not synchronising.
@@ -213,12 +229,14 @@ groups:
severity: warning severity: warning
- alert: NodeRAIDDegraded - alert: NodeRAIDDegraded
annotations: annotations:
description: RAID array '{{ $labels.device }}' at {{ $labels.instance }} is description:
RAID array '{{ $labels.device }}' at {{ $labels.instance }} is
in degraded state due to one or more disks failures. Number of spare drives in degraded state due to one or more disks failures. Number of spare drives
is insufficient to fix issue automatically. is insufficient to fix issue automatically.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
summary: RAID Array is degraded. summary: RAID Array is degraded.
expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} expr:
node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
- ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}) - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"})
> 0 > 0
for: 15m for: 15m
@@ -226,17 +244,20 @@ groups:
severity: critical severity: critical
- alert: NodeRAIDDiskFailure - alert: NodeRAIDDiskFailure
annotations: annotations:
description: At least one device in RAID array at {{ $labels.instance }} failed. description:
At least one device in RAID array at {{ $labels.instance }} failed.
Array '{{ $labels.device }}' needs attention and possibly a disk swap. Array '{{ $labels.device }}' needs attention and possibly a disk swap.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
summary: Failed device in RAID array. summary: Failed device in RAID array.
expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"} expr:
node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
> 0 > 0
labels: labels:
severity: warning severity: warning
- alert: NodeFileDescriptorLimit - alert: NodeFileDescriptorLimit
annotations: annotations:
description: File descriptors limit at {{ $labels.instance }} is currently at description:
File descriptors limit at {{ $labels.instance }} is currently at
{{ printf "%.2f" $value }}%. {{ printf "%.2f" $value }}%.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
summary: Kernel is predicted to exhaust file descriptors limit soon. summary: Kernel is predicted to exhaust file descriptors limit soon.
@@ -249,7 +270,8 @@ groups:
severity: warning severity: warning
- alert: NodeFileDescriptorLimit - alert: NodeFileDescriptorLimit
annotations: annotations:
description: File descriptors limit at {{ $labels.instance }} is currently at description:
File descriptors limit at {{ $labels.instance }} is currently at
{{ printf "%.2f" $value }}%. {{ printf "%.2f" $value }}%.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
summary: Kernel is predicted to exhaust file descriptors limit soon. summary: Kernel is predicted to exhaust file descriptors limit soon.
@@ -266,7 +288,8 @@ groups:
CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage
summary: High CPU usage. summary: High CPU usage.
expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter", expr:
sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter",
mode!="idle"}[2m]))) * 100 > 90 mode!="idle"}[2m]))) * 100 > 90
for: 15m for: 15m
labels: labels:
@@ -301,7 +324,8 @@ groups:
Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%. Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization
summary: Host is running out of memory. summary: Host is running out of memory.
expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"} expr:
100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"}
* 100) > 90 * 100) > 90
for: 15m for: 15m
labels: labels:
@@ -313,14 +337,16 @@ groups:
This symptom might indicate disk saturation. This symptom might indicate disk saturation.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation
summary: Disk IO queue is high. summary: Disk IO queue is high.
expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m]) expr:
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
> 10 > 10
for: 30m for: 30m
labels: labels:
severity: warning severity: warning
- alert: NodeSystemdServiceFailed - alert: NodeSystemdServiceFailed
annotations: annotations:
description: Systemd service {{ $labels.name }} has entered failed state at description:
Systemd service {{ $labels.name }} has entered failed state at
{{ $labels.instance }} {{ $labels.instance }}
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed
summary: Systemd service has entered failed state. summary: Systemd service has entered failed state.
@@ -330,7 +356,8 @@ groups:
severity: warning severity: warning
- alert: NodeBondingDegraded - alert: NodeBondingDegraded
annotations: annotations:
description: Bonding interface {{ $labels.master }} on {{ $labels.instance }} description:
Bonding interface {{ $labels.master }} on {{ $labels.instance }}
is in degraded state due to one or more slave failures. is in degraded state due to one or more slave failures.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded
summary: Bonding interface is degraded summary: Bonding interface is degraded
+12 -6
View File
@@ -8,14 +8,16 @@ groups:
VALUE = {{ $value }} VALUE = {{ $value }}
LABELS = {{ $labels }} LABELS = {{ $labels }}
summary: Host high CPU load (instance {{ $labels.instance }}) summary: Host high CPU load (instance {{ $labels.instance }})
expr: (sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m]))) expr:
(sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m])))
> 0.9) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"} > 0.9) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
for: 10m for: 10m
labels: labels:
severity: critical severity: critical
- alert: MemoryUtilizationHighWarning - alert: MemoryUtilizationHighWarning
annotations: annotations:
dashboard: https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{ dashboard:
https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
$labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D $labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
description: Node {{ $labels.instance }} has less than 10% available memory. description: Node {{ $labels.instance }} has less than 10% available memory.
summary: Node Memory utilization warning summary: Node Memory utilization warning
@@ -25,7 +27,8 @@ groups:
severity: critical severity: critical
- alert: MemoryUtilizationHighCritical - alert: MemoryUtilizationHighCritical
annotations: annotations:
dashboard: https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{ dashboard:
https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
$labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D $labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
description: Node {{ $labels.instance }} has less than 5% available memory. description: Node {{ $labels.instance }} has less than 5% available memory.
summary: Node Memory utilization critical summary: Node Memory utilization critical
@@ -37,7 +40,8 @@ groups:
annotations: annotations:
description: Node {{ $labels.node }} has CPU utilization over 90%. description: Node {{ $labels.node }} has CPU utilization over 90%.
summary: Node has been in not-ready state for longer than 3 minutes summary: Node has been in not-ready state for longer than 3 minutes
expr: (sum(max_over_time(kube_node_status_condition{condition="Ready",status="true"}[3m]) expr:
(sum(max_over_time(kube_node_status_condition{condition="Ready",status="true"}[3m])
<= 0) by (node)) or (absent(kube_node_status_condition{condition="Ready",status="true"})) <= 0) by (node)) or (absent(kube_node_status_condition{condition="Ready",status="true"}))
> 0 > 0
for: 5m for: 5m
@@ -50,7 +54,8 @@ groups:
VALUE = {{ $value }} VALUE = {{ $value }}
LABELS = {{ $labels }} LABELS = {{ $labels }}
summary: Kubernetes Node memory pressure (instance {{ $labels.instance }}) summary: Kubernetes Node memory pressure (instance {{ $labels.instance }})
expr: kube_node_status_condition{condition="MemoryPressure",status="true"} == expr:
kube_node_status_condition{condition="MemoryPressure",status="true"} ==
1 1
for: 2m for: 2m
labels: labels:
@@ -62,7 +67,8 @@ groups:
VALUE = {{ $value }} VALUE = {{ $value }}
LABELS = {{ $labels }} LABELS = {{ $labels }}
summary: Kubernetes Container oom killer (instance {{ $labels.instance }}) summary: Kubernetes Container oom killer (instance {{ $labels.instance }})
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total expr:
(kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total
offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m]) offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m])
== 1 == 1
for: 0m for: 0m
+8 -4
View File
@@ -3,18 +3,22 @@ groups:
rules: rules:
- alert: VeleroBackupPartialFailures - alert: VeleroBackupPartialFailures
annotations: annotations:
message: Velero backup {{ $labels.schedule }} has {{$value | humanizePercentage}} partialy message:
Velero backup {{ $labels.schedule }} has {{$value | humanizePercentage}} partialy
failed backups. failed backups.
expr: velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""} expr:
velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
> 0.25 > 0.25
for: 15m for: 15m
labels: labels:
severity: critical severity: critical
- alert: VeleroBackupFailures - alert: VeleroBackupFailures
annotations: annotations:
message: Velero backup {{$labels.schedule}} has {{$value | humanizePercentage}} failed message:
Velero backup {{$labels.schedule}} has {{$value | humanizePercentage}} failed
backups. backups.
expr: velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""} expr:
velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
> 0.25 > 0.25
for: 15m for: 15m
labels: labels:
+12 -6
View File
@@ -3,7 +3,8 @@ groups:
rules: rules:
- alert: X509ExporterReadErrors - alert: X509ExporterReadErrors
annotations: annotations:
description: Over the last 15 minutes, this x509-certificate-exporter instance description:
Over the last 15 minutes, this x509-certificate-exporter instance
has experienced errors reading certificate files or querying the Kubernetes has experienced errors reading certificate files or querying the Kubernetes
API. This could be caused by a misconfiguration if triggered when the exporter API. This could be caused by a misconfiguration if triggered when the exporter
starts. starts.
@@ -14,7 +15,8 @@ groups:
severity: warning severity: warning
- alert: CertificateError - alert: CertificateError
annotations: annotations:
description: Certificate could not be decoded {{if $labels.secret_name }} in description:
Certificate could not be decoded {{if $labels.secret_name }} in
Kubernetes secret "{{ $labels.secret_namespace }}/{{ $labels.secret_name }}"{{else}}at Kubernetes secret "{{ $labels.secret_namespace }}/{{ $labels.secret_name }}"{{else}}at
location "{{ $labels.filepath }}"{{end}} location "{{ $labels.filepath }}"{{end}}
summary: Certificate cannot be decoded summary: Certificate cannot be decoded
@@ -24,22 +26,26 @@ groups:
severity: warning severity: warning
- alert: CertificateRenewal - alert: CertificateRenewal
annotations: annotations:
description: Certificate for "{{ $labels.subject_CN }}" should be renewed {{if description:
Certificate for "{{ $labels.subject_CN }}" should be renewed {{if
$labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{ $labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
$labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}} $labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
summary: Certificate should be renewed summary: Certificate should be renewed
expr: ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="", expr:
((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 28 issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 28
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
- alert: CertificateExpiration - alert: CertificateExpiration
annotations: annotations:
description: Certificate for "{{ $labels.subject_CN }}" is about to expire {{if description:
Certificate for "{{ $labels.subject_CN }}" is about to expire {{if
$labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{ $labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
$labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}} $labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
summary: Certificate is about to expire summary: Certificate is about to expire
expr: ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="", expr:
((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 14 issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 14
for: 15m for: 15m
labels: labels: