Compare commits
194 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
75a5fb5c83
|
|||
|
0eb60de429
|
|||
|
9d034eea25
|
|||
|
6104114404
|
|||
| 1e7126fedb | |||
| 0d12907f4c | |||
|
297e5efd88
|
|||
|
d09eabd2bd
|
|||
|
351bb41f80
|
|||
|
fd773bff9f
|
|||
| 196d3ed0eb | |||
| f617f29a50 | |||
|
0bc45748cf
|
|||
|
fdbdb138e1
|
|||
|
b2ed367b2a
|
|||
| 22cb7bddb6 | |||
| fe1c3db4b2 | |||
| 830c44644d | |||
| 5825a4bbc2 | |||
| a7b3310a10 | |||
| ecfa74dddd | |||
| 8a931d7c03 | |||
| c7b099cff2 | |||
| 24276410c1 | |||
|
5493008cb6
|
|||
| a788539d33 | |||
| 3e06946d04 | |||
| 29a51653f3 | |||
|
23b43c9b41
|
|||
| 53ac321316 | |||
| c5d42f2266 | |||
| a8bbe28137 | |||
| ed9dd67040 | |||
| ef13e1f980 | |||
| 5d3f57e518 | |||
| 97ed914338 | |||
|
aa0ee6ad37
|
|||
|
7afc34dbf8
|
|||
|
c77e11f0d2
|
|||
|
78892df3fc
|
|||
|
c3b1cab416
|
|||
|
7227f07b71
|
|||
|
683c7f36c3
|
|||
|
98812a6a3b
|
|||
|
8f990cff54
|
|||
|
a2678efd78
|
|||
|
cdbacbd34c
|
|||
|
20ca29d5ec
|
|||
|
9c42fd665d
|
|||
|
7468b902ce
|
|||
| 62578486ce | |||
| 6b17805a42 | |||
| e35b81b356 | |||
|
10758b334b
|
|||
|
8f61e63f29
|
|||
|
621598dee3
|
|||
|
0689bd47f2
|
|||
|
006efc31c2
|
|||
|
9d45101ed9
|
|||
|
d630bdebef
|
|||
|
8182141bc1
|
|||
| dc67fa2271 | |||
| 37ea2ad85c | |||
| 6a5da41480 | |||
| cd25aa8a1a | |||
| 05a3a69976 | |||
| 0697a4da10 | |||
| 8d5443e126 | |||
| b32e0643fb | |||
| af7f4c8116 | |||
| 37bb29b36a | |||
| af04b27c10 | |||
| a42010546f | |||
|
7034d20e39
|
|||
|
dbdfcb4f21
|
|||
|
22148fb162
|
|||
| 3086214bac | |||
|
e6c99a8567
|
|||
|
fa9d45fbb7
|
|||
|
72eb20fb5b
|
|||
|
eb141a7efe
|
|||
|
773550df56
|
|||
|
a93173066d
|
|||
| b39ed6cc54 | |||
|
685328685b
|
|||
| 40beab6e4f | |||
|
46c890c6c3
|
|||
| aaa7cf4a6e | |||
| 55d385ea6a | |||
|
80ebe7c278
|
|||
|
cf5b0273c2
|
|||
|
c8ec4161aa
|
|||
| 59580b5d29 | |||
| ddc8c7b253 | |||
| 36f0f11ef6 | |||
| ea1a0a2eb5 | |||
| ffb572e762 | |||
| f46ca7d2be | |||
| 2cd14292d2 | |||
|
9470c73e92
|
|||
|
922e2fd0ea
|
|||
| 8c2f6d53c9 | |||
| 7041b91c45 | |||
| a1c3f766b5 | |||
| d5e6d86f4b | |||
| 608fae0bf1 | |||
| d3fd3b7c5b | |||
| 556756d0a0 | |||
| d242c23ae3 | |||
| 3255430a3b | |||
| 7594dfe93d | |||
|
616a1915f2
|
|||
| de6963de12 | |||
| 5d8a4056e3 | |||
| 9c9c87bf2f | |||
| 89a54a995d | |||
| 707c37b9f1 | |||
| 14ae0e358b | |||
| ef82ce7bc5 | |||
| d4d9d9a3b6 | |||
| f55fd396fc | |||
| 91e98e3949 | |||
| 40eb429c17 | |||
| 432a73a4ba | |||
| de4ab27a2d | |||
|
def3f19dff
|
|||
| 3336c9782c | |||
| b943caef06 | |||
| 83a3cece0b | |||
| 2155c4c654 | |||
| 13e44a495f | |||
| 923f2b81b9 | |||
| fad034ca44 | |||
| 31d1918b86 | |||
| 34181f92b1 | |||
| 1d8b1bebcd | |||
| 91fba971e2 | |||
| 4bb68c68a8 | |||
| 4fe9cfee86 | |||
| c580b22ff5 | |||
| 988ba5a4c2 | |||
|
e9e72da86a
|
|||
| a1c1022465 | |||
|
4de318d814
|
|||
|
7402bad7a4
|
|||
|
113a582649
|
|||
| 73b8b11088 | |||
| f6854b72c8 | |||
|
bb1078b0f2
|
|||
|
983fa68f6a
|
|||
|
9876d5bec5
|
|||
|
b6af70c8ca
|
|||
|
957526a6bc
|
|||
|
f81a4b2732
|
|||
|
d7e4fb43cb
|
|||
|
e94ed8155e
|
|||
| c8a0a98167 | |||
| 9cddd9b404 | |||
| 3df44cd4b2 | |||
| 53ac794bd6 | |||
| f1a382c76c | |||
| 7a7459db10 | |||
| ed3515c752 | |||
| 19457af158 | |||
| e455612874 | |||
| df757cf361 | |||
| eb8f6e83ca | |||
| 1668c8db54 | |||
| d739c3d1b1 | |||
| 10393587b2 | |||
| 64e5b26352 | |||
| 49ad715025 | |||
| ee6f7e1d56 | |||
| 468eaeed88 | |||
| 257a55fab7 | |||
| 6fb44f6ba4 | |||
| b456dbc0ff | |||
| c415754e46 | |||
| 2688f381ef | |||
| 10c6708bd4 | |||
| a07e19b22c | |||
| 2e9dc96ded | |||
| 0348b1d46f | |||
| 22383f1d88 | |||
| e2641b18b6 | |||
| 86240fc085 | |||
| 799b6c2858 | |||
| daa5b60c43 | |||
| ca0a228660 | |||
| 621945dbf2 | |||
| 847c70b547 | |||
| 40a04b72ae | |||
| 457a260d0e | |||
| 49b2992a41 |
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
# the shebang is ignored, but nice for editors
|
||||
watch_file nix/sources.json
|
||||
watch_file nix/checks.nix
|
||||
|
||||
# Load .env file if it exists
|
||||
dotenv_if_exists
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
*.tgz
|
||||
_*/
|
||||
.direnv/
|
||||
.env
|
||||
.pre-commit-config.yaml
|
||||
_*.yaml
|
||||
backup/
|
||||
|
||||
+50
-42
@@ -1,46 +1,54 @@
|
||||
image:
|
||||
name: alpine/helm:latest
|
||||
entrypoint: [ "/bin/bash", "-c" ]
|
||||
# yaml-language-server: $schema=https://gitlab.com/gitlab-org/gitlab/-/raw/master/app/assets/javascripts/editor/schema/ci.json
|
||||
default:
|
||||
tags:
|
||||
- nix
|
||||
|
||||
stages:
|
||||
- release
|
||||
include:
|
||||
- project: oceanbox/gitlab-ci
|
||||
ref: v4.5
|
||||
file: template/Base.gitlab-ci.yml
|
||||
# stages:
|
||||
# - release
|
||||
|
||||
release:
|
||||
stage: release
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH =~ /^main/'
|
||||
when: always
|
||||
- when: never
|
||||
script:
|
||||
- |
|
||||
cd $CI_PROJECT_DIR
|
||||
for i in $(git show --pretty="" --name-only | grep '^charts/.*/Chart.yaml' | cut -d/ -f2); do
|
||||
pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
|
||||
if [ ! -z $pack ]; then
|
||||
chart=$(basename $pack)
|
||||
curl --request POST \
|
||||
--user gitlab-ci-token:$CI_JOB_TOKEN \
|
||||
--form "chart=@${chart}" \
|
||||
"${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
|
||||
fi
|
||||
done
|
||||
# image:
|
||||
# name: alpine/helm:latest
|
||||
# entrypoint: ["/bin/bash", "-c"]
|
||||
|
||||
rebuild:
|
||||
stage: release
|
||||
rules:
|
||||
- when: manual
|
||||
allow_failure: true
|
||||
script:
|
||||
- |
|
||||
cd $CI_PROJECT_DIR
|
||||
for i in $(find ./charts -maxdepth 2 -name Chart.yaml | cut -d/ -f3); do
|
||||
pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
|
||||
if [ ! -z $pack ]; then
|
||||
chart=$(basename $pack)
|
||||
curl --request POST \
|
||||
--user gitlab-ci-token:$CI_JOB_TOKEN \
|
||||
--form "chart=@${chart}" \
|
||||
"${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
|
||||
fi
|
||||
done
|
||||
# release:
|
||||
# stage: release
|
||||
# rules:
|
||||
# - if: "$CI_COMMIT_BRANCH =~ /^main/"
|
||||
# when: always
|
||||
# - when: never
|
||||
# script:
|
||||
# - |
|
||||
# cd $CI_PROJECT_DIR
|
||||
# for i in $(git show --pretty="" --name-only | grep '^charts/.*/Chart.yaml' | cut -d/ -f2); do
|
||||
# pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
|
||||
# if [ ! -z $pack ]; then
|
||||
# chart=$(basename $pack)
|
||||
# curl --request POST \
|
||||
# --user gitlab-ci-token:$CI_JOB_TOKEN \
|
||||
# --form "chart=@${chart}" \
|
||||
# "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
|
||||
# fi
|
||||
# done
|
||||
|
||||
# rebuild:
|
||||
# stage: release
|
||||
# rules:
|
||||
# - when: manual
|
||||
# allow_failure: true
|
||||
# script:
|
||||
# - |
|
||||
# cd $CI_PROJECT_DIR
|
||||
# for i in $(find ./charts -maxdepth 2 -name Chart.yaml | cut -d/ -f3); do
|
||||
# pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
|
||||
# if [ ! -z $pack ]; then
|
||||
# chart=$(basename $pack)
|
||||
# curl --request POST \
|
||||
# --user gitlab-ci-token:$CI_JOB_TOKEN \
|
||||
# --form "chart=@${chart}" \
|
||||
# "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
|
||||
# fi
|
||||
# done
|
||||
|
||||
+29
-22
@@ -6,39 +6,46 @@ let
|
||||
values = lib.apps.appValues {
|
||||
inherit env;
|
||||
base = ../values/atlantis;
|
||||
extraValues = {};
|
||||
extraValues = { };
|
||||
};
|
||||
|
||||
kustomize = r:
|
||||
kustomize =
|
||||
r:
|
||||
if r.kind == "Deployment" then
|
||||
lib.attrsets.recursiveUpdate r {
|
||||
spec.template.spec.containers =
|
||||
builtins.map (x:
|
||||
x // {
|
||||
spec.template.spec.containers = builtins.map (
|
||||
x:
|
||||
x
|
||||
// {
|
||||
livenessProbe.httpGet.path = "/healthz";
|
||||
readinessProble.httpGet.path = "/healthz";
|
||||
env = x.env ++ [ { name = "INERNAL_PORT"; value = 8000; } ];
|
||||
}) r.spec.template.spec.containers;
|
||||
env = x.env ++ [
|
||||
{
|
||||
name = "INERNAL_PORT";
|
||||
value = 8000;
|
||||
}
|
||||
];
|
||||
}
|
||||
) r.spec.template.spec.containers;
|
||||
}
|
||||
else if r.kind == "Service" then
|
||||
{}
|
||||
else r;
|
||||
else if r.kind == "Service" then
|
||||
{ }
|
||||
else
|
||||
r;
|
||||
in
|
||||
{
|
||||
options.apps.atlantis = lib.apps.appOptions {
|
||||
revision = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "main";
|
||||
description = "Revision";
|
||||
};
|
||||
revision = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = "main";
|
||||
description = "Revision";
|
||||
};
|
||||
|
||||
hostname = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = if env == "prod"
|
||||
then "maps.oceanbox.io"
|
||||
else "atlantis.beta.oceanbox.io";
|
||||
description = "Revision";
|
||||
};
|
||||
hostname = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
default = if env == "prod" then "maps.oceanbox.io" else "atlantis.beta.oceanbox.io";
|
||||
description = "Revision";
|
||||
};
|
||||
};
|
||||
|
||||
config = lib.apps.appConfig cfg "${env}-atlantis" {
|
||||
|
||||
+23
-25
@@ -6,34 +6,32 @@ let
|
||||
values = lib.apps.appValues {
|
||||
inherit env;
|
||||
base = ../values/openfga;
|
||||
extraValues = {};
|
||||
extraValues = { };
|
||||
};
|
||||
|
||||
kustomize = r:
|
||||
if r.kind == "Job" then
|
||||
lib.attrsets.recursiveUpdate r { spec.backoffLimit = 2; }
|
||||
else r;
|
||||
kustomize =
|
||||
r: if r.kind == "Job" then lib.attrsets.recursiveUpdate r { spec.backoffLimit = 2; } else r;
|
||||
|
||||
in
|
||||
{
|
||||
options.apps.openfga = lib.apps.appOptions {};
|
||||
{
|
||||
options.apps.openfga = lib.apps.appOptions { };
|
||||
|
||||
config = lib.apps.appConfig cfg "${env}-openfga" {
|
||||
helm.releases."${env}-openfga" = {
|
||||
inherit values;
|
||||
chart = lib.helm.downloadHelmChart {
|
||||
repo = "https://openfga.github.io/helm-charts";
|
||||
chart = "openfga";
|
||||
version = "0.2.12";
|
||||
chartHash = "sha256-7yLcw9/oNPvCePrtTJwKAG88t0Ym5Dl/S83Gz+gQdDU=";
|
||||
};
|
||||
transformer = rs: builtins.map (x: kustomize x) rs;
|
||||
};
|
||||
|
||||
annotations = {};
|
||||
resources = {
|
||||
services.poop.spec = {
|
||||
};
|
||||
};
|
||||
config = lib.apps.appConfig cfg "${env}-openfga" {
|
||||
helm.releases."${env}-openfga" = {
|
||||
inherit values;
|
||||
chart = lib.helm.downloadHelmChart {
|
||||
repo = "https://openfga.github.io/helm-charts";
|
||||
chart = "openfga";
|
||||
version = "0.2.12";
|
||||
chartHash = "sha256-7yLcw9/oNPvCePrtTJwKAG88t0Ym5Dl/S83Gz+gQdDU=";
|
||||
};
|
||||
}
|
||||
transformer = rs: builtins.map (x: kustomize x) rs;
|
||||
};
|
||||
|
||||
annotations = { };
|
||||
resources = {
|
||||
services.poop.spec = {
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
@@ -46,19 +46,19 @@ spec:
|
||||
{{ end }}
|
||||
cleanupController:
|
||||
resources:
|
||||
limits:
|
||||
limits:
|
||||
memory: {{ .Values.kyverno.resources.cleanupController.memory }}
|
||||
requests:
|
||||
memory: {{ .Values.kyverno.resources.cleanupController.memory }}
|
||||
reportsController:
|
||||
resources:
|
||||
limits:
|
||||
limits:
|
||||
memory: {{ .Values.kyverno.resources.reportsController.memory }}
|
||||
requests:
|
||||
memory: {{ .Values.kyverno.resources.reportsController.memory }}
|
||||
backgroundController:
|
||||
resources:
|
||||
limits:
|
||||
limits:
|
||||
memory: {{ .Values.kyverno.resources.backgroundController.memory }}
|
||||
requests:
|
||||
memory: {{ .Values.kyverno.resources.backgroundController.memory }}
|
||||
|
||||
@@ -27,17 +27,17 @@ spec:
|
||||
scheme: {{ .Values.linkerd.secretScheme }}
|
||||
{{- if .Values.linkerd.identityIssuerPEM }}
|
||||
tls:
|
||||
crtPEM: {{- .Values.linkerd.identityIssuerPEM | toYaml | indent 14 }}
|
||||
crtPEM: {{- .Values.linkerd.identityIssuerPEM | toYaml | indent 14 }}
|
||||
{{- end }}
|
||||
policyValidator:
|
||||
externalSecret: true
|
||||
caBundle: {{- .Values.linkerd.webhookPEM | toYaml | indent 9 }}
|
||||
caBundle: {{- .Values.linkerd.webhookPEM | toYaml | indent 9 }}
|
||||
proxyInjector:
|
||||
externalSecret: true
|
||||
caBundle: {{- .Values.linkerd.webhookPEM | toYaml | indent 9 }}
|
||||
caBundle: {{- .Values.linkerd.webhookPEM | toYaml | indent 9 }}
|
||||
profileValidator:
|
||||
externalSecret: true
|
||||
caBundle: {{- .Values.linkerd.webhookPEM | toYaml | indent 9 }}
|
||||
caBundle: {{- .Values.linkerd.webhookPEM | toYaml | indent 9 }}
|
||||
|
||||
project: sys
|
||||
syncPolicy:
|
||||
|
||||
@@ -16,7 +16,7 @@ spec:
|
||||
helm:
|
||||
values: |
|
||||
containerPort: 10250
|
||||
resources:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 200Mi
|
||||
|
||||
@@ -53,7 +53,7 @@ spec:
|
||||
endpoint: "tempo.tempo.svc:4317"
|
||||
tls:
|
||||
insecure: true
|
||||
##
|
||||
##
|
||||
otlphttp/metrics:
|
||||
endpoint: http://prom-prometheus.prometheus:9090/api/v1/otlp
|
||||
tls:
|
||||
|
||||
@@ -12,8 +12,8 @@ metadata:
|
||||
policies.kyverno.io/minversion: 1.7.0
|
||||
kyverno.io/kubernetes-version: "1.23"
|
||||
policies.kyverno.io/description: >-
|
||||
Customers should not have full admin permissions on their own namespaces.
|
||||
This policy will generate a RoleBinding, binding their group_id to
|
||||
Customers should not have full admin permissions on their own namespaces.
|
||||
This policy will generate a RoleBinding, binding their group_id to
|
||||
the Cluster-Admin clusterrole. This will still only apply to the namespace as
|
||||
the resource is a rolebinding, not clusterrolebinding.
|
||||
This policy should not trigger on any namespaces with label component=sys
|
||||
|
||||
@@ -24,7 +24,7 @@ spec:
|
||||
grafana_folder: Prometheus-stack
|
||||
targets:
|
||||
- apiVersion: v1
|
||||
kind: ConfigMap
|
||||
kind: ConfigMap
|
||||
name: "{{`{{ request.object.metadata.name }}`}}"
|
||||
name: generate-dashboard-folder-annotation
|
||||
skipBackgroundRequests: true
|
||||
|
||||
@@ -13,7 +13,7 @@ metadata:
|
||||
is time consuming and error prone. This policy will copy a
|
||||
Secret called `regcred` which exists in the `default` Namespace to
|
||||
new Namespaces when they are created. It will also push updates to
|
||||
the copied Secrets should the source Secret be changed.
|
||||
the copied Secrets should the source Secret be changed.
|
||||
spec:
|
||||
rules:
|
||||
- name: sync-image-pull-secret
|
||||
|
||||
@@ -9,12 +9,12 @@ metadata:
|
||||
policies.kyverno.io/severity: medium
|
||||
policies.kyverno.io/subject: Ingress
|
||||
policies.kyverno.io/description: >-
|
||||
Ingresses with the label "internal=true" should be whitelisted.
|
||||
If no whitelist exists, add the default values, otherwise append
|
||||
Ingresses with the label "internal=true" should be whitelisted.
|
||||
If no whitelist exists, add the default values, otherwise append
|
||||
whitelist to the already existing ones
|
||||
spec:
|
||||
mutateExistingOnPolicyUpdate: false
|
||||
#precondition: has whitelist annotation or
|
||||
#precondition: has whitelist annotation or
|
||||
rules:
|
||||
- name: ensure-nginx-whitelist-exists
|
||||
match:
|
||||
|
||||
@@ -32,7 +32,7 @@ data:
|
||||
}
|
||||
],
|
||||
"__elements":{
|
||||
|
||||
|
||||
},
|
||||
"__requires":[
|
||||
{
|
||||
@@ -70,7 +70,7 @@ data:
|
||||
"limit":100,
|
||||
"matchAny":false,
|
||||
"tags":[
|
||||
|
||||
|
||||
],
|
||||
"type":"dashboard"
|
||||
},
|
||||
@@ -83,7 +83,7 @@ data:
|
||||
"graphTooltip":0,
|
||||
"id":null,
|
||||
"links":[
|
||||
|
||||
|
||||
],
|
||||
"liveNow":false,
|
||||
"panels":[
|
||||
@@ -130,7 +130,7 @@ data:
|
||||
}
|
||||
},
|
||||
"mappings":[
|
||||
|
||||
|
||||
],
|
||||
"thresholds":{
|
||||
"mode":"absolute",
|
||||
@@ -195,7 +195,7 @@ data:
|
||||
"options":{
|
||||
"legend":{
|
||||
"calcs":[
|
||||
|
||||
|
||||
],
|
||||
"displayMode":"list",
|
||||
"placement":"bottom",
|
||||
@@ -255,7 +255,7 @@ data:
|
||||
"multi":false,
|
||||
"name":"DS_PROMETHEUS",
|
||||
"options":[
|
||||
|
||||
|
||||
],
|
||||
"query":"prometheus",
|
||||
"refresh":1,
|
||||
@@ -266,7 +266,7 @@ data:
|
||||
},
|
||||
{
|
||||
"current":{
|
||||
|
||||
|
||||
},
|
||||
"datasource":{
|
||||
"type":"prometheus",
|
||||
@@ -279,7 +279,7 @@ data:
|
||||
"multi":false,
|
||||
"name":"namespace",
|
||||
"options":[
|
||||
|
||||
|
||||
],
|
||||
"query":{
|
||||
"query":"label_values(rabbitmq_identity_info, namespace)",
|
||||
@@ -296,7 +296,7 @@ data:
|
||||
},
|
||||
{
|
||||
"current":{
|
||||
|
||||
|
||||
},
|
||||
"datasource":{
|
||||
"type":"prometheus",
|
||||
@@ -309,7 +309,7 @@ data:
|
||||
"multi":false,
|
||||
"name":"rabbitmq_cluster",
|
||||
"options":[
|
||||
|
||||
|
||||
],
|
||||
"query":{
|
||||
"query":"label_values(rabbitmq_identity_info{namespace=\"$namespace\"}, rabbitmq_cluster)",
|
||||
@@ -326,7 +326,7 @@ data:
|
||||
},
|
||||
{
|
||||
"current":{
|
||||
|
||||
|
||||
},
|
||||
"datasource":{
|
||||
"type":"prometheus",
|
||||
@@ -339,7 +339,7 @@ data:
|
||||
"multi":false,
|
||||
"name":"queue",
|
||||
"options":[
|
||||
|
||||
|
||||
],
|
||||
"query":{
|
||||
"query":"query_result(rabbitmq_detailed_queue_messages{namespace=\"$namespace\"} * on (instance, job) group_left(rabbitmq_cluster) rabbitmq_identity_info{namespace=\"$namespace\", rabbitmq_cluster=\"$rabbitmq_cluster\"})",
|
||||
@@ -361,7 +361,7 @@ data:
|
||||
"to":"now"
|
||||
},
|
||||
"timepicker":{
|
||||
|
||||
|
||||
},
|
||||
"timezone":"",
|
||||
"title":"RabbitMQ-Queue",
|
||||
|
||||
@@ -37,7 +37,7 @@ rules:
|
||||
resources:
|
||||
- events
|
||||
verbs: ["*"]
|
||||
|
||||
|
||||
- nonResourceURLs: ["*"]
|
||||
verbs: ["*"]
|
||||
- apiGroups:
|
||||
|
||||
@@ -139,8 +139,8 @@ spec:
|
||||
resources: {}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
command:
|
||||
- "/bin/sh"
|
||||
command:
|
||||
- "/bin/sh"
|
||||
- -c
|
||||
- /tmp/renew-certs/renew-certs.sh
|
||||
volumeMounts:
|
||||
@@ -216,7 +216,7 @@ metadata:
|
||||
name: default-deny-egress
|
||||
namespace: cert-manager
|
||||
spec:
|
||||
podSelector:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
block-egress: "true"
|
||||
policyTypes:
|
||||
|
||||
@@ -42,8 +42,8 @@ spec:
|
||||
resources: {}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
command:
|
||||
- "/bin/sh"
|
||||
command:
|
||||
- "/bin/sh"
|
||||
- -c
|
||||
- /tmp/renew-certs/renew-certs.sh
|
||||
volumeMounts:
|
||||
@@ -119,7 +119,7 @@ metadata:
|
||||
name: default-deny-egress
|
||||
namespace: gitlab
|
||||
spec:
|
||||
podSelector:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
block-egress: "true"
|
||||
policyTypes:
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Oceanbox IdP
|
||||
# Oceanbox IdP
|
||||
|
||||
```
|
||||
npm install && npm start
|
||||
|
||||
@@ -2,16 +2,16 @@
|
||||
|
||||
server="root@fs1-0"
|
||||
path="/vol/brick0/nfs0/k1/pv-oceanbox-dex"
|
||||
dest="$server:$path"
|
||||
dest="${server}:${path}"
|
||||
|
||||
index=$(basename dist/assets/index-*.js)
|
||||
|
||||
ssh $server -- rm $path/static/js/*.js
|
||||
scp dist/assets/*.js $dest/static/js/
|
||||
ssh "${server}" -- rm "${path}"/static/js/*.js
|
||||
scp dist/assets/*.js "${dest}"/static/js/
|
||||
|
||||
sed -r "s/@index@/$index/" ./dex/templates/login.html > login.html.$$
|
||||
scp ./dex/templates/* $dest/templates/
|
||||
scp ./dex/static/*.* $dest/static/
|
||||
scp login.html.$$ $dest/templates/login.html
|
||||
sed -r "s/@index@/${index}/" ./dex/templates/login.html > login.html.$$
|
||||
scp ./dex/templates/* "${dest}"/templates/
|
||||
scp ./dex/static/*.* "${dest}"/static/
|
||||
scp login.html.$$ "${dest}"/templates/login.html
|
||||
rm login.html.$$
|
||||
ssh admin@k1-0.itpartner.intern -- kubectl rollout restart -n oceanbox deployment/dex
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -66,7 +66,7 @@ let MyApp() =
|
||||
if isNullOrUndefined localStorage["user_id"] then
|
||||
""
|
||||
else
|
||||
localStorage["user_id"]
|
||||
localStorage["user_id"]
|
||||
// Browser.Dom.document.cookie
|
||||
// |> fun s -> s.Split ';'
|
||||
// |> Array.filter (fun s -> s.StartsWith "user_id=")
|
||||
@@ -75,7 +75,7 @@ let MyApp() =
|
||||
// |> Option.defaultValue ""
|
||||
|
||||
let toggleAmnesia _ = setAmnesia (not amnesia)
|
||||
|
||||
|
||||
html $"""
|
||||
<div class="centering">
|
||||
<div @keydown={Ev(onEnter)}>
|
||||
|
||||
+18
-17
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# shellcheck disable=SC2034 # Unused variables left for readability
|
||||
|
||||
helmfile () {
|
||||
|
||||
@@ -10,30 +11,30 @@ bases:
|
||||
- ../envs/environments.yaml.gotmpl
|
||||
|
||||
commonLabels:
|
||||
tier: $tier
|
||||
tier: ${tier}
|
||||
|
||||
releases:
|
||||
- name: $name
|
||||
namespace: {{ .Environment.Name }}-$name
|
||||
chart: ../charts/$name
|
||||
condition: $name.enabled
|
||||
- name: ${name}
|
||||
namespace: {{ .Environment.Name }}-${name}
|
||||
chart: ../charts/${name}
|
||||
condition: ${name}.enabled
|
||||
values:
|
||||
- ../values/$name/values/values.yaml.gotmpl
|
||||
- ../values/$name/values/values-{{ .Environment.Name }}.yaml
|
||||
- ../values/${name}/values/values.yaml.gotmpl
|
||||
- ../values/${name}/values/values-{{ .Environment.Name }}.yaml
|
||||
postRenderer: ../bin/kustomizer
|
||||
postRendererArgs:
|
||||
- ../values/$name/kustomize/{{ .Environment.Name }}
|
||||
- ../values/${name}/kustomize/{{ .Environment.Name }}
|
||||
missingFileHandler: Info
|
||||
- name: manifests
|
||||
namespace: {{ .Environment.Name }}-$name
|
||||
namespace: {{ .Environment.Name }}-${name}
|
||||
chart: manifests
|
||||
condition: $name.enabled
|
||||
condition: ${name}.enabled
|
||||
missingFileHandler: Info
|
||||
values:
|
||||
- ../values/env.yaml
|
||||
- ../values/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml
|
||||
- ../values/$name/env.yaml.gotmpl
|
||||
- ../values/$name/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
|
||||
- ../values/${name}/env.yaml.gotmpl
|
||||
- ../values/${name}/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
|
||||
hooks:
|
||||
- events: [ prepare, cleanup ]
|
||||
showlogs: true
|
||||
@@ -42,7 +43,7 @@ releases:
|
||||
- '{{\`{{ if eq .Event.Name "prepare" }}build{{ else }}clean{{ end }}\`}}'
|
||||
- '{{\`{{ .Release.Chart }}\`}}'
|
||||
- '{{\`{{ .Environment.Name }}\`}}'
|
||||
- ../values/$name/manifests
|
||||
- ../values/${name}/manifests
|
||||
- manifests
|
||||
EOF
|
||||
}
|
||||
@@ -59,10 +60,10 @@ done
|
||||
|
||||
name=$1
|
||||
tier=$2
|
||||
if [ -n "$ns" ]; then
|
||||
namespace="namespace: {{ .Environment.Name }}-$name"
|
||||
if [[ -n "${ns}" ]]; then
|
||||
namespace="namespace: {{ .Environment.Name }}-${name}"
|
||||
else
|
||||
namespace="namespace: $name"
|
||||
namespace="namespace: ${name}"
|
||||
fi
|
||||
|
||||
helmfile $1 $2
|
||||
helmfile "$1" "$2"
|
||||
|
||||
+13
-14
@@ -4,39 +4,38 @@ set -o pipefail
|
||||
|
||||
cmd=$1
|
||||
chart=$2
|
||||
env=$3
|
||||
manifests=${4:-manifests}
|
||||
outdir=${5:-_manifests}
|
||||
|
||||
build() {
|
||||
mkdir -p $outdir/templates
|
||||
echo "Creating $outdir/templates"
|
||||
mkdir -p "${outdir}"/templates
|
||||
echo "Creating ${outdir}/templates"
|
||||
|
||||
echo "generating $outdir/Chart.yaml" 1>&2
|
||||
echo "generating ${outdir}/Chart.yaml" 1>&2
|
||||
|
||||
cat <<EOF > $outdir/Chart.yaml
|
||||
cat <<EOF > "${outdir}"/Chart.yaml
|
||||
apiVersion: v1
|
||||
appVersion: "1.0"
|
||||
# description: A Helm chart for Kubernetes
|
||||
name: $chart
|
||||
name: ${chart}
|
||||
version: 0.1.0
|
||||
EOF
|
||||
|
||||
if [ -d $manifests ]; then
|
||||
cp -r $manifests/* $outdir/templates
|
||||
elif [ -f $manifests ]; then
|
||||
cp $manifests $outdir/templates
|
||||
if [[ -d "${manifests}" ]]; then
|
||||
cp -r "${manifests}"/* "${outdir}"/templates
|
||||
elif [[ -f "${manifests}" ]]; then
|
||||
cp "${manifests}" "${outdir}"/templates
|
||||
fi
|
||||
}
|
||||
|
||||
clean() {
|
||||
echo "cleaning $outdir" 1>&2
|
||||
rm -rf $outdir
|
||||
echo "cleaning ${outdir}" 1>&2
|
||||
rm -rf "${outdir}"
|
||||
}
|
||||
|
||||
case "$cmd" in
|
||||
case "${cmd}" in
|
||||
"build" ) build ;;
|
||||
"clean" ) clean ;;
|
||||
* ) echo "unsupported command: $cmd" 1>&2; exit 1 ;;
|
||||
* ) echo "unsupported command: ${cmd}" 1>&2; exit 1 ;;
|
||||
esac
|
||||
|
||||
|
||||
+5
-5
@@ -1,13 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
[ $# != 1 ] && exit 1
|
||||
[[ $# != 1 ]] && exit 1
|
||||
|
||||
dir=$1
|
||||
base=$dir/../base
|
||||
base=${dir}/../base
|
||||
|
||||
if [ -f $base/kustomization.yaml -a -f $dir/kustomization.yaml ]; then
|
||||
cat > $base/_manifest.yaml
|
||||
kubectl kustomize $dir
|
||||
if [[ -f "${base}"/kustomization.yaml ]] && [[ -f "${dir}"/kustomization.yaml ]]; then
|
||||
cat > "${base}"/_manifest.yaml
|
||||
kubectl kustomize "${dir}"
|
||||
else
|
||||
cat
|
||||
fi
|
||||
|
||||
@@ -3,16 +3,16 @@ kind: ClusterRole
|
||||
metadata:
|
||||
name: argocd-cluster-admin
|
||||
rules:
|
||||
- apiGroups:
|
||||
- '*'
|
||||
resources:
|
||||
- '*'
|
||||
verbs:
|
||||
- '*'
|
||||
- nonResourceURLs:
|
||||
- '*'
|
||||
verbs:
|
||||
- '*'
|
||||
- apiGroups:
|
||||
- "*"
|
||||
resources:
|
||||
- "*"
|
||||
verbs:
|
||||
- "*"
|
||||
- nonResourceURLs:
|
||||
- "*"
|
||||
verbs:
|
||||
- "*"
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
@@ -23,9 +23,9 @@ roleRef:
|
||||
kind: ClusterRole
|
||||
name: argocd-cluster-admin
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: argocd-cluster-admin
|
||||
namespace: kube-system
|
||||
- kind: ServiceAccount
|
||||
name: argocd-cluster-admin
|
||||
namespace: kube-system
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
|
||||
@@ -6,5 +6,3 @@ metadata:
|
||||
name: cluster-admin-token
|
||||
namespace: kube-system
|
||||
type: kubernetes.io/service-account-token
|
||||
|
||||
|
||||
|
||||
@@ -10,5 +10,3 @@ metadata:
|
||||
name: cluster-ekman
|
||||
namespace: argocd
|
||||
type: Opaque
|
||||
|
||||
|
||||
|
||||
@@ -3,5 +3,5 @@
|
||||
img=registry.gitlab.com/oceanbox/manifests/helm-kustomize-cmp
|
||||
tag=${1:-latest}
|
||||
|
||||
docker build -t $img:$tag .
|
||||
docker push $img:$tag
|
||||
docker build -t "${img}":"${tag}" .
|
||||
docker push "${img}":"${tag}"
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
#!/bin/sh
|
||||
# shellcheck disable=SC2154
|
||||
|
||||
export HOME=/plugin
|
||||
|
||||
env > /tmp/$ARGOCD_APP_NAME.env
|
||||
env > /tmp/"${ARGOCD_APP_NAME}".env
|
||||
|
||||
echo "$ARGOCD_APP_PARAMETERS" | jq '.[] | select(.name == "helm-parameters") | .map' | yq -P -oy > parameters.yaml
|
||||
cp parameters.yaml /tmp/$ARGOCD_APP_NAME-parameters.yaml
|
||||
echo "${ARGOCD_APP_PARAMETERS}" | jq '.[] | select(.name == "helm-parameters") | .map' | yq -P -oy > parameters.yaml
|
||||
cp parameters.yaml /tmp/"${ARGOCD_APP_NAME}"-parameters.yaml
|
||||
|
||||
if [ -n "$PARAM_CHART" -a "$PARAM_CHART" != "." ]; then
|
||||
CHART=$PARAM_CHART
|
||||
if [ -n "${PARAM_CHART}" ] && [ "${PARAM_CHART}" != "." ]; then
|
||||
CHART=${PARAM_CHART}
|
||||
elif [ -d chart ]; then
|
||||
CHART=chart
|
||||
elif [ -f chart ]; then
|
||||
@@ -18,19 +19,19 @@ else
|
||||
fi
|
||||
|
||||
[ -f chart/values.yaml ] && VALUES="-f chart/values.yaml"
|
||||
[ -f values-chart.yaml ] && VALUES="$VALUES -f values-chart.yaml"
|
||||
[ -f values.yaml ] && VALUES="$VALUES -f values.yaml"
|
||||
[ -f values-$PARAM_ENV.yaml ] && VALUES="$VALUES -f values-$PARAM_ENV.yaml"
|
||||
VALUES="$VALUES -f parameters.yaml"
|
||||
[ -f values-chart.yaml ] && VALUES="${VALUES} -f values-chart.yaml"
|
||||
[ -f values.yaml ] && VALUES="${VALUES} -f values.yaml"
|
||||
[ -f values-"${PARAM_ENV}".yaml ] && VALUES="${VALUES} -f values-${PARAM_ENV}.yaml"
|
||||
VALUES="${VALUES} -f parameters.yaml"
|
||||
|
||||
helm dependency update $CHART >/tmp/$ARGOCD_APP_NAME-helm-dependency-build.out
|
||||
helm dependency update "${CHART}" >/tmp/"${ARGOCD_APP_NAME}"-helm-dependency-build.out
|
||||
|
||||
mkdir -p base
|
||||
echo "helm template -n $ARGOCD_APP_NAMESPACE $PARAM_FLAGS $VALUES $ARGOCD_APP_NAME $CHART" > /tmp/$ARGOCD_APP_NAME-helm.sh
|
||||
helm template -n $ARGOCD_APP_NAMESPACE $PARAM_FLAGS $VALUES $ARGOCD_APP_NAME $CHART > ./base/_manifest.yaml
|
||||
echo "helm template -n ${ARGOCD_APP_NAMESPACE} ${PARAM_FLAGS} ${VALUES} ${ARGOCD_APP_NAME} ${CHART}" > /tmp/"${ARGOCD_APP_NAME}"-helm.sh
|
||||
helm template -n "${ARGOCD_APP_NAMESPACE}" "${PARAM_FLAGS}" "${VALUES}" "${ARGOCD_APP_NAME}" "${CHART}" > ./base/_manifest.yaml
|
||||
|
||||
cp ./base/_manifest.yaml /tmp/$ARGOCD_APP_NAME-manifest.yaml
|
||||
cp ./base/_manifest.yaml /tmp/"${ARGOCD_APP_NAME}"-manifest.yaml
|
||||
|
||||
[ -d "$PARAM_ENV" ] && kubectl kustomize $PARAM_ENV > /tmp/$ARGOCD_APP_NAME-manifest.yaml
|
||||
[ -d "${PARAM_ENV}" ] && kubectl kustomize "${PARAM_ENV}" > /tmp/"${ARGOCD_APP_NAME}"-manifest.yaml
|
||||
|
||||
cat /tmp/$ARGOCD_APP_NAME-manifest.yaml
|
||||
cat /tmp/"${ARGOCD_APP_NAME}"-manifest.yaml
|
||||
|
||||
@@ -18,7 +18,7 @@ EOF
|
||||
exit 0
|
||||
fi
|
||||
|
||||
yq e -o=p $VALUES | jq --slurp --raw-input '
|
||||
yq e -o=p "${VALUES}" | jq --slurp --raw-input '
|
||||
[{
|
||||
name: "helm-parameters",
|
||||
title: "Helm Parameters",
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
#!/bin/sh
|
||||
# shellcheck disable=SC2154
|
||||
|
||||
export HOME=/plugin
|
||||
|
||||
helm repo add --username argocd-helm --password "$OCEANBOX_HELM_ACCESS_TOKEN" oceanbox \
|
||||
helm repo add --username argocd-helm --password "${OCEANBOX_HELM_ACCESS_TOKEN}" oceanbox \
|
||||
https://gitlab.com/api/v4/projects/54396343/packages/helm/stable
|
||||
|
||||
helm repo add bitnami https://charts.bitnami.com/bitnami
|
||||
|
||||
@@ -4,9 +4,9 @@ export HOME=/plugin
|
||||
|
||||
helm repo update oceanbox
|
||||
|
||||
if [ -n "$PARAM_CHART" -a "$PARAM_CHART" != "." ]; then
|
||||
helm show values $PARAM_CHART > values-chart.yaml
|
||||
if [ -n "${PARAM_CHART}" ] && [ "${PARAM_CHART}" != "." ]; then
|
||||
helm show values "${PARAM_CHART}" > values-chart.yaml
|
||||
elif [ -f chart ]; then
|
||||
CHART=$(cat chart)
|
||||
helm show values $CHART > values-chart.yaml
|
||||
helm show values "${CHART}" > values-chart.yaml
|
||||
fi
|
||||
|
||||
@@ -9,7 +9,7 @@ spec:
|
||||
init:
|
||||
# Init always happens immediately before generate, but its output is not treated as manifests.
|
||||
# This is a good place to, for example, download chart dependencies.
|
||||
command: [ /bin/sh ]
|
||||
command: [/bin/sh]
|
||||
args:
|
||||
- /plugin/init.sh
|
||||
# The generate command runs in the Application source directory each time manifests are generated. Standard output
|
||||
@@ -17,7 +17,7 @@ spec:
|
||||
# To write log messages from the command, write them to stderr, it will always be displayed.
|
||||
# Error output will be sent to the UI, so avoid printing sensitive information (such as secrets).
|
||||
generate:
|
||||
command: [ /bin/sh ]
|
||||
command: [/bin/sh]
|
||||
args:
|
||||
- /plugin/generate.sh
|
||||
|
||||
@@ -27,15 +27,15 @@ spec:
|
||||
# Only one of fileName, find.glob, or find.command should be specified. If multiple are specified then only the
|
||||
# first (in that order) is evaluated.
|
||||
# discover:
|
||||
# fileName is a glob pattern (https://pkg.go.dev/path/filepath#Glob) that is applied to the Application's source
|
||||
# directory. If there is a match, this plugin may be used for the Application.
|
||||
# fileName: "./subdir/s*.yaml"
|
||||
# find:
|
||||
# This does the same thing as fileName, but it supports double-start (nested directory) glob patterns.
|
||||
# glob: "**/Chart.yaml"
|
||||
# The find command runs in the repository's root directory. To match, it must exit with status code 0 _and_
|
||||
# produce non-empty output to standard out.
|
||||
# command: [sh, -c, find . -name env.yaml]
|
||||
# fileName is a glob pattern (https://pkg.go.dev/path/filepath#Glob) that is applied to the Application's source
|
||||
# directory. If there is a match, this plugin may be used for the Application.
|
||||
# fileName: "./subdir/s*.yaml"
|
||||
# find:
|
||||
# This does the same thing as fileName, but it supports double-start (nested directory) glob patterns.
|
||||
# glob: "**/Chart.yaml"
|
||||
# The find command runs in the repository's root directory. To match, it must exit with status code 0 _and_
|
||||
# produce non-empty output to standard out.
|
||||
# command: [sh, -c, find . -name env.yaml]
|
||||
# The parameters config describes what parameters the UI should display for an Application. It is up to the user to
|
||||
# actually set parameters in the Application manifest (in spec.source.plugin.parameters). The announcements _only_
|
||||
# inform the "Parameters" tab in the App Details page of the UI.
|
||||
@@ -66,22 +66,21 @@ spec:
|
||||
itemType: string
|
||||
collectionType: string
|
||||
string: ""
|
||||
# All the fields above besides "string" apply to both the array and map type parameter announcements.
|
||||
# - name: array-param
|
||||
# # This field communicates the parameter's default value to the UI. Setting this field is optional.
|
||||
# array: [default, items]
|
||||
# collectionType: array
|
||||
# - name: map-param
|
||||
# # This field communicates the parameter's default value to the UI. Setting this field is optional.
|
||||
# map:
|
||||
# some: value
|
||||
# collectionType: map
|
||||
# All the fields above besides 'string' apply to both the array and map type parameter announcements.
|
||||
# - name: array-param
|
||||
# # This field communicates the parameter's default value to the UI. Setting this field is optional.
|
||||
# array: [default, items]
|
||||
# collectionType: array
|
||||
# - name: map-param
|
||||
# # This field communicates the parameter's default value to the UI. Setting this field is optional.
|
||||
# map:
|
||||
# some: value
|
||||
# collectionType: map
|
||||
# dynamic:
|
||||
# The command is run in an Application's source directory. Standard output must be JSON matching the schema of the
|
||||
# static parameter announcements list.
|
||||
# command: [ /bin/sh, /plugin/get-values.sh ]
|
||||
# The command is run in an Application's source directory. Standard output must be JSON matching the schema of the
|
||||
# static parameter announcements list.
|
||||
# command: [ /bin/sh, /plugin/get-values.sh ]
|
||||
|
||||
# If set to `true` then the plugin receives repository files with original file mode. Dangerous since the repository
|
||||
# might have executable files. Set to true only if you trust the CMP plugin authors.
|
||||
preserveFileMode: false
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM ghcr.io/helmfile/helmfile:v1.0.0
|
||||
FROM ghcr.io/helmfile/helmfile:v1.1.9
|
||||
|
||||
RUN mkdir -p /home/argocd/cmp-server/config/
|
||||
COPY plugin.yaml /home/argocd/cmp-server/config/
|
||||
|
||||
@@ -45,432 +45,432 @@ spec:
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- podAffinityTerm:
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: argocd-repo-server
|
||||
topologyKey: kubernetes.io/hostname
|
||||
weight: 100
|
||||
- podAffinityTerm:
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: argocd-repo-server
|
||||
topologyKey: kubernetes.io/hostname
|
||||
weight: 100
|
||||
automountServiceAccountToken: true
|
||||
containers:
|
||||
- args:
|
||||
- /usr/local/bin/argocd-repo-server
|
||||
- --port=8081
|
||||
- --metrics-port=8084
|
||||
env:
|
||||
- name: ARGOCD_REPO_SERVER_NAME
|
||||
value: argocd-repo-server
|
||||
- name: ARGOCD_RECONCILIATION_TIMEOUT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: timeout.reconciliation
|
||||
name: argocd-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LOGFORMAT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.log.format
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LOGLEVEL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.log.level
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_PARALLELISM_LIMIT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.parallelism.limit
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LISTEN_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.listen.address
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LISTEN_METRICS_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.metrics.listen.address
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_DISABLE_TLS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.disable.tls
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_TLS_MIN_VERSION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.tls.minversion
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_TLS_MAX_VERSION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.tls.maxversion
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_TLS_CIPHERS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.tls.ciphers
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_CACHE_EXPIRATION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.repo.cache.expiration
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDIS_SERVER
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: redis.server
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDIS_COMPRESSION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: redis.compression
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDISDB
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: redis.db
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDIS_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: redis-username
|
||||
name: argocd-redis
|
||||
optional: true
|
||||
- name: REDIS_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: auth
|
||||
name: argocd-redis
|
||||
- name: REDIS_SENTINEL_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: redis-sentinel-username
|
||||
name: argocd-redis
|
||||
optional: true
|
||||
- name: REDIS_SENTINEL_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: redis-sentinel-password
|
||||
name: argocd-redis
|
||||
optional: true
|
||||
- name: ARGOCD_DEFAULT_CACHE_EXPIRATION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.default.cache.expiration
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_OTLP_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: otlp.address
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_OTLP_INSECURE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: otlp.insecure
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_OTLP_HEADERS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: otlp.headers
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_MAX_COMBINED_DIRECTORY_MANIFESTS_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.max.combined.directory.manifests.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_PLUGIN_TAR_EXCLUSIONS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.plugin.tar.exclusions
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_ALLOW_OUT_OF_BOUNDS_SYMLINKS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.allow.oob.symlinks
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_TAR_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.streamed.manifest.max.tar.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_EXTRACTED_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.streamed.manifest.max.extracted.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_HELM_MANIFEST_MAX_EXTRACTED_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.helm.manifest.max.extracted.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_DISABLE_HELM_MANIFEST_MAX_EXTRACTED_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.disable.helm.manifest.max.extracted.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_GIT_MODULES_ENABLED
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.enable.git.submodule
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_GIT_LS_REMOTE_PARALLELISM_LIMIT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.git.lsremote.parallelism.limit
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_GIT_REQUEST_TIMEOUT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.git.request.timeout
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.revision.cache.lock.timeout
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_INCLUDE_HIDDEN_DIRECTORIES
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.include.hidden.directories
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: HELM_CACHE_HOME
|
||||
value: /helm-working-dir
|
||||
- name: HELM_CONFIG_HOME
|
||||
value: /helm-working-dir
|
||||
- name: HELM_DATA_HOME
|
||||
value: /helm-working-dir
|
||||
image: quay.io/argoproj/argocd:v2.12.3
|
||||
imagePullPolicy: IfNotPresent
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz?full=true
|
||||
port: metrics
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
name: repo-server
|
||||
ports:
|
||||
- containerPort: 8081
|
||||
- args:
|
||||
- /usr/local/bin/argocd-repo-server
|
||||
- --port=8081
|
||||
- --metrics-port=8084
|
||||
env:
|
||||
- name: ARGOCD_REPO_SERVER_NAME
|
||||
value: argocd-repo-server
|
||||
- name: ARGOCD_RECONCILIATION_TIMEOUT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: timeout.reconciliation
|
||||
name: argocd-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LOGFORMAT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.log.format
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LOGLEVEL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.log.level
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_PARALLELISM_LIMIT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.parallelism.limit
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LISTEN_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.listen.address
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LISTEN_METRICS_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.metrics.listen.address
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_DISABLE_TLS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.disable.tls
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_TLS_MIN_VERSION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.tls.minversion
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_TLS_MAX_VERSION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.tls.maxversion
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_TLS_CIPHERS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.tls.ciphers
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_CACHE_EXPIRATION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.repo.cache.expiration
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDIS_SERVER
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: redis.server
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDIS_COMPRESSION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: redis.compression
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDISDB
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: redis.db
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDIS_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: redis-username
|
||||
name: argocd-redis
|
||||
optional: true
|
||||
- name: REDIS_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: auth
|
||||
name: argocd-redis
|
||||
- name: REDIS_SENTINEL_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: redis-sentinel-username
|
||||
name: argocd-redis
|
||||
optional: true
|
||||
- name: REDIS_SENTINEL_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: redis-sentinel-password
|
||||
name: argocd-redis
|
||||
optional: true
|
||||
- name: ARGOCD_DEFAULT_CACHE_EXPIRATION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.default.cache.expiration
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_OTLP_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: otlp.address
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_OTLP_INSECURE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: otlp.insecure
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_OTLP_HEADERS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: otlp.headers
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_MAX_COMBINED_DIRECTORY_MANIFESTS_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.max.combined.directory.manifests.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_PLUGIN_TAR_EXCLUSIONS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.plugin.tar.exclusions
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_ALLOW_OUT_OF_BOUNDS_SYMLINKS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.allow.oob.symlinks
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_TAR_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.streamed.manifest.max.tar.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_EXTRACTED_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.streamed.manifest.max.extracted.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_HELM_MANIFEST_MAX_EXTRACTED_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.helm.manifest.max.extracted.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_DISABLE_HELM_MANIFEST_MAX_EXTRACTED_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.disable.helm.manifest.max.extracted.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_GIT_MODULES_ENABLED
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.enable.git.submodule
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_GIT_LS_REMOTE_PARALLELISM_LIMIT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.git.lsremote.parallelism.limit
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_GIT_REQUEST_TIMEOUT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.git.request.timeout
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.revision.cache.lock.timeout
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_INCLUDE_HIDDEN_DIRECTORIES
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.include.hidden.directories
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: HELM_CACHE_HOME
|
||||
value: /helm-working-dir
|
||||
- name: HELM_CONFIG_HOME
|
||||
value: /helm-working-dir
|
||||
- name: HELM_DATA_HOME
|
||||
value: /helm-working-dir
|
||||
image: quay.io/argoproj/argocd:v2.12.3
|
||||
imagePullPolicy: IfNotPresent
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz?full=true
|
||||
port: metrics
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
name: repo-server
|
||||
protocol: TCP
|
||||
- containerPort: 8084
|
||||
name: metrics
|
||||
protocol: TCP
|
||||
readinessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: metrics
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /app/config/ssh
|
||||
name: ssh-known-hosts
|
||||
- mountPath: /app/config/tls
|
||||
name: tls-certs
|
||||
- mountPath: /app/config/gpg/source
|
||||
name: gpg-keys
|
||||
- mountPath: /app/config/gpg/keys
|
||||
name: gpg-keyring
|
||||
- mountPath: /app/config/reposerver/tls
|
||||
name: argocd-repo-server-tls
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
- command:
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
|
||||
imagePullPolicy: Always
|
||||
name: kustomize-helm-with-rewrite
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: cmp-tmp
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
- command:
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: registry.gitlab.com/oceanbox/manifests/helm-kustomize-cmp:latest
|
||||
imagePullPolicy: Always
|
||||
name: helm-kustomize-cmp
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: cmp-tmp
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
- command:
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: registry.gitlab.com/oceanbox/manifests/helmfile-cmp:latest
|
||||
imagePullPolicy: Always
|
||||
name: helmfile-cmp
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: cmp-tmp
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
ports:
|
||||
- containerPort: 8081
|
||||
name: repo-server
|
||||
protocol: TCP
|
||||
- containerPort: 8084
|
||||
name: metrics
|
||||
protocol: TCP
|
||||
readinessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: metrics
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /app/config/ssh
|
||||
name: ssh-known-hosts
|
||||
- mountPath: /app/config/tls
|
||||
name: tls-certs
|
||||
- mountPath: /app/config/gpg/source
|
||||
name: gpg-keys
|
||||
- mountPath: /app/config/gpg/keys
|
||||
name: gpg-keyring
|
||||
- mountPath: /app/config/reposerver/tls
|
||||
name: argocd-repo-server-tls
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
- command:
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
|
||||
imagePullPolicy: Always
|
||||
name: kustomize-helm-with-rewrite
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: cmp-tmp
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
- command:
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: registry.gitlab.com/oceanbox/manifests/helm-kustomize-cmp:latest
|
||||
imagePullPolicy: Always
|
||||
name: helm-kustomize-cmp
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: cmp-tmp
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
- command:
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: registry.gitlab.com/oceanbox/manifests/helmfile-cmp:latest
|
||||
imagePullPolicy: Always
|
||||
name: helmfile-cmp
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: cmp-tmp
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
dnsPolicy: ClusterFirst
|
||||
imagePullSecrets:
|
||||
- name: gitlab-pull-secret
|
||||
- name: gitlab-pull-secret
|
||||
initContainers:
|
||||
- command:
|
||||
- /bin/cp
|
||||
- -n
|
||||
- /usr/local/bin/argocd
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: quay.io/argoproj/argocd:v2.12.3
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: copyutil
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- command:
|
||||
- /bin/sh
|
||||
- /plugin/init-helm-repos.sh
|
||||
env:
|
||||
- name: OCEANBOX_HELM_ACCESS_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: token
|
||||
name: oceanbox-helm
|
||||
optional: false
|
||||
image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
|
||||
imagePullPolicy: Always
|
||||
name: init-helm-repos
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
- command:
|
||||
- /bin/cp
|
||||
- -n
|
||||
- /usr/local/bin/argocd
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: quay.io/argoproj/argocd:v2.12.3
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: copyutil
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- command:
|
||||
- /bin/sh
|
||||
- /plugin/init-helm-repos.sh
|
||||
env:
|
||||
- name: OCEANBOX_HELM_ACCESS_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: token
|
||||
name: oceanbox-helm
|
||||
optional: false
|
||||
image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
|
||||
imagePullPolicy: Always
|
||||
name: init-helm-repos
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
serviceAccount: argocd-repo-server
|
||||
serviceAccountName: argocd-repo-server
|
||||
terminationGracePeriodSeconds: 30
|
||||
volumes:
|
||||
- name: cmp-tmp
|
||||
- name: helm-working-dir
|
||||
- name: plugins
|
||||
- name: var-files
|
||||
- name: tmp
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: argocd-ssh-known-hosts-cm
|
||||
name: ssh-known-hosts
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: argocd-tls-certs-cm
|
||||
name: tls-certs
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: argocd-gpg-keys-cm
|
||||
name: gpg-keys
|
||||
- name: gpg-keyring
|
||||
- name: argocd-repo-server-tls
|
||||
secret:
|
||||
defaultMode: 420
|
||||
items:
|
||||
- key: tls.crt
|
||||
path: tls.crt
|
||||
- key: tls.key
|
||||
path: tls.key
|
||||
- key: ca.crt
|
||||
path: ca.crt
|
||||
optional: true
|
||||
secretName: argocd-repo-server-tls
|
||||
- name: cmp-tmp
|
||||
- name: helm-working-dir
|
||||
- name: plugins
|
||||
- name: var-files
|
||||
- name: tmp
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: argocd-ssh-known-hosts-cm
|
||||
name: ssh-known-hosts
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: argocd-tls-certs-cm
|
||||
name: tls-certs
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: argocd-gpg-keys-cm
|
||||
name: gpg-keys
|
||||
- name: gpg-keyring
|
||||
- name: argocd-repo-server-tls
|
||||
secret:
|
||||
defaultMode: 420
|
||||
items:
|
||||
- key: tls.crt
|
||||
path: tls.crt
|
||||
- key: tls.key
|
||||
path: tls.key
|
||||
- key: ca.crt
|
||||
path: ca.crt
|
||||
optional: true
|
||||
secretName: argocd-repo-server-tls
|
||||
|
||||
@@ -4,24 +4,24 @@ spec:
|
||||
template:
|
||||
spec:
|
||||
imagePullSecrets:
|
||||
- name: gitlab-pull-secret
|
||||
- name: gitlab-pull-secret
|
||||
containers:
|
||||
- command:
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: registry.gitlab.com/oceanbox/manifests/helmfile-cmp:latest
|
||||
imagePullPolicy: Always
|
||||
name: helmfile-cmp
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
- command:
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: registry.gitlab.com/oceanbox/manifests/helmfile-cmp:latest
|
||||
imagePullPolicy: Always
|
||||
name: helmfile-cmp
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
|
||||
@@ -3,5 +3,5 @@
|
||||
img=registry.gitlab.com/oceanbox/manifests/helmfile-cmp
|
||||
tag=${1:-latest}
|
||||
|
||||
docker build -t $img:$tag .
|
||||
docker push $img:$tag
|
||||
docker build -t "${img}":"${tag}" .
|
||||
docker push "${img}":"${tag}"
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#!/bin/sh
|
||||
# shellcheck disable=SC2154
|
||||
|
||||
# NOTE: Ensure errors are part of exitcode
|
||||
# set -o pipefail
|
||||
@@ -10,7 +11,7 @@ export HELM_CONFIG_HOME=/tmp/helm/config
|
||||
export HELMFILE_CACHE_HOME=/tmp/helmfile/cache
|
||||
export HELMFILE_TEMPDIR=/tmp/helmfile/tmp
|
||||
|
||||
test -n ARGOCD_ENV_HELMFILE_ENVIRONMENT && export HELMFILE_ENVIRONMENT=$ARGOCD_ENV_HELMFILE_ENVIRONMENT
|
||||
test -n ARGOCD_ENV_HELMFILE_FILE_PATH && export HELMFILE_FILE_PATH=$ARGOCD_ENV_HELMFILE_FILE_PATH
|
||||
test -n ARGOCD_ENV_HELMFILE_ENVIRONMENT && export HELMFILE_ENVIRONMENT="${ARGOCD_ENV_HELMFILE_ENVIRONMENT}"
|
||||
test -n ARGOCD_ENV_HELMFILE_FILE_PATH && export HELMFILE_FILE_PATH="${ARGOCD_ENV_HELMFILE_FILE_PATH}"
|
||||
|
||||
helmfile -n "$ARGOCD_APP_NAMESPACE" $ARGS template --include-crds -q
|
||||
helmfile -n "${ARGOCD_APP_NAMESPACE}" "${ARGS}" template -q --include-crds
|
||||
|
||||
@@ -4,7 +4,7 @@ metadata:
|
||||
name: helmfile-cmp
|
||||
spec:
|
||||
generate:
|
||||
command: [ "/bin/sh" ]
|
||||
command: ["/bin/sh"]
|
||||
args:
|
||||
- /plugin/generate.sh
|
||||
lockRepo: false
|
||||
|
||||
@@ -44,341 +44,341 @@ spec:
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- podAffinityTerm:
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: argocd-repo-server
|
||||
topologyKey: kubernetes.io/hostname
|
||||
weight: 100
|
||||
- podAffinityTerm:
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: argocd-repo-server
|
||||
topologyKey: kubernetes.io/hostname
|
||||
weight: 100
|
||||
containers:
|
||||
- args:
|
||||
- /usr/local/bin/argocd-repo-server
|
||||
- --port=8081
|
||||
- --metrics-port=8084
|
||||
env:
|
||||
- name: ARGOCD_REPO_SERVER_NAME
|
||||
value: argocd-repo-server
|
||||
- name: ARGOCD_RECONCILIATION_TIMEOUT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: timeout.reconciliation
|
||||
name: argocd-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LOGFORMAT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.log.format
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LOGLEVEL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.log.level
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_PARALLELISM_LIMIT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.parallelism.limit
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LISTEN_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.listen.address
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LISTEN_METRICS_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.metrics.listen.address
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_DISABLE_TLS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.disable.tls
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_TLS_MIN_VERSION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.tls.minversion
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_TLS_MAX_VERSION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.tls.maxversion
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_TLS_CIPHERS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.tls.ciphers
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_CACHE_EXPIRATION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.repo.cache.expiration
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDIS_SERVER
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: redis.server
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDIS_COMPRESSION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: redis.compression
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDISDB
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: redis.db
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDIS_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: redis-username
|
||||
name: argocd-redis
|
||||
optional: true
|
||||
- name: REDIS_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: redis-password
|
||||
name: argocd-redis
|
||||
optional: true
|
||||
- name: ARGOCD_DEFAULT_CACHE_EXPIRATION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.default.cache.expiration
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_OTLP_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: otlp.address
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_OTLP_INSECURE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: otlp.insecure
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_OTLP_HEADERS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: otlp.headers
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_MAX_COMBINED_DIRECTORY_MANIFESTS_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.max.combined.directory.manifests.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_PLUGIN_TAR_EXCLUSIONS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.plugin.tar.exclusions
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_ALLOW_OUT_OF_BOUNDS_SYMLINKS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.allow.oob.symlinks
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_TAR_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.streamed.manifest.max.tar.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_EXTRACTED_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.streamed.manifest.max.extracted.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_HELM_MANIFEST_MAX_EXTRACTED_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.helm.manifest.max.extracted.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_DISABLE_HELM_MANIFEST_MAX_EXTRACTED_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.disable.helm.manifest.max.extracted.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_GIT_MODULES_ENABLED
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.enable.git.submodule
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_GIT_LS_REMOTE_PARALLELISM_LIMIT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.git.lsremote.parallelism.limit
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_GIT_REQUEST_TIMEOUT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.git.request.timeout
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: HELM_CACHE_HOME
|
||||
value: /helm-working-dir
|
||||
- name: HELM_CONFIG_HOME
|
||||
value: /helm-working-dir
|
||||
- name: HELM_DATA_HOME
|
||||
value: /helm-working-dir
|
||||
image: quay.io/argoproj/argocd:v2.10.4
|
||||
imagePullPolicy: IfNotPresent
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz?full=true
|
||||
port: metrics
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
name: repo-server
|
||||
ports:
|
||||
- containerPort: 8081
|
||||
- args:
|
||||
- /usr/local/bin/argocd-repo-server
|
||||
- --port=8081
|
||||
- --metrics-port=8084
|
||||
env:
|
||||
- name: ARGOCD_REPO_SERVER_NAME
|
||||
value: argocd-repo-server
|
||||
- name: ARGOCD_RECONCILIATION_TIMEOUT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: timeout.reconciliation
|
||||
name: argocd-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LOGFORMAT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.log.format
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LOGLEVEL
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.log.level
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_PARALLELISM_LIMIT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.parallelism.limit
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LISTEN_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.listen.address
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_LISTEN_METRICS_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.metrics.listen.address
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_DISABLE_TLS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.disable.tls
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_TLS_MIN_VERSION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.tls.minversion
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_TLS_MAX_VERSION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.tls.maxversion
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_TLS_CIPHERS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.tls.ciphers
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_CACHE_EXPIRATION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.repo.cache.expiration
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDIS_SERVER
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: redis.server
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDIS_COMPRESSION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: redis.compression
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDISDB
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: redis.db
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: REDIS_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: redis-username
|
||||
name: argocd-redis
|
||||
optional: true
|
||||
- name: REDIS_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: redis-password
|
||||
name: argocd-redis
|
||||
optional: true
|
||||
- name: ARGOCD_DEFAULT_CACHE_EXPIRATION
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.default.cache.expiration
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_OTLP_ADDRESS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: otlp.address
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_OTLP_INSECURE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: otlp.insecure
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_OTLP_HEADERS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: otlp.headers
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_MAX_COMBINED_DIRECTORY_MANIFESTS_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.max.combined.directory.manifests.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_PLUGIN_TAR_EXCLUSIONS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.plugin.tar.exclusions
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_ALLOW_OUT_OF_BOUNDS_SYMLINKS
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.allow.oob.symlinks
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_TAR_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.streamed.manifest.max.tar.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_EXTRACTED_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.streamed.manifest.max.extracted.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_HELM_MANIFEST_MAX_EXTRACTED_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.helm.manifest.max.extracted.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_REPO_SERVER_DISABLE_HELM_MANIFEST_MAX_EXTRACTED_SIZE
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.disable.helm.manifest.max.extracted.size
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_GIT_MODULES_ENABLED
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.enable.git.submodule
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_GIT_LS_REMOTE_PARALLELISM_LIMIT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.git.lsremote.parallelism.limit
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: ARGOCD_GIT_REQUEST_TIMEOUT
|
||||
valueFrom:
|
||||
configMapKeyRef:
|
||||
key: reposerver.git.request.timeout
|
||||
name: argocd-cmd-params-cm
|
||||
optional: true
|
||||
- name: HELM_CACHE_HOME
|
||||
value: /helm-working-dir
|
||||
- name: HELM_CONFIG_HOME
|
||||
value: /helm-working-dir
|
||||
- name: HELM_DATA_HOME
|
||||
value: /helm-working-dir
|
||||
image: quay.io/argoproj/argocd:v2.10.4
|
||||
imagePullPolicy: IfNotPresent
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz?full=true
|
||||
port: metrics
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
name: repo-server
|
||||
protocol: TCP
|
||||
- containerPort: 8084
|
||||
name: metrics
|
||||
protocol: TCP
|
||||
readinessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: metrics
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
resources: {}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /app/config/ssh
|
||||
name: ssh-known-hosts
|
||||
- mountPath: /app/config/tls
|
||||
name: tls-certs
|
||||
- mountPath: /app/config/gpg/source
|
||||
name: gpg-keys
|
||||
- mountPath: /app/config/gpg/keys
|
||||
name: gpg-keyring
|
||||
- mountPath: /app/config/reposerver/tls
|
||||
name: argocd-repo-server-tls
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
- command:
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
|
||||
imagePullPolicy: Always
|
||||
name: kustomize-helm-with-rewrite
|
||||
resources: {}
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: cmp-tmp
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
ports:
|
||||
- containerPort: 8081
|
||||
name: repo-server
|
||||
protocol: TCP
|
||||
- containerPort: 8084
|
||||
name: metrics
|
||||
protocol: TCP
|
||||
readinessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: metrics
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 1
|
||||
resources: {}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /app/config/ssh
|
||||
name: ssh-known-hosts
|
||||
- mountPath: /app/config/tls
|
||||
name: tls-certs
|
||||
- mountPath: /app/config/gpg/source
|
||||
name: gpg-keys
|
||||
- mountPath: /app/config/gpg/keys
|
||||
name: gpg-keyring
|
||||
- mountPath: /app/config/reposerver/tls
|
||||
name: argocd-repo-server-tls
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: tmp
|
||||
- command:
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
|
||||
imagePullPolicy: Always
|
||||
name: kustomize-helm-with-rewrite
|
||||
resources: {}
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- mountPath: /home/argocd/cmp-server/plugins
|
||||
name: plugins
|
||||
- mountPath: /tmp
|
||||
name: cmp-tmp
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
dnsPolicy: ClusterFirst
|
||||
imagePullSecrets:
|
||||
- name: gitlab-pull-secret
|
||||
- name: gitlab-pull-secret
|
||||
initContainers:
|
||||
- command:
|
||||
- /bin/cp
|
||||
- -n
|
||||
- /usr/local/bin/argocd
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: quay.io/argoproj/argocd:v2.10.4
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: copyutil
|
||||
resources: {}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- command:
|
||||
- /bin/sh
|
||||
- /plugin/init-helm-repos.sh
|
||||
image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
|
||||
imagePullPolicy: Always
|
||||
name: init-helm-repos
|
||||
resources: {}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsUser: 999
|
||||
runAsNonRoot: true
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
env:
|
||||
- name: OCEANBOX_HELM_ACCESS_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: token
|
||||
name: oceanbox-helm
|
||||
optional: false
|
||||
volumeMounts:
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
- command:
|
||||
- /bin/cp
|
||||
- -n
|
||||
- /usr/local/bin/argocd
|
||||
- /var/run/argocd/argocd-cmp-server
|
||||
image: quay.io/argoproj/argocd:v2.10.4
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: copyutil
|
||||
resources: {}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/argocd
|
||||
name: var-files
|
||||
- command:
|
||||
- /bin/sh
|
||||
- /plugin/init-helm-repos.sh
|
||||
image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
|
||||
imagePullPolicy: Always
|
||||
name: init-helm-repos
|
||||
resources: {}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsUser: 999
|
||||
runAsNonRoot: true
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
env:
|
||||
- name: OCEANBOX_HELM_ACCESS_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
key: token
|
||||
name: oceanbox-helm
|
||||
optional: false
|
||||
volumeMounts:
|
||||
- mountPath: /helm-working-dir
|
||||
name: helm-working-dir
|
||||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
securityContext: {}
|
||||
@@ -386,40 +386,39 @@ spec:
|
||||
serviceAccountName: argocd-repo-server
|
||||
terminationGracePeriodSeconds: 30
|
||||
volumes:
|
||||
- emptyDir: {}
|
||||
name: cmp-tmp
|
||||
- emptyDir: {}
|
||||
name: helm-working-dir
|
||||
- emptyDir: {}
|
||||
name: plugins
|
||||
- emptyDir: {}
|
||||
name: var-files
|
||||
- emptyDir: {}
|
||||
name: tmp
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: argocd-ssh-known-hosts-cm
|
||||
name: ssh-known-hosts
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: argocd-tls-certs-cm
|
||||
name: tls-certs
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: argocd-gpg-keys-cm
|
||||
name: gpg-keys
|
||||
- emptyDir: {}
|
||||
name: gpg-keyring
|
||||
- name: argocd-repo-server-tls
|
||||
secret:
|
||||
defaultMode: 420
|
||||
items:
|
||||
- key: tls.crt
|
||||
path: tls.crt
|
||||
- key: tls.key
|
||||
path: tls.key
|
||||
- key: ca.crt
|
||||
path: ca.crt
|
||||
optional: true
|
||||
secretName: argocd-repo-server-tls
|
||||
|
||||
- emptyDir: {}
|
||||
name: cmp-tmp
|
||||
- emptyDir: {}
|
||||
name: helm-working-dir
|
||||
- emptyDir: {}
|
||||
name: plugins
|
||||
- emptyDir: {}
|
||||
name: var-files
|
||||
- emptyDir: {}
|
||||
name: tmp
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: argocd-ssh-known-hosts-cm
|
||||
name: ssh-known-hosts
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: argocd-tls-certs-cm
|
||||
name: tls-certs
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: argocd-gpg-keys-cm
|
||||
name: gpg-keys
|
||||
- emptyDir: {}
|
||||
name: gpg-keyring
|
||||
- name: argocd-repo-server-tls
|
||||
secret:
|
||||
defaultMode: 420
|
||||
items:
|
||||
- key: tls.crt
|
||||
path: tls.crt
|
||||
- key: tls.key
|
||||
path: tls.key
|
||||
- key: ca.crt
|
||||
path: ca.crt
|
||||
optional: true
|
||||
secretName: argocd-repo-server-tls
|
||||
|
||||
@@ -13,7 +13,7 @@ kubectl --context ekman apply -f cluster-admin-token.yaml
|
||||
# kubectl --context oceanbox apply -f _cluster-ekman.yaml
|
||||
|
||||
token=$(kubectl --context ekman get secret -n kube-system argocd-manager-token -o yaml | grep ' token:' | cut -d' ' -f4 | base64 -d)
|
||||
sed "s/@token@/$token/" cluster-ekman.yaml > _cluster-ekman.yaml
|
||||
sed "s/@token@/${token}/" cluster-ekman.yaml > _cluster-ekman.yaml
|
||||
echo "configure argocd ekman-cluster..."
|
||||
cat _cluster-ekman.yaml
|
||||
kubectl --context oceanbox apply -f _cluster-ekman.yaml
|
||||
|
||||
@@ -13,4 +13,3 @@ stringData:
|
||||
name: staging-vcluster
|
||||
server: https://staging-vcluster.staging-vcluster
|
||||
type: Opaque
|
||||
|
||||
|
||||
+11
-11
@@ -19,12 +19,12 @@ applications:
|
||||
plugin:
|
||||
name: helmfile-cmp
|
||||
env:
|
||||
- name: CLUSTER_NAME
|
||||
value: replaceme
|
||||
- name: HELMFILE_ENVIRONMENT
|
||||
value: default
|
||||
- name: HELMFILE_FILE_PATH
|
||||
value: system.yaml.gotmpl
|
||||
- name: CLUSTER_NAME
|
||||
value: replaceme
|
||||
- name: HELMFILE_ENVIRONMENT
|
||||
value: default
|
||||
- name: HELMFILE_FILE_PATH
|
||||
value: system.yaml.gotmpl
|
||||
projects:
|
||||
sys:
|
||||
namespace: argocd
|
||||
@@ -32,12 +32,12 @@ projects:
|
||||
additionalAnnotations: {}
|
||||
description: sys components project
|
||||
sourceRepos:
|
||||
- '*'
|
||||
- "*"
|
||||
destinations:
|
||||
- namespace: '*'
|
||||
server: https://kubernetes.default.svc
|
||||
- namespace: "*"
|
||||
server: https://kubernetes.default.svc
|
||||
clusterResourceWhitelist:
|
||||
- group: '*'
|
||||
kind: '*'
|
||||
- group: "*"
|
||||
kind: "*"
|
||||
orphanedResources:
|
||||
warn: false
|
||||
|
||||
@@ -59,12 +59,18 @@ spec:
|
||||
resources:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.volumeMounts }}
|
||||
volumeMounts:
|
||||
- name: statestore
|
||||
mountPath: /app/components/statestore.yaml
|
||||
subPath: statestore.yaml
|
||||
{{- with .Values.volumeMounts }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.volumes }}
|
||||
volumes:
|
||||
- name: statestore
|
||||
configMap:
|
||||
name: {{ include "diagrid-dashboard.fullname" . }}-statestore
|
||||
{{- with .Values.volumes }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: diadash-statestore
|
||||
name: {{ include "diagrid-dashboard.fullname" . }}-statestore
|
||||
data:
|
||||
statestore.yaml: |
|
||||
apiVersion: dapr.io/v1alpha1
|
||||
@@ -17,10 +17,7 @@ data:
|
||||
- name: redisUsername
|
||||
value: default
|
||||
- name: redisPassword
|
||||
value: mrtz-password
|
||||
# secretKeyRef:
|
||||
# key: redis-password
|
||||
# name: {{ .Values.statestore.redis }}
|
||||
value: secret
|
||||
- name: actorStateStore
|
||||
value: "true"
|
||||
- name: redisDB
|
||||
|
||||
@@ -130,14 +130,14 @@ resources: {}
|
||||
# memory: 128Mi
|
||||
|
||||
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
|
||||
# livenessProbe:
|
||||
# httpGet:
|
||||
# path: /
|
||||
# port: http
|
||||
# readinessProbe:
|
||||
# httpGet:
|
||||
# path: /
|
||||
# port: http
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
|
||||
# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
|
||||
autoscaling:
|
||||
@@ -148,16 +148,10 @@ autoscaling:
|
||||
# targetMemoryUtilizationPercentage: 80
|
||||
|
||||
# Additional volumes on the output Deployment definition.
|
||||
volumes:
|
||||
- name: statestore
|
||||
configMap:
|
||||
name: diadash-statestore
|
||||
volumes: {}
|
||||
|
||||
# Additional volumeMounts on the output Deployment definition.
|
||||
volumeMounts:
|
||||
- name: statestore
|
||||
mountPath: /app/components/statestore.yaml
|
||||
subPath: statestore.yaml
|
||||
volumeMounts: {}
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
|
||||
@@ -13,9 +13,9 @@ type: application
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: v1.2.4
|
||||
version: v1.6.0
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "v1.2.4"
|
||||
appVersion: "v1.6.0"
|
||||
|
||||
@@ -12,7 +12,7 @@ image:
|
||||
# This sets the pull policy for images.
|
||||
pullPolicy: IfNotPresent
|
||||
# Overrides the image tag whose default is the chart appVersion.
|
||||
tag: v1.2.4
|
||||
tag: v1.6.0
|
||||
# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
|
||||
imagePullSecrets:
|
||||
- name: gitlab-pull-secret
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
# Declare variables to be passed into your templates.
|
||||
replicaCount: 1
|
||||
image:
|
||||
repository: registry.gitlab.com/oceanbox/makai/makai
|
||||
repository: registry.gitlab.com/oceanbox/makai
|
||||
tag: v0.1.0
|
||||
pullPolicy: IfNotPresent
|
||||
init:
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
replicaCount: 1
|
||||
image:
|
||||
repository: registry
|
||||
tag: 2
|
||||
tag: 3
|
||||
pullPolicy: IfNotPresent
|
||||
init:
|
||||
enabled: false
|
||||
|
||||
@@ -27,7 +27,7 @@ releases:
|
||||
- name: argocd-apps
|
||||
namespace: argocd
|
||||
chart: argo/argocd-apps
|
||||
version: 0.0.9
|
||||
version: 2.0.3
|
||||
condition: argo.apps.enabled
|
||||
values:
|
||||
- ../values/argo/values/apps.yaml.gotmpl
|
||||
|
||||
@@ -3,7 +3,8 @@ bases:
|
||||
|
||||
repositories:
|
||||
- name: cert-manager
|
||||
url: 'https://charts.jetstack.io'
|
||||
oci: true
|
||||
url: 'quay.io/jetstack/charts'
|
||||
|
||||
commonLabels:
|
||||
tier: system
|
||||
@@ -12,7 +13,7 @@ releases:
|
||||
- name: cert-manager
|
||||
namespace: cert-manager
|
||||
chart: cert-manager/cert-manager
|
||||
version: 1.12.13
|
||||
version: v1.19.2
|
||||
condition: cert_manager.enabled
|
||||
values:
|
||||
- ../values/cert-manager/values/cert-manager.yaml.gotmpl
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
bases:
|
||||
- ../envs/environments.yaml.gotmpl
|
||||
|
||||
repositories:
|
||||
- name: forgejo
|
||||
oci: true
|
||||
url: code.forgejo.org/forgejo-helm
|
||||
|
||||
commonLabels:
|
||||
tier: system
|
||||
|
||||
releases:
|
||||
- name: forgejo
|
||||
namespace: forgejo
|
||||
chart: forgejo/forgejo
|
||||
version: 16.0.0
|
||||
condition: forgejo.enabled
|
||||
values:
|
||||
- ../values/forgejo/values/values.yaml
|
||||
- ../values/forgejo/values/values-{{ .Environment.Name }}.yaml
|
||||
postRenderer: ../bin/kustomizer
|
||||
postRendererArgs:
|
||||
- ../values/forgejo/kustomize/{{ .Environment.Name }}
|
||||
missingFileHandler: Info
|
||||
- name: manifests
|
||||
namespace: forgejo
|
||||
chart: manifests
|
||||
condition: forgejo.enabled
|
||||
missingFileHandler: Info
|
||||
values:
|
||||
- ../values/env.yaml
|
||||
- ../values/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml
|
||||
- ../values/forgejo/env.yaml.gotmpl
|
||||
- ../values/forgejo/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
|
||||
hooks:
|
||||
- events: [ prepare, cleanup ]
|
||||
showlogs: true
|
||||
command: ../bin/helmify
|
||||
args:
|
||||
- '{{`{{ if eq .Event.Name "prepare" }}build{{ else }}clean{{ end }}`}}'
|
||||
- '{{`{{ .Release.Chart }}`}}'
|
||||
- '{{`{{ .Environment.Name }}`}}'
|
||||
- ../values/forgejo/manifests
|
||||
- manifests
|
||||
@@ -12,7 +12,7 @@ releases:
|
||||
- name: ingress-nginx
|
||||
namespace: ingress-nginx
|
||||
chart: ingress-nginx/ingress-nginx
|
||||
version: 4.8.3
|
||||
version: 4.14.1
|
||||
condition: nginx.enabled
|
||||
values:
|
||||
- ../values/ingress-nginx/values/ingress-nginx.yaml.gotmpl
|
||||
|
||||
@@ -15,7 +15,7 @@ releases:
|
||||
- name: kyverno
|
||||
namespace: kyverno
|
||||
chart: kyverno/kyverno
|
||||
version: 3.5.1
|
||||
version: 3.6.1
|
||||
condition: kyverno.enabled
|
||||
values:
|
||||
- ../values/kyverno/values/kyverno.yaml.gotmpl
|
||||
|
||||
@@ -12,7 +12,7 @@ releases:
|
||||
- name: mariadb-operator
|
||||
namespace: mariadb-operator
|
||||
chart: mariadb-operator/mariadb-operator
|
||||
version: 25.8.4
|
||||
version: 25.10.3
|
||||
condition: mariadb_operator.enabled
|
||||
values:
|
||||
- ../values/mariadb-operator/values/mariadb-operator.yaml.gotmpl
|
||||
|
||||
@@ -16,7 +16,7 @@ releases:
|
||||
namespace: {{ .Environment.Name }}-openfga
|
||||
{{- end }}
|
||||
chart: openfga/openfga
|
||||
version: 0.2.45
|
||||
version: 0.2.50
|
||||
condition: openfga.enabled
|
||||
values:
|
||||
- ../values/openfga/values/values.yaml
|
||||
|
||||
@@ -12,7 +12,7 @@ releases:
|
||||
- name: opentelemetry-collector
|
||||
namespace: otel
|
||||
chart: open-telemetry/opentelemetry-collector
|
||||
version: 0.134.1
|
||||
version: 0.142.1
|
||||
condition: otel.enabled
|
||||
values:
|
||||
- ../values/opentelemetry-collector/values/values.yaml
|
||||
|
||||
@@ -15,7 +15,7 @@ releases:
|
||||
- name: postgres-operator
|
||||
namespace: cnpg
|
||||
chart: cloudnative-pg/cloudnative-pg
|
||||
version: 0.26.1
|
||||
version: 0.27.0
|
||||
condition: postgres_operator.enabled
|
||||
values:
|
||||
- ../values/postgres-operator/values/postgres-operator.yaml.gotmpl
|
||||
|
||||
@@ -13,7 +13,7 @@ releases:
|
||||
- name: {{ .Environment.Name }}-rabbitmq
|
||||
namespace: rabbitmq
|
||||
chart: bitnami/rabbitmq
|
||||
version: 12.9.0
|
||||
version: 13.0.3
|
||||
condition: rabbitmq.enabled
|
||||
values:
|
||||
- ../values/rabbitmq/values/values.yaml
|
||||
|
||||
@@ -13,7 +13,7 @@ releases:
|
||||
- name: slurm-operator
|
||||
namespace: slinky
|
||||
chart: slurm-operator/slurm-operator
|
||||
version: 0.4.0
|
||||
version: 0.4.1
|
||||
condition: slurm_operator.enabled
|
||||
values:
|
||||
- ../values/slurm-operator/values/slurm-operator.yaml.gotmpl
|
||||
|
||||
@@ -13,7 +13,7 @@ releases:
|
||||
- name: spegel
|
||||
namespace: spegel
|
||||
chart: spegel/spegel
|
||||
version: 0.5.1
|
||||
version: 0.6.0
|
||||
condition: spegel.enabled
|
||||
values:
|
||||
- ../values/spegel/values/spegel.yaml.gotmpl
|
||||
|
||||
@@ -15,7 +15,7 @@ releases:
|
||||
- name: velero
|
||||
namespace: velero
|
||||
chart: velero/velero
|
||||
version: 11.1.1
|
||||
version: 11.3.2
|
||||
condition: velero.enabled
|
||||
values:
|
||||
- ../values/velero/values/velero.yaml.gotmpl
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
let
|
||||
sources = import ./default.nix;
|
||||
pkgs = import sources.nixpkgs { };
|
||||
pre-commit = import sources.git-hooks;
|
||||
|
||||
globalExcludes = [
|
||||
"nix/default.nix"
|
||||
"attic"
|
||||
"vcluster"
|
||||
".*vendor"
|
||||
".*chart/.*"
|
||||
".*schema.json"
|
||||
];
|
||||
|
||||
in
|
||||
pre-commit.run {
|
||||
src = pkgs.nix-gitignore.gitignoreSource [ ] ../.;
|
||||
# Do not run at pre-commit time
|
||||
default_stages = [
|
||||
"pre-push"
|
||||
];
|
||||
# TODO(mrtz): Remove when default
|
||||
package = pkgs.prek;
|
||||
# Linters From https://github.com/cachix/pre-commit-hooks.nix
|
||||
hooks = {
|
||||
nixfmt-rfc-style = {
|
||||
enable = true;
|
||||
excludes = globalExcludes;
|
||||
};
|
||||
|
||||
trim-trailing-whitespace.enable = true;
|
||||
|
||||
shellcheck = {
|
||||
enable = true;
|
||||
excludes = [
|
||||
"vcluster/"
|
||||
"attic/"
|
||||
];
|
||||
args = [
|
||||
"-x"
|
||||
"-o"
|
||||
"all"
|
||||
];
|
||||
};
|
||||
|
||||
yamllint = {
|
||||
enable = true;
|
||||
excludes = [
|
||||
"attic/"
|
||||
"charts/templates/"
|
||||
"charts/"
|
||||
"values/"
|
||||
"vcluster/"
|
||||
];
|
||||
settings = {
|
||||
strict = true;
|
||||
configData = ''{ extends: default, rules: { document-start: disable, line-length: {max: 300} } }'';
|
||||
};
|
||||
};
|
||||
|
||||
check-json.enable = true;
|
||||
|
||||
renovate-config-validator = {
|
||||
enable = true;
|
||||
files = "renovate.json$";
|
||||
entry = "renovate-config-validator";
|
||||
};
|
||||
|
||||
};
|
||||
}
|
||||
+127
-24
@@ -9,8 +9,15 @@
|
||||
*/
|
||||
# Generated by npins. Do not modify; will be overwritten regularly
|
||||
let
|
||||
data = builtins.fromJSON (builtins.readFile ./sources.json);
|
||||
version = data.version;
|
||||
# Backwards-compatibly make something that previously didn't take any arguments take some
|
||||
# The function must return an attrset, and will unfortunately be eagerly evaluated
|
||||
# Same thing, but it catches eval errors on the default argument so that one may still call it with other arguments
|
||||
mkFunctor =
|
||||
fn:
|
||||
let
|
||||
e = builtins.tryEval (fn { });
|
||||
in
|
||||
(if e.success then e.value else { error = fn { }; }) // { __functor = _self: fn; };
|
||||
|
||||
# https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295
|
||||
range =
|
||||
@@ -21,7 +28,6 @@ let
|
||||
|
||||
# https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269
|
||||
stringAsChars = f: s: concatStrings (map f (stringToCharacters s));
|
||||
concatMapStrings = f: list: concatStrings (map f list);
|
||||
concatStrings = builtins.concatStringsSep "";
|
||||
|
||||
# If the environment variable NPINS_OVERRIDE_${name} is set, then use
|
||||
@@ -48,41 +54,87 @@ let
|
||||
|
||||
mkSource =
|
||||
name: spec:
|
||||
{
|
||||
pkgs ? null,
|
||||
}:
|
||||
assert spec ? type;
|
||||
let
|
||||
# Unify across builtin and pkgs fetchers.
|
||||
# `fetchGit` requires a wrapper because of slight API differences.
|
||||
fetchers =
|
||||
if pkgs == null then
|
||||
{
|
||||
inherit (builtins) fetchTarball fetchurl;
|
||||
# For some fucking reason, fetchGit has a different signature than the other builtin fetchers …
|
||||
fetchGit = args: (builtins.fetchGit args).outPath;
|
||||
}
|
||||
else
|
||||
{
|
||||
fetchTarball =
|
||||
{
|
||||
url,
|
||||
sha256,
|
||||
}:
|
||||
pkgs.fetchzip {
|
||||
inherit url sha256;
|
||||
extension = "tar";
|
||||
};
|
||||
inherit (pkgs) fetchurl;
|
||||
fetchGit =
|
||||
{
|
||||
url,
|
||||
submodules,
|
||||
rev,
|
||||
name,
|
||||
narHash,
|
||||
}:
|
||||
pkgs.fetchgit {
|
||||
inherit url rev name;
|
||||
fetchSubmodules = submodules;
|
||||
hash = narHash;
|
||||
};
|
||||
};
|
||||
|
||||
# Dispatch to the correct code path based on the type
|
||||
path =
|
||||
if spec.type == "Git" then
|
||||
mkGitSource spec
|
||||
mkGitSource fetchers spec
|
||||
else if spec.type == "GitRelease" then
|
||||
mkGitSource spec
|
||||
mkGitSource fetchers spec
|
||||
else if spec.type == "PyPi" then
|
||||
mkPyPiSource spec
|
||||
mkPyPiSource fetchers spec
|
||||
else if spec.type == "Channel" then
|
||||
mkChannelSource spec
|
||||
mkChannelSource fetchers spec
|
||||
else if spec.type == "Tarball" then
|
||||
mkTarballSource spec
|
||||
mkTarballSource fetchers spec
|
||||
else if spec.type == "Container" then
|
||||
mkContainerSource pkgs spec
|
||||
else
|
||||
builtins.throw "Unknown source type ${spec.type}";
|
||||
in
|
||||
spec // { outPath = mayOverride name path; };
|
||||
|
||||
mkGitSource =
|
||||
{
|
||||
fetchTarball,
|
||||
fetchGit,
|
||||
...
|
||||
}:
|
||||
{
|
||||
repository,
|
||||
revision,
|
||||
url ? null,
|
||||
submodules,
|
||||
hash,
|
||||
branch ? null,
|
||||
...
|
||||
}:
|
||||
assert repository ? type;
|
||||
# At the moment, either it is a plain git repository (which has an url), or it is a GitHub/GitLab repository
|
||||
# In the latter case, there we will always be an url to the tarball
|
||||
if url != null && !submodules then
|
||||
builtins.fetchTarball {
|
||||
fetchTarball {
|
||||
inherit url;
|
||||
sha256 = hash; # FIXME: check nix version & use SRI hashes
|
||||
sha256 = hash;
|
||||
}
|
||||
else
|
||||
let
|
||||
@@ -93,6 +145,8 @@ let
|
||||
"https://github.com/${repository.owner}/${repository.repo}.git"
|
||||
else if repository.type == "GitLab" then
|
||||
"${repository.server}/${repository.repo_path}.git"
|
||||
else if repository.type == "Forgejo" then
|
||||
"${repository.server}/${repository.owner}/${repository.repo}.git"
|
||||
else
|
||||
throw "Unrecognized repository type ${repository.type}";
|
||||
urlToName =
|
||||
@@ -107,40 +161,89 @@ let
|
||||
"${if matched == null then "source" else builtins.head matched}${appendShort}";
|
||||
name = urlToName url revision;
|
||||
in
|
||||
builtins.fetchGit {
|
||||
fetchGit {
|
||||
rev = revision;
|
||||
inherit name;
|
||||
# hash = hash;
|
||||
inherit url submodules;
|
||||
narHash = hash;
|
||||
|
||||
inherit name submodules url;
|
||||
};
|
||||
|
||||
mkPyPiSource =
|
||||
{ url, hash, ... }:
|
||||
builtins.fetchurl {
|
||||
{ fetchurl, ... }:
|
||||
{
|
||||
url,
|
||||
hash,
|
||||
...
|
||||
}:
|
||||
fetchurl {
|
||||
inherit url;
|
||||
sha256 = hash;
|
||||
};
|
||||
|
||||
mkChannelSource =
|
||||
{ url, hash, ... }:
|
||||
builtins.fetchTarball {
|
||||
{ fetchTarball, ... }:
|
||||
{
|
||||
url,
|
||||
hash,
|
||||
...
|
||||
}:
|
||||
fetchTarball {
|
||||
inherit url;
|
||||
sha256 = hash;
|
||||
};
|
||||
|
||||
mkTarballSource =
|
||||
{ fetchTarball, ... }:
|
||||
{
|
||||
url,
|
||||
locked_url ? url,
|
||||
hash,
|
||||
...
|
||||
}:
|
||||
builtins.fetchTarball {
|
||||
fetchTarball {
|
||||
url = locked_url;
|
||||
sha256 = hash;
|
||||
};
|
||||
|
||||
mkContainerSource =
|
||||
pkgs:
|
||||
{
|
||||
image_name,
|
||||
image_tag,
|
||||
image_digest,
|
||||
...
|
||||
}:
|
||||
if pkgs == null then
|
||||
builtins.throw "container sources require passing in a Nixpkgs value: https://github.com/andir/npins/blob/master/README.md#using-the-nixpkgs-fetchers"
|
||||
else
|
||||
pkgs.dockerTools.pullImage {
|
||||
imageName = image_name;
|
||||
imageDigest = image_digest;
|
||||
finalImageTag = image_tag;
|
||||
};
|
||||
in
|
||||
if version == 5 then
|
||||
builtins.mapAttrs mkSource data.pins
|
||||
else
|
||||
throw "Unsupported format version ${toString version} in sources.json. Try running `npins upgrade`"
|
||||
mkFunctor (
|
||||
{
|
||||
input ? ./sources.json,
|
||||
}:
|
||||
let
|
||||
data =
|
||||
if builtins.isPath input then
|
||||
# while `readFile` will throw an error anyways if the path doesn't exist,
|
||||
# we still need to check beforehand because *our* error can be caught but not the one from the builtin
|
||||
# *piegames sighs*
|
||||
if builtins.pathExists input then
|
||||
builtins.fromJSON (builtins.readFile input)
|
||||
else
|
||||
throw "Input path ${toString input} does not exist"
|
||||
else if builtins.isAttrs input then
|
||||
input
|
||||
else
|
||||
throw "Unsupported input type ${builtins.typeOf input}, must be a path or an attrset";
|
||||
version = data.version;
|
||||
in
|
||||
if version == 7 then
|
||||
builtins.mapAttrs (name: spec: mkFunctor (mkSource name spec)) data.pins
|
||||
else
|
||||
throw "Unsupported format version ${toString version} in sources.json. Try running `npins upgrade`"
|
||||
)
|
||||
|
||||
+16
-3
@@ -1,11 +1,24 @@
|
||||
{
|
||||
"pins": {
|
||||
"git-hooks": {
|
||||
"type": "Git",
|
||||
"repository": {
|
||||
"type": "GitHub",
|
||||
"owner": "cachix",
|
||||
"repo": "git-hooks.nix"
|
||||
},
|
||||
"branch": "master",
|
||||
"submodules": false,
|
||||
"revision": "f0927703b7b1c8d97511c4116eb9b4ec6645a0fa",
|
||||
"url": "https://github.com/cachix/git-hooks.nix/archive/f0927703b7b1c8d97511c4116eb9b4ec6645a0fa.tar.gz",
|
||||
"hash": "sha256-6MkqajPICgugsuZ92OMoQcgSHnD6sJHwk8AxvMcIgTE="
|
||||
},
|
||||
"nixpkgs": {
|
||||
"type": "Channel",
|
||||
"name": "nixpkgs-unstable",
|
||||
"url": "https://releases.nixos.org/nixpkgs/nixpkgs-26.05pre903996.59b6c96beacc/nixexprs.tar.xz",
|
||||
"hash": "0b0yr9d1xyfwgpaj68bimsbjjbj7yis4whjvkrfdycfnasdf0gf0"
|
||||
"url": "https://releases.nixos.org/nixpkgs/nixpkgs-26.05pre927565.13868c071cc7/nixexprs.tar.xz",
|
||||
"hash": "sha256-wufp5c0nWh/87f9eK7xy1eZXms5zd4yl6S4SR+LfA08="
|
||||
}
|
||||
},
|
||||
"version": 5
|
||||
"version": 7
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
# Simple script for uploading a base64 encoded image into our database. For
|
||||
# grafana business image panels.
|
||||
|
||||
if [ $# -ne 2 ]
|
||||
if [[ $# -ne 2 ]]
|
||||
then
|
||||
echo "Usage: $0 <image-name> <file>.png"
|
||||
exit 1
|
||||
@@ -12,9 +12,9 @@ fi
|
||||
filename=$1
|
||||
file=$2
|
||||
|
||||
if [ ! -e $file ]
|
||||
if [[ ! -e "${file}" ]]
|
||||
then
|
||||
echo "file $file does not exist"
|
||||
echo "file ${file} does not exist"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -22,9 +22,9 @@ function create_image() {
|
||||
local filename=$1
|
||||
local data=$2
|
||||
cat << EOF
|
||||
INSERT INTO images VALUES('$filename', '$data');
|
||||
INSERT INTO images VALUES('${filename}', '${data}');
|
||||
EOF
|
||||
}
|
||||
|
||||
data=$(cat $file | base64 -w0)
|
||||
create_image $filename $data
|
||||
data=$(base64 -w0 < "${file}")
|
||||
create_image "${filename}" "${data}"
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
// -*- mode: jsonc -*-
|
||||
{
|
||||
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
||||
"extends": [
|
||||
|
||||
+203
-182
@@ -1,183 +1,204 @@
|
||||
groups:
|
||||
- name: etcd
|
||||
rules:
|
||||
- alert: etcdMembersDown
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value
|
||||
}}).'
|
||||
summary: etcd cluster members are down.
|
||||
expr: |-
|
||||
max without (endpoint) (
|
||||
sum without (instance) (up{job=~".*etcd.*"} == bool 0)
|
||||
or
|
||||
count without (To) (
|
||||
sum without (instance) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[120s])) > 0.01
|
||||
)
|
||||
)
|
||||
> 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdInsufficientMembers
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
|
||||
}}).'
|
||||
summary: etcd cluster has insufficient number of members.
|
||||
expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"})
|
||||
without (instance) + 1) / 2)
|
||||
for: 3m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdNoLeader
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }}
|
||||
has no leader.'
|
||||
summary: etcd cluster has no leader.
|
||||
expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdHighNumberOfLeaderChanges
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes
|
||||
within the last 15 minutes. Frequent elections may be a sign of insufficient
|
||||
resources, high network latency, or disruptions by other components and should
|
||||
be investigated.'
|
||||
summary: etcd cluster has high number of leader changes.
|
||||
expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"})
|
||||
or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m])
|
||||
>= 4
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdHighNumberOfFailedGRPCRequests
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
|
||||
{{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
|
||||
summary: etcd cluster has high number of failed grpc requests.
|
||||
expr: |-
|
||||
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code)
|
||||
/
|
||||
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code)
|
||||
> 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdHighNumberOfFailedGRPCRequests
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
|
||||
{{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
|
||||
summary: etcd cluster has high number of failed grpc requests.
|
||||
expr: |-
|
||||
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code)
|
||||
/
|
||||
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code)
|
||||
> 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdGRPCRequestsSlow
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests
|
||||
is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method
|
||||
}} method.'
|
||||
summary: etcd grpc requests are slow
|
||||
expr: |-
|
||||
histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type))
|
||||
> 0.15
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdMemberCommunicationSlow
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": member communication with {{
|
||||
$labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance
|
||||
}}.'
|
||||
summary: etcd cluster member communication is slow.
|
||||
expr: |-
|
||||
histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
|
||||
> 0.15
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdHighNumberOfFailedProposals
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures
|
||||
within the last 30 minutes on etcd instance {{ $labels.instance }}.'
|
||||
summary: etcd cluster has high number of proposal failures.
|
||||
expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdHighFsyncDurations
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
|
||||
are {{ $value }}s on etcd instance {{ $labels.instance }}.'
|
||||
summary: etcd cluster 99th percentile fsync durations are too high.
|
||||
expr: |-
|
||||
histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
|
||||
> 0.5
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdHighFsyncDurations
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
|
||||
are {{ $value }}s on etcd instance {{ $labels.instance }}.'
|
||||
summary: etcd cluster 99th percentile fsync durations are too high.
|
||||
expr: |-
|
||||
histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
|
||||
> 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdHighCommitDurations
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
|
||||
{{ $value }}s on etcd instance {{ $labels.instance }}.'
|
||||
summary: etcd cluster 99th percentile commit durations are too high.
|
||||
expr: |-
|
||||
histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
|
||||
> 0.25
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdDatabaseQuotaLowSpace
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": database size exceeds the defined
|
||||
quota on etcd instance {{ $labels.instance }}, please defrag or increase the
|
||||
quota as the writes to etcd will be disabled when it is full.'
|
||||
summary: etcd cluster database is running full.
|
||||
expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) /
|
||||
last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 >
|
||||
95
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdExcessiveDatabaseGrowth
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": Predicting running out of disk
|
||||
space in the next four hours, based on write observations within the past
|
||||
four hours on etcd instance {{ $labels.instance }}, please check as it might
|
||||
be disruptive.'
|
||||
summary: etcd cluster database growing very fast.
|
||||
expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60)
|
||||
> etcd_server_quota_backend_bytes{job=~".*etcd.*"}
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdDatabaseHighFragmentationRatio
|
||||
annotations:
|
||||
description: 'etcd cluster "{{ $labels.job }}": database size in use on instance
|
||||
{{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual
|
||||
allocated disk space, please run defragmentation (e.g. etcdctl defrag) to
|
||||
retrieve the unused fragmented disk space.'
|
||||
runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation
|
||||
summary: etcd database size in use is less than 50% of the actual allocated
|
||||
storage.
|
||||
expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m])
|
||||
/ last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5
|
||||
and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: etcd
|
||||
rules:
|
||||
- alert: etcdMembersDown
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": members are down ({{ $value
|
||||
}}).'
|
||||
summary: etcd cluster members are down.
|
||||
expr: |-
|
||||
max without (endpoint) (
|
||||
sum without (instance) (up{job=~".*etcd.*"} == bool 0)
|
||||
or
|
||||
count without (To) (
|
||||
sum without (instance) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[120s])) > 0.01
|
||||
)
|
||||
)
|
||||
> 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdInsufficientMembers
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
|
||||
}}).'
|
||||
summary: etcd cluster has insufficient number of members.
|
||||
expr:
|
||||
sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"})
|
||||
without (instance) + 1) / 2)
|
||||
for: 3m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdNoLeader
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }}
|
||||
has no leader.'
|
||||
summary: etcd cluster has no leader.
|
||||
expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdHighNumberOfLeaderChanges
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes
|
||||
within the last 15 minutes. Frequent elections may be a sign of insufficient
|
||||
resources, high network latency, or disruptions by other components and should
|
||||
be investigated.'
|
||||
summary: etcd cluster has high number of leader changes.
|
||||
expr:
|
||||
increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"})
|
||||
or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m])
|
||||
>= 4
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdHighNumberOfFailedGRPCRequests
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
|
||||
{{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
|
||||
summary: etcd cluster has high number of failed grpc requests.
|
||||
expr: |-
|
||||
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code)
|
||||
/
|
||||
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code)
|
||||
> 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdHighNumberOfFailedGRPCRequests
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
|
||||
{{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
|
||||
summary: etcd cluster has high number of failed grpc requests.
|
||||
expr: |-
|
||||
100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code)
|
||||
/
|
||||
sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code)
|
||||
> 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdGRPCRequestsSlow
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests
|
||||
is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method
|
||||
}} method.'
|
||||
summary: etcd grpc requests are slow
|
||||
expr: |-
|
||||
histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type))
|
||||
> 0.15
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdMemberCommunicationSlow
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": member communication with {{
|
||||
$labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance
|
||||
}}.'
|
||||
summary: etcd cluster member communication is slow.
|
||||
expr: |-
|
||||
histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
|
||||
> 0.15
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdHighNumberOfFailedProposals
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures
|
||||
within the last 30 minutes on etcd instance {{ $labels.instance }}.'
|
||||
summary: etcd cluster has high number of proposal failures.
|
||||
expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdHighFsyncDurations
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
|
||||
are {{ $value }}s on etcd instance {{ $labels.instance }}.'
|
||||
summary: etcd cluster 99th percentile fsync durations are too high.
|
||||
expr: |-
|
||||
histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
|
||||
> 0.5
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdHighFsyncDurations
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
|
||||
are {{ $value }}s on etcd instance {{ $labels.instance }}.'
|
||||
summary: etcd cluster 99th percentile fsync durations are too high.
|
||||
expr: |-
|
||||
histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
|
||||
> 1
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdHighCommitDurations
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
|
||||
{{ $value }}s on etcd instance {{ $labels.instance }}.'
|
||||
summary: etcd cluster 99th percentile commit durations are too high.
|
||||
expr: |-
|
||||
histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
|
||||
> 0.25
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdDatabaseQuotaLowSpace
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": database size exceeds the defined
|
||||
quota on etcd instance {{ $labels.instance }}, please defrag or increase the
|
||||
quota as the writes to etcd will be disabled when it is full.'
|
||||
summary: etcd cluster database is running full.
|
||||
expr:
|
||||
(last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) /
|
||||
last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 >
|
||||
95
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: etcdExcessiveDatabaseGrowth
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": Predicting running out of disk
|
||||
space in the next four hours, based on write observations within the past
|
||||
four hours on etcd instance {{ $labels.instance }}, please check as it might
|
||||
be disruptive.'
|
||||
summary: etcd cluster database growing very fast.
|
||||
expr:
|
||||
predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60)
|
||||
> etcd_server_quota_backend_bytes{job=~".*etcd.*"}
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: etcdDatabaseHighFragmentationRatio
|
||||
annotations:
|
||||
description:
|
||||
'etcd cluster "{{ $labels.job }}": database size in use on instance
|
||||
{{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual
|
||||
allocated disk space, please run defragmentation (e.g. etcdctl defrag) to
|
||||
retrieve the unused fragmented disk space.'
|
||||
runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation
|
||||
summary:
|
||||
etcd database size in use is less than 50% of the actual allocated
|
||||
storage.
|
||||
expr:
|
||||
(last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m])
|
||||
/ last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5
|
||||
and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
+46
-42
@@ -1,43 +1,47 @@
|
||||
groups:
|
||||
- name: general.rules
|
||||
rules:
|
||||
- alert: TargetDown
|
||||
annotations:
|
||||
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
|
||||
}} targets in {{ $labels.namespace }} namespace are down.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
|
||||
summary: One or more targets are unreachable.
|
||||
expr: 100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
|
||||
BY (cluster, job, namespace, service)) > 10
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: Watchdog
|
||||
annotations:
|
||||
description: |
|
||||
This is an alert meant to ensure that the entire alerting pipeline is functional.
|
||||
This alert is always firing, therefore it should always be firing in Alertmanager
|
||||
and always fire against a receiver. There are integrations with various notification
|
||||
mechanisms that send a notification when this alert is not firing. For example the
|
||||
"DeadMansSnitch" integration in PagerDuty.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
|
||||
summary: An alert that should always be firing to certify that Alertmanager
|
||||
is working properly.
|
||||
expr: vector(1)
|
||||
labels:
|
||||
severity: none
|
||||
- alert: InfoInhibitor
|
||||
annotations:
|
||||
description: |
|
||||
This is an alert that is used to inhibit info alerts.
|
||||
By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
|
||||
other alerts.
|
||||
This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
|
||||
severity of 'warning' or 'critical' starts firing on the same namespace.
|
||||
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
|
||||
summary: Info-level alert inhibition.
|
||||
expr: ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname !=
|
||||
"InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
|
||||
labels:
|
||||
severity: none
|
||||
- name: general.rules
|
||||
rules:
|
||||
- alert: TargetDown
|
||||
annotations:
|
||||
description:
|
||||
'{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
|
||||
}} targets in {{ $labels.namespace }} namespace are down.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
|
||||
summary: One or more targets are unreachable.
|
||||
expr:
|
||||
100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
|
||||
BY (cluster, job, namespace, service)) > 10
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: Watchdog
|
||||
annotations:
|
||||
description: |
|
||||
This is an alert meant to ensure that the entire alerting pipeline is functional.
|
||||
This alert is always firing, therefore it should always be firing in Alertmanager
|
||||
and always fire against a receiver. There are integrations with various notification
|
||||
mechanisms that send a notification when this alert is not firing. For example the
|
||||
"DeadMansSnitch" integration in PagerDuty.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
|
||||
summary:
|
||||
An alert that should always be firing to certify that Alertmanager
|
||||
is working properly.
|
||||
expr: vector(1)
|
||||
labels:
|
||||
severity: none
|
||||
- alert: InfoInhibitor
|
||||
annotations:
|
||||
description: |
|
||||
This is an alert that is used to inhibit info alerts.
|
||||
By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
|
||||
other alerts.
|
||||
This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
|
||||
severity of 'warning' or 'critical' starts firing on the same namespace.
|
||||
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
|
||||
summary: Info-level alert inhibition.
|
||||
expr:
|
||||
ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname !=
|
||||
"InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
|
||||
labels:
|
||||
severity: none
|
||||
|
||||
+277
-258
@@ -1,262 +1,281 @@
|
||||
groups:
|
||||
- name: kubernetes-apps
|
||||
rules:
|
||||
- alert: KubePodCrashLooping
|
||||
annotations:
|
||||
description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
|
||||
}}) is in waiting state (reason: "CrashLoopBackOff").'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
|
||||
summary: Pod is crash looping.
|
||||
expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",
|
||||
job="kube-state-metrics", namespace=~".*"}[5m]) >= 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubePodNotReady
|
||||
annotations:
|
||||
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
|
||||
state for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
|
||||
summary: Pod has been in a non-ready state for more than 15 minutes.
|
||||
expr: |-
|
||||
sum by (namespace, pod, cluster) (
|
||||
max by (namespace, pod, cluster) (
|
||||
kube_pod_status_phase{job="kube-state-metrics", namespace=~".*", phase=~"Pending|Unknown|Failed"}
|
||||
) * on (namespace, pod, cluster) group_left(owner_kind) topk by (namespace, pod, cluster) (
|
||||
1, max by (namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"})
|
||||
)
|
||||
) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentGenerationMismatch
|
||||
annotations:
|
||||
description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
|
||||
}} does not match, this indicates that the Deployment has failed but has not
|
||||
been rolled back.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
|
||||
summary: Deployment generation mismatch due to possible roll-back
|
||||
expr: |-
|
||||
kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~".*"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentReplicasMismatch
|
||||
annotations:
|
||||
description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
|
||||
not matched the expected number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
|
||||
summary: Deployment has not matched the expected number of replicas.
|
||||
expr: |-
|
||||
(
|
||||
kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
>
|
||||
kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~".*"}
|
||||
) and (
|
||||
changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m])
|
||||
==
|
||||
0
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentRolloutStuck
|
||||
annotations:
|
||||
description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment
|
||||
}} is not progressing for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck
|
||||
summary: Deployment rollout is not progressing.
|
||||
expr: |-
|
||||
kube_deployment_status_condition{condition="Progressing", status="false",job="kube-state-metrics", namespace=~".*"}
|
||||
!= 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetReplicasMismatch
|
||||
annotations:
|
||||
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
|
||||
not matched the expected number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
|
||||
summary: StatefulSet has not matched the expected number of replicas.
|
||||
expr: |-
|
||||
(
|
||||
kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
) and (
|
||||
changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m])
|
||||
==
|
||||
0
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetGenerationMismatch
|
||||
annotations:
|
||||
description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
|
||||
}} does not match, this indicates that the StatefulSet has failed but has
|
||||
not been rolled back.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
|
||||
summary: StatefulSet generation mismatch due to possible roll-back
|
||||
expr: |-
|
||||
kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~".*"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetUpdateNotRolledOut
|
||||
annotations:
|
||||
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
|
||||
has not been rolled out.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
|
||||
summary: StatefulSet update has not been rolled out.
|
||||
expr: |-
|
||||
(
|
||||
max by (namespace, statefulset) (
|
||||
kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~".*"}
|
||||
unless
|
||||
kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~".*"}
|
||||
)
|
||||
*
|
||||
(
|
||||
kube_statefulset_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
- name: kubernetes-apps
|
||||
rules:
|
||||
- alert: KubePodCrashLooping
|
||||
annotations:
|
||||
description:
|
||||
'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
|
||||
}}) is in waiting state (reason: "CrashLoopBackOff").'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
|
||||
summary: Pod is crash looping.
|
||||
expr:
|
||||
max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",
|
||||
job="kube-state-metrics", namespace=~".*"}[5m]) >= 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubePodNotReady
|
||||
annotations:
|
||||
description:
|
||||
Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
|
||||
state for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
|
||||
summary: Pod has been in a non-ready state for more than 15 minutes.
|
||||
expr: |-
|
||||
sum by (namespace, pod, cluster) (
|
||||
max by (namespace, pod, cluster) (
|
||||
kube_pod_status_phase{job="kube-state-metrics", namespace=~".*", phase=~"Pending|Unknown|Failed"}
|
||||
) * on (namespace, pod, cluster) group_left(owner_kind) topk by (namespace, pod, cluster) (
|
||||
1, max by (namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"})
|
||||
)
|
||||
) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentGenerationMismatch
|
||||
annotations:
|
||||
description:
|
||||
Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
|
||||
}} does not match, this indicates that the Deployment has failed but has not
|
||||
been rolled back.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
|
||||
summary: Deployment generation mismatch due to possible roll-back
|
||||
expr: |-
|
||||
kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}
|
||||
)
|
||||
) and (
|
||||
changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[5m])
|
||||
==
|
||||
0
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetRolloutStuck
|
||||
annotations:
|
||||
description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not
|
||||
finished or progressed for at least 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
|
||||
summary: DaemonSet rollout is stuck.
|
||||
expr: |-
|
||||
(
|
||||
(
|
||||
kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~".*"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentReplicasMismatch
|
||||
annotations:
|
||||
description:
|
||||
Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
|
||||
not matched the expected number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
|
||||
summary: Deployment has not matched the expected number of replicas.
|
||||
expr: |-
|
||||
(
|
||||
kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
>
|
||||
kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~".*"}
|
||||
) and (
|
||||
changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m])
|
||||
==
|
||||
0
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDeploymentRolloutStuck
|
||||
annotations:
|
||||
description:
|
||||
Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment
|
||||
}} is not progressing for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck
|
||||
summary: Deployment rollout is not progressing.
|
||||
expr: |-
|
||||
kube_deployment_status_condition{condition="Progressing", status="false",job="kube-state-metrics", namespace=~".*"}
|
||||
!= 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetReplicasMismatch
|
||||
annotations:
|
||||
description:
|
||||
StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
|
||||
not matched the expected number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
|
||||
summary: StatefulSet has not matched the expected number of replicas.
|
||||
expr: |-
|
||||
(
|
||||
kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
) and (
|
||||
changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m])
|
||||
==
|
||||
0
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetGenerationMismatch
|
||||
annotations:
|
||||
description:
|
||||
StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
|
||||
}} does not match, this indicates that the StatefulSet has failed but has
|
||||
not been rolled back.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
|
||||
summary: StatefulSet generation mismatch due to possible roll-back
|
||||
expr: |-
|
||||
kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~".*"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeStatefulSetUpdateNotRolledOut
|
||||
annotations:
|
||||
description:
|
||||
StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
|
||||
has not been rolled out.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
|
||||
summary: StatefulSet update has not been rolled out.
|
||||
expr: |-
|
||||
(
|
||||
max by (namespace, statefulset) (
|
||||
kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~".*"}
|
||||
unless
|
||||
kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~".*"}
|
||||
)
|
||||
*
|
||||
(
|
||||
kube_statefulset_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}
|
||||
)
|
||||
) and (
|
||||
changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[5m])
|
||||
==
|
||||
0
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetRolloutStuck
|
||||
annotations:
|
||||
description:
|
||||
DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not
|
||||
finished or progressed for at least 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
|
||||
summary: DaemonSet rollout is stuck.
|
||||
expr: |-
|
||||
(
|
||||
(
|
||||
kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
) or (
|
||||
kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
0
|
||||
) or (
|
||||
kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
) or (
|
||||
kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
)
|
||||
) and (
|
||||
changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"}[5m])
|
||||
==
|
||||
0
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeContainerWaiting
|
||||
annotations:
|
||||
description:
|
||||
pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
|
||||
{{ $labels.container}} has been in waiting state for longer than 1 hour.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
|
||||
summary: Pod container waiting longer than 1 hour
|
||||
expr:
|
||||
sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
|
||||
namespace=~".*"}) > 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetNotScheduled
|
||||
annotations:
|
||||
description:
|
||||
"{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||
}} are not scheduled."
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
|
||||
summary: DaemonSet pods are not scheduled.
|
||||
expr: |-
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
) or (
|
||||
-
|
||||
kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"} > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetMisScheduled
|
||||
annotations:
|
||||
description:
|
||||
"{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||
}} are running where they are not supposed to run."
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
|
||||
summary: DaemonSet pods are misscheduled.
|
||||
expr:
|
||||
kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
0
|
||||
) or (
|
||||
kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
) or (
|
||||
kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
)
|
||||
) and (
|
||||
changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"}[5m])
|
||||
==
|
||||
0
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeContainerWaiting
|
||||
annotations:
|
||||
description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
|
||||
{{ $labels.container}} has been in waiting state for longer than 1 hour.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
|
||||
summary: Pod container waiting longer than 1 hour
|
||||
expr: sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
|
||||
namespace=~".*"}) > 0
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetNotScheduled
|
||||
annotations:
|
||||
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||
}} are not scheduled.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
|
||||
summary: DaemonSet pods are not scheduled.
|
||||
expr: |-
|
||||
kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
-
|
||||
kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"} > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeDaemonSetMisScheduled
|
||||
annotations:
|
||||
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
|
||||
}} are running where they are not supposed to run.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
|
||||
summary: DaemonSet pods are misscheduled.
|
||||
expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
|
||||
> 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeJobNotCompleted
|
||||
annotations:
|
||||
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
|
||||
than {{ "43200" | humanizeDuration }} to complete.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
|
||||
summary: Job did not complete in time
|
||||
expr: |-
|
||||
time() - max by (namespace, job_name, cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~".*"}
|
||||
and
|
||||
kube_job_status_active{job="kube-state-metrics", namespace=~".*"} > 0) > 43200
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeJobFailed
|
||||
annotations:
|
||||
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
|
||||
Removing failed job after investigation should clear this alert.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
|
||||
summary: Job failed to complete.
|
||||
expr: kube_job_failed{job="kube-state-metrics", namespace=~".*"} > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeHpaReplicasMismatch
|
||||
annotations:
|
||||
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
|
||||
has not matched the desired number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
|
||||
summary: HPA has not matched desired number of replicas.
|
||||
expr: |-
|
||||
(kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"})
|
||||
and
|
||||
(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
>
|
||||
kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~".*"})
|
||||
and
|
||||
(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
<
|
||||
kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"})
|
||||
and
|
||||
changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}[15m]) == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeHpaMaxedOut
|
||||
annotations:
|
||||
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
|
||||
has been running at max replicas for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
|
||||
summary: HPA is running at max replicas
|
||||
expr: |-
|
||||
kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
==
|
||||
kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
> 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeJobNotCompleted
|
||||
annotations:
|
||||
description:
|
||||
Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
|
||||
than {{ "43200" | humanizeDuration }} to complete.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
|
||||
summary: Job did not complete in time
|
||||
expr: |-
|
||||
time() - max by (namespace, job_name, cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~".*"}
|
||||
and
|
||||
kube_job_status_active{job="kube-state-metrics", namespace=~".*"} > 0) > 43200
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeJobFailed
|
||||
annotations:
|
||||
description:
|
||||
Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
|
||||
Removing failed job after investigation should clear this alert.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
|
||||
summary: Job failed to complete.
|
||||
expr: kube_job_failed{job="kube-state-metrics", namespace=~".*"} > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeHpaReplicasMismatch
|
||||
annotations:
|
||||
description:
|
||||
HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
|
||||
has not matched the desired number of replicas for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
|
||||
summary: HPA has not matched desired number of replicas.
|
||||
expr: |-
|
||||
(kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
!=
|
||||
kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"})
|
||||
and
|
||||
(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
>
|
||||
kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~".*"})
|
||||
and
|
||||
(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
<
|
||||
kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"})
|
||||
and
|
||||
changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}[15m]) == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeHpaMaxedOut
|
||||
annotations:
|
||||
description:
|
||||
HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
|
||||
has been running at max replicas for longer than 15 minutes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
|
||||
summary: HPA is running at max replicas
|
||||
expr: |-
|
||||
kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
==
|
||||
kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
+122
-114
@@ -1,115 +1,123 @@
|
||||
groups:
|
||||
- name: kubernetes-resources
|
||||
rules:
|
||||
- alert: KubeCPUOvercommit
|
||||
annotations:
|
||||
description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
|
||||
for Pods by {{ $value }} CPU shares and cannot tolerate node failure.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit
|
||||
summary: Cluster has overcommitted CPU resource requests.
|
||||
expr: |-
|
||||
sum(namespace_cpu:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0
|
||||
and
|
||||
(sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeMemoryOvercommit
|
||||
annotations:
|
||||
description: Cluster {{ $labels.cluster }} has overcommitted memory resource
|
||||
requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node
|
||||
failure.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
|
||||
summary: Cluster has overcommitted memory resource requests.
|
||||
expr: |-
|
||||
sum(namespace_memory:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0
|
||||
and
|
||||
(sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeCPUQuotaOvercommit
|
||||
annotations:
|
||||
description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
|
||||
for Namespaces.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit
|
||||
summary: Cluster has overcommitted CPU resource requests.
|
||||
expr: |-
|
||||
sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(cpu|requests.cpu)"})) by (cluster)
|
||||
/
|
||||
sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) by (cluster)
|
||||
> 1.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeMemoryQuotaOvercommit
|
||||
annotations:
|
||||
description: Cluster {{ $labels.cluster }} has overcommitted memory resource
|
||||
requests for Namespaces.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit
|
||||
summary: Cluster has overcommitted memory resource requests.
|
||||
expr: |-
|
||||
sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(memory|requests.memory)"})) by (cluster)
|
||||
/
|
||||
sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)
|
||||
> 1.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeQuotaAlmostFull
|
||||
annotations:
|
||||
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||
}} of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
|
||||
summary: Namespace quota is going to be full.
|
||||
expr: |-
|
||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
/ ignoring(instance, job, type)
|
||||
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
||||
> 0.9 < 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: info
|
||||
- alert: KubeQuotaFullyUsed
|
||||
annotations:
|
||||
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||
}} of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
|
||||
summary: Namespace quota is fully used.
|
||||
expr: |-
|
||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
/ ignoring(instance, job, type)
|
||||
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
||||
== 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: info
|
||||
- alert: KubeQuotaExceeded
|
||||
annotations:
|
||||
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||
}} of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
|
||||
summary: Namespace quota has exceeded the limits.
|
||||
expr: |-
|
||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
/ ignoring(instance, job, type)
|
||||
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
||||
> 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CPUThrottlingHigh
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace
|
||||
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod
|
||||
}}.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
|
||||
summary: Processes experience elevated CPU throttling.
|
||||
expr: |-
|
||||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace)
|
||||
/
|
||||
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace)
|
||||
> ( 25 / 100 )
|
||||
for: 15m
|
||||
labels:
|
||||
severity: info
|
||||
- name: kubernetes-resources
|
||||
rules:
|
||||
- alert: KubeCPUOvercommit
|
||||
annotations:
|
||||
description:
|
||||
Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
|
||||
for Pods by {{ $value }} CPU shares and cannot tolerate node failure.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit
|
||||
summary: Cluster has overcommitted CPU resource requests.
|
||||
expr: |-
|
||||
sum(namespace_cpu:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0
|
||||
and
|
||||
(sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeMemoryOvercommit
|
||||
annotations:
|
||||
description:
|
||||
Cluster {{ $labels.cluster }} has overcommitted memory resource
|
||||
requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node
|
||||
failure.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
|
||||
summary: Cluster has overcommitted memory resource requests.
|
||||
expr: |-
|
||||
sum(namespace_memory:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0
|
||||
and
|
||||
(sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeCPUQuotaOvercommit
|
||||
annotations:
|
||||
description:
|
||||
Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
|
||||
for Namespaces.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit
|
||||
summary: Cluster has overcommitted CPU resource requests.
|
||||
expr: |-
|
||||
sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(cpu|requests.cpu)"})) by (cluster)
|
||||
/
|
||||
sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) by (cluster)
|
||||
> 1.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeMemoryQuotaOvercommit
|
||||
annotations:
|
||||
description:
|
||||
Cluster {{ $labels.cluster }} has overcommitted memory resource
|
||||
requests for Namespaces.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit
|
||||
summary: Cluster has overcommitted memory resource requests.
|
||||
expr: |-
|
||||
sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(memory|requests.memory)"})) by (cluster)
|
||||
/
|
||||
sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)
|
||||
> 1.5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubeQuotaAlmostFull
|
||||
annotations:
|
||||
description:
|
||||
Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||
}} of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
|
||||
summary: Namespace quota is going to be full.
|
||||
expr: |-
|
||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
/ ignoring(instance, job, type)
|
||||
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
||||
> 0.9 < 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: info
|
||||
- alert: KubeQuotaFullyUsed
|
||||
annotations:
|
||||
description:
|
||||
Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||
}} of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
|
||||
summary: Namespace quota is fully used.
|
||||
expr: |-
|
||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
/ ignoring(instance, job, type)
|
||||
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
||||
== 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: info
|
||||
- alert: KubeQuotaExceeded
|
||||
annotations:
|
||||
description:
|
||||
Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
|
||||
}} of its {{ $labels.resource }} quota.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
|
||||
summary: Namespace quota has exceeded the limits.
|
||||
expr: |-
|
||||
kube_resourcequota{job="kube-state-metrics", type="used"}
|
||||
/ ignoring(instance, job, type)
|
||||
(kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
|
||||
> 1
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CPUThrottlingHigh
|
||||
annotations:
|
||||
description:
|
||||
"{{ $value | humanizePercentage }} throttling of CPU in namespace
|
||||
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod
|
||||
}}."
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
|
||||
summary: Processes experience elevated CPU throttling.
|
||||
expr: |-
|
||||
sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace)
|
||||
/
|
||||
sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace)
|
||||
> ( 25 / 100 )
|
||||
for: 15m
|
||||
labels:
|
||||
severity: info
|
||||
|
||||
+113
-108
@@ -1,109 +1,114 @@
|
||||
|
||||
groups:
|
||||
- name: kubernetes-storage
|
||||
rules:
|
||||
- alert: KubePersistentVolumeFillingUp
|
||||
annotations:
|
||||
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
|
||||
{{ . }} {{- end }} is only {{ $value | humanizePercentage }} free.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
|
||||
summary: PersistentVolume is filling up.
|
||||
expr: |-
|
||||
(
|
||||
kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
) < 0.03
|
||||
and
|
||||
kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubePersistentVolumeFillingUp
|
||||
annotations:
|
||||
description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
|
||||
{{ . }} {{- end }} is expected to fill up within four days. Currently {{ $value
|
||||
| humanizePercentage }} is available.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
|
||||
summary: PersistentVolume is filling up.
|
||||
expr: |-
|
||||
(
|
||||
kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
) < 0.15
|
||||
and
|
||||
kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
|
||||
and
|
||||
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubePersistentVolumeInodesFillingUp
|
||||
annotations:
|
||||
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
|
||||
{{ . }} {{- end }} only has {{ $value | humanizePercentage }} free inodes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
|
||||
summary: PersistentVolumeInodes are filling up.
|
||||
expr: |-
|
||||
(
|
||||
kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
/
|
||||
kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
) < 0.03
|
||||
and
|
||||
kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubePersistentVolumeInodesFillingUp
|
||||
annotations:
|
||||
description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
|
||||
{{ . }} {{- end }} is expected to run out of inodes within four days. Currently
|
||||
{{ $value | humanizePercentage }} of its inodes are free.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
|
||||
summary: PersistentVolumeInodes are filling up.
|
||||
expr: |-
|
||||
(
|
||||
kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
/
|
||||
kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
) < 0.15
|
||||
and
|
||||
kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
|
||||
and
|
||||
predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubePersistentVolumeErrors
|
||||
annotations:
|
||||
description: The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster
|
||||
-}} on Cluster {{ . }} {{- end }} has status {{ $labels.phase }}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors
|
||||
summary: PersistentVolume is having issues with provisioning.
|
||||
expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"}
|
||||
> 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: kubernetes-storage
|
||||
rules:
|
||||
- alert: KubePersistentVolumeFillingUp
|
||||
annotations:
|
||||
description:
|
||||
The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
|
||||
{{ . }} {{- end }} is only {{ $value | humanizePercentage }} free.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
|
||||
summary: PersistentVolume is filling up.
|
||||
expr: |-
|
||||
(
|
||||
kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
) < 0.03
|
||||
and
|
||||
kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubePersistentVolumeFillingUp
|
||||
annotations:
|
||||
description:
|
||||
Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
|
||||
{{ . }} {{- end }} is expected to fill up within four days. Currently {{ $value
|
||||
| humanizePercentage }} is available.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
|
||||
summary: PersistentVolume is filling up.
|
||||
expr: |-
|
||||
(
|
||||
kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
/
|
||||
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
) < 0.15
|
||||
and
|
||||
kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
|
||||
and
|
||||
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubePersistentVolumeInodesFillingUp
|
||||
annotations:
|
||||
description:
|
||||
The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
|
||||
{{ . }} {{- end }} only has {{ $value | humanizePercentage }} free inodes.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
|
||||
summary: PersistentVolumeInodes are filling up.
|
||||
expr: |-
|
||||
(
|
||||
kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
/
|
||||
kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
) < 0.03
|
||||
and
|
||||
kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubePersistentVolumeInodesFillingUp
|
||||
annotations:
|
||||
description:
|
||||
Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
|
||||
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
|
||||
{{ . }} {{- end }} is expected to run out of inodes within four days. Currently
|
||||
{{ $value | humanizePercentage }} of its inodes are free.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
|
||||
summary: PersistentVolumeInodes are filling up.
|
||||
expr: |-
|
||||
(
|
||||
kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
/
|
||||
kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
|
||||
) < 0.15
|
||||
and
|
||||
kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
|
||||
and
|
||||
predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
|
||||
unless on (cluster, namespace, persistentvolumeclaim)
|
||||
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: KubePersistentVolumeErrors
|
||||
annotations:
|
||||
description:
|
||||
The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster
|
||||
-}} on Cluster {{ . }} {{- end }} has status {{ $labels.phase }}.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors
|
||||
summary: PersistentVolume is having issues with provisioning.
|
||||
expr:
|
||||
kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"}
|
||||
> 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
+366
-339
@@ -1,340 +1,367 @@
|
||||
groups:
|
||||
- name: node-exporter
|
||||
rules:
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
space left and is filling up.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15
|
||||
and
|
||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
space left and is filling up fast.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10
|
||||
and
|
||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
space left.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 5% space left.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
space left.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 3% space left.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 30m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
inodes left and is filling up.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40
|
||||
and
|
||||
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
inodes left and is filling up fast.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20
|
||||
and
|
||||
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
inodes left.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 5% inodes left.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
inodes left.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 3% inodes left.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeNetworkReceiveErrs
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
|
||||
summary: Network interface is reporting many receive errors.
|
||||
expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m])
|
||||
> 0.01
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeNetworkTransmitErrs
|
||||
annotations:
|
||||
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
|
||||
summary: Network interface is reporting many transmit errors.
|
||||
expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m])
|
||||
> 0.01
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeHighNumberConntrackEntriesUsed
|
||||
annotations:
|
||||
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
|
||||
summary: Number of conntrack are getting close to the limit.
|
||||
expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit)
|
||||
> 0.75
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeTextFileCollectorScrapeError
|
||||
annotations:
|
||||
description: Node Exporter text file collector on {{ $labels.instance }} failed
|
||||
to scrape.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror
|
||||
summary: Node Exporter text file collector failed to scrape.
|
||||
expr: node_textfile_scrape_error{job="node-exporter"} == 1
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeClockSkewDetected
|
||||
annotations:
|
||||
description: Clock at {{ $labels.instance }} is out of sync by more than 0.05s.
|
||||
Ensure NTP is configured correctly on this host.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
|
||||
summary: Clock skew detected.
|
||||
expr: |-
|
||||
(
|
||||
node_timex_offset_seconds{job="node-exporter"} > 0.05
|
||||
and
|
||||
deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0
|
||||
)
|
||||
or
|
||||
(
|
||||
node_timex_offset_seconds{job="node-exporter"} < -0.05
|
||||
and
|
||||
deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
|
||||
)
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeClockNotSynchronising
|
||||
annotations:
|
||||
description: Clock at {{ $labels.instance }} is not synchronising. Ensure NTP
|
||||
is configured on this host.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
|
||||
summary: Clock not synchronising.
|
||||
expr: |-
|
||||
min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
|
||||
and
|
||||
node_timex_maxerror_seconds{job="node-exporter"} >= 16
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeRAIDDegraded
|
||||
annotations:
|
||||
description: RAID array '{{ $labels.device }}' at {{ $labels.instance }} is
|
||||
in degraded state due to one or more disks failures. Number of spare drives
|
||||
is insufficient to fix issue automatically.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
|
||||
summary: RAID Array is degraded.
|
||||
expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
|
||||
- ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"})
|
||||
> 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeRAIDDiskFailure
|
||||
annotations:
|
||||
description: At least one device in RAID array at {{ $labels.instance }} failed.
|
||||
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
|
||||
summary: Failed device in RAID array.
|
||||
expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
|
||||
> 0
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFileDescriptorLimit
|
||||
annotations:
|
||||
description: File descriptors limit at {{ $labels.instance }} is currently at
|
||||
{{ printf "%.2f" $value }}%.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
|
||||
summary: Kernel is predicted to exhaust file descriptors limit soon.
|
||||
expr: |-
|
||||
(
|
||||
node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFileDescriptorLimit
|
||||
annotations:
|
||||
description: File descriptors limit at {{ $labels.instance }} is currently at
|
||||
{{ printf "%.2f" $value }}%.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
|
||||
summary: Kernel is predicted to exhaust file descriptors limit soon.
|
||||
expr: |-
|
||||
(
|
||||
node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeCPUHighUsage
|
||||
annotations:
|
||||
description: |
|
||||
CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage
|
||||
summary: High CPU usage.
|
||||
expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter",
|
||||
mode!="idle"}[2m]))) * 100 > 90
|
||||
for: 15m
|
||||
labels:
|
||||
severity: info
|
||||
- alert: NodeSystemSaturation
|
||||
annotations:
|
||||
description: |
|
||||
System load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
|
||||
This might indicate this instance resources saturation and can cause it becoming unresponsive.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemsaturation
|
||||
summary: System saturated, load per core is very high.
|
||||
expr: |-
|
||||
node_load1{job="node-exporter"}
|
||||
/ count without (cpu, mode) (node_cpu_seconds_total{job="node-exporter", mode="idle"}) > 2
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeMemoryMajorPagesFaults
|
||||
annotations:
|
||||
description: |
|
||||
Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
|
||||
Please check that there is enough memory available at this instance.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememorymajorpagesfaults
|
||||
summary: Memory major page faults are occurring at very high rate.
|
||||
expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > 500
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeMemoryHighUtilization
|
||||
annotations:
|
||||
description: |
|
||||
Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization
|
||||
summary: Host is running out of memory.
|
||||
expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"}
|
||||
* 100) > 90
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeDiskIOSaturation
|
||||
annotations:
|
||||
description: |
|
||||
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
|
||||
This symptom might indicate disk saturation.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation
|
||||
summary: Disk IO queue is high.
|
||||
expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
|
||||
> 10
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeSystemdServiceFailed
|
||||
annotations:
|
||||
description: Systemd service {{ $labels.name }} has entered failed state at
|
||||
{{ $labels.instance }}
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed
|
||||
summary: Systemd service has entered failed state.
|
||||
expr: node_systemd_unit_state{job="node-exporter", state="failed"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeBondingDegraded
|
||||
annotations:
|
||||
description: Bonding interface {{ $labels.master }} on {{ $labels.instance }}
|
||||
is in degraded state due to one or more slave failures.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded
|
||||
summary: Bonding interface is degraded
|
||||
expr: (node_bonding_slaves - node_bonding_active) != 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: node-exporter
|
||||
rules:
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description:
|
||||
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
space left and is filling up.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 24 hours.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15
|
||||
and
|
||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemSpaceFillingUp
|
||||
annotations:
|
||||
description:
|
||||
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
space left and is filling up fast.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
|
||||
summary: Filesystem is predicted to run out of space within the next 4 hours.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10
|
||||
and
|
||||
predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description:
|
||||
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
space left.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 5% space left.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfSpace
|
||||
annotations:
|
||||
description:
|
||||
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
space left.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
|
||||
summary: Filesystem has less than 3% space left.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 30m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description:
|
||||
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
inodes left and is filling up.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 24 hours.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40
|
||||
and
|
||||
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemFilesFillingUp
|
||||
annotations:
|
||||
description:
|
||||
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
inodes left and is filling up fast.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
|
||||
summary: Filesystem is predicted to run out of inodes within the next 4 hours.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20
|
||||
and
|
||||
predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description:
|
||||
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
inodes left.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 5% inodes left.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFilesystemAlmostOutOfFiles
|
||||
annotations:
|
||||
description:
|
||||
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
|
||||
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
|
||||
inodes left.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
|
||||
summary: Filesystem has less than 3% inodes left.
|
||||
expr: |-
|
||||
(
|
||||
node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
|
||||
and
|
||||
node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
|
||||
)
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeNetworkReceiveErrs
|
||||
annotations:
|
||||
description:
|
||||
'{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
|
||||
summary: Network interface is reporting many receive errors.
|
||||
expr:
|
||||
rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m])
|
||||
> 0.01
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeNetworkTransmitErrs
|
||||
annotations:
|
||||
description:
|
||||
'{{ $labels.instance }} interface {{ $labels.device }} has encountered
|
||||
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
|
||||
summary: Network interface is reporting many transmit errors.
|
||||
expr:
|
||||
rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m])
|
||||
> 0.01
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeHighNumberConntrackEntriesUsed
|
||||
annotations:
|
||||
description: "{{ $value | humanizePercentage }} of conntrack entries are used."
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
|
||||
summary: Number of conntrack are getting close to the limit.
|
||||
expr:
|
||||
(node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit)
|
||||
> 0.75
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeTextFileCollectorScrapeError
|
||||
annotations:
|
||||
description:
|
||||
Node Exporter text file collector on {{ $labels.instance }} failed
|
||||
to scrape.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror
|
||||
summary: Node Exporter text file collector failed to scrape.
|
||||
expr: node_textfile_scrape_error{job="node-exporter"} == 1
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeClockSkewDetected
|
||||
annotations:
|
||||
description:
|
||||
Clock at {{ $labels.instance }} is out of sync by more than 0.05s.
|
||||
Ensure NTP is configured correctly on this host.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
|
||||
summary: Clock skew detected.
|
||||
expr: |-
|
||||
(
|
||||
node_timex_offset_seconds{job="node-exporter"} > 0.05
|
||||
and
|
||||
deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0
|
||||
)
|
||||
or
|
||||
(
|
||||
node_timex_offset_seconds{job="node-exporter"} < -0.05
|
||||
and
|
||||
deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
|
||||
)
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeClockNotSynchronising
|
||||
annotations:
|
||||
description:
|
||||
Clock at {{ $labels.instance }} is not synchronising. Ensure NTP
|
||||
is configured on this host.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
|
||||
summary: Clock not synchronising.
|
||||
expr: |-
|
||||
min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
|
||||
and
|
||||
node_timex_maxerror_seconds{job="node-exporter"} >= 16
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeRAIDDegraded
|
||||
annotations:
|
||||
description:
|
||||
RAID array '{{ $labels.device }}' at {{ $labels.instance }} is
|
||||
in degraded state due to one or more disks failures. Number of spare drives
|
||||
is insufficient to fix issue automatically.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
|
||||
summary: RAID Array is degraded.
|
||||
expr:
|
||||
node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
|
||||
- ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"})
|
||||
> 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeRAIDDiskFailure
|
||||
annotations:
|
||||
description:
|
||||
At least one device in RAID array at {{ $labels.instance }} failed.
|
||||
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
|
||||
summary: Failed device in RAID array.
|
||||
expr:
|
||||
node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
|
||||
> 0
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFileDescriptorLimit
|
||||
annotations:
|
||||
description:
|
||||
File descriptors limit at {{ $labels.instance }} is currently at
|
||||
{{ printf "%.2f" $value }}%.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
|
||||
summary: Kernel is predicted to exhaust file descriptors limit soon.
|
||||
expr: |-
|
||||
(
|
||||
node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeFileDescriptorLimit
|
||||
annotations:
|
||||
description:
|
||||
File descriptors limit at {{ $labels.instance }} is currently at
|
||||
{{ printf "%.2f" $value }}%.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
|
||||
summary: Kernel is predicted to exhaust file descriptors limit soon.
|
||||
expr: |-
|
||||
(
|
||||
node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90
|
||||
)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeCPUHighUsage
|
||||
annotations:
|
||||
description: |
|
||||
CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage
|
||||
summary: High CPU usage.
|
||||
expr:
|
||||
sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter",
|
||||
mode!="idle"}[2m]))) * 100 > 90
|
||||
for: 15m
|
||||
labels:
|
||||
severity: info
|
||||
- alert: NodeSystemSaturation
|
||||
annotations:
|
||||
description: |
|
||||
System load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
|
||||
This might indicate this instance resources saturation and can cause it becoming unresponsive.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemsaturation
|
||||
summary: System saturated, load per core is very high.
|
||||
expr: |-
|
||||
node_load1{job="node-exporter"}
|
||||
/ count without (cpu, mode) (node_cpu_seconds_total{job="node-exporter", mode="idle"}) > 2
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeMemoryMajorPagesFaults
|
||||
annotations:
|
||||
description: |
|
||||
Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
|
||||
Please check that there is enough memory available at this instance.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememorymajorpagesfaults
|
||||
summary: Memory major page faults are occurring at very high rate.
|
||||
expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > 500
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeMemoryHighUtilization
|
||||
annotations:
|
||||
description: |
|
||||
Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization
|
||||
summary: Host is running out of memory.
|
||||
expr:
|
||||
100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"}
|
||||
* 100) > 90
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeDiskIOSaturation
|
||||
annotations:
|
||||
description: |
|
||||
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
|
||||
This symptom might indicate disk saturation.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation
|
||||
summary: Disk IO queue is high.
|
||||
expr:
|
||||
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
|
||||
> 10
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeSystemdServiceFailed
|
||||
annotations:
|
||||
description:
|
||||
Systemd service {{ $labels.name }} has entered failed state at
|
||||
{{ $labels.instance }}
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed
|
||||
summary: Systemd service has entered failed state.
|
||||
expr: node_systemd_unit_state{job="node-exporter", state="failed"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: NodeBondingDegraded
|
||||
annotations:
|
||||
description:
|
||||
Bonding interface {{ $labels.master }} on {{ $labels.instance }}
|
||||
is in degraded state due to one or more slave failures.
|
||||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded
|
||||
summary: Bonding interface is degraded
|
||||
expr: (node_bonding_slaves - node_bonding_active) != 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
@@ -1,70 +1,76 @@
|
||||
groups:
|
||||
- name: node-resource-utilization.rules
|
||||
rules:
|
||||
- alert: HostHighCpuLoad
|
||||
annotations:
|
||||
description: |-
|
||||
CPU load is > 90%
|
||||
VALUE = {{ $value }}
|
||||
LABELS = {{ $labels }}
|
||||
summary: Host high CPU load (instance {{ $labels.instance }})
|
||||
expr: (sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m])))
|
||||
> 0.9) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: MemoryUtilizationHighWarning
|
||||
annotations:
|
||||
dashboard: https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
|
||||
$labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
|
||||
description: Node {{ $labels.instance }} has less than 10% available memory.
|
||||
summary: Node Memory utilization warning
|
||||
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: MemoryUtilizationHighCritical
|
||||
annotations:
|
||||
dashboard: https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
|
||||
$labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
|
||||
description: Node {{ $labels.instance }} has less than 5% available memory.
|
||||
summary: Node Memory utilization critical
|
||||
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 5
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeNotReady
|
||||
annotations:
|
||||
description: Node {{ $labels.node }} has CPU utilization over 90%.
|
||||
summary: Node has been in not-ready state for longer than 3 minutes
|
||||
expr: (sum(max_over_time(kube_node_status_condition{condition="Ready",status="true"}[3m])
|
||||
<= 0) by (node)) or (absent(kube_node_status_condition{condition="Ready",status="true"}))
|
||||
> 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubernetesNodeMemoryPressure
|
||||
annotations:
|
||||
description: |-
|
||||
Node {{ $labels.node }} has MemoryPressure condition
|
||||
VALUE = {{ $value }}
|
||||
LABELS = {{ $labels }}
|
||||
summary: Kubernetes Node memory pressure (instance {{ $labels.instance }})
|
||||
expr: kube_node_status_condition{condition="MemoryPressure",status="true"} ==
|
||||
1
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubernetesContainerOomKiller
|
||||
annotations:
|
||||
description: |-
|
||||
Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.
|
||||
VALUE = {{ $value }}
|
||||
LABELS = {{ $labels }}
|
||||
summary: Kubernetes Container oom killer (instance {{ $labels.instance }})
|
||||
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total
|
||||
offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m])
|
||||
== 1
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: node-resource-utilization.rules
|
||||
rules:
|
||||
- alert: HostHighCpuLoad
|
||||
annotations:
|
||||
description: |-
|
||||
CPU load is > 90%
|
||||
VALUE = {{ $value }}
|
||||
LABELS = {{ $labels }}
|
||||
summary: Host high CPU load (instance {{ $labels.instance }})
|
||||
expr:
|
||||
(sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m])))
|
||||
> 0.9) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: MemoryUtilizationHighWarning
|
||||
annotations:
|
||||
dashboard:
|
||||
https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
|
||||
$labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
|
||||
description: Node {{ $labels.instance }} has less than 10% available memory.
|
||||
summary: Node Memory utilization warning
|
||||
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: MemoryUtilizationHighCritical
|
||||
annotations:
|
||||
dashboard:
|
||||
https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
|
||||
$labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
|
||||
description: Node {{ $labels.instance }} has less than 5% available memory.
|
||||
summary: Node Memory utilization critical
|
||||
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 5
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: NodeNotReady
|
||||
annotations:
|
||||
description: Node {{ $labels.node }} has CPU utilization over 90%.
|
||||
summary: Node has been in not-ready state for longer than 3 minutes
|
||||
expr:
|
||||
(sum(max_over_time(kube_node_status_condition{condition="Ready",status="true"}[3m])
|
||||
<= 0) by (node)) or (absent(kube_node_status_condition{condition="Ready",status="true"}))
|
||||
> 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubernetesNodeMemoryPressure
|
||||
annotations:
|
||||
description: |-
|
||||
Node {{ $labels.node }} has MemoryPressure condition
|
||||
VALUE = {{ $value }}
|
||||
LABELS = {{ $labels }}
|
||||
summary: Kubernetes Node memory pressure (instance {{ $labels.instance }})
|
||||
expr:
|
||||
kube_node_status_condition{condition="MemoryPressure",status="true"} ==
|
||||
1
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubernetesContainerOomKiller
|
||||
annotations:
|
||||
description: |-
|
||||
Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.
|
||||
VALUE = {{ $value }}
|
||||
LABELS = {{ $labels }}
|
||||
summary: Kubernetes Container oom killer (instance {{ $labels.instance }})
|
||||
expr:
|
||||
(kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total
|
||||
offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m])
|
||||
== 1
|
||||
for: 0m
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
+24
-20
@@ -1,21 +1,25 @@
|
||||
groups:
|
||||
- name: velero
|
||||
rules:
|
||||
- alert: VeleroBackupPartialFailures
|
||||
annotations:
|
||||
message: Velero backup {{ $labels.schedule }} has {{$value | humanizePercentage}} partialy
|
||||
failed backups.
|
||||
expr: velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
|
||||
> 0.25
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: VeleroBackupFailures
|
||||
annotations:
|
||||
message: Velero backup {{$labels.schedule}} has {{$value | humanizePercentage}} failed
|
||||
backups.
|
||||
expr: velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
|
||||
> 0.25
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: velero
|
||||
rules:
|
||||
- alert: VeleroBackupPartialFailures
|
||||
annotations:
|
||||
message:
|
||||
Velero backup {{ $labels.schedule }} has {{$value | humanizePercentage}} partialy
|
||||
failed backups.
|
||||
expr:
|
||||
velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
|
||||
> 0.25
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: VeleroBackupFailures
|
||||
annotations:
|
||||
message:
|
||||
Velero backup {{$labels.schedule}} has {{$value | humanizePercentage}} failed
|
||||
backups.
|
||||
expr:
|
||||
velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
|
||||
> 0.25
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
+51
-45
@@ -1,46 +1,52 @@
|
||||
groups:
|
||||
- name: x509-certificate-exporter.rules
|
||||
rules:
|
||||
- alert: X509ExporterReadErrors
|
||||
annotations:
|
||||
description: Over the last 15 minutes, this x509-certificate-exporter instance
|
||||
has experienced errors reading certificate files or querying the Kubernetes
|
||||
API. This could be caused by a misconfiguration if triggered when the exporter
|
||||
starts.
|
||||
summary: Increasing read errors for x509-certificate-exporter
|
||||
expr: delta(x509_read_errors[15m]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CertificateError
|
||||
annotations:
|
||||
description: Certificate could not be decoded {{if $labels.secret_name }} in
|
||||
Kubernetes secret "{{ $labels.secret_namespace }}/{{ $labels.secret_name }}"{{else}}at
|
||||
location "{{ $labels.filepath }}"{{end}}
|
||||
summary: Certificate cannot be decoded
|
||||
expr: x509_cert_error > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CertificateRenewal
|
||||
annotations:
|
||||
description: Certificate for "{{ $labels.subject_CN }}" should be renewed {{if
|
||||
$labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
|
||||
$labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
|
||||
summary: Certificate should be renewed
|
||||
expr: ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
|
||||
issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 28
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CertificateExpiration
|
||||
annotations:
|
||||
description: Certificate for "{{ $labels.subject_CN }}" is about to expire {{if
|
||||
$labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
|
||||
$labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
|
||||
summary: Certificate is about to expire
|
||||
expr: ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
|
||||
issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 14
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- name: x509-certificate-exporter.rules
|
||||
rules:
|
||||
- alert: X509ExporterReadErrors
|
||||
annotations:
|
||||
description:
|
||||
Over the last 15 minutes, this x509-certificate-exporter instance
|
||||
has experienced errors reading certificate files or querying the Kubernetes
|
||||
API. This could be caused by a misconfiguration if triggered when the exporter
|
||||
starts.
|
||||
summary: Increasing read errors for x509-certificate-exporter
|
||||
expr: delta(x509_read_errors[15m]) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CertificateError
|
||||
annotations:
|
||||
description:
|
||||
Certificate could not be decoded {{if $labels.secret_name }} in
|
||||
Kubernetes secret "{{ $labels.secret_namespace }}/{{ $labels.secret_name }}"{{else}}at
|
||||
location "{{ $labels.filepath }}"{{end}}
|
||||
summary: Certificate cannot be decoded
|
||||
expr: x509_cert_error > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CertificateRenewal
|
||||
annotations:
|
||||
description:
|
||||
Certificate for "{{ $labels.subject_CN }}" should be renewed {{if
|
||||
$labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
|
||||
$labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
|
||||
summary: Certificate should be renewed
|
||||
expr:
|
||||
((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
|
||||
issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 28
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: CertificateExpiration
|
||||
annotations:
|
||||
description:
|
||||
Certificate for "{{ $labels.subject_CN }}" is about to expire {{if
|
||||
$labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
|
||||
$labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
|
||||
summary: Certificate is about to expire
|
||||
expr:
|
||||
((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
|
||||
issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 14
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
@@ -6,35 +6,62 @@ let
|
||||
config = { };
|
||||
overlays = [ ];
|
||||
};
|
||||
checks = import ./nix/checks.nix;
|
||||
in
|
||||
pkgs.mkShellNoCC {
|
||||
name = "clstr";
|
||||
|
||||
packages = with pkgs; [
|
||||
just
|
||||
npins
|
||||
packages =
|
||||
with pkgs;
|
||||
[
|
||||
# dev tools
|
||||
just
|
||||
npins
|
||||
|
||||
# helm
|
||||
helmfile
|
||||
kubernetes-helm
|
||||
# helm
|
||||
helmfile
|
||||
kubernetes-helm
|
||||
|
||||
# kubectl tools
|
||||
kubectl-cnpg
|
||||
kubectl-neat
|
||||
kubelogin
|
||||
kubelogin-oidc
|
||||
kubectl-rook-ceph
|
||||
# kubectl tools
|
||||
kubectl-cnpg
|
||||
kubectl-neat
|
||||
kubelogin
|
||||
kubelogin-oidc
|
||||
kubectl-rook-ceph
|
||||
kubectl-graph
|
||||
kubectl-klock
|
||||
graphviz
|
||||
|
||||
# other tools
|
||||
step-cli
|
||||
linkerd
|
||||
velero
|
||||
cmctl
|
||||
# other tools activate when needed
|
||||
# step-cli
|
||||
# linkerd
|
||||
# cmctl
|
||||
# rclone
|
||||
# velero
|
||||
# renovate
|
||||
|
||||
# dapr
|
||||
dapr-cli
|
||||
# dapr
|
||||
dapr-cli
|
||||
]
|
||||
++ checks.enabledPackages;
|
||||
|
||||
# Environment variables
|
||||
ARGOCD_ENV_CLUSTER_NAME = "hel1";
|
||||
HELM_GIT_ACCESS_TOKEN = "glpat-xxx";
|
||||
|
||||
shellHook = builtins.concatStringsSep "\n" [
|
||||
checks.shellHook
|
||||
];
|
||||
|
||||
ARGOCD_ENV_CLUSTER_NAME = "rossby";
|
||||
HELM_GIT_ACCESS_TOKEN = "glpat-xxx";
|
||||
# Alternative shells
|
||||
passthru = pkgs.lib.mapAttrs (name: value: pkgs.mkShellNoCC (value // { inherit name; })) {
|
||||
ci-shell = {
|
||||
packages = [
|
||||
pkgs.npins
|
||||
];
|
||||
shellHook = ''
|
||||
export NPINS_DIRECTORY="nix"
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
@@ -88,6 +88,8 @@ spec:
|
||||
server: https://kubernetes.default.svc
|
||||
- namespace: uptime
|
||||
server: https://kubernetes.default.svc
|
||||
- namespace: forgejo
|
||||
server: https://kubernetes.default.svc
|
||||
sourceRepos:
|
||||
- https://argoproj.github.io/argo-helm
|
||||
- https://kubernetes-sigs.github.io/metrics-server/
|
||||
@@ -123,6 +125,7 @@ spec:
|
||||
- ghcr.io/slinkyproject/charts/slurm-operator-crds
|
||||
- ghcr.io/spegel-org/helm-charts
|
||||
- ghcr.io/dragonflydb/dragonfly-operator/helm/dragonfly-operator
|
||||
- code.forgejo.org/forgejo-helm
|
||||
- https://operator.mariadb.com/mariadb-enterprise-operator
|
||||
- https://operator.mariadb.com
|
||||
- https://ot-container-kit.github.io/helm-charts
|
||||
|
||||
@@ -73,7 +73,7 @@
|
||||
"connString": "Username=postgres;Password=secret;Host=localhost;Port=5432;Database=app;Pooling=true;",
|
||||
"sorcerer" : "https://sorcerer.data.oceanbox.io",
|
||||
"allowedOrigins": [
|
||||
"https://maps.oceanbox.io",
|
||||
"https://maps.oceanbox.io"
|
||||
],
|
||||
"appName": "atlantis",
|
||||
"appEnv": "prod",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
replicaCount: 1
|
||||
image:
|
||||
tag: faa0a853-debug
|
||||
tag: 503ccbb2-debug
|
||||
podAnnotations:
|
||||
dapr.io/app-id: "staging-atlantis"
|
||||
env:
|
||||
@@ -26,12 +26,12 @@ env:
|
||||
- name: DB_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: staging-atlantis-db-superuser
|
||||
name: staging-atlantis-db-app
|
||||
key: username
|
||||
- name: DB_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: staging-atlantis-db-superuser
|
||||
name: staging-atlantis-db-app
|
||||
key: password
|
||||
- name: DAPR_API_TOKEN
|
||||
valueFrom:
|
||||
@@ -116,9 +116,6 @@ cluster:
|
||||
db: prod-atlantis-db
|
||||
namespace: prod-atlantis
|
||||
resources:
|
||||
limits:
|
||||
cpu: 250m
|
||||
memory: 1Gi
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 1Gi
|
||||
@@ -133,7 +130,6 @@ redis:
|
||||
resources:
|
||||
cpu: 150m
|
||||
memory: 256Mi
|
||||
|
||||
diagrid-dashboard:
|
||||
enabled: false
|
||||
statestore:
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
codex:
|
||||
enabled: false
|
||||
{{- if eq .Environment.Name "prod" }}
|
||||
autosync: false
|
||||
{{- else }}
|
||||
autosync: true
|
||||
{{- end }}
|
||||
env: {{ .Environment.Name }}
|
||||
|
||||
@@ -0,0 +1,67 @@
|
||||
{
|
||||
"Logging": {
|
||||
"LogLevel": {
|
||||
"Default": "Information",
|
||||
"Microsoft": "Warning",
|
||||
"Microsoft.Hosting": "Error"
|
||||
}
|
||||
},
|
||||
"Debug": {
|
||||
"LogLevel": {
|
||||
"Default": "Debug"
|
||||
}
|
||||
},
|
||||
"Console": {
|
||||
"IncludeScopes": true,
|
||||
"LogLevel": {
|
||||
"Default": "Debug"
|
||||
}
|
||||
},
|
||||
"OIDC": {
|
||||
"issuer": "https://auth.oceanbox.io/realms/oceanbox",
|
||||
"authorization_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/auth",
|
||||
"token_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/token",
|
||||
"jwks_uri": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/certs",
|
||||
"userinfo_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/userinfo",
|
||||
"end_session_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/logout",
|
||||
"device_authorization_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/auth/device",
|
||||
"clientId": "atlantis",
|
||||
"clientSecret": "",
|
||||
"scopes": [
|
||||
"openid",
|
||||
"email",
|
||||
"offline_access",
|
||||
"profile"
|
||||
],
|
||||
"audiences": [
|
||||
"atlantis"
|
||||
]
|
||||
},
|
||||
"SSO": {
|
||||
"cookieDomain": ".oceanbox.io",
|
||||
"cookieName": ".obx.prod",
|
||||
"ttl": 12.0,
|
||||
"signedOutRedirectUri": "https://maps.oceanbox.io/",
|
||||
"realm": "atlantis",
|
||||
"environment": "prod",
|
||||
"keyStore": {
|
||||
"kind": "azure",
|
||||
"uri": "https://atlantis.blob.core.windows.net",
|
||||
"key": "dataprotection-keys"
|
||||
},
|
||||
"keyVault": {
|
||||
"kind": "azure",
|
||||
"uri": "https://atlantisvault.vault.azure.net",
|
||||
"key": "dataencryption-keys"
|
||||
}
|
||||
},
|
||||
"plainAuthUsers": [
|
||||
{
|
||||
"username": "admin",
|
||||
"password": "en-to-tre-fire",
|
||||
"groups": [ "/oceanbox" ],
|
||||
"roles": [ "admin" ]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/envFrom
|
||||
value:
|
||||
- secretRef:
|
||||
name: azure-keyvault
|
||||
- op: add
|
||||
path: /spec/template/spec/containers/0/env
|
||||
value:
|
||||
- name: APP_NAMESPACE
|
||||
value: prod-atlantis
|
||||
- name: DOTNET_ENVIRONMENT
|
||||
value: Production
|
||||
- name: ASPNETCORE_ENVIRONMENT
|
||||
value: Production
|
||||
- name: DB_HOST
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: prod-atlantis-db-app
|
||||
key: host
|
||||
- name: DB_PORT
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: prod-atlantis-db-app
|
||||
key: port
|
||||
- name: DB_DATABASE
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: prod-atlantis-db-app
|
||||
key: dbname
|
||||
- name: DB_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: prod-atlantis-db-app
|
||||
key: user
|
||||
- name: DB_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: prod-atlantis-db-app
|
||||
key: password
|
||||
- name: FGA_URL
|
||||
value: http://prod-openfga.openfga.svc.cluster.local:8080
|
||||
- name: FGA_DB_HOST
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: prod-openfga-db-app
|
||||
key: host
|
||||
- name: FGA_DB_PORT
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: prod-openfga-db-app
|
||||
key: port
|
||||
- name: FGA_DB_DATABASE
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: prod-openfga-db-app
|
||||
key: dbname
|
||||
- name: FGA_DB_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: prod-openfga-db-app
|
||||
key: user
|
||||
- name: FGA_DB_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: prod-openfga-db-app
|
||||
key: password
|
||||
@@ -0,0 +1,15 @@
|
||||
generatorOptions:
|
||||
disableNameSuffixHash: true
|
||||
configMapGenerator:
|
||||
- name: prod-codex-appsettings
|
||||
files:
|
||||
- appsettings.json
|
||||
patches:
|
||||
- target:
|
||||
group: apps
|
||||
version: v1
|
||||
kind: Deployment
|
||||
path: deployment_patch.yaml
|
||||
resources:
|
||||
- ../base
|
||||
|
||||
@@ -62,4 +62,3 @@
|
||||
secretKeyRef:
|
||||
name: staging-openfga-db-app
|
||||
key: password
|
||||
name: azure-keyvault
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
{{- if .Values.clusterConfig.cilium.enabled }}
|
||||
apiVersion: cilium.io/v2
|
||||
kind: CiliumNetworkPolicy
|
||||
metadata:
|
||||
name: codex-allow-external-services
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
egress:
|
||||
- toFQDNs:
|
||||
- matchName: cacerts.digicert.com
|
||||
endpointSelector:
|
||||
matchLabels: {}
|
||||
{{- end }}
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
replicaCount: 1
|
||||
ingress:
|
||||
enabled: true
|
||||
className: "nginx"
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-production
|
||||
nginx.ingress.kubernetes.io/backend-protocol: HTTP
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
oceanbox.io/expose: internal
|
||||
hosts:
|
||||
- host: codex.adm.oceanbox.io
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls:
|
||||
- hosts:
|
||||
- codex.adm.oceanbox.io
|
||||
secretName: prod-codex-tls
|
||||
volumes:
|
||||
- name: appsettings
|
||||
configMap:
|
||||
name: prod-codex-appsettings
|
||||
volumeMounts:
|
||||
- name: appsettings
|
||||
mountPath: "/app/appsettings.json"
|
||||
readOnly: true
|
||||
subPath: appsettings.json
|
||||
@@ -1,6 +1,4 @@
|
||||
replicaCount: 1
|
||||
image:
|
||||
tag: 70878e14-debug
|
||||
ingress:
|
||||
enabled: true
|
||||
className: "nginx"
|
||||
@@ -32,3 +30,5 @@ volumeMounts:
|
||||
mountPath: "/app/appsettings.Development.json"
|
||||
readOnly: true
|
||||
subPath: appsettings.json
|
||||
image:
|
||||
tag: 2e1165d9-debug
|
||||
|
||||
@@ -34,6 +34,8 @@ spec:
|
||||
name: http
|
||||
protocol: TCP
|
||||
env:
|
||||
- name: BASE_URL
|
||||
value: https://fornix.hel1.oceanbox.io
|
||||
- name: DRUPAL_DATABASE_HOST
|
||||
value: drupal-db-rw
|
||||
- name: DRUPAL_DATABASE_PREFIX
|
||||
@@ -63,12 +65,15 @@ spec:
|
||||
- mountPath: /opt/drupal/web/sites
|
||||
name: drupal
|
||||
subPath: sites
|
||||
- mountPath: /opt/drupal/composer.json
|
||||
name: drupal
|
||||
subPath: modules/composer.json
|
||||
- mountPath: /opt/drupal/patches
|
||||
name: drupal
|
||||
subPath: modules/patches
|
||||
- mountPath: /opt/drupal/composer.json
|
||||
name: drupal
|
||||
subPath: modules/composer.json
|
||||
- mountPath: /opt/drupal/composer.lock
|
||||
name: drupal
|
||||
subPath: modules/composer.lock
|
||||
volumes:
|
||||
- name: drupal
|
||||
persistentVolumeClaim:
|
||||
|
||||
@@ -2,13 +2,14 @@ clusterConfig:
|
||||
manifests: https://gitlab.com/oceanbox/manifests.git
|
||||
env: "prod"
|
||||
distro: "talos"
|
||||
domain: "hel1.oceanbox.io"
|
||||
domain: "adm.hel1.obx"
|
||||
initca: ""
|
||||
apiserver: ""
|
||||
apiserverip: ""
|
||||
etcd_nodes: ["10.0.1.2, 10.0.1.4, 10.0.1.5"]
|
||||
k8s_nodes: [""]
|
||||
cluster: "hel1"
|
||||
ingress_clusterissuer: "ca-issuer"
|
||||
ingress_nodes: ["controlplane-1, controlplane-2, controlplane-3"]
|
||||
ingress_replica_count: 3
|
||||
ingress_loadbalancer: true
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
forgejo:
|
||||
enabled: true
|
||||
backup: true
|
||||
@@ -0,0 +1,5 @@
|
||||
forgejo:
|
||||
enabled: false
|
||||
backup: false
|
||||
autosync: {{ if eq .Environment.Name "prod" }} false {{ else }} true {{ end }}
|
||||
env: {{ .Environment.Name }}
|
||||
@@ -0,0 +1,33 @@
|
||||
apiVersion: barmancloud.cnpg.io/v1
|
||||
kind: ObjectStore
|
||||
metadata:
|
||||
name: hel-store
|
||||
namespace: forgejo
|
||||
spec:
|
||||
retentionPolicy: "7d"
|
||||
configuration:
|
||||
destinationPath: s3://obx-cnpg/hel1/forgejo-db
|
||||
endpointURL: https://hel1.your-objectstorage.com
|
||||
s3Credentials:
|
||||
accessKeyId:
|
||||
name: cnpg-s3
|
||||
key: access_key
|
||||
secretAccessKey:
|
||||
name: cnpg-s3
|
||||
key: access_secret
|
||||
wal:
|
||||
compression: snappy
|
||||
---
|
||||
apiVersion: postgresql.cnpg.io/v1
|
||||
kind: ScheduledBackup
|
||||
metadata:
|
||||
name: forgejo-db
|
||||
namespace: forgejo
|
||||
spec:
|
||||
schedule: "0 0 1 * * *"
|
||||
backupOwnerReference: self
|
||||
cluster:
|
||||
name: forgejo-db
|
||||
method: plugin
|
||||
pluginConfiguration:
|
||||
name: barman-cloud.cloudnative-pg.io
|
||||
@@ -0,0 +1,11 @@
|
||||
apiVersion: postgresql.cnpg.io/v1
|
||||
kind: Cluster
|
||||
metadata:
|
||||
name: forgejo-db
|
||||
namespace: forgejo
|
||||
spec:
|
||||
instances: 1
|
||||
imageName: ghcr.io/cloudnative-pg/postgresql:18-minimal-trixie
|
||||
storage:
|
||||
resizeInUseVolumes: true
|
||||
size: 10Gi
|
||||
@@ -0,0 +1,25 @@
|
||||
apiVersion: dragonflydb.io/v1alpha1
|
||||
kind: Dragonfly
|
||||
metadata:
|
||||
name: dragonfly-forgejo
|
||||
namespace: forgejo
|
||||
spec:
|
||||
replicas: 1
|
||||
resources:
|
||||
requests:
|
||||
cpu: 150m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
memory: 256Mi
|
||||
args:
|
||||
- --dbfilename=dump # Static filename prevents disk exhaustion
|
||||
- --maxmemory=$(MAX_MEMORY)Mi # Graceful memory management (90% of limit)
|
||||
- --proactor_threads=1 # Auto-detect CPU cores (optimal threading)
|
||||
- --cluster_mode=emulated
|
||||
- --logtostderr
|
||||
env:
|
||||
- name: MAX_MEMORY
|
||||
valueFrom:
|
||||
resourceFieldRef:
|
||||
resource: limits.memory
|
||||
divisor: 1Mi
|
||||
@@ -0,0 +1,42 @@
|
||||
{{- if .Values.clusterConfig.argo.enabled }}
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: forgejo
|
||||
namespace: argocd
|
||||
annotations:
|
||||
argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true
|
||||
finalizers:
|
||||
- resources-finalizer.argocd.argoproj.io
|
||||
spec:
|
||||
destination:
|
||||
namespace: forgejo
|
||||
server: https://kubernetes.default.svc
|
||||
project: sys
|
||||
sources:
|
||||
- repoURL: {{ .Values.clusterConfig.manifests }}
|
||||
targetRevision: HEAD
|
||||
path: helmfile.d
|
||||
plugin:
|
||||
name: helmfile-cmp
|
||||
env:
|
||||
- name: CLUSTER_NAME
|
||||
value: {{ .Values.clusterConfig.cluster }}
|
||||
- name: HELMFILE_ENVIRONMENT
|
||||
value: {{ .Values.forgejo.env }}
|
||||
- name: HELMFILE_FILE_PATH
|
||||
value: forgejo.yaml.gotmpl
|
||||
syncPolicy:
|
||||
managedNamespaceMetadata:
|
||||
labels:
|
||||
component: sys
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- ApplyOutOfSyncOnly=true
|
||||
# - ServerSideApply=true
|
||||
{{- if .Values.forgejo.autosync }}
|
||||
automated:
|
||||
prune: true
|
||||
# selfHeal: false
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,11 @@
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PodMonitor
|
||||
metadata:
|
||||
name: forgejo-db-monitor
|
||||
namespace: forgejo
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
cnpg.io/cluster: forgejo-db
|
||||
podMetricsEndpoints:
|
||||
- port: metrics
|
||||
@@ -0,0 +1,134 @@
|
||||
replicaCount: 1
|
||||
image:
|
||||
registry: code.forgejo.org
|
||||
repository: forgejo/forgejo
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 512Mi
|
||||
|
||||
gitea:
|
||||
metrics:
|
||||
enabled: false
|
||||
serviceMonitor:
|
||||
enabled: false
|
||||
config:
|
||||
APP_NAME: 'Forgejo: With a cup of tea.'
|
||||
cache:
|
||||
ENABLED: true
|
||||
ADAPTER: redis
|
||||
HOST: redis://dragonfly-forgejo.forgejo.svc:6379/0
|
||||
session:
|
||||
PROVIDER: redis
|
||||
PROVIDER_CONFIG: redis://dragonfly-forgejo.forgejo.svc:6379/1
|
||||
queue:
|
||||
TYPE: redis
|
||||
CONN_STR: redis://dragonfly-forgejo.forgejo.svc:6379/2
|
||||
storage:
|
||||
STORAGE_TYPE: minio
|
||||
MINIO_ENDPOINT: hel1.your-objectstorage.com
|
||||
MINIO_USE_SSL: true
|
||||
MINIO_LOCATION: hel1
|
||||
MINIO_BUCKET: obx-forgejo
|
||||
security:
|
||||
INSTALL_LOCK: true
|
||||
service:
|
||||
DISABLE_REGISTRATION: false
|
||||
server:
|
||||
APP_DATA_PATH: "/data/gitea"
|
||||
DOMAIN: git.svc.hel1.obx
|
||||
ROOT_URL: https://git.svc.hel1.obx
|
||||
SSH_DOMAIN: git.svc.hel1.obx
|
||||
SSH_PORT: 22
|
||||
SSH_SERVER_USE_PROXY_PROTOCOL: true
|
||||
LANDING_PAGE: "explore"
|
||||
oauth2_client:
|
||||
ENABLE_AUTO_REGISTRATION: true
|
||||
UPDATE_AVATAR: true
|
||||
ACCOUNT_LINKING: auto
|
||||
database:
|
||||
DB_TYPE: postgres
|
||||
MAX_OPEN_CONNS: 90
|
||||
openid:
|
||||
ENABLE_OPENID_SIGNIN: false
|
||||
ENABLE_OPENID_SIGNUP: false
|
||||
oauth:
|
||||
- name: 'Oceanbox'
|
||||
provider: 'openidConnect'
|
||||
existingSecret: forgejo-oauth-oceanbox
|
||||
autoDiscoverUrl: 'https://login.microsoftonline.com/3f737008-e9a0-4485-9d27-40329d288089/.well-known/openid-configuration'
|
||||
scopes: 'openid profile email groups'
|
||||
groupClaimName: 'groups'
|
||||
adminGroup: '/oceanbox/devel'
|
||||
restrictedGroup: ''
|
||||
additionalConfigFromEnvs:
|
||||
- name: FORGEJO__STORAGE__MINIO_ACCESS_KEY_ID
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: forgejo-s3
|
||||
key: access_key
|
||||
- name: FORGEJO__STORAGE__MINIO_SECRET_ACCESS_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: forgejo-s3
|
||||
key: secret_key
|
||||
- name: FORGEJO__DATABASE__PASSWD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: forgejo-db-app
|
||||
key: password
|
||||
- name: FORGEJO__DATABASE__NAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: forgejo-db-app
|
||||
key: dbname
|
||||
- name: FORGEJO__DATABASE__USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: forgejo-db-app
|
||||
key: user
|
||||
- name: FORGEJO__DATABASE__HOST
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: forgejo-db-app
|
||||
key: host
|
||||
- name: FORGEJO__DATABASE__DB_TYPE
|
||||
value: postgres
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: ca-issuer
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/proxy-body-size: "0"
|
||||
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
|
||||
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
|
||||
nginx.ingress.kubernetes.io/whitelist-source-range: 10.0.0.0/8,172.16.0.0/12,192.168.0.0/16,172.19.255.0/24,100.64.0.0/12
|
||||
hosts:
|
||||
- host: git.svc.hel1.obx
|
||||
paths:
|
||||
- backend:
|
||||
service:
|
||||
name: forgejo-http
|
||||
port:
|
||||
number: 3000
|
||||
path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls:
|
||||
- secretName: forgejo-tls
|
||||
hosts:
|
||||
- git.svc.hel1.obx
|
||||
|
||||
# service:
|
||||
# ssh:
|
||||
# type: LoadBalancer
|
||||
# port: 22
|
||||
# annotations:
|
||||
# load-balancer.hetzner.cloud/location: hel1
|
||||
# load-balancer.hetzner.cloud/uses-proxyprotocol: 'false'
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 1Gi
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user