devel: Useful kubectl plugins

chore(ingress-nginx): Bump to latest
chore(ingress-nginx): Bump to latest v4.9
2026-01-20 18:17:07 +01:00 · 2026-01-20 17:16:54 +01:00 · 2026-01-20 17:03:03 +01:00 · 2026-01-20 15:45:13 +01:00 · 2026-01-20 14:42:27 +00:00 · 2026-01-20 14:42:20 +00:00
188 changed files with 4925 additions and 2540 deletions
@@ -1,6 +1,7 @@
 #!/usr/bin/env bash
 # the shebang is ignored, but nice for editors
 watch_file nix/sources.json
+watch_file nix/checks.nix

 # Load .env file if it exists
 dotenv_if_exists
@@ -1,6 +1,7 @@
 *.tgz
 _*/
 .direnv/
+.env
 .pre-commit-config.yaml
 _*.yaml
 backup/
@@ -1,46 +1,54 @@
-image:
-  name: alpine/helm:latest
-  entrypoint: [ "/bin/bash", "-c" ]
+# yaml-language-server: $schema=https://gitlab.com/gitlab-org/gitlab/-/raw/master/app/assets/javascripts/editor/schema/ci.json
+default:
+  tags:
+    - nix

-stages:
-  - release
+include:
+  - project: oceanbox/gitlab-ci
+    ref: v4.5
+    file: template/Base.gitlab-ci.yml
+# stages:
+# - release

-release:
-  stage: release
-  rules:
-  - if: '$CI_COMMIT_BRANCH =~ /^main/'
-    when: always
-  - when: never
-  script:
-    - |
-      cd $CI_PROJECT_DIR
-      for i in $(git show --pretty="" --name-only | grep '^charts/.*/Chart.yaml' | cut -d/ -f2); do
-        pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
-        if [ ! -z $pack ]; then
-          chart=$(basename $pack)
-          curl --request POST \
-               --user gitlab-ci-token:$CI_JOB_TOKEN \
-               --form "chart=@${chart}" \
-               "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
-        fi
-      done
+# image:
+# name: alpine/helm:latest
+# entrypoint: ["/bin/bash", "-c"]

-rebuild:
-  stage: release
-  rules:
-    - when: manual
-      allow_failure: true
-  script:
-    - |
-      cd $CI_PROJECT_DIR
-      for i in $(find ./charts -maxdepth 2 -name Chart.yaml | cut -d/ -f3); do
-        pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
-        if [ ! -z $pack ]; then
-          chart=$(basename $pack)
-          curl --request POST \
-               --user gitlab-ci-token:$CI_JOB_TOKEN \
-               --form "chart=@${chart}" \
-               "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
-        fi
-      done
+# release:
+# stage: release
+# rules:
+# - if: "$CI_COMMIT_BRANCH =~ /^main/"
+# when: always
+# - when: never
+# script:
+# - |
+# cd $CI_PROJECT_DIR
+# for i in $(git show --pretty="" --name-only | grep '^charts/.*/Chart.yaml' | cut -d/ -f2); do
+# pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
+# if [ ! -z $pack ]; then
+# chart=$(basename $pack)
+# curl --request POST \
+# --user gitlab-ci-token:$CI_JOB_TOKEN \
+# --form "chart=@${chart}" \
+# "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
+# fi
+# done

+# rebuild:
+# stage: release
+# rules:
+# - when: manual
+# allow_failure: true
+# script:
+# - |
+# cd $CI_PROJECT_DIR
+# for i in $(find ./charts -maxdepth 2 -name Chart.yaml | cut -d/ -f3); do
+# pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
+# if [ ! -z $pack ]; then
+# chart=$(basename $pack)
+# curl --request POST \
+# --user gitlab-ci-token:$CI_JOB_TOKEN \
+# --form "chart=@${chart}" \
+# "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
+# fi
+# done
@@ -6,39 +6,46 @@ let
  values = lib.apps.appValues {
    inherit env;
    base = ../values/atlantis;
-    extraValues = {};
+    extraValues = { };
  };

-  kustomize = r:
+  kustomize =
+    r:
    if r.kind == "Deployment" then
      lib.attrsets.recursiveUpdate r {
-        spec.template.spec.containers =
-        builtins.map (x:
-        x // {
+        spec.template.spec.containers = builtins.map (
+          x:
+          x
+          // {
            livenessProbe.httpGet.path = "/healthz";
            readinessProble.httpGet.path = "/healthz";
-            env = x.env ++ [ { name = "INERNAL_PORT"; value = 8000; } ];
-        }) r.spec.template.spec.containers;
+            env = x.env ++ [
+              {
+                name = "INERNAL_PORT";
+                value = 8000;
+              }
+            ];
+          }
+        ) r.spec.template.spec.containers;
      }
-      else if r.kind == "Service" then
-      {}
-    else r;
+    else if r.kind == "Service" then
+      { }
+    else
+      r;
 in
 {
  options.apps.atlantis = lib.apps.appOptions {
-      revision = lib.mkOption {
-        type = lib.types.str;
-        default = "main";
-        description = "Revision";
-      };
+    revision = lib.mkOption {
+      type = lib.types.str;
+      default = "main";
+      description = "Revision";
+    };

-      hostname = lib.mkOption {
-        type = lib.types.str;
-        default = if env == "prod"
-          then "maps.oceanbox.io"
-          else "atlantis.beta.oceanbox.io";
-        description = "Revision";
-      };
+    hostname = lib.mkOption {
+      type = lib.types.str;
+      default = if env == "prod" then "maps.oceanbox.io" else "atlantis.beta.oceanbox.io";
+      description = "Revision";
+    };
  };

  config = lib.apps.appConfig cfg "${env}-atlantis" {
@@ -6,34 +6,32 @@ let
  values = lib.apps.appValues {
    inherit env;
    base = ../values/openfga;
-    extraValues = {};
+    extraValues = { };
  };

-  kustomize = r:
-    if r.kind == "Job" then
-      lib.attrsets.recursiveUpdate r { spec.backoffLimit = 2; }
-    else r;
+  kustomize =
+    r: if r.kind == "Job" then lib.attrsets.recursiveUpdate r { spec.backoffLimit = 2; } else r;

 in
-  {
-    options.apps.openfga = lib.apps.appOptions {};
+{
+  options.apps.openfga = lib.apps.appOptions { };

-    config = lib.apps.appConfig cfg "${env}-openfga" {
-        helm.releases."${env}-openfga" = {
-          inherit values;
-          chart = lib.helm.downloadHelmChart {
-            repo = "https://openfga.github.io/helm-charts";
-            chart = "openfga";
-              version = "0.2.12";
-              chartHash = "sha256-7yLcw9/oNPvCePrtTJwKAG88t0Ym5Dl/S83Gz+gQdDU=";
-          };
-          transformer = rs: builtins.map (x: kustomize x) rs;
-        };
-
-        annotations = {};
-        resources = {
-          services.poop.spec = {
-          };
-        };
+  config = lib.apps.appConfig cfg "${env}-openfga" {
+    helm.releases."${env}-openfga" = {
+      inherit values;
+      chart = lib.helm.downloadHelmChart {
+        repo = "https://openfga.github.io/helm-charts";
+        chart = "openfga";
+        version = "0.2.12";
+        chartHash = "sha256-7yLcw9/oNPvCePrtTJwKAG88t0Ym5Dl/S83Gz+gQdDU=";
      };
-  }
+      transformer = rs: builtins.map (x: kustomize x) rs;
+    };
+
+    annotations = { };
+    resources = {
+      services.poop.spec = {
+      };
+    };
+  };
+}
@@ -1,7 +1,4 @@
 {
-    // Use IntelliSense to learn about possible attributes.
-    // Hover to view descriptions of existing attributes.
-    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
@@ -2,16 +2,16 @@

 server="root@fs1-0"
 path="/vol/brick0/nfs0/k1/pv-oceanbox-dex"
-dest="$server:$path"
+dest="${server}:${path}"

 index=$(basename dist/assets/index-*.js)

-ssh $server -- rm $path/static/js/*.js
-scp dist/assets/*.js $dest/static/js/
+ssh "${server}" -- rm "${path}"/static/js/*.js
+scp dist/assets/*.js "${dest}"/static/js/

-sed -r "s/@index@/$index/" ./dex/templates/login.html > login.html.$$
-scp ./dex/templates/* $dest/templates/
-scp ./dex/static/*.* $dest/static/
-scp login.html.$$ $dest/templates/login.html
+sed -r "s/@index@/${index}/" ./dex/templates/login.html > login.html.$$
+scp ./dex/templates/* "${dest}"/templates/
+scp ./dex/static/*.* "${dest}"/static/
+scp login.html.$$ "${dest}"/templates/login.html
 rm login.html.$$
 ssh admin@k1-0.itpartner.intern -- kubectl rollout restart -n oceanbox deployment/dex
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+# shellcheck disable=SC2034  # Unused variables left for readability

 helmfile () {

@@ -10,30 +11,30 @@ bases:
 - ../envs/environments.yaml.gotmpl

 commonLabels:
-  tier: $tier
+  tier: ${tier}

 releases:
- name: $name
-  namespace: {{ .Environment.Name }}-$name
-  chart: ../charts/$name
-  condition: $name.enabled
+- name: ${name}
+  namespace: {{ .Environment.Name }}-${name}
+  chart: ../charts/${name}
+  condition: ${name}.enabled
  values:
-  - ../values/$name/values/values.yaml.gotmpl
-  - ../values/$name/values/values-{{ .Environment.Name }}.yaml
+  - ../values/${name}/values/values.yaml.gotmpl
+  - ../values/${name}/values/values-{{ .Environment.Name }}.yaml
  postRenderer: ../bin/kustomizer
  postRendererArgs:
-  - ../values/$name/kustomize/{{ .Environment.Name }}
+  - ../values/${name}/kustomize/{{ .Environment.Name }}
  missingFileHandler: Info
 - name: manifests
-  namespace: {{ .Environment.Name }}-$name
+  namespace: {{ .Environment.Name }}-${name}
  chart: manifests
-  condition: $name.enabled
+  condition: ${name}.enabled
  missingFileHandler: Info
  values:
  - ../values/env.yaml
  - ../values/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml
-  - ../values/$name/env.yaml.gotmpl
-  - ../values/$name/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
+  - ../values/${name}/env.yaml.gotmpl
+  - ../values/${name}/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
  hooks:
  - events: [ prepare, cleanup ]
    showlogs: true
@@ -42,7 +43,7 @@ releases:
    - '{{\`{{ if eq .Event.Name "prepare" }}build{{ else }}clean{{ end }}\`}}'
    - '{{\`{{ .Release.Chart }}\`}}'
    - '{{\`{{ .Environment.Name }}\`}}'
-    - ../values/$name/manifests
+    - ../values/${name}/manifests
    - manifests
 EOF
 }
@@ -59,10 +60,10 @@ done

 name=$1
 tier=$2
-if [ -n "$ns" ]; then
-    namespace="namespace: {{ .Environment.Name }}-$name"
+if [[ -n "${ns}" ]]; then
+    namespace="namespace: {{ .Environment.Name }}-${name}"
 else
-    namespace="namespace: $name"
+    namespace="namespace: ${name}"
 fi

-helmfile $1 $2
+helmfile "$1" "$2"
@@ -4,39 +4,38 @@ set -o pipefail

 cmd=$1
 chart=$2
-env=$3
 manifests=${4:-manifests}
 outdir=${5:-_manifests}

 build() {
-  mkdir -p $outdir/templates
-  echo "Creating $outdir/templates"
+  mkdir -p "${outdir}"/templates
+  echo "Creating ${outdir}/templates"

-  echo "generating $outdir/Chart.yaml" 1>&2
+  echo "generating ${outdir}/Chart.yaml" 1>&2

-  cat <<EOF > $outdir/Chart.yaml
+  cat <<EOF > "${outdir}"/Chart.yaml
 apiVersion: v1
 appVersion: "1.0"
 # description: A Helm chart for Kubernetes
-name: $chart
+name: ${chart}
 version: 0.1.0
 EOF

-if [ -d $manifests ]; then
-    cp -r $manifests/* $outdir/templates
-elif [ -f $manifests ]; then
-    cp $manifests $outdir/templates
+if [[ -d "${manifests}" ]]; then
+    cp -r "${manifests}"/* "${outdir}"/templates
+elif [[ -f "${manifests}" ]]; then
+    cp "${manifests}" "${outdir}"/templates
 fi
 }

 clean() {
-  echo "cleaning $outdir" 1>&2
-  rm -rf $outdir
+  echo "cleaning ${outdir}" 1>&2
+  rm -rf "${outdir}"
 }

-case "$cmd" in
+case "${cmd}" in
  "build" ) build ;;
  "clean" ) clean ;;
-  * ) echo "unsupported command: $cmd" 1>&2; exit 1 ;;
+  * ) echo "unsupported command: ${cmd}" 1>&2; exit 1 ;;
 esac

@@ -1,13 +1,13 @@
 #!/usr/bin/env bash

-[ $# != 1 ] && exit 1
+[[ $# != 1 ]] && exit 1

 dir=$1
-base=$dir/../base
+base=${dir}/../base

-if [ -f $base/kustomization.yaml -a -f $dir/kustomization.yaml ]; then
-    cat > $base/_manifest.yaml
-    kubectl kustomize $dir
+if [[ -f "${base}"/kustomization.yaml ]] && [[ -f "${dir}"/kustomization.yaml ]]; then
+    cat > "${base}"/_manifest.yaml
+    kubectl kustomize "${dir}"
 else
    cat
 fi
@@ -3,16 +3,16 @@ kind: ClusterRole
 metadata:
  name: argocd-cluster-admin
 rules:
- apiGroups:
-  - '*'
-  resources:
-  - '*'
-  verbs:
-  - '*'
- nonResourceURLs:
-  - '*'
-  verbs:
-  - '*'
+  - apiGroups:
+      - "*"
+    resources:
+      - "*"
+    verbs:
+      - "*"
+  - nonResourceURLs:
+      - "*"
+    verbs:
+      - "*"
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
@@ -23,9 +23,9 @@ roleRef:
  kind: ClusterRole
  name: argocd-cluster-admin
 subjects:
- kind: ServiceAccount
-  name: argocd-cluster-admin
-  namespace: kube-system
+  - kind: ServiceAccount
+    name: argocd-cluster-admin
+    namespace: kube-system
 ---
 apiVersion: v1
 kind: ServiceAccount
@@ -6,5 +6,3 @@ metadata:
  name: cluster-admin-token
  namespace: kube-system
 type: kubernetes.io/service-account-token
-
-
@@ -10,5 +10,3 @@ metadata:
  name: cluster-ekman
  namespace: argocd
 type: Opaque
-
-
@@ -3,5 +3,5 @@
 img=registry.gitlab.com/oceanbox/manifests/helm-kustomize-cmp
 tag=${1:-latest}

-docker build -t $img:$tag .
-docker push $img:$tag
+docker build -t "${img}":"${tag}" .
+docker push "${img}":"${tag}"
@@ -1,14 +1,15 @@
 #!/bin/sh
+# shellcheck disable=SC2154

 export HOME=/plugin

-env > /tmp/$ARGOCD_APP_NAME.env
+env > /tmp/"${ARGOCD_APP_NAME}".env

-echo "$ARGOCD_APP_PARAMETERS" | jq '.[] | select(.name == "helm-parameters") | .map' | yq -P -oy > parameters.yaml
-cp parameters.yaml /tmp/$ARGOCD_APP_NAME-parameters.yaml
+echo "${ARGOCD_APP_PARAMETERS}" | jq '.[] | select(.name == "helm-parameters") | .map' | yq -P -oy > parameters.yaml
+cp parameters.yaml /tmp/"${ARGOCD_APP_NAME}"-parameters.yaml

-if [ -n "$PARAM_CHART" -a "$PARAM_CHART" != "." ]; then
-    CHART=$PARAM_CHART
+if [ -n "${PARAM_CHART}" ] && [ "${PARAM_CHART}" != "." ]; then
+    CHART=${PARAM_CHART}
 elif [ -d chart ]; then
    CHART=chart
 elif [ -f chart ]; then
@@ -18,19 +19,19 @@ else
 fi

 [ -f chart/values.yaml ] && VALUES="-f chart/values.yaml"
-[ -f values-chart.yaml ] && VALUES="$VALUES -f values-chart.yaml"
-[ -f values.yaml ] && VALUES="$VALUES -f values.yaml"
-[ -f values-$PARAM_ENV.yaml ] && VALUES="$VALUES -f values-$PARAM_ENV.yaml"
-VALUES="$VALUES -f parameters.yaml"
+[ -f values-chart.yaml ] && VALUES="${VALUES} -f values-chart.yaml"
+[ -f values.yaml ] && VALUES="${VALUES} -f values.yaml"
+[ -f values-"${PARAM_ENV}".yaml ] && VALUES="${VALUES} -f values-${PARAM_ENV}.yaml"
+VALUES="${VALUES} -f parameters.yaml"

-helm dependency update $CHART >/tmp/$ARGOCD_APP_NAME-helm-dependency-build.out
+helm dependency update "${CHART}" >/tmp/"${ARGOCD_APP_NAME}"-helm-dependency-build.out

 mkdir -p base
-echo "helm template -n $ARGOCD_APP_NAMESPACE $PARAM_FLAGS $VALUES $ARGOCD_APP_NAME $CHART" > /tmp/$ARGOCD_APP_NAME-helm.sh
-helm template -n $ARGOCD_APP_NAMESPACE $PARAM_FLAGS $VALUES $ARGOCD_APP_NAME $CHART > ./base/_manifest.yaml
+echo "helm template -n ${ARGOCD_APP_NAMESPACE} ${PARAM_FLAGS} ${VALUES} ${ARGOCD_APP_NAME} ${CHART}" > /tmp/"${ARGOCD_APP_NAME}"-helm.sh
+helm template -n "${ARGOCD_APP_NAMESPACE}" "${PARAM_FLAGS}" "${VALUES}" "${ARGOCD_APP_NAME}" "${CHART}" > ./base/_manifest.yaml

-cp ./base/_manifest.yaml /tmp/$ARGOCD_APP_NAME-manifest.yaml
+cp ./base/_manifest.yaml /tmp/"${ARGOCD_APP_NAME}"-manifest.yaml

-[ -d "$PARAM_ENV" ] && kubectl kustomize $PARAM_ENV > /tmp/$ARGOCD_APP_NAME-manifest.yaml
+[ -d "${PARAM_ENV}" ] && kubectl kustomize "${PARAM_ENV}" > /tmp/"${ARGOCD_APP_NAME}"-manifest.yaml

-cat /tmp/$ARGOCD_APP_NAME-manifest.yaml
+cat /tmp/"${ARGOCD_APP_NAME}"-manifest.yaml
@@ -18,7 +18,7 @@ EOF
    exit 0
 fi

-yq e -o=p $VALUES | jq --slurp --raw-input '
+yq e -o=p "${VALUES}" | jq --slurp --raw-input '
  [{
    name: "helm-parameters",
    title: "Helm Parameters",
@@ -1,8 +1,9 @@
 #!/bin/sh
+# shellcheck disable=SC2154

 export HOME=/plugin

-helm repo add --username argocd-helm --password "$OCEANBOX_HELM_ACCESS_TOKEN" oceanbox \
+helm repo add --username argocd-helm --password "${OCEANBOX_HELM_ACCESS_TOKEN}" oceanbox \
    https://gitlab.com/api/v4/projects/54396343/packages/helm/stable

 helm repo add bitnami https://charts.bitnami.com/bitnami
@@ -4,9 +4,9 @@ export HOME=/plugin

 helm repo update oceanbox

-if [ -n "$PARAM_CHART" -a "$PARAM_CHART" != "." ]; then
-    helm show values $PARAM_CHART > values-chart.yaml
+if [ -n "${PARAM_CHART}" ] && [ "${PARAM_CHART}" != "." ]; then
+    helm show values "${PARAM_CHART}" > values-chart.yaml
 elif [ -f chart ]; then
    CHART=$(cat chart)
-    helm show values $CHART > values-chart.yaml
+    helm show values "${CHART}" > values-chart.yaml
 fi
@@ -9,7 +9,7 @@ spec:
  init:
    # Init always happens immediately before generate, but its output is not treated as manifests.
    # This is a good place to, for example, download chart dependencies.
-    command: [ /bin/sh ]
+    command: [/bin/sh]
    args:
      - /plugin/init.sh
  # The generate command runs in the Application source directory each time manifests are generated. Standard output
@@ -17,7 +17,7 @@ spec:
  # To write log messages from the command, write them to stderr, it will always be displayed.
  # Error output will be sent to the UI, so avoid printing sensitive information (such as secrets).
  generate:
-    command: [ /bin/sh ]
+    command: [/bin/sh]
    args:
      - /plugin/generate.sh

@@ -27,15 +27,15 @@ spec:
  # Only one of fileName, find.glob, or find.command should be specified. If multiple are specified then only the
  # first (in that order) is evaluated.
  # discover:
-    # fileName is a glob pattern (https://pkg.go.dev/path/filepath#Glob) that is applied to the Application's source
-    # directory. If there is a match, this plugin may be used for the Application.
-    # fileName: "./subdir/s*.yaml"
-    # find:
-      # This does the same thing as fileName, but it supports double-start (nested directory) glob patterns.
-      # glob: "**/Chart.yaml"
-      # The find command runs in the repository's root directory. To match, it must exit with status code 0 _and_
-      # produce non-empty output to standard out.
-      # command: [sh, -c, find . -name env.yaml]
+  # fileName is a glob pattern (https://pkg.go.dev/path/filepath#Glob) that is applied to the Application's source
+  # directory. If there is a match, this plugin may be used for the Application.
+  # fileName: "./subdir/s*.yaml"
+  # find:
+  # This does the same thing as fileName, but it supports double-start (nested directory) glob patterns.
+  # glob: "**/Chart.yaml"
+  # The find command runs in the repository's root directory. To match, it must exit with status code 0 _and_
+  # produce non-empty output to standard out.
+  # command: [sh, -c, find . -name env.yaml]
  # The parameters config describes what parameters the UI should display for an Application. It is up to the user to
  # actually set parameters in the Application manifest (in spec.source.plugin.parameters). The announcements _only_
  # inform the "Parameters" tab in the App Details page of the UI.
@@ -66,22 +66,21 @@ spec:
        itemType: string
        collectionType: string
        string: ""
-      # All the fields above besides "string" apply to both the array and map type parameter announcements.
-      # - name: array-param
-      #   # This field communicates the parameter's default value to the UI. Setting this field is optional.
-      #   array: [default, items]
-      #   collectionType: array
-      # - name: map-param
-      #   # This field communicates the parameter's default value to the UI. Setting this field is optional.
-      #   map:
-      #     some: value
-      #   collectionType: map
+        # All the fields above besides 'string' apply to both the array and map type parameter announcements.
+        # - name: array-param
+        #   # This field communicates the parameter's default value to the UI. Setting this field is optional.
+        #   array: [default, items]
+        #   collectionType: array
+        # - name: map-param
+        #   # This field communicates the parameter's default value to the UI. Setting this field is optional.
+        #   map:
+        #     some: value
+        #   collectionType: map
    # dynamic:
-      # The command is run in an Application's source directory. Standard output must be JSON matching the schema of the
-      # static parameter announcements list.
-      # command: [ /bin/sh, /plugin/get-values.sh ]
+    # The command is run in an Application's source directory. Standard output must be JSON matching the schema of the
+    # static parameter announcements list.
+    # command: [ /bin/sh, /plugin/get-values.sh ]

    # If set to `true` then the plugin receives repository files with original file mode. Dangerous since the repository
    # might have executable files. Set to true only if you trust the CMP plugin authors.
    preserveFileMode: false
-
@@ -1,4 +1,4 @@
-FROM ghcr.io/helmfile/helmfile:v1.0.0
+FROM ghcr.io/helmfile/helmfile:v1.1.9

 RUN mkdir -p /home/argocd/cmp-server/config/
 COPY plugin.yaml /home/argocd/cmp-server/config/
@@ -45,432 +45,432 @@ spec:
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
-          - podAffinityTerm:
-              labelSelector:
-                matchLabels:
-                  app.kubernetes.io/name: argocd-repo-server
-              topologyKey: kubernetes.io/hostname
-            weight: 100
+            - podAffinityTerm:
+                labelSelector:
+                  matchLabels:
+                    app.kubernetes.io/name: argocd-repo-server
+                topologyKey: kubernetes.io/hostname
+              weight: 100
      automountServiceAccountToken: true
      containers:
-      - args:
-        - /usr/local/bin/argocd-repo-server
-        - --port=8081
-        - --metrics-port=8084
-        env:
-        - name: ARGOCD_REPO_SERVER_NAME
-          value: argocd-repo-server
-        - name: ARGOCD_RECONCILIATION_TIMEOUT
-          valueFrom:
-            configMapKeyRef:
-              key: timeout.reconciliation
-              name: argocd-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_LOGFORMAT
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.log.format
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_LOGLEVEL
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.log.level
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_PARALLELISM_LIMIT
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.parallelism.limit
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_LISTEN_ADDRESS
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.listen.address
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_LISTEN_METRICS_ADDRESS
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.metrics.listen.address
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_DISABLE_TLS
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.disable.tls
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_TLS_MIN_VERSION
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.tls.minversion
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_TLS_MAX_VERSION
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.tls.maxversion
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_TLS_CIPHERS
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.tls.ciphers
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_CACHE_EXPIRATION
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.repo.cache.expiration
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: REDIS_SERVER
-          valueFrom:
-            configMapKeyRef:
-              key: redis.server
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: REDIS_COMPRESSION
-          valueFrom:
-            configMapKeyRef:
-              key: redis.compression
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: REDISDB
-          valueFrom:
-            configMapKeyRef:
-              key: redis.db
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: REDIS_USERNAME
-          valueFrom:
-            secretKeyRef:
-              key: redis-username
-              name: argocd-redis
-              optional: true
-        - name: REDIS_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              key: auth
-              name: argocd-redis
-        - name: REDIS_SENTINEL_USERNAME
-          valueFrom:
-            secretKeyRef:
-              key: redis-sentinel-username
-              name: argocd-redis
-              optional: true
-        - name: REDIS_SENTINEL_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              key: redis-sentinel-password
-              name: argocd-redis
-              optional: true
-        - name: ARGOCD_DEFAULT_CACHE_EXPIRATION
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.default.cache.expiration
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_OTLP_ADDRESS
-          valueFrom:
-            configMapKeyRef:
-              key: otlp.address
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_OTLP_INSECURE
-          valueFrom:
-            configMapKeyRef:
-              key: otlp.insecure
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_OTLP_HEADERS
-          valueFrom:
-            configMapKeyRef:
-              key: otlp.headers
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_MAX_COMBINED_DIRECTORY_MANIFESTS_SIZE
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.max.combined.directory.manifests.size
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_PLUGIN_TAR_EXCLUSIONS
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.plugin.tar.exclusions
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_ALLOW_OUT_OF_BOUNDS_SYMLINKS
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.allow.oob.symlinks
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_TAR_SIZE
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.streamed.manifest.max.tar.size
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_EXTRACTED_SIZE
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.streamed.manifest.max.extracted.size
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_HELM_MANIFEST_MAX_EXTRACTED_SIZE
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.helm.manifest.max.extracted.size
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_DISABLE_HELM_MANIFEST_MAX_EXTRACTED_SIZE
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.disable.helm.manifest.max.extracted.size
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_GIT_MODULES_ENABLED
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.enable.git.submodule
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_GIT_LS_REMOTE_PARALLELISM_LIMIT
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.git.lsremote.parallelism.limit
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_GIT_REQUEST_TIMEOUT
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.git.request.timeout
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.revision.cache.lock.timeout
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_INCLUDE_HIDDEN_DIRECTORIES
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.include.hidden.directories
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: HELM_CACHE_HOME
-          value: /helm-working-dir
-        - name: HELM_CONFIG_HOME
-          value: /helm-working-dir
-        - name: HELM_DATA_HOME
-          value: /helm-working-dir
-        image: quay.io/argoproj/argocd:v2.12.3
-        imagePullPolicy: IfNotPresent
-        livenessProbe:
-          failureThreshold: 3
-          httpGet:
-            path: /healthz?full=true
-            port: metrics
-            scheme: HTTP
-          initialDelaySeconds: 10
-          periodSeconds: 10
-          successThreshold: 1
-          timeoutSeconds: 1
-        name: repo-server
-        ports:
-        - containerPort: 8081
+        - args:
+            - /usr/local/bin/argocd-repo-server
+            - --port=8081
+            - --metrics-port=8084
+          env:
+            - name: ARGOCD_REPO_SERVER_NAME
+              value: argocd-repo-server
+            - name: ARGOCD_RECONCILIATION_TIMEOUT
+              valueFrom:
+                configMapKeyRef:
+                  key: timeout.reconciliation
+                  name: argocd-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_LOGFORMAT
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.log.format
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_LOGLEVEL
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.log.level
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_PARALLELISM_LIMIT
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.parallelism.limit
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_LISTEN_ADDRESS
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.listen.address
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_LISTEN_METRICS_ADDRESS
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.metrics.listen.address
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_DISABLE_TLS
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.disable.tls
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_TLS_MIN_VERSION
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.tls.minversion
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_TLS_MAX_VERSION
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.tls.maxversion
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_TLS_CIPHERS
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.tls.ciphers
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_CACHE_EXPIRATION
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.repo.cache.expiration
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: REDIS_SERVER
+              valueFrom:
+                configMapKeyRef:
+                  key: redis.server
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: REDIS_COMPRESSION
+              valueFrom:
+                configMapKeyRef:
+                  key: redis.compression
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: REDISDB
+              valueFrom:
+                configMapKeyRef:
+                  key: redis.db
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: REDIS_USERNAME
+              valueFrom:
+                secretKeyRef:
+                  key: redis-username
+                  name: argocd-redis
+                  optional: true
+            - name: REDIS_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  key: auth
+                  name: argocd-redis
+            - name: REDIS_SENTINEL_USERNAME
+              valueFrom:
+                secretKeyRef:
+                  key: redis-sentinel-username
+                  name: argocd-redis
+                  optional: true
+            - name: REDIS_SENTINEL_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  key: redis-sentinel-password
+                  name: argocd-redis
+                  optional: true
+            - name: ARGOCD_DEFAULT_CACHE_EXPIRATION
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.default.cache.expiration
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_OTLP_ADDRESS
+              valueFrom:
+                configMapKeyRef:
+                  key: otlp.address
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_OTLP_INSECURE
+              valueFrom:
+                configMapKeyRef:
+                  key: otlp.insecure
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_OTLP_HEADERS
+              valueFrom:
+                configMapKeyRef:
+                  key: otlp.headers
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_MAX_COMBINED_DIRECTORY_MANIFESTS_SIZE
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.max.combined.directory.manifests.size
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_PLUGIN_TAR_EXCLUSIONS
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.plugin.tar.exclusions
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_ALLOW_OUT_OF_BOUNDS_SYMLINKS
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.allow.oob.symlinks
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_TAR_SIZE
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.streamed.manifest.max.tar.size
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_EXTRACTED_SIZE
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.streamed.manifest.max.extracted.size
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_HELM_MANIFEST_MAX_EXTRACTED_SIZE
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.helm.manifest.max.extracted.size
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_DISABLE_HELM_MANIFEST_MAX_EXTRACTED_SIZE
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.disable.helm.manifest.max.extracted.size
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_GIT_MODULES_ENABLED
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.enable.git.submodule
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_GIT_LS_REMOTE_PARALLELISM_LIMIT
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.git.lsremote.parallelism.limit
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_GIT_REQUEST_TIMEOUT
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.git.request.timeout
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.revision.cache.lock.timeout
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_INCLUDE_HIDDEN_DIRECTORIES
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.include.hidden.directories
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: HELM_CACHE_HOME
+              value: /helm-working-dir
+            - name: HELM_CONFIG_HOME
+              value: /helm-working-dir
+            - name: HELM_DATA_HOME
+              value: /helm-working-dir
+          image: quay.io/argoproj/argocd:v2.12.3
+          imagePullPolicy: IfNotPresent
+          livenessProbe:
+            failureThreshold: 3
+            httpGet:
+              path: /healthz?full=true
+              port: metrics
+              scheme: HTTP
+            initialDelaySeconds: 10
+            periodSeconds: 10
+            successThreshold: 1
+            timeoutSeconds: 1
          name: repo-server
-          protocol: TCP
-        - containerPort: 8084
-          name: metrics
-          protocol: TCP
-        readinessProbe:
-          failureThreshold: 3
-          httpGet:
-            path: /healthz
-            port: metrics
-            scheme: HTTP
-          initialDelaySeconds: 10
-          periodSeconds: 10
-          successThreshold: 1
-          timeoutSeconds: 1
-        securityContext:
-          allowPrivilegeEscalation: false
-          capabilities:
-            drop:
-            - ALL
-          readOnlyRootFilesystem: true
-          runAsNonRoot: true
-          seccompProfile:
-            type: RuntimeDefault
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
-        volumeMounts:
-        - mountPath: /app/config/ssh
-          name: ssh-known-hosts
-        - mountPath: /app/config/tls
-          name: tls-certs
-        - mountPath: /app/config/gpg/source
-          name: gpg-keys
-        - mountPath: /app/config/gpg/keys
-          name: gpg-keyring
-        - mountPath: /app/config/reposerver/tls
-          name: argocd-repo-server-tls
-        - mountPath: /helm-working-dir
-          name: helm-working-dir
-        - mountPath: /home/argocd/cmp-server/plugins
-          name: plugins
-        - mountPath: /tmp
-          name: tmp
-      - command:
-        - /var/run/argocd/argocd-cmp-server
-        image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
-        imagePullPolicy: Always
-        name: kustomize-helm-with-rewrite
-        securityContext:
-          runAsNonRoot: true
-          runAsUser: 999
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
-        volumeMounts:
-        - mountPath: /var/run/argocd
-          name: var-files
-        - mountPath: /home/argocd/cmp-server/plugins
-          name: plugins
-        - mountPath: /tmp
-          name: cmp-tmp
-        - mountPath: /helm-working-dir
-          name: helm-working-dir
-      - command:
-        - /var/run/argocd/argocd-cmp-server
-        image: registry.gitlab.com/oceanbox/manifests/helm-kustomize-cmp:latest
-        imagePullPolicy: Always
-        name: helm-kustomize-cmp
-        securityContext:
-          runAsNonRoot: true
-          runAsUser: 999
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
-        volumeMounts:
-        - mountPath: /var/run/argocd
-          name: var-files
-        - mountPath: /home/argocd/cmp-server/plugins
-          name: plugins
-        - mountPath: /tmp
-          name: cmp-tmp
-        - mountPath: /helm-working-dir
-          name: helm-working-dir
-      - command:
-        - /var/run/argocd/argocd-cmp-server
-        image: registry.gitlab.com/oceanbox/manifests/helmfile-cmp:latest
-        imagePullPolicy: Always
-        name: helmfile-cmp
-        securityContext:
-          runAsNonRoot: true
-          runAsUser: 999
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
-        volumeMounts:
-        - mountPath: /var/run/argocd
-          name: var-files
-        - mountPath: /home/argocd/cmp-server/plugins
-          name: plugins
-        - mountPath: /tmp
-          name: cmp-tmp
-        - mountPath: /helm-working-dir
-          name: helm-working-dir
+          ports:
+            - containerPort: 8081
+              name: repo-server
+              protocol: TCP
+            - containerPort: 8084
+              name: metrics
+              protocol: TCP
+          readinessProbe:
+            failureThreshold: 3
+            httpGet:
+              path: /healthz
+              port: metrics
+              scheme: HTTP
+            initialDelaySeconds: 10
+            periodSeconds: 10
+            successThreshold: 1
+            timeoutSeconds: 1
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+            readOnlyRootFilesystem: true
+            runAsNonRoot: true
+            seccompProfile:
+              type: RuntimeDefault
+          terminationMessagePath: /dev/termination-log
+          terminationMessagePolicy: File
+          volumeMounts:
+            - mountPath: /app/config/ssh
+              name: ssh-known-hosts
+            - mountPath: /app/config/tls
+              name: tls-certs
+            - mountPath: /app/config/gpg/source
+              name: gpg-keys
+            - mountPath: /app/config/gpg/keys
+              name: gpg-keyring
+            - mountPath: /app/config/reposerver/tls
+              name: argocd-repo-server-tls
+            - mountPath: /helm-working-dir
+              name: helm-working-dir
+            - mountPath: /home/argocd/cmp-server/plugins
+              name: plugins
+            - mountPath: /tmp
+              name: tmp
+        - command:
+            - /var/run/argocd/argocd-cmp-server
+          image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
+          imagePullPolicy: Always
+          name: kustomize-helm-with-rewrite
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 999
+          terminationMessagePath: /dev/termination-log
+          terminationMessagePolicy: File
+          volumeMounts:
+            - mountPath: /var/run/argocd
+              name: var-files
+            - mountPath: /home/argocd/cmp-server/plugins
+              name: plugins
+            - mountPath: /tmp
+              name: cmp-tmp
+            - mountPath: /helm-working-dir
+              name: helm-working-dir
+        - command:
+            - /var/run/argocd/argocd-cmp-server
+          image: registry.gitlab.com/oceanbox/manifests/helm-kustomize-cmp:latest
+          imagePullPolicy: Always
+          name: helm-kustomize-cmp
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 999
+          terminationMessagePath: /dev/termination-log
+          terminationMessagePolicy: File
+          volumeMounts:
+            - mountPath: /var/run/argocd
+              name: var-files
+            - mountPath: /home/argocd/cmp-server/plugins
+              name: plugins
+            - mountPath: /tmp
+              name: cmp-tmp
+            - mountPath: /helm-working-dir
+              name: helm-working-dir
+        - command:
+            - /var/run/argocd/argocd-cmp-server
+          image: registry.gitlab.com/oceanbox/manifests/helmfile-cmp:latest
+          imagePullPolicy: Always
+          name: helmfile-cmp
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 999
+          terminationMessagePath: /dev/termination-log
+          terminationMessagePolicy: File
+          volumeMounts:
+            - mountPath: /var/run/argocd
+              name: var-files
+            - mountPath: /home/argocd/cmp-server/plugins
+              name: plugins
+            - mountPath: /tmp
+              name: cmp-tmp
+            - mountPath: /helm-working-dir
+              name: helm-working-dir
      dnsPolicy: ClusterFirst
      imagePullSecrets:
-      - name: gitlab-pull-secret
+        - name: gitlab-pull-secret
      initContainers:
-      - command:
-        - /bin/cp
-        - -n
-        - /usr/local/bin/argocd
-        - /var/run/argocd/argocd-cmp-server
-        image: quay.io/argoproj/argocd:v2.12.3
-        imagePullPolicy: IfNotPresent
-        name: copyutil
-        securityContext:
-          allowPrivilegeEscalation: false
-          capabilities:
-            drop:
-            - ALL
-          readOnlyRootFilesystem: true
-          runAsNonRoot: true
-          seccompProfile:
-            type: RuntimeDefault
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
-        volumeMounts:
-        - mountPath: /var/run/argocd
-          name: var-files
-      - command:
-        - /bin/sh
-        - /plugin/init-helm-repos.sh
-        env:
-        - name: OCEANBOX_HELM_ACCESS_TOKEN
-          valueFrom:
-            secretKeyRef:
-              key: token
-              name: oceanbox-helm
-              optional: false
-        image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
-        imagePullPolicy: Always
-        name: init-helm-repos
-        securityContext:
-          allowPrivilegeEscalation: false
-          capabilities:
-            drop:
-            - ALL
-          readOnlyRootFilesystem: true
-          runAsNonRoot: true
-          runAsUser: 999
-          seccompProfile:
-            type: RuntimeDefault
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
-        volumeMounts:
-        - mountPath: /helm-working-dir
-          name: helm-working-dir
+        - command:
+            - /bin/cp
+            - -n
+            - /usr/local/bin/argocd
+            - /var/run/argocd/argocd-cmp-server
+          image: quay.io/argoproj/argocd:v2.12.3
+          imagePullPolicy: IfNotPresent
+          name: copyutil
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+            readOnlyRootFilesystem: true
+            runAsNonRoot: true
+            seccompProfile:
+              type: RuntimeDefault
+          terminationMessagePath: /dev/termination-log
+          terminationMessagePolicy: File
+          volumeMounts:
+            - mountPath: /var/run/argocd
+              name: var-files
+        - command:
+            - /bin/sh
+            - /plugin/init-helm-repos.sh
+          env:
+            - name: OCEANBOX_HELM_ACCESS_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  key: token
+                  name: oceanbox-helm
+                  optional: false
+          image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
+          imagePullPolicy: Always
+          name: init-helm-repos
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+            readOnlyRootFilesystem: true
+            runAsNonRoot: true
+            runAsUser: 999
+            seccompProfile:
+              type: RuntimeDefault
+          terminationMessagePath: /dev/termination-log
+          terminationMessagePolicy: File
+          volumeMounts:
+            - mountPath: /helm-working-dir
+              name: helm-working-dir
      restartPolicy: Always
      schedulerName: default-scheduler
      serviceAccount: argocd-repo-server
      serviceAccountName: argocd-repo-server
      terminationGracePeriodSeconds: 30
      volumes:
-      - name: cmp-tmp
-      - name: helm-working-dir
-      - name: plugins
-      - name: var-files
-      - name: tmp
-      - configMap:
-          defaultMode: 420
-          name: argocd-ssh-known-hosts-cm
-        name: ssh-known-hosts
-      - configMap:
-          defaultMode: 420
-          name: argocd-tls-certs-cm
-        name: tls-certs
-      - configMap:
-          defaultMode: 420
-          name: argocd-gpg-keys-cm
-        name: gpg-keys
-      - name: gpg-keyring
-      - name: argocd-repo-server-tls
-        secret:
-          defaultMode: 420
-          items:
-          - key: tls.crt
-            path: tls.crt
-          - key: tls.key
-            path: tls.key
-          - key: ca.crt
-            path: ca.crt
-          optional: true
-          secretName: argocd-repo-server-tls
+        - name: cmp-tmp
+        - name: helm-working-dir
+        - name: plugins
+        - name: var-files
+        - name: tmp
+        - configMap:
+            defaultMode: 420
+            name: argocd-ssh-known-hosts-cm
+          name: ssh-known-hosts
+        - configMap:
+            defaultMode: 420
+            name: argocd-tls-certs-cm
+          name: tls-certs
+        - configMap:
+            defaultMode: 420
+            name: argocd-gpg-keys-cm
+          name: gpg-keys
+        - name: gpg-keyring
+        - name: argocd-repo-server-tls
+          secret:
+            defaultMode: 420
+            items:
+              - key: tls.crt
+                path: tls.crt
+              - key: tls.key
+                path: tls.key
+              - key: ca.crt
+                path: ca.crt
+            optional: true
+            secretName: argocd-repo-server-tls
@@ -4,24 +4,24 @@ spec:
  template:
    spec:
      imagePullSecrets:
-      - name: gitlab-pull-secret
+        - name: gitlab-pull-secret
      containers:
-      - command:
-        - /var/run/argocd/argocd-cmp-server
-        image: registry.gitlab.com/oceanbox/manifests/helmfile-cmp:latest
-        imagePullPolicy: Always
-        name: helmfile-cmp
-        securityContext:
-          runAsNonRoot: true
-          runAsUser: 999
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
-        volumeMounts:
-        - mountPath: /var/run/argocd
-          name: var-files
-        - mountPath: /home/argocd/cmp-server/plugins
-          name: plugins
-        - mountPath: /tmp
-          name: tmp
-        - mountPath: /helm-working-dir
-          name: helm-working-dir
+        - command:
+            - /var/run/argocd/argocd-cmp-server
+          image: registry.gitlab.com/oceanbox/manifests/helmfile-cmp:latest
+          imagePullPolicy: Always
+          name: helmfile-cmp
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 999
+          terminationMessagePath: /dev/termination-log
+          terminationMessagePolicy: File
+          volumeMounts:
+            - mountPath: /var/run/argocd
+              name: var-files
+            - mountPath: /home/argocd/cmp-server/plugins
+              name: plugins
+            - mountPath: /tmp
+              name: tmp
+            - mountPath: /helm-working-dir
+              name: helm-working-dir
@@ -3,5 +3,5 @@
 img=registry.gitlab.com/oceanbox/manifests/helmfile-cmp
 tag=${1:-latest}

-docker build -t $img:$tag .
-docker push $img:$tag
+docker build -t "${img}":"${tag}" .
+docker push "${img}":"${tag}"
@@ -1,4 +1,5 @@
 #!/bin/sh
+# shellcheck disable=SC2154

 # NOTE: Ensure errors are part of exitcode
 # set -o pipefail
@@ -10,7 +11,7 @@ export HELM_CONFIG_HOME=/tmp/helm/config
 export HELMFILE_CACHE_HOME=/tmp/helmfile/cache
 export HELMFILE_TEMPDIR=/tmp/helmfile/tmp

-test -n ARGOCD_ENV_HELMFILE_ENVIRONMENT && export HELMFILE_ENVIRONMENT=$ARGOCD_ENV_HELMFILE_ENVIRONMENT
-test -n ARGOCD_ENV_HELMFILE_FILE_PATH && export HELMFILE_FILE_PATH=$ARGOCD_ENV_HELMFILE_FILE_PATH
+test -n ARGOCD_ENV_HELMFILE_ENVIRONMENT && export HELMFILE_ENVIRONMENT="${ARGOCD_ENV_HELMFILE_ENVIRONMENT}"
+test -n ARGOCD_ENV_HELMFILE_FILE_PATH && export HELMFILE_FILE_PATH="${ARGOCD_ENV_HELMFILE_FILE_PATH}"

-helmfile -n "$ARGOCD_APP_NAMESPACE" $ARGS template --include-crds -q
+helmfile -n "${ARGOCD_APP_NAMESPACE}" "${ARGS}" template -q --include-crds
@@ -4,7 +4,7 @@ metadata:
  name: helmfile-cmp
 spec:
  generate:
-    command: [ "/bin/sh" ]
+    command: ["/bin/sh"]
    args:
      - /plugin/generate.sh
  lockRepo: false
@@ -44,341 +44,341 @@ spec:
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
-          - podAffinityTerm:
-              labelSelector:
-                matchLabels:
-                  app.kubernetes.io/name: argocd-repo-server
-              topologyKey: kubernetes.io/hostname
-            weight: 100
+            - podAffinityTerm:
+                labelSelector:
+                  matchLabels:
+                    app.kubernetes.io/name: argocd-repo-server
+                topologyKey: kubernetes.io/hostname
+              weight: 100
      containers:
-      - args:
-        - /usr/local/bin/argocd-repo-server
-        - --port=8081
-        - --metrics-port=8084
-        env:
-        - name: ARGOCD_REPO_SERVER_NAME
-          value: argocd-repo-server
-        - name: ARGOCD_RECONCILIATION_TIMEOUT
-          valueFrom:
-            configMapKeyRef:
-              key: timeout.reconciliation
-              name: argocd-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_LOGFORMAT
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.log.format
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_LOGLEVEL
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.log.level
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_PARALLELISM_LIMIT
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.parallelism.limit
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_LISTEN_ADDRESS
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.listen.address
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_LISTEN_METRICS_ADDRESS
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.metrics.listen.address
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_DISABLE_TLS
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.disable.tls
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_TLS_MIN_VERSION
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.tls.minversion
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_TLS_MAX_VERSION
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.tls.maxversion
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_TLS_CIPHERS
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.tls.ciphers
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_CACHE_EXPIRATION
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.repo.cache.expiration
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: REDIS_SERVER
-          valueFrom:
-            configMapKeyRef:
-              key: redis.server
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: REDIS_COMPRESSION
-          valueFrom:
-            configMapKeyRef:
-              key: redis.compression
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: REDISDB
-          valueFrom:
-            configMapKeyRef:
-              key: redis.db
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: REDIS_USERNAME
-          valueFrom:
-            secretKeyRef:
-              key: redis-username
-              name: argocd-redis
-              optional: true
-        - name: REDIS_PASSWORD
-          valueFrom:
-            secretKeyRef:
-              key: redis-password
-              name: argocd-redis
-              optional: true
-        - name: ARGOCD_DEFAULT_CACHE_EXPIRATION
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.default.cache.expiration
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_OTLP_ADDRESS
-          valueFrom:
-            configMapKeyRef:
-              key: otlp.address
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_OTLP_INSECURE
-          valueFrom:
-            configMapKeyRef:
-              key: otlp.insecure
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_OTLP_HEADERS
-          valueFrom:
-            configMapKeyRef:
-              key: otlp.headers
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_MAX_COMBINED_DIRECTORY_MANIFESTS_SIZE
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.max.combined.directory.manifests.size
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_PLUGIN_TAR_EXCLUSIONS
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.plugin.tar.exclusions
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_ALLOW_OUT_OF_BOUNDS_SYMLINKS
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.allow.oob.symlinks
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_TAR_SIZE
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.streamed.manifest.max.tar.size
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_EXTRACTED_SIZE
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.streamed.manifest.max.extracted.size
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_HELM_MANIFEST_MAX_EXTRACTED_SIZE
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.helm.manifest.max.extracted.size
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_REPO_SERVER_DISABLE_HELM_MANIFEST_MAX_EXTRACTED_SIZE
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.disable.helm.manifest.max.extracted.size
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_GIT_MODULES_ENABLED
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.enable.git.submodule
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_GIT_LS_REMOTE_PARALLELISM_LIMIT
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.git.lsremote.parallelism.limit
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: ARGOCD_GIT_REQUEST_TIMEOUT
-          valueFrom:
-            configMapKeyRef:
-              key: reposerver.git.request.timeout
-              name: argocd-cmd-params-cm
-              optional: true
-        - name: HELM_CACHE_HOME
-          value: /helm-working-dir
-        - name: HELM_CONFIG_HOME
-          value: /helm-working-dir
-        - name: HELM_DATA_HOME
-          value: /helm-working-dir
-        image: quay.io/argoproj/argocd:v2.10.4
-        imagePullPolicy: IfNotPresent
-        livenessProbe:
-          failureThreshold: 3
-          httpGet:
-            path: /healthz?full=true
-            port: metrics
-            scheme: HTTP
-          initialDelaySeconds: 10
-          periodSeconds: 10
-          successThreshold: 1
-          timeoutSeconds: 1
-        name: repo-server
-        ports:
-        - containerPort: 8081
+        - args:
+            - /usr/local/bin/argocd-repo-server
+            - --port=8081
+            - --metrics-port=8084
+          env:
+            - name: ARGOCD_REPO_SERVER_NAME
+              value: argocd-repo-server
+            - name: ARGOCD_RECONCILIATION_TIMEOUT
+              valueFrom:
+                configMapKeyRef:
+                  key: timeout.reconciliation
+                  name: argocd-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_LOGFORMAT
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.log.format
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_LOGLEVEL
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.log.level
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_PARALLELISM_LIMIT
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.parallelism.limit
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_LISTEN_ADDRESS
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.listen.address
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_LISTEN_METRICS_ADDRESS
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.metrics.listen.address
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_DISABLE_TLS
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.disable.tls
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_TLS_MIN_VERSION
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.tls.minversion
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_TLS_MAX_VERSION
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.tls.maxversion
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_TLS_CIPHERS
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.tls.ciphers
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_CACHE_EXPIRATION
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.repo.cache.expiration
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: REDIS_SERVER
+              valueFrom:
+                configMapKeyRef:
+                  key: redis.server
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: REDIS_COMPRESSION
+              valueFrom:
+                configMapKeyRef:
+                  key: redis.compression
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: REDISDB
+              valueFrom:
+                configMapKeyRef:
+                  key: redis.db
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: REDIS_USERNAME
+              valueFrom:
+                secretKeyRef:
+                  key: redis-username
+                  name: argocd-redis
+                  optional: true
+            - name: REDIS_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  key: redis-password
+                  name: argocd-redis
+                  optional: true
+            - name: ARGOCD_DEFAULT_CACHE_EXPIRATION
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.default.cache.expiration
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_OTLP_ADDRESS
+              valueFrom:
+                configMapKeyRef:
+                  key: otlp.address
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_OTLP_INSECURE
+              valueFrom:
+                configMapKeyRef:
+                  key: otlp.insecure
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_OTLP_HEADERS
+              valueFrom:
+                configMapKeyRef:
+                  key: otlp.headers
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_MAX_COMBINED_DIRECTORY_MANIFESTS_SIZE
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.max.combined.directory.manifests.size
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_PLUGIN_TAR_EXCLUSIONS
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.plugin.tar.exclusions
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_ALLOW_OUT_OF_BOUNDS_SYMLINKS
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.allow.oob.symlinks
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_TAR_SIZE
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.streamed.manifest.max.tar.size
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_STREAMED_MANIFEST_MAX_EXTRACTED_SIZE
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.streamed.manifest.max.extracted.size
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_HELM_MANIFEST_MAX_EXTRACTED_SIZE
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.helm.manifest.max.extracted.size
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_REPO_SERVER_DISABLE_HELM_MANIFEST_MAX_EXTRACTED_SIZE
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.disable.helm.manifest.max.extracted.size
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_GIT_MODULES_ENABLED
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.enable.git.submodule
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_GIT_LS_REMOTE_PARALLELISM_LIMIT
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.git.lsremote.parallelism.limit
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: ARGOCD_GIT_REQUEST_TIMEOUT
+              valueFrom:
+                configMapKeyRef:
+                  key: reposerver.git.request.timeout
+                  name: argocd-cmd-params-cm
+                  optional: true
+            - name: HELM_CACHE_HOME
+              value: /helm-working-dir
+            - name: HELM_CONFIG_HOME
+              value: /helm-working-dir
+            - name: HELM_DATA_HOME
+              value: /helm-working-dir
+          image: quay.io/argoproj/argocd:v2.10.4
+          imagePullPolicy: IfNotPresent
+          livenessProbe:
+            failureThreshold: 3
+            httpGet:
+              path: /healthz?full=true
+              port: metrics
+              scheme: HTTP
+            initialDelaySeconds: 10
+            periodSeconds: 10
+            successThreshold: 1
+            timeoutSeconds: 1
          name: repo-server
-          protocol: TCP
-        - containerPort: 8084
-          name: metrics
-          protocol: TCP
-        readinessProbe:
-          failureThreshold: 3
-          httpGet:
-            path: /healthz
-            port: metrics
-            scheme: HTTP
-          initialDelaySeconds: 10
-          periodSeconds: 10
-          successThreshold: 1
-          timeoutSeconds: 1
-        resources: {}
-        securityContext:
-          allowPrivilegeEscalation: false
-          capabilities:
-            drop:
-            - ALL
-          readOnlyRootFilesystem: true
-          runAsNonRoot: true
-          seccompProfile:
-            type: RuntimeDefault
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
-        volumeMounts:
-        - mountPath: /app/config/ssh
-          name: ssh-known-hosts
-        - mountPath: /app/config/tls
-          name: tls-certs
-        - mountPath: /app/config/gpg/source
-          name: gpg-keys
-        - mountPath: /app/config/gpg/keys
-          name: gpg-keyring
-        - mountPath: /app/config/reposerver/tls
-          name: argocd-repo-server-tls
-        - mountPath: /helm-working-dir
-          name: helm-working-dir
-        - mountPath: /home/argocd/cmp-server/plugins
-          name: plugins
-        - mountPath: /tmp
-          name: tmp
-      - command:
-        - /var/run/argocd/argocd-cmp-server
-        image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
-        imagePullPolicy: Always
-        name: kustomize-helm-with-rewrite
-        resources: {}
-        securityContext:
-          runAsNonRoot: true
-          runAsUser: 999
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
-        volumeMounts:
-        - mountPath: /var/run/argocd
-          name: var-files
-        - mountPath: /home/argocd/cmp-server/plugins
-          name: plugins
-        - mountPath: /tmp
-          name: cmp-tmp
-        - mountPath: /helm-working-dir
-          name: helm-working-dir
+          ports:
+            - containerPort: 8081
+              name: repo-server
+              protocol: TCP
+            - containerPort: 8084
+              name: metrics
+              protocol: TCP
+          readinessProbe:
+            failureThreshold: 3
+            httpGet:
+              path: /healthz
+              port: metrics
+              scheme: HTTP
+            initialDelaySeconds: 10
+            periodSeconds: 10
+            successThreshold: 1
+            timeoutSeconds: 1
+          resources: {}
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+            readOnlyRootFilesystem: true
+            runAsNonRoot: true
+            seccompProfile:
+              type: RuntimeDefault
+          terminationMessagePath: /dev/termination-log
+          terminationMessagePolicy: File
+          volumeMounts:
+            - mountPath: /app/config/ssh
+              name: ssh-known-hosts
+            - mountPath: /app/config/tls
+              name: tls-certs
+            - mountPath: /app/config/gpg/source
+              name: gpg-keys
+            - mountPath: /app/config/gpg/keys
+              name: gpg-keyring
+            - mountPath: /app/config/reposerver/tls
+              name: argocd-repo-server-tls
+            - mountPath: /helm-working-dir
+              name: helm-working-dir
+            - mountPath: /home/argocd/cmp-server/plugins
+              name: plugins
+            - mountPath: /tmp
+              name: tmp
+        - command:
+            - /var/run/argocd/argocd-cmp-server
+          image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
+          imagePullPolicy: Always
+          name: kustomize-helm-with-rewrite
+          resources: {}
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 999
+          terminationMessagePath: /dev/termination-log
+          terminationMessagePolicy: File
+          volumeMounts:
+            - mountPath: /var/run/argocd
+              name: var-files
+            - mountPath: /home/argocd/cmp-server/plugins
+              name: plugins
+            - mountPath: /tmp
+              name: cmp-tmp
+            - mountPath: /helm-working-dir
+              name: helm-working-dir
      dnsPolicy: ClusterFirst
      imagePullSecrets:
-      - name: gitlab-pull-secret
+        - name: gitlab-pull-secret
      initContainers:
-      - command:
-        - /bin/cp
-        - -n
-        - /usr/local/bin/argocd
-        - /var/run/argocd/argocd-cmp-server
-        image: quay.io/argoproj/argocd:v2.10.4
-        imagePullPolicy: IfNotPresent
-        name: copyutil
-        resources: {}
-        securityContext:
-          allowPrivilegeEscalation: false
-          capabilities:
-            drop:
-            - ALL
-          readOnlyRootFilesystem: true
-          runAsNonRoot: true
-          seccompProfile:
-            type: RuntimeDefault
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
-        volumeMounts:
-        - mountPath: /var/run/argocd
-          name: var-files
-      - command:
-        - /bin/sh
-        - /plugin/init-helm-repos.sh
-        image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
-        imagePullPolicy: Always
-        name: init-helm-repos
-        resources: {}
-        securityContext:
-          allowPrivilegeEscalation: false
-          capabilities:
-            drop:
-            - ALL
-          readOnlyRootFilesystem: true
-          runAsUser: 999
-          runAsNonRoot: true
-          seccompProfile:
-            type: RuntimeDefault
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
-        env:
-        - name: OCEANBOX_HELM_ACCESS_TOKEN
-          valueFrom:
-            secretKeyRef:
-              key: token
-              name: oceanbox-helm
-              optional: false
-        volumeMounts:
-        - mountPath: /helm-working-dir
-          name: helm-working-dir
+        - command:
+            - /bin/cp
+            - -n
+            - /usr/local/bin/argocd
+            - /var/run/argocd/argocd-cmp-server
+          image: quay.io/argoproj/argocd:v2.10.4
+          imagePullPolicy: IfNotPresent
+          name: copyutil
+          resources: {}
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+            readOnlyRootFilesystem: true
+            runAsNonRoot: true
+            seccompProfile:
+              type: RuntimeDefault
+          terminationMessagePath: /dev/termination-log
+          terminationMessagePolicy: File
+          volumeMounts:
+            - mountPath: /var/run/argocd
+              name: var-files
+        - command:
+            - /bin/sh
+            - /plugin/init-helm-repos.sh
+          image: registry.gitlab.com/oceanbox/manifests/kustomize-helm-with-rewrite:latest
+          imagePullPolicy: Always
+          name: init-helm-repos
+          resources: {}
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+            readOnlyRootFilesystem: true
+            runAsUser: 999
+            runAsNonRoot: true
+            seccompProfile:
+              type: RuntimeDefault
+          terminationMessagePath: /dev/termination-log
+          terminationMessagePolicy: File
+          env:
+            - name: OCEANBOX_HELM_ACCESS_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  key: token
+                  name: oceanbox-helm
+                  optional: false
+          volumeMounts:
+            - mountPath: /helm-working-dir
+              name: helm-working-dir
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
@@ -386,40 +386,39 @@ spec:
      serviceAccountName: argocd-repo-server
      terminationGracePeriodSeconds: 30
      volumes:
-      - emptyDir: {}
-        name: cmp-tmp
-      - emptyDir: {}
-        name: helm-working-dir
-      - emptyDir: {}
-        name: plugins
-      - emptyDir: {}
-        name: var-files
-      - emptyDir: {}
-        name: tmp
-      - configMap:
-          defaultMode: 420
-          name: argocd-ssh-known-hosts-cm
-        name: ssh-known-hosts
-      - configMap:
-          defaultMode: 420
-          name: argocd-tls-certs-cm
-        name: tls-certs
-      - configMap:
-          defaultMode: 420
-          name: argocd-gpg-keys-cm
-        name: gpg-keys
-      - emptyDir: {}
-        name: gpg-keyring
-      - name: argocd-repo-server-tls
-        secret:
-          defaultMode: 420
-          items:
-          - key: tls.crt
-            path: tls.crt
-          - key: tls.key
-            path: tls.key
-          - key: ca.crt
-            path: ca.crt
-          optional: true
-          secretName: argocd-repo-server-tls
-
+        - emptyDir: {}
+          name: cmp-tmp
+        - emptyDir: {}
+          name: helm-working-dir
+        - emptyDir: {}
+          name: plugins
+        - emptyDir: {}
+          name: var-files
+        - emptyDir: {}
+          name: tmp
+        - configMap:
+            defaultMode: 420
+            name: argocd-ssh-known-hosts-cm
+          name: ssh-known-hosts
+        - configMap:
+            defaultMode: 420
+            name: argocd-tls-certs-cm
+          name: tls-certs
+        - configMap:
+            defaultMode: 420
+            name: argocd-gpg-keys-cm
+          name: gpg-keys
+        - emptyDir: {}
+          name: gpg-keyring
+        - name: argocd-repo-server-tls
+          secret:
+            defaultMode: 420
+            items:
+              - key: tls.crt
+                path: tls.crt
+              - key: tls.key
+                path: tls.key
+              - key: ca.crt
+                path: ca.crt
+            optional: true
+            secretName: argocd-repo-server-tls
@@ -13,7 +13,7 @@ kubectl --context ekman apply -f cluster-admin-token.yaml
 # kubectl --context oceanbox apply -f _cluster-ekman.yaml

 token=$(kubectl --context ekman get secret -n kube-system argocd-manager-token -o yaml | grep ' token:' | cut -d' ' -f4 | base64 -d)
-sed "s/@token@/$token/" cluster-ekman.yaml > _cluster-ekman.yaml
+sed "s/@token@/${token}/" cluster-ekman.yaml > _cluster-ekman.yaml
 echo "configure argocd ekman-cluster..."
 cat _cluster-ekman.yaml
 kubectl --context oceanbox apply -f _cluster-ekman.yaml
@@ -13,4 +13,3 @@ stringData:
  name: staging-vcluster
  server: https://staging-vcluster.staging-vcluster
 type: Opaque
-
@@ -19,12 +19,12 @@ applications:
        plugin:
          name: helmfile-cmp
          env:
-          - name: CLUSTER_NAME
-            value: replaceme
-          - name: HELMFILE_ENVIRONMENT
-            value: default
-          - name: HELMFILE_FILE_PATH
-            value: system.yaml.gotmpl
+            - name: CLUSTER_NAME
+              value: replaceme
+            - name: HELMFILE_ENVIRONMENT
+              value: default
+            - name: HELMFILE_FILE_PATH
+              value: system.yaml.gotmpl
 projects:
  sys:
    namespace: argocd
@@ -32,12 +32,12 @@ projects:
    additionalAnnotations: {}
    description: sys components project
    sourceRepos:
-    - '*'
+      - "*"
    destinations:
-    - namespace: '*'
-      server: https://kubernetes.default.svc
+      - namespace: "*"
+        server: https://kubernetes.default.svc
    clusterResourceWhitelist:
-    - group: '*'
-      kind: '*'
+      - group: "*"
+        kind: "*"
    orphanedResources:
      warn: false
@@ -8,3 +8,8 @@ version: v1.35.2
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application.
 appVersion: v1.35.2
+dependencies:
+  - name: diagrid-dashboard
+    version: "0.1.0"
+    repository: "file://../diagrid-dashboard"
+    condition: diagrid-dashboard.enabled
@@ -116,3 +116,6 @@ serviceMonitor:
 nodeSelector: {}
 tolerations: []
 affinity: {}
+
+diagrid-dashboard:
+  enabled: false
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
@@ -0,0 +1,24 @@
+apiVersion: v2
+name: diagrid-dashboard
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
@@ -0,0 +1,35 @@
+1. Get the application URL by running these commands:
+{{- if .Values.httpRoute.enabled }}
+{{- if .Values.httpRoute.hostnames }}
+    export APP_HOSTNAME={{ .Values.httpRoute.hostnames | first }}
+{{- else }}
+    export APP_HOSTNAME=$(kubectl get --namespace {{(first .Values.httpRoute.parentRefs).namespace | default .Release.Namespace }} gateway/{{ (first .Values.httpRoute.parentRefs).name }} -o jsonpath="{.spec.listeners[0].hostname}")
+  {{- end }}
+{{- if and .Values.httpRoute.rules (first .Values.httpRoute.rules).matches (first (first .Values.httpRoute.rules).matches).path.value }}
+    echo "Visit http://$APP_HOSTNAME{{ (first (first .Values.httpRoute.rules).matches).path.value }} to use your application"
+
+    NOTE: Your HTTPRoute depends on the listener configuration of your gateway and your HTTPRoute rules.
+    The rules can be set for path, method, header and query parameters.
+    You can check the gateway configuration with 'kubectl get --namespace {{(first .Values.httpRoute.parentRefs).namespace | default .Release.Namespace }} gateway/{{ (first .Values.httpRoute.parentRefs).name }} -o yaml'
+{{- end }}
+{{- else if .Values.ingress.enabled }}
+{{- range $host := .Values.ingress.hosts }}
+  {{- range .paths }}
+  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
+  {{- end }}
+{{- end }}
+{{- else if contains "NodePort" .Values.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "diagrid-dashboard.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "diagrid-dashboard.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "diagrid-dashboard.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "diagrid-dashboard.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "diagrid-dashboard.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "diagrid-dashboard.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "diagrid-dashboard.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "diagrid-dashboard.labels" -}}
+helm.sh/chart: {{ include "diagrid-dashboard.chart" . }}
+{{ include "diagrid-dashboard.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "diagrid-dashboard.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "diagrid-dashboard.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "diagrid-dashboard.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "diagrid-dashboard.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
@@ -0,0 +1,87 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "diagrid-dashboard.fullname" . }}
+  labels:
+    {{- include "diagrid-dashboard.labels" . | nindent 4 }}
+spec:
+  {{- if not .Values.autoscaling.enabled }}
+  replicas: {{ .Values.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "diagrid-dashboard.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "diagrid-dashboard.labels" . | nindent 8 }}
+        {{- with .Values.podLabels }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "diagrid-dashboard.serviceAccountName" . }}
+      {{- with .Values.podSecurityContext }}
+      securityContext:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ .Chart.Name }}
+          {{- with .Values.securityContext }}
+          securityContext:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          env:
+            - name: COMPONENT_FILE
+              value: /app/components/statestore.yaml
+          ports:
+            - name: http
+              containerPort: {{ .Values.service.port }}
+              protocol: TCP
+          {{- with .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          volumeMounts:
+            - name: statestore
+              mountPath: /app/components/statestore.yaml
+              subPath: statestore.yaml
+          {{- with .Values.volumeMounts }}
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+      volumes:
+        - name: statestore
+          configMap:
+            name: {{ include "diagrid-dashboard.fullname" . }}-statestore
+      {{- with .Values.volumes }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
@@ -0,0 +1,32 @@
+{{- if .Values.autoscaling.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "diagrid-dashboard.fullname" . }}
+  labels:
+    {{- include "diagrid-dashboard.labels" . | nindent 4 }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "diagrid-dashboard.fullname" . }}
+  minReplicas: {{ .Values.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  metrics:
+    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
+    {{- end }}
+    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    {{- end }}
+{{- end }}
@@ -0,0 +1,38 @@
+{{- if .Values.httpRoute.enabled -}}
+{{- $fullName := include "diagrid-dashboard.fullname" . -}}
+{{- $svcPort := .Values.service.port -}}
+apiVersion: gateway.networking.k8s.io/v1
+kind: HTTPRoute
+metadata:
+  name: {{ $fullName }}
+  labels:
+    {{- include "diagrid-dashboard.labels" . | nindent 4 }}
+  {{- with .Values.httpRoute.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  parentRefs:
+    {{- with .Values.httpRoute.parentRefs }}
+      {{- toYaml . | nindent 4 }}
+    {{- end }}
+  {{- with .Values.httpRoute.hostnames }}
+  hostnames:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+  rules:
+    {{- range .Values.httpRoute.rules }}
+    {{- with .matches }}
+    - matches:
+      {{- toYaml . | nindent 8 }}
+    {{- end }}
+    {{- with .filters }}
+      filters:
+      {{- toYaml . | nindent 8 }}
+    {{- end }}
+      backendRefs:
+        - name: {{ $fullName }}
+          port: {{ $svcPort }}
+          weight: 1
+    {{- end }}
+{{- end }}
@@ -0,0 +1,43 @@
+{{- if .Values.ingress.enabled -}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "diagrid-dashboard.fullname" . }}
+  labels:
+    {{- include "diagrid-dashboard.labels" . | nindent 4 }}
+  {{- with .Values.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- with .Values.ingress.className }}
+  ingressClassName: {{ . }}
+  {{- end }}
+  {{- if .Values.ingress.tls }}
+  tls:
+    {{- range .Values.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
+  {{- end }}
+  rules:
+    {{- range .Values.ingress.hosts }}
+    - host: {{ .host | quote }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            {{- with .pathType }}
+            pathType: {{ . }}
+            {{- end }}
+            backend:
+              service:
+                name: {{ include "diagrid-dashboard.fullname" $ }}
+                port:
+                  number: {{ $.Values.service.port }}
+          {{- end }}
+    {{- end }}
+{{- end }}
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "diagrid-dashboard.fullname" . }}
+  labels:
+    {{- include "diagrid-dashboard.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: http
+      protocol: TCP
+      name: http
+  selector:
+    {{- include "diagrid-dashboard.selectorLabels" . | nindent 4 }}
@@ -0,0 +1,13 @@
+{{- if .Values.serviceAccount.create -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "diagrid-dashboard.serviceAccountName" . }}
+  labels:
+    {{- include "diagrid-dashboard.labels" . | nindent 4 }}
+  {{- with .Values.serviceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
+{{- end }}
@@ -0,0 +1,26 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "diagrid-dashboard.fullname" . }}-statestore
+data:
+  statestore.yaml: |
+    apiVersion: dapr.io/v1alpha1
+    kind: Component
+    metadata:
+      name: statestore
+    scopes:
+      - {{ .Values.statestore.scope }}
+    spec:
+      metadata:
+        - name: redisHost
+          value: {{ .Values.statestore.redis }}:6379
+        - name: redisUsername
+          value: default
+        - name: redisPassword
+          value: secret
+        - name: actorStateStore
+          value: "true"
+        - name: redisDB
+          value: "1"
+      type: state.redis
+      version: v1
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "diagrid-dashboard.fullname" . }}-test-connection"
+  labels:
+    {{- include "diagrid-dashboard.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+spec:
+  containers:
+    - name: wget
+      image: busybox
+      command: ['wget']
+      args: ['{{ include "diagrid-dashboard.fullname" . }}:{{ .Values.service.port }}']
+  restartPolicy: Never
@@ -0,0 +1,160 @@
+# Default values for diagrid-dashboard.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+statestore:
+  scope: my-scope
+  redis: my-redis
+
+# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
+replicaCount: 1
+
+# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
+image:
+  repository: ghcr.io/diagridio/diagrid-dashboard
+  # This sets the pull policy for images.
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: "latest"
+
+# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
+imagePullSecrets: []
+# This is to override the chart name.
+nameOverride: ""
+fullnameOverride: ""
+
+# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # Automatically mount a ServiceAccount's API credentials?
+  automount: true
+  # Annotations to add to the service account
+  annotations: {}
+  # The name of the service account to use.
+  # If not set and create is true, a name is generated using the fullname template
+  name: ""
+
+# This is for setting Kubernetes Annotations to a Pod.
+# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
+podAnnotations: {}
+# This is for setting Kubernetes Labels to a Pod.
+# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
+podLabels: {}
+
+podSecurityContext: {}
+  # fsGroup: 2000
+
+securityContext: {}
+  # capabilities:
+  #   drop:
+  #   - ALL
+  # readOnlyRootFilesystem: true
+  # runAsNonRoot: true
+  # runAsUser: 1000
+
+# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
+service:
+  # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
+  type: ClusterIP
+  # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
+  port: 8080
+
+# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
+ingress:
+  enabled: true
+  className: "nginx"
+  annotations:
+    cert-manager.io/cluster-issuer: ca-issuer
+    nginx.ingress.kubernetes.io/backend-protocol: HTTP
+    nginx.ingress.kubernetes.io/ssl-redirect: "true"
+    oceanbox.io/expose: internal
+  hosts:
+    - host: diadash.dev.vtn.obx
+      paths:
+        - path: /
+          pathType: ImplementationSpecific
+  tls:
+    - secretName: diadash-tls
+      hosts:
+        - diadash.dev.vtn.obx
+
+# -- Expose the service via gateway-api HTTPRoute
+# Requires Gateway API resources and suitable controller installed within the cluster
+# (see: https://gateway-api.sigs.k8s.io/guides/)
+httpRoute:
+  # HTTPRoute enabled.
+  enabled: false
+  # HTTPRoute annotations.
+  annotations: {}
+  # Which Gateways this Route is attached to.
+  parentRefs:
+    - name: gateway
+      sectionName: http
+      # namespace: default
+  # Hostnames matching HTTP header.
+  hostnames:
+    - chart-example.local
+  # List of rules and filters applied.
+  rules:
+    - matches:
+        - path:
+            type: PathPrefix
+            value: /headers
+  #   filters:
+  #   - type: RequestHeaderModifier
+  #     requestHeaderModifier:
+  #       set:
+  #       - name: My-Overwrite-Header
+  #         value: this-is-the-only-value
+  #       remove:
+  #       - User-Agent
+  # - matches:
+  #   - path:
+  #       type: PathPrefix
+  #       value: /echo
+  #     headers:
+  #     - name: version
+  #       value: v2
+
+resources: {}
+  # We usually recommend not to specify default resources and to leave this as a conscious
+  # choice for the user. This also increases chances charts run on environments with little
+  # resources, such as Minikube. If you do want to specify resources, uncomment the following
+  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+  # limits:
+  #   cpu: 100m
+  #   memory: 128Mi
+  # requests:
+  #   cpu: 100m
+  #   memory: 128Mi
+
+# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
+livenessProbe:
+  httpGet:
+    path: /
+    port: http
+readinessProbe:
+  httpGet:
+    path: /
+    port: http
+
+# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
+autoscaling:
+  enabled: false
+  minReplicas: 1
+  maxReplicas: 100
+  targetCPUUtilizationPercentage: 80
+  # targetMemoryUtilizationPercentage: 80
+
+# Additional volumes on the output Deployment definition.
+volumes: {}
+
+# Additional volumeMounts on the output Deployment definition.
+volumeMounts: {}
+
+nodeSelector: {}
+
+tolerations: []
+
+affinity: {}
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
@@ -0,0 +1,21 @@
+apiVersion: v2
+name: fornix
+description: A Helm chart for Kubernetes
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: v1.6.0
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "v1.6.0"
@@ -0,0 +1,22 @@
+1. Get the application URL by running these commands:
+{{- if .Values.ingress.enabled }}
+{{- range $host := .Values.ingress.hosts }}
+  {{- range .paths }}
+  http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
+  {{- end }}
+{{- end }}
+{{- else if contains "NodePort" .Values.service.type }}
+  export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "fornix.fullname" . }})
+  export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
+  echo http://$NODE_IP:$NODE_PORT
+{{- else if contains "LoadBalancer" .Values.service.type }}
+     NOTE: It may take a few minutes for the LoadBalancer IP to be available.
+           You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "fornix.fullname" . }}'
+  export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "fornix.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
+  echo http://$SERVICE_IP:{{ .Values.service.port }}
+{{- else if contains "ClusterIP" .Values.service.type }}
+  export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "fornix.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
+  export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
+  echo "Visit http://127.0.0.1:8080 to use your application"
+  kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
+{{- end }}
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "fornix.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "fornix.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "fornix.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "fornix.labels" -}}
+helm.sh/chart: {{ include "fornix.chart" . }}
+{{ include "fornix.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "fornix.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "fornix.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "fornix.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "fornix.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
@@ -0,0 +1,83 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "fornix.fullname" . }}
+  labels:
+    {{- include "fornix.labels" . | nindent 4 }}
+spec:
+  {{- if not .Values.autoscaling.enabled }}
+  replicas: {{ .Values.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "fornix.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "fornix.labels" . | nindent 8 }}
+        {{- with .Values.podLabels }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "fornix.serviceAccountName" . }}
+      {{- with .Values.podSecurityContext }}
+      securityContext:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: {{ .Chart.Name }}
+          {{- with .Values.securityContext }}
+          securityContext:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          ports:
+            - name: http
+              containerPort: {{ .Values.service.port }}
+              protocol: TCP
+          env:
+          - name: DRUPAL_URL
+            value: {{ .Values.drupalUrl }}
+          - name: BASE_URL
+            value: {{ .Values.baseUrl }}
+          {{- with .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.volumeMounts }}
+          volumeMounts:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+      {{- with .Values.volumes }}
+      volumes:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
@@ -0,0 +1,43 @@
+{{- if .Values.ingress.enabled -}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "fornix.fullname" . }}
+  labels:
+    {{- include "fornix.labels" . | nindent 4 }}
+  {{- with .Values.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- with .Values.ingress.className }}
+  ingressClassName: {{ . }}
+  {{- end }}
+  {{- if .Values.ingress.tls }}
+  tls:
+    {{- range .Values.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
+  {{- end }}
+  rules:
+    {{- range .Values.ingress.hosts }}
+    - host: {{ .host | quote }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            {{- with .pathType }}
+            pathType: {{ . }}
+            {{- end }}
+            backend:
+              service:
+                name: {{ include "fornix.fullname" $ }}
+                port:
+                  number: {{ $.Values.service.port }}
+          {{- end }}
+    {{- end }}
+{{- end }}
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "fornix.fullname" . }}
+  labels:
+    {{- include "fornix.labels" . | nindent 4 }}
+spec:
+  type: {{ .Values.service.type }}
+  ports:
+    - port: {{ .Values.service.port }}
+      targetPort: http
+      protocol: TCP
+      name: http
+  selector:
+    {{- include "fornix.selectorLabels" . | nindent 4 }}
@@ -0,0 +1,13 @@
+{{- if .Values.serviceAccount.create -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "fornix.serviceAccountName" . }}
+  labels:
+    {{- include "fornix.labels" . | nindent 4 }}
+  {{- with .Values.serviceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
+{{- end }}
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: "{{ include "fornix.fullname" . }}-test-connection"
+  labels:
+    {{- include "fornix.labels" . | nindent 4 }}
+  annotations:
+    "helm.sh/hook": test
+spec:
+  containers:
+    - name: wget
+      image: busybox
+      command: ['wget']
+      args: ['{{ include "fornix.fullname" . }}:{{ .Values.service.port }}']
+  restartPolicy: Never
@@ -0,0 +1,100 @@
+# Default values for fornix.
+# This is a YAML-formatted file.
+# Declare variables to be passed into your templates.
+
+drupalUrl: http://drupal
+baseUrl: https://oceanbox.io
+# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
+replicaCount: 1
+# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
+image:
+  repository: registry.gitlab.com/oceanbox/fornix
+  # This sets the pull policy for images.
+  pullPolicy: IfNotPresent
+  # Overrides the image tag whose default is the chart appVersion.
+  tag: v1.6.0
+# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
+imagePullSecrets:
+  - name: gitlab-pull-secret
+# This is to override the chart name.
+nameOverride: ""
+fullnameOverride: ""
+# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
+serviceAccount:
+  # Specifies whether a service account should be created
+  create: true
+  # Automatically mount a ServiceAccount's API credentials?
+  automount: true
+  # Annotations to add to the service account
+  annotations: {}
+  # The name of the service account to use.
+  # If not set and create is true, a name is generated using the fullname template
+  name: ""
+# This is for setting Kubernetes Annotations to a Pod.
+# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
+podAnnotations: {}
+# This is for setting Kubernetes Labels to a Pod.
+# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
+podLabels: {}
+podSecurityContext:
+  fsGroup: 2000
+securityContext:
+  capabilities:
+    drop:
+      - ALL
+  readOnlyRootFilesystem: false
+  runAsNonRoot: true
+  runAsUser: 1000
+# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
+service:
+  # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
+  type: ClusterIP
+  # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
+  port: 8085
+# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
+ingress:
+  enabled: false
+resources: {}
+# We usually recommend not to specify default resources and to leave this as a conscious
+# choice for the user. This also increases chances charts run on environments with little
+# resources, such as Minikube. If you do want to specify resources, uncomment the following
+# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+# limits:
+#   cpu: 100m
+#   memory: 128Mi
+# requests:
+#   cpu: 100m
+#   memory: 128Mi
+
+# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
+livenessProbe:
+  httpGet:
+    path: /
+    port: http
+readinessProbe:
+  httpGet:
+    path: /
+    port: http
+# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
+autoscaling:
+  enabled: false
+  minReplicas: 1
+  maxReplicas: 100
+  targetCPUUtilizationPercentage: 80
+  # targetMemoryUtilizationPercentage: 80
+# Additional volumes on the output Deployment definition.
+volumes: []
+# - name: foo
+#   secret:
+#     secretName: mysecret
+#     optional: false
+
+# Additional volumeMounts on the output Deployment definition.
+volumeMounts: []
+# - name: foo
+#   mountPath: "/etc/foo"
+#   readOnly: true
+
+nodeSelector: {}
+tolerations: []
+affinity: {}
@@ -3,7 +3,7 @@
 # Declare variables to be passed into your templates.
 replicaCount: 1
 image:
-  repository: registry.gitlab.com/oceanbox/makai/makai
+  repository: registry.gitlab.com/oceanbox/makai
  tag: v0.1.0
  pullPolicy: IfNotPresent
 init:
@@ -45,6 +45,7 @@ spec:
    persistentVolumeClaimSpec:
      accessModes:
      - ReadWriteOnce
+      storageClass: {{ .Values.redis.storageClass | default "managed-nfs-storage" }}
      resources:
        requests:
          storage: {{ .Values.redis.size | default "1Gi" }}
@@ -3,7 +3,7 @@
 # Declare variables to be passed into your templates.
 replicaCount: 1
 image:
-  repository: registry.gitlab.com/oceanbox/plume/plume
+  repository: registry.gitlab.com/oceanbox/plume
  tag: v1.6.7
  pullPolicy: IfNotPresent
 init:
@@ -0,0 +1,6 @@
+dependencies:
+- name: diagrid-dashboard
+  repository: file://../diagrid-dashboard
+  version: 0.1.0
+digest: sha256:4fdb3148a2a6439223d7844a3083da2de324dd47e5cb3ac4a5d9c436e6e2c775
+generated: "2025-12-16T19:38:21.939708629+01:00"
@@ -8,3 +8,8 @@ version: v1.35.2
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application.
 appVersion: v1.35.2
+dependencies:
+  - name: diagrid-dashboard
+    version: "0.1.0"
+    repository: "file://../diagrid-dashboard"
+    condition: diagrid-dashboard.enabled
@@ -108,3 +108,9 @@ serviceMonitor:
 nodeSelector: {}
 tolerations: []
 affinity: {}
+
+diagrid-dashboard:
+  enabled: false
+  statestore:
+    scope: sorcerer
+    redis: sorcerer-redis
@@ -5,7 +5,7 @@
 replicaCount: 1
 image:
  repository: registry
-  tag: 2
+  tag: 3
  pullPolicy: IfNotPresent
 init:
  enabled: false
@@ -27,7 +27,7 @@ releases:
 - name: argocd-apps
  namespace: argocd
  chart: argo/argocd-apps
-  version: 0.0.9
+  version: 2.0.3
  condition: argo.apps.enabled
  values:
  - ../values/argo/values/apps.yaml.gotmpl
@@ -3,7 +3,8 @@ bases:

 repositories:
  - name: cert-manager
-    url: 'https://charts.jetstack.io'
+    oci: true
+    url: 'quay.io/jetstack/charts'

 commonLabels:
  tier: system
@@ -12,7 +13,7 @@ releases:
 - name: cert-manager
  namespace: cert-manager
  chart: cert-manager/cert-manager
-  version: 1.12.13
+  version: v1.19.2
  condition: cert_manager.enabled
  values:
  - ../values/cert-manager/values/cert-manager.yaml.gotmpl
@@ -0,0 +1,44 @@
+bases:
+ - ../envs/environments.yaml.gotmpl
+
+repositories:
+- name: forgejo
+  oci: true
+  url: code.forgejo.org/forgejo-helm
+
+commonLabels:
+  tier: system
+
+releases:
+- name: forgejo
+  namespace: forgejo
+  chart: forgejo/forgejo
+  version: 16.0.0
+  condition: forgejo.enabled
+  values:
+  - ../values/forgejo/values/values.yaml
+  - ../values/forgejo/values/values-{{ .Environment.Name }}.yaml
+  postRenderer: ../bin/kustomizer
+  postRendererArgs:
+  - ../values/forgejo/kustomize/{{ .Environment.Name }}
+  missingFileHandler: Info
+- name: manifests
+  namespace: forgejo
+  chart: manifests
+  condition: forgejo.enabled
+  missingFileHandler: Info
+  values:
+  - ../values/env.yaml
+  - ../values/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml
+  - ../values/forgejo/env.yaml.gotmpl
+  - ../values/forgejo/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
+  hooks:
+  - events: [ prepare, cleanup ]
+    showlogs: true
+    command: ../bin/helmify
+    args:
+    - '{{`{{ if eq .Event.Name "prepare" }}build{{ else }}clean{{ end }}`}}'
+    - '{{`{{ .Release.Chart }}`}}'
+    - '{{`{{ .Environment.Name }}`}}'
+    - ../values/forgejo/manifests
+    - manifests
@@ -0,0 +1,37 @@
+bases:
+ - ../envs/environments.yaml.gotmpl
+
+commonLabels:
+  tier: oceanbox
+
+releases:
+- name: fornix
+  namespace: fornix
+  chart: ../charts/fornix
+  condition: fornix.enabled
+  values:
+  - ../values/fornix/values/values.yaml
+  postRenderer: ../bin/kustomizer
+  postRendererArgs:
+  - ../values/fornix/kustomize/{{ .Environment.Name }}
+  missingFileHandler: Info
+- name: manifests
+  namespace: fornix
+  chart: manifests
+  condition: fornix.enabled
+  missingFileHandler: Info
+  values:
+  - ../values/env.yaml
+  - ../values/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml
+  - ../values/fornix/env.yaml.gotmpl
+  - ../values/fornix/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
+  hooks:
+  - events: [ prepare, cleanup ]
+    showlogs: true
+    command: ../bin/helmify
+    args:
+    - '{{`{{ if eq .Event.Name "prepare" }}build{{ else }}clean{{ end }}`}}'
+    - '{{`{{ .Release.Chart }}`}}'
+    - '{{`{{ .Environment.Name }}`}}'
+    - ../values/fornix/manifests
+    - manifests
@@ -12,7 +12,7 @@ releases:
 - name: ingress-nginx
  namespace: ingress-nginx
  chart: ingress-nginx/ingress-nginx
-  version: 4.8.3
+  version: 4.14.1
  condition: nginx.enabled
  values:
  - ../values/ingress-nginx/values/ingress-nginx.yaml.gotmpl
@@ -0,0 +1,40 @@
+bases:
+ - ../envs/environments.yaml.gotmpl
+
+commonLabels:
+  tier: system
+
+releases:
+- name: kueue
+  namespace: kueue-system
+  chart: oci://registry.k8s.io/kueue/charts/kueue
+  version: 0.15.0
+  condition: kueue.enabled
+  values:
+  - ../values/kueue/values/values.yaml
+  - ../values/kueue/values/values-{{ .Environment.Name }}.yaml
+  - ../values/kueue/values/values-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml
+  postRenderer: ../bin/kustomizer
+  postRendererArgs:
+  - ../values/kueue/kustomize/{{ .Environment.Name }}
+  missingFileHandler: Info
+- name: manifests
+  namespace: kueue-system
+  chart: manifests
+  condition: kueue.enabled
+  missingFileHandler: Info
+  values:
+  - ../values/env.yaml
+  - ../values/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml
+  - ../values/kueue/env.yaml.gotmpl
+  - ../values/kueue/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
+  hooks:
+  - events: [ prepare, cleanup ]
+    showlogs: true
+    command: ../bin/helmify
+    args:
+    - '{{`{{ if eq .Event.Name "prepare" }}build{{ else }}clean{{ end }}`}}'
+    - '{{`{{ .Release.Chart }}`}}'
+    - '{{`{{ .Environment.Name }}`}}'
+    - ../values/kueue/manifests
+    - manifests
@@ -15,7 +15,7 @@ releases:
 - name: kyverno
  namespace: kyverno
  chart: kyverno/kyverno
-  version: 3.5.1
+  version: 3.6.1
  condition: kyverno.enabled
  values:
  - ../values/kyverno/values/kyverno.yaml.gotmpl
@@ -12,7 +12,7 @@ releases:
 - name: mariadb-operator
  namespace: mariadb-operator
  chart: mariadb-operator/mariadb-operator
-  version: 25.8.4
+  version: 25.10.3
  condition: mariadb_operator.enabled
  values:
  - ../values/mariadb-operator/values/mariadb-operator.yaml.gotmpl
@@ -16,7 +16,7 @@ releases:
  namespace: {{ .Environment.Name }}-openfga
  {{- end }}
  chart: openfga/openfga
-  version: 0.2.45
+  version: 0.2.50
  condition: openfga.enabled
  values:
  - ../values/openfga/values/values.yaml
@@ -12,7 +12,7 @@ releases:
 - name: opentelemetry-collector
  namespace: otel
  chart: open-telemetry/opentelemetry-collector
-  version: 0.134.1
+  version: 0.142.1
  condition: otel.enabled
  values:
  - ../values/opentelemetry-collector/values/values.yaml
@@ -15,7 +15,7 @@ releases:
 - name: postgres-operator
  namespace: cnpg
  chart: cloudnative-pg/cloudnative-pg
-  version: 0.26.1
+  version: 0.27.0
  condition: postgres_operator.enabled
  values:
  - ../values/postgres-operator/values/postgres-operator.yaml.gotmpl
@@ -13,7 +13,7 @@ releases:
 - name: {{ .Environment.Name }}-rabbitmq
  namespace: rabbitmq
  chart: bitnami/rabbitmq
-  version: 12.9.0
+  version: 13.0.3
  condition: rabbitmq.enabled
  values:
  - ../values/rabbitmq/values/values.yaml
@@ -13,7 +13,7 @@ releases:
 - name: slurm-operator
  namespace: slinky
  chart: slurm-operator/slurm-operator
-  version: 0.4.0
+  version: 0.4.1
  condition: slurm_operator.enabled
  values:
  - ../values/slurm-operator/values/slurm-operator.yaml.gotmpl
@@ -13,7 +13,7 @@ releases:
 - name: spegel
  namespace: spegel
  chart: spegel/spegel
-  version: 0.5.1
+  version: 0.6.0
  condition: spegel.enabled
  values:
  - ../values/spegel/values/spegel.yaml.gotmpl
@@ -15,7 +15,7 @@ releases:
 - name: velero
  namespace: velero
  chart: velero/velero
-  version: 11.1.1
+  version: 11.3.2
  condition: velero.enabled
  values:
  - ../values/velero/values/velero.yaml.gotmpl
@@ -0,0 +1,70 @@
+let
+  sources = import ./default.nix;
+  pkgs = import sources.nixpkgs { };
+  pre-commit = import sources.git-hooks;
+
+  globalExcludes = [
+    "nix/default.nix"
+    "attic"
+    "vcluster"
+    ".*vendor"
+    ".*chart/.*"
+    ".*schema.json"
+  ];
+
+in
+pre-commit.run {
+  src = pkgs.nix-gitignore.gitignoreSource [ ] ../.;
+  # Do not run at pre-commit time
+  default_stages = [
+    "pre-push"
+  ];
+  # TODO(mrtz): Remove when default
+  package = pkgs.prek;
+  # Linters From https://github.com/cachix/pre-commit-hooks.nix
+  hooks = {
+    nixfmt-rfc-style = {
+      enable = true;
+      excludes = globalExcludes;
+    };
+
+    trim-trailing-whitespace.enable = true;
+
+    shellcheck = {
+      enable = true;
+      excludes = [
+        "vcluster/"
+        "attic/"
+      ];
+      args = [
+        "-x"
+        "-o"
+        "all"
+      ];
+    };
+
+    yamllint = {
+      enable = true;
+      excludes = [
+        "attic/"
+        "charts/templates/"
+        "charts/"
+        "values/"
+        "vcluster/"
+      ];
+      settings = {
+        strict = true;
+        configData = ''{ extends: default, rules: { document-start: disable, line-length: {max: 300} } }'';
+      };
+    };
+
+    check-json.enable = true;
+
+    renovate-config-validator = {
+      enable = true;
+      files = "renovate.json$";
+      entry = "renovate-config-validator";
+    };
+
+  };
+}
@@ -9,8 +9,15 @@
 */
 # Generated by npins. Do not modify; will be overwritten regularly
 let
-  data = builtins.fromJSON (builtins.readFile ./sources.json);
-  version = data.version;
+  # Backwards-compatibly make something that previously didn't take any arguments take some
+  # The function must return an attrset, and will unfortunately be eagerly evaluated
+  # Same thing, but it catches eval errors on the default argument so that one may still call it with other arguments
+  mkFunctor =
+    fn:
+    let
+      e = builtins.tryEval (fn { });
+    in
+    (if e.success then e.value else { error = fn { }; }) // { __functor = _self: fn; };

  # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295
  range =
@@ -21,7 +28,6 @@ let

  # https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269
  stringAsChars = f: s: concatStrings (map f (stringToCharacters s));
-  concatMapStrings = f: list: concatStrings (map f list);
  concatStrings = builtins.concatStringsSep "";

  # If the environment variable NPINS_OVERRIDE_${name} is set, then use
@@ -48,41 +54,87 @@ let

  mkSource =
    name: spec:
+    {
+      pkgs ? null,
+    }:
    assert spec ? type;
    let
+      # Unify across builtin and pkgs fetchers.
+      # `fetchGit` requires a wrapper because of slight API differences.
+      fetchers =
+        if pkgs == null then
+          {
+            inherit (builtins) fetchTarball fetchurl;
+            # For some fucking reason, fetchGit has a different signature than the other builtin fetchers …
+            fetchGit = args: (builtins.fetchGit args).outPath;
+          }
+        else
+          {
+            fetchTarball =
+              {
+                url,
+                sha256,
+              }:
+              pkgs.fetchzip {
+                inherit url sha256;
+                extension = "tar";
+              };
+            inherit (pkgs) fetchurl;
+            fetchGit =
+              {
+                url,
+                submodules,
+                rev,
+                name,
+                narHash,
+              }:
+              pkgs.fetchgit {
+                inherit url rev name;
+                fetchSubmodules = submodules;
+                hash = narHash;
+              };
+          };
+
+      # Dispatch to the correct code path based on the type
      path =
        if spec.type == "Git" then
-          mkGitSource spec
+          mkGitSource fetchers spec
        else if spec.type == "GitRelease" then
-          mkGitSource spec
+          mkGitSource fetchers spec
        else if spec.type == "PyPi" then
-          mkPyPiSource spec
+          mkPyPiSource fetchers spec
        else if spec.type == "Channel" then
-          mkChannelSource spec
+          mkChannelSource fetchers spec
        else if spec.type == "Tarball" then
-          mkTarballSource spec
+          mkTarballSource fetchers spec
+        else if spec.type == "Container" then
+          mkContainerSource pkgs spec
        else
          builtins.throw "Unknown source type ${spec.type}";
    in
    spec // { outPath = mayOverride name path; };

  mkGitSource =
+    {
+      fetchTarball,
+      fetchGit,
+      ...
+    }:
    {
      repository,
      revision,
      url ? null,
      submodules,
      hash,
-      branch ? null,
      ...
    }:
    assert repository ? type;
    # At the moment, either it is a plain git repository (which has an url), or it is a GitHub/GitLab repository
    # In the latter case, there we will always be an url to the tarball
    if url != null && !submodules then
-      builtins.fetchTarball {
+      fetchTarball {
        inherit url;
-        sha256 = hash; # FIXME: check nix version & use SRI hashes
+        sha256 = hash;
      }
    else
      let
@@ -93,6 +145,8 @@ let
            "https://github.com/${repository.owner}/${repository.repo}.git"
          else if repository.type == "GitLab" then
            "${repository.server}/${repository.repo_path}.git"
+          else if repository.type == "Forgejo" then
+            "${repository.server}/${repository.owner}/${repository.repo}.git"
          else
            throw "Unrecognized repository type ${repository.type}";
        urlToName =
@@ -107,40 +161,89 @@ let
          "${if matched == null then "source" else builtins.head matched}${appendShort}";
        name = urlToName url revision;
      in
-      builtins.fetchGit {
+      fetchGit {
        rev = revision;
-        inherit name;
-        # hash = hash;
-        inherit url submodules;
+        narHash = hash;
+
+        inherit name submodules url;
      };

  mkPyPiSource =
-    { url, hash, ... }:
-    builtins.fetchurl {
+    { fetchurl, ... }:
+    {
+      url,
+      hash,
+      ...
+    }:
+    fetchurl {
      inherit url;
      sha256 = hash;
    };

  mkChannelSource =
-    { url, hash, ... }:
-    builtins.fetchTarball {
+    { fetchTarball, ... }:
+    {
+      url,
+      hash,
+      ...
+    }:
+    fetchTarball {
      inherit url;
      sha256 = hash;
    };

  mkTarballSource =
+    { fetchTarball, ... }:
    {
      url,
      locked_url ? url,
      hash,
      ...
    }:
-    builtins.fetchTarball {
+    fetchTarball {
      url = locked_url;
      sha256 = hash;
    };
+
+  mkContainerSource =
+    pkgs:
+    {
+      image_name,
+      image_tag,
+      image_digest,
+      ...
+    }:
+    if pkgs == null then
+      builtins.throw "container sources require passing in a Nixpkgs value: https://github.com/andir/npins/blob/master/README.md#using-the-nixpkgs-fetchers"
+    else
+      pkgs.dockerTools.pullImage {
+        imageName = image_name;
+        imageDigest = image_digest;
+        finalImageTag = image_tag;
+      };
 in
-if version == 5 then
-  builtins.mapAttrs mkSource data.pins
-else
-  throw "Unsupported format version ${toString version} in sources.json. Try running `npins upgrade`"
+mkFunctor (
+  {
+    input ? ./sources.json,
+  }:
+  let
+    data =
+      if builtins.isPath input then
+        # while `readFile` will throw an error anyways if the path doesn't exist,
+        # we still need to check beforehand because *our* error can be caught but not the one from the builtin
+        # *piegames sighs*
+        if builtins.pathExists input then
+          builtins.fromJSON (builtins.readFile input)
+        else
+          throw "Input path ${toString input} does not exist"
+      else if builtins.isAttrs input then
+        input
+      else
+        throw "Unsupported input type ${builtins.typeOf input}, must be a path or an attrset";
+    version = data.version;
+  in
+  if version == 7 then
+    builtins.mapAttrs (name: spec: mkFunctor (mkSource name spec)) data.pins
+  else
+    throw "Unsupported format version ${toString version} in sources.json. Try running `npins upgrade`"
+)
@@ -1,11 +1,24 @@
 {
  "pins": {
+    "git-hooks": {
+      "type": "Git",
+      "repository": {
+        "type": "GitHub",
+        "owner": "cachix",
+        "repo": "git-hooks.nix"
+      },
+      "branch": "master",
+      "submodules": false,
+      "revision": "f0927703b7b1c8d97511c4116eb9b4ec6645a0fa",
+      "url": "https://github.com/cachix/git-hooks.nix/archive/f0927703b7b1c8d97511c4116eb9b4ec6645a0fa.tar.gz",
+      "hash": "sha256-6MkqajPICgugsuZ92OMoQcgSHnD6sJHwk8AxvMcIgTE="
+    },
    "nixpkgs": {
      "type": "Channel",
      "name": "nixpkgs-unstable",
-      "url": "https://releases.nixos.org/nixpkgs/nixpkgs-26.05pre903996.59b6c96beacc/nixexprs.tar.xz",
-      "hash": "0b0yr9d1xyfwgpaj68bimsbjjbj7yis4whjvkrfdycfnasdf0gf0"
+      "url": "https://releases.nixos.org/nixpkgs/nixpkgs-26.05pre927565.13868c071cc7/nixexprs.tar.xz",
+      "hash": "sha256-wufp5c0nWh/87f9eK7xy1eZXms5zd4yl6S4SR+LfA08="
    }
  },
-  "version": 5
+  "version": 7
 }
@@ -3,7 +3,7 @@
 # Simple script for uploading a base64 encoded image into our database. For
 # grafana business image panels.

-if [ $# -ne 2 ]
+if [[ $# -ne 2 ]]
 then
 	echo "Usage: $0 <image-name> <file>.png"
 	exit 1
@@ -12,9 +12,9 @@ fi
 filename=$1
 file=$2

-if [ ! -e $file ]
+if [[ ! -e "${file}" ]]
 then
-	echo "file $file does not exist"
+	echo "file ${file} does not exist"
 	exit 1
 fi

@@ -22,9 +22,9 @@ function create_image() {
 	local filename=$1
 	local data=$2
 cat << EOF
-INSERT INTO images VALUES('$filename', '$data');
+INSERT INTO images VALUES('${filename}', '${data}');
 EOF
 }

-data=$(cat $file | base64 -w0)
-create_image $filename $data
+data=$(base64 -w0 < "${file}")
+create_image "${filename}" "${data}"
@@ -1,4 +1,3 @@
-// -*- mode: jsonc -*-
 {
  "$schema": "https://docs.renovatebot.com/renovate-schema.json",
  "extends": [
@@ -1,183 +1,204 @@
 groups:
- name: etcd
-  rules:
-  - alert: etcdMembersDown
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value
-        }}).'
-      summary: etcd cluster members are down.
-    expr: |-
-      max without (endpoint) (
-        sum without (instance) (up{job=~".*etcd.*"} == bool 0)
-      or
-        count without (To) (
-          sum without (instance) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[120s])) > 0.01
-        )
-      )
-      > 0
-    for: 10m
-    labels:
-      severity: critical
-  - alert: etcdInsufficientMembers
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
-        }}).'
-      summary: etcd cluster has insufficient number of members.
-    expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"})
-      without (instance) + 1) / 2)
-    for: 3m
-    labels:
-      severity: critical
-  - alert: etcdNoLeader
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }}
-        has no leader.'
-      summary: etcd cluster has no leader.
-    expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
-    for: 1m
-    labels:
-      severity: critical
-  - alert: etcdHighNumberOfLeaderChanges
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes
-        within the last 15 minutes. Frequent elections may be a sign of insufficient
-        resources, high network latency, or disruptions by other components and should
-        be investigated.'
-      summary: etcd cluster has high number of leader changes.
-    expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"})
-      or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m])
-      >= 4
-    for: 5m
-    labels:
-      severity: warning
-  - alert: etcdHighNumberOfFailedGRPCRequests
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
-        {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
-      summary: etcd cluster has high number of failed grpc requests.
-    expr: |-
-      100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code)
-        /
-      sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code)
-        > 1
-    for: 10m
-    labels:
-      severity: warning
-  - alert: etcdHighNumberOfFailedGRPCRequests
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
-        {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
-      summary: etcd cluster has high number of failed grpc requests.
-    expr: |-
-      100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code)
-        /
-      sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code)
-        > 5
-    for: 5m
-    labels:
-      severity: critical
-  - alert: etcdGRPCRequestsSlow
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests
-        is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method
-        }} method.'
-      summary: etcd grpc requests are slow
-    expr: |-
-      histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type))
-      > 0.15
-    for: 10m
-    labels:
-      severity: critical
-  - alert: etcdMemberCommunicationSlow
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": member communication with {{
-        $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance
-        }}.'
-      summary: etcd cluster member communication is slow.
-    expr: |-
-      histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
-      > 0.15
-    for: 10m
-    labels:
-      severity: warning
-  - alert: etcdHighNumberOfFailedProposals
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures
-        within the last 30 minutes on etcd instance {{ $labels.instance }}.'
-      summary: etcd cluster has high number of proposal failures.
-    expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
-    for: 15m
-    labels:
-      severity: warning
-  - alert: etcdHighFsyncDurations
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
-        are {{ $value }}s on etcd instance {{ $labels.instance }}.'
-      summary: etcd cluster 99th percentile fsync durations are too high.
-    expr: |-
-      histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
-      > 0.5
-    for: 10m
-    labels:
-      severity: warning
-  - alert: etcdHighFsyncDurations
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
-        are {{ $value }}s on etcd instance {{ $labels.instance }}.'
-      summary: etcd cluster 99th percentile fsync durations are too high.
-    expr: |-
-      histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
-      > 1
-    for: 10m
-    labels:
-      severity: critical
-  - alert: etcdHighCommitDurations
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
-        {{ $value }}s on etcd instance {{ $labels.instance }}.'
-      summary: etcd cluster 99th percentile commit durations are too high.
-    expr: |-
-      histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
-      > 0.25
-    for: 10m
-    labels:
-      severity: warning
-  - alert: etcdDatabaseQuotaLowSpace
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": database size exceeds the defined
-        quota on etcd instance {{ $labels.instance }}, please defrag or increase the
-        quota as the writes to etcd will be disabled when it is full.'
-      summary: etcd cluster database is running full.
-    expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) /
-      last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 >
-      95
-    for: 10m
-    labels:
-      severity: critical
-  - alert: etcdExcessiveDatabaseGrowth
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": Predicting running out of disk
-        space in the next four hours, based on write observations within the past
-        four hours on etcd instance {{ $labels.instance }}, please check as it might
-        be disruptive.'
-      summary: etcd cluster database growing very fast.
-    expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60)
-      > etcd_server_quota_backend_bytes{job=~".*etcd.*"}
-    for: 10m
-    labels:
-      severity: warning
-  - alert: etcdDatabaseHighFragmentationRatio
-    annotations:
-      description: 'etcd cluster "{{ $labels.job }}": database size in use on instance
-        {{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual
-        allocated disk space, please run defragmentation (e.g. etcdctl defrag) to
-        retrieve the unused fragmented disk space.'
-      runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation
-      summary: etcd database size in use is less than 50% of the actual allocated
-        storage.
-    expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m])
-      / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5
-      and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600
-    for: 10m
-    labels:
-      severity: warning
+  - name: etcd
+    rules:
+      - alert: etcdMembersDown
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": members are down ({{ $value
+            }}).'
+          summary: etcd cluster members are down.
+        expr: |-
+          max without (endpoint) (
+            sum without (instance) (up{job=~".*etcd.*"} == bool 0)
+          or
+            count without (To) (
+              sum without (instance) (rate(etcd_network_peer_sent_failures_total{job=~".*etcd.*"}[120s])) > 0.01
+            )
+          )
+          > 0
+        for: 10m
+        labels:
+          severity: critical
+      - alert: etcdInsufficientMembers
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
+            }}).'
+          summary: etcd cluster has insufficient number of members.
+        expr:
+          sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"})
+          without (instance) + 1) / 2)
+        for: 3m
+        labels:
+          severity: critical
+      - alert: etcdNoLeader
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }}
+            has no leader.'
+          summary: etcd cluster has no leader.
+        expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
+        for: 1m
+        labels:
+          severity: critical
+      - alert: etcdHighNumberOfLeaderChanges
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes
+            within the last 15 minutes. Frequent elections may be a sign of insufficient
+            resources, high network latency, or disruptions by other components and should
+            be investigated.'
+          summary: etcd cluster has high number of leader changes.
+        expr:
+          increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"})
+          or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m])
+          >= 4
+        for: 5m
+        labels:
+          severity: warning
+      - alert: etcdHighNumberOfFailedGRPCRequests
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
+            {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
+          summary: etcd cluster has high number of failed grpc requests.
+        expr: |-
+          100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code)
+            /
+          sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code)
+            > 1
+        for: 10m
+        labels:
+          severity: warning
+      - alert: etcdHighNumberOfFailedGRPCRequests
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
+            {{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
+          summary: etcd cluster has high number of failed grpc requests.
+        expr: |-
+          100 * sum(rate(grpc_server_handled_total{job=~".*etcd.*", grpc_code=~"Unknown|FailedPrecondition|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded"}[5m])) without (grpc_type, grpc_code)
+            /
+          sum(rate(grpc_server_handled_total{job=~".*etcd.*"}[5m])) without (grpc_type, grpc_code)
+            > 5
+        for: 5m
+        labels:
+          severity: critical
+      - alert: etcdGRPCRequestsSlow
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests
+            is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method
+            }} method.'
+          summary: etcd grpc requests are slow
+        expr: |-
+          histogram_quantile(0.99, sum(rate(grpc_server_handling_seconds_bucket{job=~".*etcd.*", grpc_method!="Defragment", grpc_type="unary"}[5m])) without(grpc_type))
+          > 0.15
+        for: 10m
+        labels:
+          severity: critical
+      - alert: etcdMemberCommunicationSlow
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": member communication with {{
+            $labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance
+            }}.'
+          summary: etcd cluster member communication is slow.
+        expr: |-
+          histogram_quantile(0.99, rate(etcd_network_peer_round_trip_time_seconds_bucket{job=~".*etcd.*"}[5m]))
+          > 0.15
+        for: 10m
+        labels:
+          severity: warning
+      - alert: etcdHighNumberOfFailedProposals
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures
+            within the last 30 minutes on etcd instance {{ $labels.instance }}.'
+          summary: etcd cluster has high number of proposal failures.
+        expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
+        for: 15m
+        labels:
+          severity: warning
+      - alert: etcdHighFsyncDurations
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
+            are {{ $value }}s on etcd instance {{ $labels.instance }}.'
+          summary: etcd cluster 99th percentile fsync durations are too high.
+        expr: |-
+          histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
+          > 0.5
+        for: 10m
+        labels:
+          severity: warning
+      - alert: etcdHighFsyncDurations
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
+            are {{ $value }}s on etcd instance {{ $labels.instance }}.'
+          summary: etcd cluster 99th percentile fsync durations are too high.
+        expr: |-
+          histogram_quantile(0.99, rate(etcd_disk_wal_fsync_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
+          > 1
+        for: 10m
+        labels:
+          severity: critical
+      - alert: etcdHighCommitDurations
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
+            {{ $value }}s on etcd instance {{ $labels.instance }}.'
+          summary: etcd cluster 99th percentile commit durations are too high.
+        expr: |-
+          histogram_quantile(0.99, rate(etcd_disk_backend_commit_duration_seconds_bucket{job=~".*etcd.*"}[5m]))
+          > 0.25
+        for: 10m
+        labels:
+          severity: warning
+      - alert: etcdDatabaseQuotaLowSpace
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": database size exceeds the defined
+            quota on etcd instance {{ $labels.instance }}, please defrag or increase the
+            quota as the writes to etcd will be disabled when it is full.'
+          summary: etcd cluster database is running full.
+        expr:
+          (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) /
+          last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 >
+          95
+        for: 10m
+        labels:
+          severity: critical
+      - alert: etcdExcessiveDatabaseGrowth
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": Predicting running out of disk
+            space in the next four hours, based on write observations within the past
+            four hours on etcd instance {{ $labels.instance }}, please check as it might
+            be disruptive.'
+          summary: etcd cluster database growing very fast.
+        expr:
+          predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60)
+          > etcd_server_quota_backend_bytes{job=~".*etcd.*"}
+        for: 10m
+        labels:
+          severity: warning
+      - alert: etcdDatabaseHighFragmentationRatio
+        annotations:
+          description:
+            'etcd cluster "{{ $labels.job }}": database size in use on instance
+            {{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual
+            allocated disk space, please run defragmentation (e.g. etcdctl defrag) to
+            retrieve the unused fragmented disk space.'
+          runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation
+          summary:
+            etcd database size in use is less than 50% of the actual allocated
+            storage.
+        expr:
+          (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m])
+          / last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5
+          and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600
+        for: 10m
+        labels:
+          severity: warning
@@ -1,43 +1,47 @@
 groups:
- name: general.rules
-  rules:
-  - alert: TargetDown
-    annotations:
-      description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
-        }} targets in {{ $labels.namespace }} namespace are down.'
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
-      summary: One or more targets are unreachable.
-    expr: 100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
-      BY (cluster, job, namespace, service)) > 10
-    for: 10m
-    labels:
-      severity: warning
-  - alert: Watchdog
-    annotations:
-      description: |
-        This is an alert meant to ensure that the entire alerting pipeline is functional.
-        This alert is always firing, therefore it should always be firing in Alertmanager
-        and always fire against a receiver. There are integrations with various notification
-        mechanisms that send a notification when this alert is not firing. For example the
-        "DeadMansSnitch" integration in PagerDuty.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
-      summary: An alert that should always be firing to certify that Alertmanager
-        is working properly.
-    expr: vector(1)
-    labels:
-      severity: none
-  - alert: InfoInhibitor
-    annotations:
-      description: |
-        This is an alert that is used to inhibit info alerts.
-        By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
-        other alerts.
-        This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
-        severity of 'warning' or 'critical' starts firing on the same namespace.
-        This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
-      summary: Info-level alert inhibition.
-    expr: ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname !=
-      "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
-    labels:
-      severity: none
+  - name: general.rules
+    rules:
+      - alert: TargetDown
+        annotations:
+          description:
+            '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
+            }} targets in {{ $labels.namespace }} namespace are down.'
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
+          summary: One or more targets are unreachable.
+        expr:
+          100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
+          BY (cluster, job, namespace, service)) > 10
+        for: 10m
+        labels:
+          severity: warning
+      - alert: Watchdog
+        annotations:
+          description: |
+            This is an alert meant to ensure that the entire alerting pipeline is functional.
+            This alert is always firing, therefore it should always be firing in Alertmanager
+            and always fire against a receiver. There are integrations with various notification
+            mechanisms that send a notification when this alert is not firing. For example the
+            "DeadMansSnitch" integration in PagerDuty.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
+          summary:
+            An alert that should always be firing to certify that Alertmanager
+            is working properly.
+        expr: vector(1)
+        labels:
+          severity: none
+      - alert: InfoInhibitor
+        annotations:
+          description: |
+            This is an alert that is used to inhibit info alerts.
+            By themselves, the info-level alerts are sometimes very noisy, but they are relevant when combined with
+            other alerts.
+            This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
+            severity of 'warning' or 'critical' starts firing on the same namespace.
+            This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
+          summary: Info-level alert inhibition.
+        expr:
+          ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname !=
+          "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
+        labels:
+          severity: none
@@ -1,262 +1,281 @@
 groups:
- name: kubernetes-apps
-  rules:
-  - alert: KubePodCrashLooping
-    annotations:
-      description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
-        }}) is in waiting state (reason: "CrashLoopBackOff").'
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
-      summary: Pod is crash looping.
-    expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",
-      job="kube-state-metrics", namespace=~".*"}[5m]) >= 1
-    for: 15m
-    labels:
-      severity: warning
-  - alert: KubePodNotReady
-    annotations:
-      description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
-        state for longer than 15 minutes.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
-      summary: Pod has been in a non-ready state for more than 15 minutes.
-    expr: |-
-      sum by (namespace, pod, cluster) (
-        max by (namespace, pod, cluster) (
-          kube_pod_status_phase{job="kube-state-metrics", namespace=~".*", phase=~"Pending|Unknown|Failed"}
-        ) * on (namespace, pod, cluster) group_left(owner_kind) topk by (namespace, pod, cluster) (
-          1, max by (namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"})
-        )
-      ) > 0
-    for: 15m
-    labels:
-      severity: warning
-  - alert: KubeDeploymentGenerationMismatch
-    annotations:
-      description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
-        }} does not match, this indicates that the Deployment has failed but has not
-        been rolled back.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
-      summary: Deployment generation mismatch due to possible roll-back
-    expr: |-
-      kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~".*"}
-        !=
-      kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~".*"}
-    for: 15m
-    labels:
-      severity: warning
-  - alert: KubeDeploymentReplicasMismatch
-    annotations:
-      description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
-        not matched the expected number of replicas for longer than 15 minutes.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
-      summary: Deployment has not matched the expected number of replicas.
-    expr: |-
-      (
-        kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~".*"}
-          >
-        kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~".*"}
-      ) and (
-        changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m])
-          ==
-        0
-      )
-    for: 15m
-    labels:
-      severity: warning
-  - alert: KubeDeploymentRolloutStuck
-    annotations:
-      description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment
-        }} is not progressing for longer than 15 minutes.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck
-      summary: Deployment rollout is not progressing.
-    expr: |-
-      kube_deployment_status_condition{condition="Progressing", status="false",job="kube-state-metrics", namespace=~".*"}
-      != 0
-    for: 15m
-    labels:
-      severity: warning
-  - alert: KubeStatefulSetReplicasMismatch
-    annotations:
-      description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
-        not matched the expected number of replicas for longer than 15 minutes.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
-      summary: StatefulSet has not matched the expected number of replicas.
-    expr: |-
-      (
-        kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~".*"}
-          !=
-        kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~".*"}
-      ) and (
-        changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m])
-          ==
-        0
-      )
-    for: 15m
-    labels:
-      severity: warning
-  - alert: KubeStatefulSetGenerationMismatch
-    annotations:
-      description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
-        }} does not match, this indicates that the StatefulSet has failed but has
-        not been rolled back.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
-      summary: StatefulSet generation mismatch due to possible roll-back
-    expr: |-
-      kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~".*"}
-        !=
-      kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~".*"}
-    for: 15m
-    labels:
-      severity: warning
-  - alert: KubeStatefulSetUpdateNotRolledOut
-    annotations:
-      description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
-        has not been rolled out.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
-      summary: StatefulSet update has not been rolled out.
-    expr: |-
-      (
-        max by (namespace, statefulset) (
-          kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~".*"}
-            unless
-          kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~".*"}
-        )
-          *
-        (
-          kube_statefulset_replicas{job="kube-state-metrics", namespace=~".*"}
+  - name: kubernetes-apps
+    rules:
+      - alert: KubePodCrashLooping
+        annotations:
+          description:
+            'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
+            }}) is in waiting state (reason: "CrashLoopBackOff").'
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
+          summary: Pod is crash looping.
+        expr:
+          max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",
+          job="kube-state-metrics", namespace=~".*"}[5m]) >= 1
+        for: 15m
+        labels:
+          severity: warning
+      - alert: KubePodNotReady
+        annotations:
+          description:
+            Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
+            state for longer than 15 minutes.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
+          summary: Pod has been in a non-ready state for more than 15 minutes.
+        expr: |-
+          sum by (namespace, pod, cluster) (
+            max by (namespace, pod, cluster) (
+              kube_pod_status_phase{job="kube-state-metrics", namespace=~".*", phase=~"Pending|Unknown|Failed"}
+            ) * on (namespace, pod, cluster) group_left(owner_kind) topk by (namespace, pod, cluster) (
+              1, max by (namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"})
+            )
+          ) > 0
+        for: 15m
+        labels:
+          severity: warning
+      - alert: KubeDeploymentGenerationMismatch
+        annotations:
+          description:
+            Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
+            }} does not match, this indicates that the Deployment has failed but has not
+            been rolled back.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
+          summary: Deployment generation mismatch due to possible roll-back
+        expr: |-
+          kube_deployment_status_observed_generation{job="kube-state-metrics", namespace=~".*"}
            !=
-          kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}
-        )
-      )  and (
-        changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[5m])
-          ==
-        0
-      )
-    for: 15m
-    labels:
-      severity: warning
-  - alert: KubeDaemonSetRolloutStuck
-    annotations:
-      description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not
-        finished or progressed for at least 15 minutes.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
-      summary: DaemonSet rollout is stuck.
-    expr: |-
-      (
-        (
-          kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"}
-           !=
+          kube_deployment_metadata_generation{job="kube-state-metrics", namespace=~".*"}
+        for: 15m
+        labels:
+          severity: warning
+      - alert: KubeDeploymentReplicasMismatch
+        annotations:
+          description:
+            Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
+            not matched the expected number of replicas for longer than 15 minutes.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
+          summary: Deployment has not matched the expected number of replicas.
+        expr: |-
+          (
+            kube_deployment_spec_replicas{job="kube-state-metrics", namespace=~".*"}
+              >
+            kube_deployment_status_replicas_available{job="kube-state-metrics", namespace=~".*"}
+          ) and (
+            changes(kube_deployment_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m])
+              ==
+            0
+          )
+        for: 15m
+        labels:
+          severity: warning
+      - alert: KubeDeploymentRolloutStuck
+        annotations:
+          description:
+            Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment
+            }} is not progressing for longer than 15 minutes.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck
+          summary: Deployment rollout is not progressing.
+        expr: |-
+          kube_deployment_status_condition{condition="Progressing", status="false",job="kube-state-metrics", namespace=~".*"}
+          != 0
+        for: 15m
+        labels:
+          severity: warning
+      - alert: KubeStatefulSetReplicasMismatch
+        annotations:
+          description:
+            StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
+            not matched the expected number of replicas for longer than 15 minutes.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
+          summary: StatefulSet has not matched the expected number of replicas.
+        expr: |-
+          (
+            kube_statefulset_status_replicas_ready{job="kube-state-metrics", namespace=~".*"}
+              !=
+            kube_statefulset_status_replicas{job="kube-state-metrics", namespace=~".*"}
+          ) and (
+            changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[10m])
+              ==
+            0
+          )
+        for: 15m
+        labels:
+          severity: warning
+      - alert: KubeStatefulSetGenerationMismatch
+        annotations:
+          description:
+            StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
+            }} does not match, this indicates that the StatefulSet has failed but has
+            not been rolled back.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
+          summary: StatefulSet generation mismatch due to possible roll-back
+        expr: |-
+          kube_statefulset_status_observed_generation{job="kube-state-metrics", namespace=~".*"}
+            !=
+          kube_statefulset_metadata_generation{job="kube-state-metrics", namespace=~".*"}
+        for: 15m
+        labels:
+          severity: warning
+      - alert: KubeStatefulSetUpdateNotRolledOut
+        annotations:
+          description:
+            StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
+            has not been rolled out.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
+          summary: StatefulSet update has not been rolled out.
+        expr: |-
+          (
+            max by (namespace, statefulset) (
+              kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~".*"}
+                unless
+              kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~".*"}
+            )
+              *
+            (
+              kube_statefulset_replicas{job="kube-state-metrics", namespace=~".*"}
+                !=
+              kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}
+            )
+          )  and (
+            changes(kube_statefulset_status_replicas_updated{job="kube-state-metrics", namespace=~".*"}[5m])
+              ==
+            0
+          )
+        for: 15m
+        labels:
+          severity: warning
+      - alert: KubeDaemonSetRolloutStuck
+        annotations:
+          description:
+            DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not
+            finished or progressed for at least 15 minutes.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
+          summary: DaemonSet rollout is stuck.
+        expr: |-
+          (
+            (
+              kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"}
+               !=
+              kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
+            ) or (
+              kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
+               !=
+              0
+            ) or (
+              kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"}
+               !=
+              kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
+            ) or (
+              kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~".*"}
+               !=
+              kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
+            )
+          ) and (
+            changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"}[5m])
+              ==
+            0
+          )
+        for: 15m
+        labels:
+          severity: warning
+      - alert: KubeContainerWaiting
+        annotations:
+          description:
+            pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
+            {{ $labels.container}} has been in waiting state for longer than 1 hour.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
+          summary: Pod container waiting longer than 1 hour
+        expr:
+          sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
+          namespace=~".*"}) > 0
+        for: 1h
+        labels:
+          severity: warning
+      - alert: KubeDaemonSetNotScheduled
+        annotations:
+          description:
+            "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
+            }} are not scheduled."
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
+          summary: DaemonSet pods are not scheduled.
+        expr: |-
          kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
-        ) or (
+            -
+          kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"} > 0
+        for: 10m
+        labels:
+          severity: warning
+      - alert: KubeDaemonSetMisScheduled
+        annotations:
+          description:
+            "{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
+            }} are running where they are not supposed to run."
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
+          summary: DaemonSet pods are misscheduled.
+        expr:
          kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
-           !=
-          0
-        ) or (
-          kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"}
-           !=
-          kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
-        ) or (
-          kube_daemonset_status_number_available{job="kube-state-metrics", namespace=~".*"}
-           !=
-          kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
-        )
-      ) and (
-        changes(kube_daemonset_status_updated_number_scheduled{job="kube-state-metrics", namespace=~".*"}[5m])
-          ==
-        0
-      )
-    for: 15m
-    labels:
-      severity: warning
-  - alert: KubeContainerWaiting
-    annotations:
-      description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
-        {{ $labels.container}} has been in waiting state for longer than 1 hour.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
-      summary: Pod container waiting longer than 1 hour
-    expr: sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
-      namespace=~".*"}) > 0
-    for: 1h
-    labels:
-      severity: warning
-  - alert: KubeDaemonSetNotScheduled
-    annotations:
-      description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
-        }} are not scheduled.'
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
-      summary: DaemonSet pods are not scheduled.
-    expr: |-
-      kube_daemonset_status_desired_number_scheduled{job="kube-state-metrics", namespace=~".*"}
-        -
-      kube_daemonset_status_current_number_scheduled{job="kube-state-metrics", namespace=~".*"} > 0
-    for: 10m
-    labels:
-      severity: warning
-  - alert: KubeDaemonSetMisScheduled
-    annotations:
-      description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
-        }} are running where they are not supposed to run.'
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
-      summary: DaemonSet pods are misscheduled.
-    expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
-      > 0
-    for: 15m
-    labels:
-      severity: warning
-  - alert: KubeJobNotCompleted
-    annotations:
-      description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
-        than {{ "43200" | humanizeDuration }} to complete.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
-      summary: Job did not complete in time
-    expr: |-
-      time() - max by (namespace, job_name, cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~".*"}
-        and
-      kube_job_status_active{job="kube-state-metrics", namespace=~".*"} > 0) > 43200
-    labels:
-      severity: warning
-  - alert: KubeJobFailed
-    annotations:
-      description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
-        Removing failed job after investigation should clear this alert.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
-      summary: Job failed to complete.
-    expr: kube_job_failed{job="kube-state-metrics", namespace=~".*"}  > 0
-    for: 15m
-    labels:
-      severity: warning
-  - alert: KubeHpaReplicasMismatch
-    annotations:
-      description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler  }}
-        has not matched the desired number of replicas for longer than 15 minutes.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
-      summary: HPA has not matched desired number of replicas.
-    expr: |-
-      (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~".*"}
-        !=
-      kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"})
-        and
-      (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}
-        >
-      kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~".*"})
-        and
-      (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}
-        <
-      kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"})
-        and
-      changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}[15m]) == 0
-    for: 15m
-    labels:
-      severity: warning
-  - alert: KubeHpaMaxedOut
-    annotations:
-      description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler  }}
-        has been running at max replicas for longer than 15 minutes.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
-      summary: HPA is running at max replicas
-    expr: |-
-      kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}
-        ==
-      kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"}
-    for: 15m
-    labels:
-      severity: warning
+          > 0
+        for: 15m
+        labels:
+          severity: warning
+      - alert: KubeJobNotCompleted
+        annotations:
+          description:
+            Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
+            than {{ "43200" | humanizeDuration }} to complete.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
+          summary: Job did not complete in time
+        expr: |-
+          time() - max by (namespace, job_name, cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~".*"}
+            and
+          kube_job_status_active{job="kube-state-metrics", namespace=~".*"} > 0) > 43200
+        labels:
+          severity: warning
+      - alert: KubeJobFailed
+        annotations:
+          description:
+            Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
+            Removing failed job after investigation should clear this alert.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
+          summary: Job failed to complete.
+        expr: kube_job_failed{job="kube-state-metrics", namespace=~".*"}  > 0
+        for: 15m
+        labels:
+          severity: warning
+      - alert: KubeHpaReplicasMismatch
+        annotations:
+          description:
+            HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler  }}
+            has not matched the desired number of replicas for longer than 15 minutes.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
+          summary: HPA has not matched desired number of replicas.
+        expr: |-
+          (kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics", namespace=~".*"}
+            !=
+          kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"})
+            and
+          (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}
+            >
+          kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics", namespace=~".*"})
+            and
+          (kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}
+            <
+          kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"})
+            and
+          changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}[15m]) == 0
+        for: 15m
+        labels:
+          severity: warning
+      - alert: KubeHpaMaxedOut
+        annotations:
+          description:
+            HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler  }}
+            has been running at max replicas for longer than 15 minutes.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
+          summary: HPA is running at max replicas
+        expr: |-
+          kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics", namespace=~".*"}
+            ==
+          kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics", namespace=~".*"}
+        for: 15m
+        labels:
+          severity: warning
@@ -1,115 +1,123 @@
 groups:
- name: kubernetes-resources
-  rules:
-  - alert: KubeCPUOvercommit
-    annotations:
-      description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
-        for Pods by {{ $value }} CPU shares and cannot tolerate node failure.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit
-      summary: Cluster has overcommitted CPU resource requests.
-    expr: |-
-      sum(namespace_cpu:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0
-      and
-      (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0
-    for: 10m
-    labels:
-      severity: warning
-  - alert: KubeMemoryOvercommit
-    annotations:
-      description: Cluster {{ $labels.cluster }} has overcommitted memory resource
-        requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node
-        failure.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
-      summary: Cluster has overcommitted memory resource requests.
-    expr: |-
-      sum(namespace_memory:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0
-      and
-      (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0
-    for: 10m
-    labels:
-      severity: warning
-  - alert: KubeCPUQuotaOvercommit
-    annotations:
-      description: Cluster {{ $labels.cluster }}  has overcommitted CPU resource requests
-        for Namespaces.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit
-      summary: Cluster has overcommitted CPU resource requests.
-    expr: |-
-      sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(cpu|requests.cpu)"})) by (cluster)
-        /
-      sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) by (cluster)
-        > 1.5
-    for: 5m
-    labels:
-      severity: warning
-  - alert: KubeMemoryQuotaOvercommit
-    annotations:
-      description: Cluster {{ $labels.cluster }}  has overcommitted memory resource
-        requests for Namespaces.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit
-      summary: Cluster has overcommitted memory resource requests.
-    expr: |-
-      sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(memory|requests.memory)"})) by (cluster)
-        /
-      sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)
-        > 1.5
-    for: 5m
-    labels:
-      severity: warning
-  - alert: KubeQuotaAlmostFull
-    annotations:
-      description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
-        }} of its {{ $labels.resource }} quota.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
-      summary: Namespace quota is going to be full.
-    expr: |-
-      kube_resourcequota{job="kube-state-metrics", type="used"}
-        / ignoring(instance, job, type)
-      (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
-        > 0.9 < 1
-    for: 15m
-    labels:
-      severity: info
-  - alert: KubeQuotaFullyUsed
-    annotations:
-      description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
-        }} of its {{ $labels.resource }} quota.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
-      summary: Namespace quota is fully used.
-    expr: |-
-      kube_resourcequota{job="kube-state-metrics", type="used"}
-        / ignoring(instance, job, type)
-      (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
-        == 1
-    for: 15m
-    labels:
-      severity: info
-  - alert: KubeQuotaExceeded
-    annotations:
-      description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
-        }} of its {{ $labels.resource }} quota.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
-      summary: Namespace quota has exceeded the limits.
-    expr: |-
-      kube_resourcequota{job="kube-state-metrics", type="used"}
-        / ignoring(instance, job, type)
-      (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
-        > 1
-    for: 15m
-    labels:
-      severity: warning
-  - alert: CPUThrottlingHigh
-    annotations:
-      description: '{{ $value | humanizePercentage }} throttling of CPU in namespace
-        {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod
-        }}.'
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
-      summary: Processes experience elevated CPU throttling.
-    expr: |-
-      sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace)
-        /
-      sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace)
-        > ( 25 / 100 )
-    for: 15m
-    labels:
-      severity: info
+  - name: kubernetes-resources
+    rules:
+      - alert: KubeCPUOvercommit
+        annotations:
+          description:
+            Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
+            for Pods by {{ $value }} CPU shares and cannot tolerate node failure.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit
+          summary: Cluster has overcommitted CPU resource requests.
+        expr: |-
+          sum(namespace_cpu:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0
+          and
+          (sum(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster) - max(kube_node_status_allocatable{job="kube-state-metrics",resource="cpu"}) by (cluster)) > 0
+        for: 10m
+        labels:
+          severity: warning
+      - alert: KubeMemoryOvercommit
+        annotations:
+          description:
+            Cluster {{ $labels.cluster }} has overcommitted memory resource
+            requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node
+            failure.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
+          summary: Cluster has overcommitted memory resource requests.
+        expr: |-
+          sum(namespace_memory:kube_pod_container_resource_requests:sum{}) by (cluster) - (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0
+          and
+          (sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster) - max(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)) > 0
+        for: 10m
+        labels:
+          severity: warning
+      - alert: KubeCPUQuotaOvercommit
+        annotations:
+          description:
+            Cluster {{ $labels.cluster }}  has overcommitted CPU resource requests
+            for Namespaces.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit
+          summary: Cluster has overcommitted CPU resource requests.
+        expr: |-
+          sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(cpu|requests.cpu)"})) by (cluster)
+            /
+          sum(kube_node_status_allocatable{resource="cpu", job="kube-state-metrics"}) by (cluster)
+            > 1.5
+        for: 5m
+        labels:
+          severity: warning
+      - alert: KubeMemoryQuotaOvercommit
+        annotations:
+          description:
+            Cluster {{ $labels.cluster }}  has overcommitted memory resource
+            requests for Namespaces.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit
+          summary: Cluster has overcommitted memory resource requests.
+        expr: |-
+          sum(min without(resource) (kube_resourcequota{job="kube-state-metrics", type="hard", resource=~"(memory|requests.memory)"})) by (cluster)
+            /
+          sum(kube_node_status_allocatable{resource="memory", job="kube-state-metrics"}) by (cluster)
+            > 1.5
+        for: 5m
+        labels:
+          severity: warning
+      - alert: KubeQuotaAlmostFull
+        annotations:
+          description:
+            Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
+            }} of its {{ $labels.resource }} quota.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
+          summary: Namespace quota is going to be full.
+        expr: |-
+          kube_resourcequota{job="kube-state-metrics", type="used"}
+            / ignoring(instance, job, type)
+          (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
+            > 0.9 < 1
+        for: 15m
+        labels:
+          severity: info
+      - alert: KubeQuotaFullyUsed
+        annotations:
+          description:
+            Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
+            }} of its {{ $labels.resource }} quota.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
+          summary: Namespace quota is fully used.
+        expr: |-
+          kube_resourcequota{job="kube-state-metrics", type="used"}
+            / ignoring(instance, job, type)
+          (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
+            == 1
+        for: 15m
+        labels:
+          severity: info
+      - alert: KubeQuotaExceeded
+        annotations:
+          description:
+            Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
+            }} of its {{ $labels.resource }} quota.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
+          summary: Namespace quota has exceeded the limits.
+        expr: |-
+          kube_resourcequota{job="kube-state-metrics", type="used"}
+            / ignoring(instance, job, type)
+          (kube_resourcequota{job="kube-state-metrics", type="hard"} > 0)
+            > 1
+        for: 15m
+        labels:
+          severity: warning
+      - alert: CPUThrottlingHigh
+        annotations:
+          description:
+            "{{ $value | humanizePercentage }} throttling of CPU in namespace
+            {{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod
+            }}."
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
+          summary: Processes experience elevated CPU throttling.
+        expr: |-
+          sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace)
+            /
+          sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace)
+            > ( 25 / 100 )
+        for: 15m
+        labels:
+          severity: info
@@ -1,109 +1,114 @@
-
 groups:
- name: kubernetes-storage
-  rules:
-  - alert: KubePersistentVolumeFillingUp
-    annotations:
-      description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
-        }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
-        {{ . }} {{- end }} is only {{ $value | humanizePercentage }} free.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
-      summary: PersistentVolume is filling up.
-    expr: |-
-      (
-        kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
-          /
-        kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
-      ) < 0.03
-      and
-      kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
-      unless on (cluster, namespace, persistentvolumeclaim)
-      kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
-      unless on (cluster, namespace, persistentvolumeclaim)
-      kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
-    for: 1m
-    labels:
-      severity: critical
-  - alert: KubePersistentVolumeFillingUp
-    annotations:
-      description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
-        }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
-        {{ . }} {{- end }} is expected to fill up within four days. Currently {{ $value
-        | humanizePercentage }} is available.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
-      summary: PersistentVolume is filling up.
-    expr: |-
-      (
-        kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
-          /
-        kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
-      ) < 0.15
-      and
-      kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
-      and
-      predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
-      unless on (cluster, namespace, persistentvolumeclaim)
-      kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
-      unless on (cluster, namespace, persistentvolumeclaim)
-      kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
-    for: 1h
-    labels:
-      severity: warning
-  - alert: KubePersistentVolumeInodesFillingUp
-    annotations:
-      description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
-        }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
-        {{ . }} {{- end }} only has {{ $value | humanizePercentage }} free inodes.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
-      summary: PersistentVolumeInodes are filling up.
-    expr: |-
-      (
-        kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}
-          /
-        kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
-      ) < 0.03
-      and
-      kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
-      unless on (cluster, namespace, persistentvolumeclaim)
-      kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
-      unless on (cluster, namespace, persistentvolumeclaim)
-      kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
-    for: 1m
-    labels:
-      severity: critical
-  - alert: KubePersistentVolumeInodesFillingUp
-    annotations:
-      description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
-        }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
-        {{ . }} {{- end }} is expected to run out of inodes within four days. Currently
-        {{ $value | humanizePercentage }} of its inodes are free.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
-      summary: PersistentVolumeInodes are filling up.
-    expr: |-
-      (
-        kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}
-          /
-        kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
-      ) < 0.15
-      and
-      kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
-      and
-      predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
-      unless on (cluster, namespace, persistentvolumeclaim)
-      kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
-      unless on (cluster, namespace, persistentvolumeclaim)
-      kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
-    for: 1h
-    labels:
-      severity: warning
-  - alert: KubePersistentVolumeErrors
-    annotations:
-      description: The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster
-        -}} on Cluster {{ . }} {{- end }} has status {{ $labels.phase }}.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors
-      summary: PersistentVolume is having issues with provisioning.
-    expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"}
-      > 0
-    for: 5m
-    labels:
-      severity: critical
+  - name: kubernetes-storage
+    rules:
+      - alert: KubePersistentVolumeFillingUp
+        annotations:
+          description:
+            The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
+            }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
+            {{ . }} {{- end }} is only {{ $value | humanizePercentage }} free.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
+          summary: PersistentVolume is filling up.
+        expr: |-
+          (
+            kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
+              /
+            kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
+          ) < 0.03
+          and
+          kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
+          unless on (cluster, namespace, persistentvolumeclaim)
+          kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
+          unless on (cluster, namespace, persistentvolumeclaim)
+          kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
+        for: 1m
+        labels:
+          severity: critical
+      - alert: KubePersistentVolumeFillingUp
+        annotations:
+          description:
+            Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
+            }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
+            {{ . }} {{- end }} is expected to fill up within four days. Currently {{ $value
+            | humanizePercentage }} is available.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
+          summary: PersistentVolume is filling up.
+        expr: |-
+          (
+            kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
+              /
+            kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
+          ) < 0.15
+          and
+          kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
+          and
+          predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
+          unless on (cluster, namespace, persistentvolumeclaim)
+          kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
+          unless on (cluster, namespace, persistentvolumeclaim)
+          kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
+        for: 1h
+        labels:
+          severity: warning
+      - alert: KubePersistentVolumeInodesFillingUp
+        annotations:
+          description:
+            The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
+            }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
+            {{ . }} {{- end }} only has {{ $value | humanizePercentage }} free inodes.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
+          summary: PersistentVolumeInodes are filling up.
+        expr: |-
+          (
+            kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}
+              /
+            kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
+          ) < 0.03
+          and
+          kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
+          unless on (cluster, namespace, persistentvolumeclaim)
+          kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
+          unless on (cluster, namespace, persistentvolumeclaim)
+          kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
+        for: 1m
+        labels:
+          severity: critical
+      - alert: KubePersistentVolumeInodesFillingUp
+        annotations:
+          description:
+            Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
+            }} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
+            {{ . }} {{- end }} is expected to run out of inodes within four days. Currently
+            {{ $value | humanizePercentage }} of its inodes are free.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
+          summary: PersistentVolumeInodes are filling up.
+        expr: |-
+          (
+            kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}
+              /
+            kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
+          ) < 0.15
+          and
+          kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
+          and
+          predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
+          unless on (cluster, namespace, persistentvolumeclaim)
+          kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
+          unless on (cluster, namespace, persistentvolumeclaim)
+          kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
+        for: 1h
+        labels:
+          severity: warning
+      - alert: KubePersistentVolumeErrors
+        annotations:
+          description:
+            The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster
+            -}} on Cluster {{ . }} {{- end }} has status {{ $labels.phase }}.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors
+          summary: PersistentVolume is having issues with provisioning.
+        expr:
+          kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"}
+          > 0
+        for: 5m
+        labels:
+          severity: critical
@@ -1,340 +1,367 @@
 groups:
- name: node-exporter
-  rules:
-  - alert: NodeFilesystemSpaceFillingUp
-    annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
-        }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
-        space left and is filling up.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
-      summary: Filesystem is predicted to run out of space within the next 24 hours.
-    expr: |-
-      (
-        node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15
-      and
-        predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
-      and
-        node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
-      )
-    for: 1h
-    labels:
-      severity: warning
-  - alert: NodeFilesystemSpaceFillingUp
-    annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
-        }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
-        space left and is filling up fast.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
-      summary: Filesystem is predicted to run out of space within the next 4 hours.
-    expr: |-
-      (
-        node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10
-      and
-        predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
-      and
-        node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
-      )
-    for: 1h
-    labels:
-      severity: critical
-  - alert: NodeFilesystemAlmostOutOfSpace
-    annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
-        }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
-        space left.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
-      summary: Filesystem has less than 5% space left.
-    expr: |-
-      (
-        node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
-      and
-        node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
-      )
-    for: 30m
-    labels:
-      severity: warning
-  - alert: NodeFilesystemAlmostOutOfSpace
-    annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
-        }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
-        space left.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
-      summary: Filesystem has less than 3% space left.
-    expr: |-
-      (
-        node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
-      and
-        node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
-      )
-    for: 30m
-    labels:
-      severity: critical
-  - alert: NodeFilesystemFilesFillingUp
-    annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
-        }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
-        inodes left and is filling up.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
-      summary: Filesystem is predicted to run out of inodes within the next 24 hours.
-    expr: |-
-      (
-        node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40
-      and
-        predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
-      and
-        node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
-      )
-    for: 1h
-    labels:
-      severity: warning
-  - alert: NodeFilesystemFilesFillingUp
-    annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
-        }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
-        inodes left and is filling up fast.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
-      summary: Filesystem is predicted to run out of inodes within the next 4 hours.
-    expr: |-
-      (
-        node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20
-      and
-        predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
-      and
-        node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
-      )
-    for: 1h
-    labels:
-      severity: critical
-  - alert: NodeFilesystemAlmostOutOfFiles
-    annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
-        }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
-        inodes left.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
-      summary: Filesystem has less than 5% inodes left.
-    expr: |-
-      (
-        node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
-      and
-        node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
-      )
-    for: 1h
-    labels:
-      severity: warning
-  - alert: NodeFilesystemAlmostOutOfFiles
-    annotations:
-      description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
-        }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
-        inodes left.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
-      summary: Filesystem has less than 3% inodes left.
-    expr: |-
-      (
-        node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
-      and
-        node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
-      )
-    for: 1h
-    labels:
-      severity: critical
-  - alert: NodeNetworkReceiveErrs
-    annotations:
-      description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
-        {{ printf "%.0f" $value }} receive errors in the last two minutes.'
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
-      summary: Network interface is reporting many receive errors.
-    expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m])
-      > 0.01
-    for: 1h
-    labels:
-      severity: warning
-  - alert: NodeNetworkTransmitErrs
-    annotations:
-      description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
-        {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
-      summary: Network interface is reporting many transmit errors.
-    expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m])
-      > 0.01
-    for: 1h
-    labels:
-      severity: warning
-  - alert: NodeHighNumberConntrackEntriesUsed
-    annotations:
-      description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
-      summary: Number of conntrack are getting close to the limit.
-    expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit)
-      > 0.75
-    labels:
-      severity: warning
-  - alert: NodeTextFileCollectorScrapeError
-    annotations:
-      description: Node Exporter text file collector on {{ $labels.instance }} failed
-        to scrape.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror
-      summary: Node Exporter text file collector failed to scrape.
-    expr: node_textfile_scrape_error{job="node-exporter"} == 1
-    labels:
-      severity: warning
-  - alert: NodeClockSkewDetected
-    annotations:
-      description: Clock at {{ $labels.instance }} is out of sync by more than 0.05s.
-        Ensure NTP is configured correctly on this host.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
-      summary: Clock skew detected.
-    expr: |-
-      (
-        node_timex_offset_seconds{job="node-exporter"} > 0.05
-      and
-        deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0
-      )
-      or
-      (
-        node_timex_offset_seconds{job="node-exporter"} < -0.05
-      and
-        deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
-      )
-    for: 10m
-    labels:
-      severity: warning
-  - alert: NodeClockNotSynchronising
-    annotations:
-      description: Clock at {{ $labels.instance }} is not synchronising. Ensure NTP
-        is configured on this host.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
-      summary: Clock not synchronising.
-    expr: |-
-      min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
-      and
-      node_timex_maxerror_seconds{job="node-exporter"} >= 16
-    for: 10m
-    labels:
-      severity: warning
-  - alert: NodeRAIDDegraded
-    annotations:
-      description: RAID array '{{ $labels.device }}' at {{ $labels.instance }} is
-        in degraded state due to one or more disks failures. Number of spare drives
-        is insufficient to fix issue automatically.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
-      summary: RAID Array is degraded.
-    expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
-      - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"})
-      > 0
-    for: 15m
-    labels:
-      severity: critical
-  - alert: NodeRAIDDiskFailure
-    annotations:
-      description: At least one device in RAID array at {{ $labels.instance }} failed.
-        Array '{{ $labels.device }}' needs attention and possibly a disk swap.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
-      summary: Failed device in RAID array.
-    expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
-      > 0
-    labels:
-      severity: warning
-  - alert: NodeFileDescriptorLimit
-    annotations:
-      description: File descriptors limit at {{ $labels.instance }} is currently at
-        {{ printf "%.2f" $value }}%.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
-      summary: Kernel is predicted to exhaust file descriptors limit soon.
-    expr: |-
-      (
-        node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70
-      )
-    for: 15m
-    labels:
-      severity: warning
-  - alert: NodeFileDescriptorLimit
-    annotations:
-      description: File descriptors limit at {{ $labels.instance }} is currently at
-        {{ printf "%.2f" $value }}%.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
-      summary: Kernel is predicted to exhaust file descriptors limit soon.
-    expr: |-
-      (
-        node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90
-      )
-    for: 15m
-    labels:
-      severity: critical
-  - alert: NodeCPUHighUsage
-    annotations:
-      description: |
-        CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage
-      summary: High CPU usage.
-    expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter",
-      mode!="idle"}[2m]))) * 100 > 90
-    for: 15m
-    labels:
-      severity: info
-  - alert: NodeSystemSaturation
-    annotations:
-      description: |
-        System load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
-        This might indicate this instance resources saturation and can cause it becoming unresponsive.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemsaturation
-      summary: System saturated, load per core is very high.
-    expr: |-
-      node_load1{job="node-exporter"}
-      / count without (cpu, mode) (node_cpu_seconds_total{job="node-exporter", mode="idle"}) > 2
-    for: 15m
-    labels:
-      severity: warning
-  - alert: NodeMemoryMajorPagesFaults
-    annotations:
-      description: |
-        Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
-        Please check that there is enough memory available at this instance.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememorymajorpagesfaults
-      summary: Memory major page faults are occurring at very high rate.
-    expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > 500
-    for: 15m
-    labels:
-      severity: warning
-  - alert: NodeMemoryHighUtilization
-    annotations:
-      description: |
-        Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization
-      summary: Host is running out of memory.
-    expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"}
-      * 100) > 90
-    for: 15m
-    labels:
-      severity: warning
-  - alert: NodeDiskIOSaturation
-    annotations:
-      description: |
-        Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
-        This symptom might indicate disk saturation.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation
-      summary: Disk IO queue is high.
-    expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
-      > 10
-    for: 30m
-    labels:
-      severity: warning
-  - alert: NodeSystemdServiceFailed
-    annotations:
-      description: Systemd service {{ $labels.name }} has entered failed state at
-        {{ $labels.instance }}
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed
-      summary: Systemd service has entered failed state.
-    expr: node_systemd_unit_state{job="node-exporter", state="failed"} == 1
-    for: 5m
-    labels:
-      severity: warning
-  - alert: NodeBondingDegraded
-    annotations:
-      description: Bonding interface {{ $labels.master }} on {{ $labels.instance }}
-        is in degraded state due to one or more slave failures.
-      runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded
-      summary: Bonding interface is degraded
-    expr: (node_bonding_slaves - node_bonding_active) != 0
-    for: 5m
-    labels:
-      severity: warning
+  - name: node-exporter
+    rules:
+      - alert: NodeFilesystemSpaceFillingUp
+        annotations:
+          description:
+            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
+            space left and is filling up.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
+          summary: Filesystem is predicted to run out of space within the next 24 hours.
+        expr: |-
+          (
+            node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 15
+          and
+            predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
+          and
+            node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+          )
+        for: 1h
+        labels:
+          severity: warning
+      - alert: NodeFilesystemSpaceFillingUp
+        annotations:
+          description:
+            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
+            space left and is filling up fast.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
+          summary: Filesystem is predicted to run out of space within the next 4 hours.
+        expr: |-
+          (
+            node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 10
+          and
+            predict_linear(node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
+          and
+            node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+          )
+        for: 1h
+        labels:
+          severity: critical
+      - alert: NodeFilesystemAlmostOutOfSpace
+        annotations:
+          description:
+            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
+            space left.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
+          summary: Filesystem has less than 5% space left.
+        expr: |-
+          (
+            node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
+          and
+            node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+          )
+        for: 30m
+        labels:
+          severity: warning
+      - alert: NodeFilesystemAlmostOutOfSpace
+        annotations:
+          description:
+            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
+            space left.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
+          summary: Filesystem has less than 3% space left.
+        expr: |-
+          (
+            node_filesystem_avail_bytes{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_size_bytes{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
+          and
+            node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+          )
+        for: 30m
+        labels:
+          severity: critical
+      - alert: NodeFilesystemFilesFillingUp
+        annotations:
+          description:
+            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
+            inodes left and is filling up.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
+          summary: Filesystem is predicted to run out of inodes within the next 24 hours.
+        expr: |-
+          (
+            node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 40
+          and
+            predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 24*60*60) < 0
+          and
+            node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+          )
+        for: 1h
+        labels:
+          severity: warning
+      - alert: NodeFilesystemFilesFillingUp
+        annotations:
+          description:
+            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
+            inodes left and is filling up fast.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
+          summary: Filesystem is predicted to run out of inodes within the next 4 hours.
+        expr: |-
+          (
+            node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 20
+          and
+            predict_linear(node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""}[6h], 4*60*60) < 0
+          and
+            node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+          )
+        for: 1h
+        labels:
+          severity: critical
+      - alert: NodeFilesystemAlmostOutOfFiles
+        annotations:
+          description:
+            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
+            inodes left.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
+          summary: Filesystem has less than 5% inodes left.
+        expr: |-
+          (
+            node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 5
+          and
+            node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+          )
+        for: 1h
+        labels:
+          severity: warning
+      - alert: NodeFilesystemAlmostOutOfFiles
+        annotations:
+          description:
+            Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
+            }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
+            inodes left.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
+          summary: Filesystem has less than 3% inodes left.
+        expr: |-
+          (
+            node_filesystem_files_free{job="node-exporter",fstype!="",mountpoint!=""} / node_filesystem_files{job="node-exporter",fstype!="",mountpoint!=""} * 100 < 3
+          and
+            node_filesystem_readonly{job="node-exporter",fstype!="",mountpoint!=""} == 0
+          )
+        for: 1h
+        labels:
+          severity: critical
+      - alert: NodeNetworkReceiveErrs
+        annotations:
+          description:
+            '{{ $labels.instance }} interface {{ $labels.device }} has encountered
+            {{ printf "%.0f" $value }} receive errors in the last two minutes.'
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
+          summary: Network interface is reporting many receive errors.
+        expr:
+          rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m])
+          > 0.01
+        for: 1h
+        labels:
+          severity: warning
+      - alert: NodeNetworkTransmitErrs
+        annotations:
+          description:
+            '{{ $labels.instance }} interface {{ $labels.device }} has encountered
+            {{ printf "%.0f" $value }} transmit errors in the last two minutes.'
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
+          summary: Network interface is reporting many transmit errors.
+        expr:
+          rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m])
+          > 0.01
+        for: 1h
+        labels:
+          severity: warning
+      - alert: NodeHighNumberConntrackEntriesUsed
+        annotations:
+          description: "{{ $value | humanizePercentage }} of conntrack entries are used."
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
+          summary: Number of conntrack are getting close to the limit.
+        expr:
+          (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit)
+          > 0.75
+        labels:
+          severity: warning
+      - alert: NodeTextFileCollectorScrapeError
+        annotations:
+          description:
+            Node Exporter text file collector on {{ $labels.instance }} failed
+            to scrape.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror
+          summary: Node Exporter text file collector failed to scrape.
+        expr: node_textfile_scrape_error{job="node-exporter"} == 1
+        labels:
+          severity: warning
+      - alert: NodeClockSkewDetected
+        annotations:
+          description:
+            Clock at {{ $labels.instance }} is out of sync by more than 0.05s.
+            Ensure NTP is configured correctly on this host.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
+          summary: Clock skew detected.
+        expr: |-
+          (
+            node_timex_offset_seconds{job="node-exporter"} > 0.05
+          and
+            deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) >= 0
+          )
+          or
+          (
+            node_timex_offset_seconds{job="node-exporter"} < -0.05
+          and
+            deriv(node_timex_offset_seconds{job="node-exporter"}[5m]) <= 0
+          )
+        for: 10m
+        labels:
+          severity: warning
+      - alert: NodeClockNotSynchronising
+        annotations:
+          description:
+            Clock at {{ $labels.instance }} is not synchronising. Ensure NTP
+            is configured on this host.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
+          summary: Clock not synchronising.
+        expr: |-
+          min_over_time(node_timex_sync_status{job="node-exporter"}[5m]) == 0
+          and
+          node_timex_maxerror_seconds{job="node-exporter"} >= 16
+        for: 10m
+        labels:
+          severity: warning
+      - alert: NodeRAIDDegraded
+        annotations:
+          description:
+            RAID array '{{ $labels.device }}' at {{ $labels.instance }} is
+            in degraded state due to one or more disks failures. Number of spare drives
+            is insufficient to fix issue automatically.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
+          summary: RAID Array is degraded.
+        expr:
+          node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
+          - ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"})
+          > 0
+        for: 15m
+        labels:
+          severity: critical
+      - alert: NodeRAIDDiskFailure
+        annotations:
+          description:
+            At least one device in RAID array at {{ $labels.instance }} failed.
+            Array '{{ $labels.device }}' needs attention and possibly a disk swap.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
+          summary: Failed device in RAID array.
+        expr:
+          node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
+          > 0
+        labels:
+          severity: warning
+      - alert: NodeFileDescriptorLimit
+        annotations:
+          description:
+            File descriptors limit at {{ $labels.instance }} is currently at
+            {{ printf "%.2f" $value }}%.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
+          summary: Kernel is predicted to exhaust file descriptors limit soon.
+        expr: |-
+          (
+            node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 70
+          )
+        for: 15m
+        labels:
+          severity: warning
+      - alert: NodeFileDescriptorLimit
+        annotations:
+          description:
+            File descriptors limit at {{ $labels.instance }} is currently at
+            {{ printf "%.2f" $value }}%.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
+          summary: Kernel is predicted to exhaust file descriptors limit soon.
+        expr: |-
+          (
+            node_filefd_allocated{job="node-exporter"} * 100 / node_filefd_maximum{job="node-exporter"} > 90
+          )
+        for: 15m
+        labels:
+          severity: critical
+      - alert: NodeCPUHighUsage
+        annotations:
+          description: |
+            CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage
+          summary: High CPU usage.
+        expr:
+          sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter",
+          mode!="idle"}[2m]))) * 100 > 90
+        for: 15m
+        labels:
+          severity: info
+      - alert: NodeSystemSaturation
+        annotations:
+          description: |
+            System load per core at {{ $labels.instance }} has been above 2 for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
+            This might indicate this instance resources saturation and can cause it becoming unresponsive.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemsaturation
+          summary: System saturated, load per core is very high.
+        expr: |-
+          node_load1{job="node-exporter"}
+          / count without (cpu, mode) (node_cpu_seconds_total{job="node-exporter", mode="idle"}) > 2
+        for: 15m
+        labels:
+          severity: warning
+      - alert: NodeMemoryMajorPagesFaults
+        annotations:
+          description: |
+            Memory major pages are occurring at very high rate at {{ $labels.instance }}, 500 major page faults per second for the last 15 minutes, is currently at {{ printf "%.2f" $value }}.
+            Please check that there is enough memory available at this instance.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememorymajorpagesfaults
+          summary: Memory major page faults are occurring at very high rate.
+        expr: rate(node_vmstat_pgmajfault{job="node-exporter"}[5m]) > 500
+        for: 15m
+        labels:
+          severity: warning
+      - alert: NodeMemoryHighUtilization
+        annotations:
+          description: |
+            Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization
+          summary: Host is running out of memory.
+        expr:
+          100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"}
+          * 100) > 90
+        for: 15m
+        labels:
+          severity: warning
+      - alert: NodeDiskIOSaturation
+        annotations:
+          description: |
+            Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above 10 for the last 30 minutes, is currently at {{ printf "%.2f" $value }}.
+            This symptom might indicate disk saturation.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation
+          summary: Disk IO queue is high.
+        expr:
+          rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
+          > 10
+        for: 30m
+        labels:
+          severity: warning
+      - alert: NodeSystemdServiceFailed
+        annotations:
+          description:
+            Systemd service {{ $labels.name }} has entered failed state at
+            {{ $labels.instance }}
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed
+          summary: Systemd service has entered failed state.
+        expr: node_systemd_unit_state{job="node-exporter", state="failed"} == 1
+        for: 5m
+        labels:
+          severity: warning
+      - alert: NodeBondingDegraded
+        annotations:
+          description:
+            Bonding interface {{ $labels.master }} on {{ $labels.instance }}
+            is in degraded state due to one or more slave failures.
+          runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded
+          summary: Bonding interface is degraded
+        expr: (node_bonding_slaves - node_bonding_active) != 0
+        for: 5m
+        labels:
+          severity: warning
@@ -1,70 +1,76 @@
 groups:
- name: node-resource-utilization.rules
-  rules:
-  - alert: HostHighCpuLoad
-    annotations:
-      description: |-
-        CPU load is > 90%
-          VALUE = {{ $value }}
-          LABELS = {{ $labels }}
-      summary: Host high CPU load (instance {{ $labels.instance }})
-    expr: (sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m])))
-      > 0.9) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
-    for: 10m
-    labels:
-      severity: critical
-  - alert: MemoryUtilizationHighWarning
-    annotations:
-      dashboard: https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
-        $labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
-      description: Node {{ $labels.instance }} has less than 10% available memory.
-      summary: Node Memory utilization warning
-    expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
-    for: 5m
-    labels:
-      severity: critical
-  - alert: MemoryUtilizationHighCritical
-    annotations:
-      dashboard: https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
-        $labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
-      description: Node {{ $labels.instance }} has less than 5% available memory.
-      summary: Node Memory utilization critical
-    expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 5
-    for: 1m
-    labels:
-      severity: critical
-  - alert: NodeNotReady
-    annotations:
-      description: Node {{ $labels.node }} has CPU utilization over 90%.
-      summary: Node has been in not-ready state for longer than 3 minutes
-    expr: (sum(max_over_time(kube_node_status_condition{condition="Ready",status="true"}[3m])
-      <= 0) by (node)) or (absent(kube_node_status_condition{condition="Ready",status="true"}))
-      > 0
-    for: 5m
-    labels:
-      severity: critical
-  - alert: KubernetesNodeMemoryPressure
-    annotations:
-      description: |-
-        Node {{ $labels.node }} has MemoryPressure condition
-          VALUE = {{ $value }}
-          LABELS = {{ $labels }}
-      summary: Kubernetes Node memory pressure (instance {{ $labels.instance }})
-    expr: kube_node_status_condition{condition="MemoryPressure",status="true"} ==
-      1
-    for: 2m
-    labels:
-      severity: critical
-  - alert: KubernetesContainerOomKiller
-    annotations:
-      description: |-
-        Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.
-          VALUE = {{ $value }}
-          LABELS = {{ $labels }}
-      summary: Kubernetes Container oom killer (instance {{ $labels.instance }})
-    expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total
-      offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m])
-      == 1
-    for: 0m
-    labels:
-      severity: warning
+  - name: node-resource-utilization.rules
+    rules:
+      - alert: HostHighCpuLoad
+        annotations:
+          description: |-
+            CPU load is > 90%
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+          summary: Host high CPU load (instance {{ $labels.instance }})
+        expr:
+          (sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m])))
+          > 0.9) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
+        for: 10m
+        labels:
+          severity: critical
+      - alert: MemoryUtilizationHighWarning
+        annotations:
+          dashboard:
+            https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
+            $labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
+          description: Node {{ $labels.instance }} has less than 10% available memory.
+          summary: Node Memory utilization warning
+        expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
+        for: 5m
+        labels:
+          severity: critical
+      - alert: MemoryUtilizationHighCritical
+        annotations:
+          dashboard:
+            https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
+            $labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
+          description: Node {{ $labels.instance }} has less than 5% available memory.
+          summary: Node Memory utilization critical
+        expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 5
+        for: 1m
+        labels:
+          severity: critical
+      - alert: NodeNotReady
+        annotations:
+          description: Node {{ $labels.node }} has CPU utilization over 90%.
+          summary: Node has been in not-ready state for longer than 3 minutes
+        expr:
+          (sum(max_over_time(kube_node_status_condition{condition="Ready",status="true"}[3m])
+          <= 0) by (node)) or (absent(kube_node_status_condition{condition="Ready",status="true"}))
+          > 0
+        for: 5m
+        labels:
+          severity: critical
+      - alert: KubernetesNodeMemoryPressure
+        annotations:
+          description: |-
+            Node {{ $labels.node }} has MemoryPressure condition
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+          summary: Kubernetes Node memory pressure (instance {{ $labels.instance }})
+        expr:
+          kube_node_status_condition{condition="MemoryPressure",status="true"} ==
+          1
+        for: 2m
+        labels:
+          severity: critical
+      - alert: KubernetesContainerOomKiller
+        annotations:
+          description: |-
+            Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has been OOMKilled {{ $value }} times in the last 10 minutes.
+              VALUE = {{ $value }}
+              LABELS = {{ $labels }}
+          summary: Kubernetes Container oom killer (instance {{ $labels.instance }})
+        expr:
+          (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total
+          offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m])
+          == 1
+        for: 0m
+        labels:
+          severity: warning
@@ -1,21 +1,25 @@
 groups:
- name: velero
-  rules:
-  - alert: VeleroBackupPartialFailures
-    annotations:
-      message: Velero backup  {{ $labels.schedule }}  has  {{$value | humanizePercentage}}  partialy
-        failed backups.
-    expr: velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
-      > 0.25
-    for: 15m
-    labels:
-      severity: critical
-  - alert: VeleroBackupFailures
-    annotations:
-      message: Velero backup  {{$labels.schedule}}  has  {{$value | humanizePercentage}}  failed
-        backups.
-    expr: velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
-      > 0.25
-    for: 15m
-    labels:
-      severity: critical
+  - name: velero
+    rules:
+      - alert: VeleroBackupPartialFailures
+        annotations:
+          message:
+            Velero backup  {{ $labels.schedule }}  has  {{$value | humanizePercentage}}  partialy
+            failed backups.
+        expr:
+          velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
+          > 0.25
+        for: 15m
+        labels:
+          severity: critical
+      - alert: VeleroBackupFailures
+        annotations:
+          message:
+            Velero backup  {{$labels.schedule}}  has  {{$value | humanizePercentage}}  failed
+            backups.
+        expr:
+          velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
+          > 0.25
+        for: 15m
+        labels:
+          severity: critical
@@ -1,46 +1,52 @@
 groups:
- name: x509-certificate-exporter.rules
-  rules:
-  - alert: X509ExporterReadErrors
-    annotations:
-      description: Over the last 15 minutes, this x509-certificate-exporter instance
-        has experienced errors reading certificate files or querying the Kubernetes
-        API. This could be caused by a misconfiguration if triggered when the exporter
-        starts.
-      summary: Increasing read errors for x509-certificate-exporter
-    expr: delta(x509_read_errors[15m]) > 0
-    for: 5m
-    labels:
-      severity: warning
-  - alert: CertificateError
-    annotations:
-      description: Certificate could not be decoded {{if $labels.secret_name }} in
-        Kubernetes secret "{{ $labels.secret_namespace }}/{{ $labels.secret_name }}"{{else}}at
-        location "{{ $labels.filepath }}"{{end}}
-      summary: Certificate cannot be decoded
-    expr: x509_cert_error > 0
-    for: 15m
-    labels:
-      severity: warning
-  - alert: CertificateRenewal
-    annotations:
-      description: Certificate for "{{ $labels.subject_CN }}" should be renewed {{if
-        $labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
-        $labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
-      summary: Certificate should be renewed
-    expr: ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
-      issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 28
-    for: 15m
-    labels:
-      severity: warning
-  - alert: CertificateExpiration
-    annotations:
-      description: Certificate for "{{ $labels.subject_CN }}" is about to expire {{if
-        $labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
-        $labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
-      summary: Certificate is about to expire
-    expr: ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
-      issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 14
-    for: 15m
-    labels:
-      severity: critical
+  - name: x509-certificate-exporter.rules
+    rules:
+      - alert: X509ExporterReadErrors
+        annotations:
+          description:
+            Over the last 15 minutes, this x509-certificate-exporter instance
+            has experienced errors reading certificate files or querying the Kubernetes
+            API. This could be caused by a misconfiguration if triggered when the exporter
+            starts.
+          summary: Increasing read errors for x509-certificate-exporter
+        expr: delta(x509_read_errors[15m]) > 0
+        for: 5m
+        labels:
+          severity: warning
+      - alert: CertificateError
+        annotations:
+          description:
+            Certificate could not be decoded {{if $labels.secret_name }} in
+            Kubernetes secret "{{ $labels.secret_namespace }}/{{ $labels.secret_name }}"{{else}}at
+            location "{{ $labels.filepath }}"{{end}}
+          summary: Certificate cannot be decoded
+        expr: x509_cert_error > 0
+        for: 15m
+        labels:
+          severity: warning
+      - alert: CertificateRenewal
+        annotations:
+          description:
+            Certificate for "{{ $labels.subject_CN }}" should be renewed {{if
+            $labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
+            $labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
+          summary: Certificate should be renewed
+        expr:
+          ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
+          issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 28
+        for: 15m
+        labels:
+          severity: warning
+      - alert: CertificateExpiration
+        annotations:
+          description:
+            Certificate for "{{ $labels.subject_CN }}" is about to expire {{if
+            $labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
+            $labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
+          summary: Certificate is about to expire
+        expr:
+          ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
+          issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 14
+        for: 15m
+        labels:
+          severity: critical
@@ -6,35 +6,62 @@ let
    config = { };
    overlays = [ ];
  };
+  checks = import ./nix/checks.nix;
 in
 pkgs.mkShellNoCC {
  name = "clstr";

-  packages = with pkgs; [
-    just
-    npins
+  packages =
+    with pkgs;
+    [
+      # dev tools
+      just
+      npins

-    # helm
-    helmfile
-    kubernetes-helm
+      # helm
+      helmfile
+      kubernetes-helm

-    # kubectl tools
-    kubectl-cnpg
-    kubectl-neat
-    kubelogin
-    kubelogin-oidc
-    kubectl-rook-ceph
+      # kubectl tools
+      kubectl-cnpg
+      kubectl-neat
+      kubelogin
+      kubelogin-oidc
+      kubectl-rook-ceph
+      kubectl-graph
+      kubectl-klock
+      graphviz

-    # other tools
-    step-cli
-    linkerd
-    velero
-    cmctl
+      # other tools activate when needed
+      # step-cli
+      # linkerd
+      # cmctl
+      # rclone
+      # velero
+      # renovate

-    # dapr
-    dapr-cli
+      # dapr
+      dapr-cli
+    ]
+    ++ checks.enabledPackages;
+
+  # Environment variables
+  ARGOCD_ENV_CLUSTER_NAME = "hel1";
+  HELM_GIT_ACCESS_TOKEN = "glpat-xxx";
+
+  shellHook = builtins.concatStringsSep "\n" [
+    checks.shellHook
  ];

-  ARGOCD_ENV_CLUSTER_NAME = "rossby";
-  HELM_GIT_ACCESS_TOKEN = "glpat-xxx";
+  # Alternative shells
+  passthru = pkgs.lib.mapAttrs (name: value: pkgs.mkShellNoCC (value // { inherit name; })) {
+    ci-shell = {
+      packages = [
+        pkgs.npins
+      ];
+      shellHook = ''
+        export NPINS_DIRECTORY="nix"
+      '';
+    };
+  };
 }
@@ -88,6 +88,8 @@ spec:
    server: https://kubernetes.default.svc
  - namespace: uptime
    server: https://kubernetes.default.svc
+  - namespace: forgejo
+    server: https://kubernetes.default.svc
  sourceRepos:
  - https://argoproj.github.io/argo-helm
  - https://kubernetes-sigs.github.io/metrics-server/
@@ -123,6 +125,7 @@ spec:
  - ghcr.io/slinkyproject/charts/slurm-operator-crds
  - ghcr.io/spegel-org/helm-charts
  - ghcr.io/dragonflydb/dragonfly-operator/helm/dragonfly-operator
+  - code.forgejo.org/forgejo-helm
  - https://operator.mariadb.com/mariadb-enterprise-operator
  - https://operator.mariadb.com
  - https://ot-container-kit.github.io/helm-charts
@@ -73,7 +73,7 @@
    "connString": "Username=postgres;Password=secret;Host=localhost;Port=5432;Database=app;Pooling=true;",
    "sorcerer" : "https://sorcerer.data.oceanbox.io",
    "allowedOrigins": [
-        "https://maps.oceanbox.io",
+        "https://maps.oceanbox.io"
    ],
    "appName": "atlantis",
    "appEnv": "prod",
@@ -79,3 +79,9 @@ resources:
  requests:
    cpu: 500m
    memory: 1Gi
+
+diagrid-dashboard:
+  enabled: false
+  statestore:
+    scope: prod-atlantis
+    redis: prod-atlantis-redis
@@ -1,6 +1,6 @@
 replicaCount: 1
 image:
-  tag: 369127e0-debug
+  tag: 503ccbb2-debug
 podAnnotations:
  dapr.io/app-id: "staging-atlantis"
 env:
@@ -26,12 +26,12 @@ env:
  - name: DB_USER
    valueFrom:
      secretKeyRef:
-        name: staging-atlantis-db-superuser
+        name: staging-atlantis-db-app
        key: username
  - name: DB_PASSWORD
    valueFrom:
      secretKeyRef:
-        name: staging-atlantis-db-superuser
+        name: staging-atlantis-db-app
        key: password
  - name: DAPR_API_TOKEN
    valueFrom:
@@ -116,9 +116,6 @@ cluster:
      db: prod-atlantis-db
      namespace: prod-atlantis
 resources:
-  limits:
-    cpu: 250m
-    memory: 1Gi
  requests:
    cpu: 250m
    memory: 1Gi
@@ -133,3 +130,8 @@ redis:
  resources:
    cpu: 150m
    memory: 256Mi
+diagrid-dashboard:
+  enabled: false
+  statestore:
+    scope: staging-atlantis
+    redis: staging-atlantis-redis
@@ -10,3 +10,4 @@ podAnnotations:
  dapr.io/sidecar-memory-request: "50Mi"
  # dapr.io/sidecar-cpu-limit: "100m"
  # dapr.io/sidecar-memory-limit: "1000Mi"
+
@@ -1,4 +1,8 @@
 codex:
  enabled: false
+  {{- if eq .Environment.Name "prod" }}
  autosync: false
+  {{- else }}
+  autosync: true
+  {{- end }}
  env: {{ .Environment.Name }}
@@ -0,0 +1,67 @@
+{
+    "Logging": {
+        "LogLevel": {
+            "Default": "Information",
+            "Microsoft": "Warning",
+            "Microsoft.Hosting": "Error"
+        }
+    },
+    "Debug": {
+        "LogLevel": {
+            "Default": "Debug"
+        }
+    },
+    "Console": {
+        "IncludeScopes": true,
+        "LogLevel": {
+            "Default": "Debug"
+        }
+    },
+    "OIDC": {
+        "issuer": "https://auth.oceanbox.io/realms/oceanbox",
+        "authorization_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/auth",
+        "token_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/token",
+        "jwks_uri": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/certs",
+        "userinfo_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/userinfo",
+        "end_session_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/logout",
+        "device_authorization_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/auth/device",
+        "clientId": "atlantis",
+        "clientSecret": "",
+        "scopes": [
+            "openid",
+            "email",
+            "offline_access",
+            "profile"
+        ],
+        "audiences": [
+            "atlantis"
+        ]
+    },
+    "SSO": {
+        "cookieDomain": ".oceanbox.io",
+        "cookieName": ".obx.prod",
+        "ttl": 12.0,
+        "signedOutRedirectUri": "https://maps.oceanbox.io/",
+        "realm": "atlantis",
+        "environment": "prod",
+        "keyStore": {
+            "kind": "azure",
+            "uri": "https://atlantis.blob.core.windows.net",
+            "key": "dataprotection-keys"
+        },
+        "keyVault": {
+            "kind": "azure",
+            "uri": "https://atlantisvault.vault.azure.net",
+            "key": "dataencryption-keys"
+        }
+    },
+    "plainAuthUsers": [
+        {
+            "username": "admin",
+            "password": "en-to-tre-fire",
+            "groups": [ "/oceanbox" ],
+            "roles": [ "admin" ]
+        }
+    ]
+}
+
@@ -0,0 +1,66 @@
+- op: add
+  path: /spec/template/spec/containers/0/envFrom
+  value:
+    - secretRef:
+        name: azure-keyvault
+- op: add
+  path: /spec/template/spec/containers/0/env
+  value:
+    - name: APP_NAMESPACE
+      value: prod-atlantis
+    - name: DOTNET_ENVIRONMENT
+      value: Production
+    - name: ASPNETCORE_ENVIRONMENT
+      value: Production
+    - name: DB_HOST
+      valueFrom:
+        secretKeyRef:
+          name: prod-atlantis-db-app
+          key: host
+    - name: DB_PORT
+      valueFrom:
+        secretKeyRef:
+          name: prod-atlantis-db-app
+          key: port
+    - name: DB_DATABASE
+      valueFrom:
+        secretKeyRef:
+          name: prod-atlantis-db-app
+          key: dbname
+    - name: DB_USER
+      valueFrom:
+        secretKeyRef:
+          name: prod-atlantis-db-app
+          key: user
+    - name: DB_PASSWORD
+      valueFrom:
+        secretKeyRef:
+          name: prod-atlantis-db-app
+          key: password
+    - name: FGA_URL
+      value: http://prod-openfga.openfga.svc.cluster.local:8080
+    - name: FGA_DB_HOST
+      valueFrom:
+        secretKeyRef:
+          name: prod-openfga-db-app
+          key: host
+    - name: FGA_DB_PORT
+      valueFrom:
+        secretKeyRef:
+          name: prod-openfga-db-app
+          key: port
+    - name: FGA_DB_DATABASE
+      valueFrom:
+        secretKeyRef:
+          name: prod-openfga-db-app
+          key: dbname
+    - name: FGA_DB_USER
+      valueFrom:
+        secretKeyRef:
+          name: prod-openfga-db-app
+          key: user
+    - name: FGA_DB_PASSWORD
+      valueFrom:
+        secretKeyRef:
+          name: prod-openfga-db-app
+          key: password
@@ -0,0 +1,15 @@
+generatorOptions:
+  disableNameSuffixHash: true
+configMapGenerator:
+- name: prod-codex-appsettings
+  files:
+    - appsettings.json
+patches:
+  - target:
+      group: apps
+      version: v1
+      kind: Deployment
+    path: deployment_patch.yaml
+resources:
+  - ../base
+
--- a/Show More
+++ b/Show More