Compare commits

..

1 Commits

Author SHA1 Message Date
juselius 6e9b1c8f29 feat: add stub sonatype-nexus helmfile 2025-12-16 20:11:20 +01:00
143 changed files with 2480 additions and 3348 deletions
-1
View File
@@ -1,7 +1,6 @@
#!/usr/bin/env bash
# the shebang is ignored, but nice for editors
watch_file nix/sources.json
watch_file nix/checks.nix
# Load .env file if it exists
dotenv_if_exists
-1
View File
@@ -1,7 +1,6 @@
*.tgz
_*/
.direnv/
.env
.pre-commit-config.yaml
_*.yaml
backup/
+42 -50
View File
@@ -1,54 +1,46 @@
# yaml-language-server: $schema=https://gitlab.com/gitlab-org/gitlab/-/raw/master/app/assets/javascripts/editor/schema/ci.json
default:
tags:
- nix
image:
name: alpine/helm:latest
entrypoint: [ "/bin/bash", "-c" ]
include:
- project: oceanbox/gitlab-ci
ref: v4.5
file: template/Base.gitlab-ci.yml
# stages:
# - release
stages:
- release
# image:
# name: alpine/helm:latest
# entrypoint: ["/bin/bash", "-c"]
release:
stage: release
rules:
- if: '$CI_COMMIT_BRANCH =~ /^main/'
when: always
- when: never
script:
- |
cd $CI_PROJECT_DIR
for i in $(git show --pretty="" --name-only | grep '^charts/.*/Chart.yaml' | cut -d/ -f2); do
pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
if [ ! -z $pack ]; then
chart=$(basename $pack)
curl --request POST \
--user gitlab-ci-token:$CI_JOB_TOKEN \
--form "chart=@${chart}" \
"${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
fi
done
# release:
# stage: release
# rules:
# - if: "$CI_COMMIT_BRANCH =~ /^main/"
# when: always
# - when: never
# script:
# - |
# cd $CI_PROJECT_DIR
# for i in $(git show --pretty="" --name-only | grep '^charts/.*/Chart.yaml' | cut -d/ -f2); do
# pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
# if [ ! -z $pack ]; then
# chart=$(basename $pack)
# curl --request POST \
# --user gitlab-ci-token:$CI_JOB_TOKEN \
# --form "chart=@${chart}" \
# "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
# fi
# done
rebuild:
stage: release
rules:
- when: manual
allow_failure: true
script:
- |
cd $CI_PROJECT_DIR
for i in $(find ./charts -maxdepth 2 -name Chart.yaml | cut -d/ -f3); do
pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
if [ ! -z $pack ]; then
chart=$(basename $pack)
curl --request POST \
--user gitlab-ci-token:$CI_JOB_TOKEN \
--form "chart=@${chart}" \
"${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
fi
done
# rebuild:
# stage: release
# rules:
# - when: manual
# allow_failure: true
# script:
# - |
# cd $CI_PROJECT_DIR
# for i in $(find ./charts -maxdepth 2 -name Chart.yaml | cut -d/ -f3); do
# pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
# if [ ! -z $pack ]; then
# chart=$(basename $pack)
# curl --request POST \
# --user gitlab-ci-token:$CI_JOB_TOKEN \
# --form "chart=@${chart}" \
# "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
# fi
# done
+10 -17
View File
@@ -9,29 +9,20 @@ let
extraValues = {};
};
kustomize =
r:
kustomize = r:
if r.kind == "Deployment" then
lib.attrsets.recursiveUpdate r {
spec.template.spec.containers = builtins.map (
x:
x
// {
spec.template.spec.containers =
builtins.map (x:
x // {
livenessProbe.httpGet.path = "/healthz";
readinessProble.httpGet.path = "/healthz";
env = x.env ++ [
{
name = "INERNAL_PORT";
value = 8000;
}
];
}
) r.spec.template.spec.containers;
env = x.env ++ [ { name = "INERNAL_PORT"; value = 8000; } ];
}) r.spec.template.spec.containers;
}
else if r.kind == "Service" then
{}
else
r;
else r;
in
{
options.apps.atlantis = lib.apps.appOptions {
@@ -43,7 +34,9 @@ in
hostname = lib.mkOption {
type = lib.types.str;
default = if env == "prod" then "maps.oceanbox.io" else "atlantis.beta.oceanbox.io";
default = if env == "prod"
then "maps.oceanbox.io"
else "atlantis.beta.oceanbox.io";
description = "Revision";
};
};
+4 -2
View File
@@ -9,8 +9,10 @@ let
extraValues = {};
};
kustomize =
r: if r.kind == "Job" then lib.attrsets.recursiveUpdate r { spec.backoffLimit = 2; } else r;
kustomize = r:
if r.kind == "Job" then
lib.attrsets.recursiveUpdate r { spec.backoffLimit = 2; }
else r;
in
{
+3
View File
@@ -1,4 +1,7 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
+7 -7
View File
@@ -2,16 +2,16 @@
server="root@fs1-0"
path="/vol/brick0/nfs0/k1/pv-oceanbox-dex"
dest="${server}:${path}"
dest="$server:$path"
index=$(basename dist/assets/index-*.js)
ssh "${server}" -- rm "${path}"/static/js/*.js
scp dist/assets/*.js "${dest}"/static/js/
ssh $server -- rm $path/static/js/*.js
scp dist/assets/*.js $dest/static/js/
sed -r "s/@index@/${index}/" ./dex/templates/login.html > login.html.$$
scp ./dex/templates/* "${dest}"/templates/
scp ./dex/static/*.* "${dest}"/static/
scp login.html.$$ "${dest}"/templates/login.html
sed -r "s/@index@/$index/" ./dex/templates/login.html > login.html.$$
scp ./dex/templates/* $dest/templates/
scp ./dex/static/*.* $dest/static/
scp login.html.$$ $dest/templates/login.html
rm login.html.$$
ssh admin@k1-0.itpartner.intern -- kubectl rollout restart -n oceanbox deployment/dex
+17 -18
View File
@@ -1,5 +1,4 @@
#!/usr/bin/env bash
# shellcheck disable=SC2034 # Unused variables left for readability
helmfile () {
@@ -11,30 +10,30 @@ bases:
- ../envs/environments.yaml.gotmpl
commonLabels:
tier: ${tier}
tier: $tier
releases:
- name: ${name}
namespace: {{ .Environment.Name }}-${name}
chart: ../charts/${name}
condition: ${name}.enabled
- name: $name
namespace: {{ .Environment.Name }}-$name
chart: ../charts/$name
condition: $name.enabled
values:
- ../values/${name}/values/values.yaml.gotmpl
- ../values/${name}/values/values-{{ .Environment.Name }}.yaml
- ../values/$name/values/values.yaml.gotmpl
- ../values/$name/values/values-{{ .Environment.Name }}.yaml
postRenderer: ../bin/kustomizer
postRendererArgs:
- ../values/${name}/kustomize/{{ .Environment.Name }}
- ../values/$name/kustomize/{{ .Environment.Name }}
missingFileHandler: Info
- name: manifests
namespace: {{ .Environment.Name }}-${name}
namespace: {{ .Environment.Name }}-$name
chart: manifests
condition: ${name}.enabled
condition: $name.enabled
missingFileHandler: Info
values:
- ../values/env.yaml
- ../values/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml
- ../values/${name}/env.yaml.gotmpl
- ../values/${name}/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
- ../values/$name/env.yaml.gotmpl
- ../values/$name/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
hooks:
- events: [ prepare, cleanup ]
showlogs: true
@@ -43,7 +42,7 @@ releases:
- '{{\`{{ if eq .Event.Name "prepare" }}build{{ else }}clean{{ end }}\`}}'
- '{{\`{{ .Release.Chart }}\`}}'
- '{{\`{{ .Environment.Name }}\`}}'
- ../values/${name}/manifests
- ../values/$name/manifests
- manifests
EOF
}
@@ -60,10 +59,10 @@ done
name=$1
tier=$2
if [[ -n "${ns}" ]]; then
namespace="namespace: {{ .Environment.Name }}-${name}"
if [ -n "$ns" ]; then
namespace="namespace: {{ .Environment.Name }}-$name"
else
namespace="namespace: ${name}"
namespace="namespace: $name"
fi
helmfile "$1" "$2"
helmfile $1 $2
+14 -13
View File
@@ -4,38 +4,39 @@ set -o pipefail
cmd=$1
chart=$2
env=$3
manifests=${4:-manifests}
outdir=${5:-_manifests}
build() {
mkdir -p "${outdir}"/templates
echo "Creating ${outdir}/templates"
mkdir -p $outdir/templates
echo "Creating $outdir/templates"
echo "generating ${outdir}/Chart.yaml" 1>&2
echo "generating $outdir/Chart.yaml" 1>&2
cat <<EOF > "${outdir}"/Chart.yaml
cat <<EOF > $outdir/Chart.yaml
apiVersion: v1
appVersion: "1.0"
# description: A Helm chart for Kubernetes
name: ${chart}
name: $chart
version: 0.1.0
EOF
if [[ -d "${manifests}" ]]; then
cp -r "${manifests}"/* "${outdir}"/templates
elif [[ -f "${manifests}" ]]; then
cp "${manifests}" "${outdir}"/templates
if [ -d $manifests ]; then
cp -r $manifests/* $outdir/templates
elif [ -f $manifests ]; then
cp $manifests $outdir/templates
fi
}
clean() {
echo "cleaning ${outdir}" 1>&2
rm -rf "${outdir}"
echo "cleaning $outdir" 1>&2
rm -rf $outdir
}
case "${cmd}" in
case "$cmd" in
"build" ) build ;;
"clean" ) clean ;;
* ) echo "unsupported command: ${cmd}" 1>&2; exit 1 ;;
* ) echo "unsupported command: $cmd" 1>&2; exit 1 ;;
esac
+5 -5
View File
@@ -1,13 +1,13 @@
#!/usr/bin/env bash
[[ $# != 1 ]] && exit 1
[ $# != 1 ] && exit 1
dir=$1
base=${dir}/../base
base=$dir/../base
if [[ -f "${base}"/kustomization.yaml ]] && [[ -f "${dir}"/kustomization.yaml ]]; then
cat > "${base}"/_manifest.yaml
kubectl kustomize "${dir}"
if [ -f $base/kustomization.yaml -a -f $dir/kustomization.yaml ]; then
cat > $base/_manifest.yaml
kubectl kustomize $dir
else
cat
fi
+5 -5
View File
@@ -4,15 +4,15 @@ metadata:
name: argocd-cluster-admin
rules:
- apiGroups:
- "*"
- '*'
resources:
- "*"
- '*'
verbs:
- "*"
- '*'
- nonResourceURLs:
- "*"
- '*'
verbs:
- "*"
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
+2
View File
@@ -6,3 +6,5 @@ metadata:
name: cluster-admin-token
namespace: kube-system
type: kubernetes.io/service-account-token
+2
View File
@@ -10,3 +10,5 @@ metadata:
name: cluster-ekman
namespace: argocd
type: Opaque
+2 -2
View File
@@ -3,5 +3,5 @@
img=registry.gitlab.com/oceanbox/manifests/helm-kustomize-cmp
tag=${1:-latest}
docker build -t "${img}":"${tag}" .
docker push "${img}":"${tag}"
docker build -t $img:$tag .
docker push $img:$tag
+15 -16
View File
@@ -1,15 +1,14 @@
#!/bin/sh
# shellcheck disable=SC2154
export HOME=/plugin
env > /tmp/"${ARGOCD_APP_NAME}".env
env > /tmp/$ARGOCD_APP_NAME.env
echo "${ARGOCD_APP_PARAMETERS}" | jq '.[] | select(.name == "helm-parameters") | .map' | yq -P -oy > parameters.yaml
cp parameters.yaml /tmp/"${ARGOCD_APP_NAME}"-parameters.yaml
echo "$ARGOCD_APP_PARAMETERS" | jq '.[] | select(.name == "helm-parameters") | .map' | yq -P -oy > parameters.yaml
cp parameters.yaml /tmp/$ARGOCD_APP_NAME-parameters.yaml
if [ -n "${PARAM_CHART}" ] && [ "${PARAM_CHART}" != "." ]; then
CHART=${PARAM_CHART}
if [ -n "$PARAM_CHART" -a "$PARAM_CHART" != "." ]; then
CHART=$PARAM_CHART
elif [ -d chart ]; then
CHART=chart
elif [ -f chart ]; then
@@ -19,19 +18,19 @@ else
fi
[ -f chart/values.yaml ] && VALUES="-f chart/values.yaml"
[ -f values-chart.yaml ] && VALUES="${VALUES} -f values-chart.yaml"
[ -f values.yaml ] && VALUES="${VALUES} -f values.yaml"
[ -f values-"${PARAM_ENV}".yaml ] && VALUES="${VALUES} -f values-${PARAM_ENV}.yaml"
VALUES="${VALUES} -f parameters.yaml"
[ -f values-chart.yaml ] && VALUES="$VALUES -f values-chart.yaml"
[ -f values.yaml ] && VALUES="$VALUES -f values.yaml"
[ -f values-$PARAM_ENV.yaml ] && VALUES="$VALUES -f values-$PARAM_ENV.yaml"
VALUES="$VALUES -f parameters.yaml"
helm dependency update "${CHART}" >/tmp/"${ARGOCD_APP_NAME}"-helm-dependency-build.out
helm dependency update $CHART >/tmp/$ARGOCD_APP_NAME-helm-dependency-build.out
mkdir -p base
echo "helm template -n ${ARGOCD_APP_NAMESPACE} ${PARAM_FLAGS} ${VALUES} ${ARGOCD_APP_NAME} ${CHART}" > /tmp/"${ARGOCD_APP_NAME}"-helm.sh
helm template -n "${ARGOCD_APP_NAMESPACE}" "${PARAM_FLAGS}" "${VALUES}" "${ARGOCD_APP_NAME}" "${CHART}" > ./base/_manifest.yaml
echo "helm template -n $ARGOCD_APP_NAMESPACE $PARAM_FLAGS $VALUES $ARGOCD_APP_NAME $CHART" > /tmp/$ARGOCD_APP_NAME-helm.sh
helm template -n $ARGOCD_APP_NAMESPACE $PARAM_FLAGS $VALUES $ARGOCD_APP_NAME $CHART > ./base/_manifest.yaml
cp ./base/_manifest.yaml /tmp/"${ARGOCD_APP_NAME}"-manifest.yaml
cp ./base/_manifest.yaml /tmp/$ARGOCD_APP_NAME-manifest.yaml
[ -d "${PARAM_ENV}" ] && kubectl kustomize "${PARAM_ENV}" > /tmp/"${ARGOCD_APP_NAME}"-manifest.yaml
[ -d "$PARAM_ENV" ] && kubectl kustomize $PARAM_ENV > /tmp/$ARGOCD_APP_NAME-manifest.yaml
cat /tmp/"${ARGOCD_APP_NAME}"-manifest.yaml
cat /tmp/$ARGOCD_APP_NAME-manifest.yaml
+1 -1
View File
@@ -18,7 +18,7 @@ EOF
exit 0
fi
yq e -o=p "${VALUES}" | jq --slurp --raw-input '
yq e -o=p $VALUES | jq --slurp --raw-input '
[{
name: "helm-parameters",
title: "Helm Parameters",
@@ -1,9 +1,8 @@
#!/bin/sh
# shellcheck disable=SC2154
export HOME=/plugin
helm repo add --username argocd-helm --password "${OCEANBOX_HELM_ACCESS_TOKEN}" oceanbox \
helm repo add --username argocd-helm --password "$OCEANBOX_HELM_ACCESS_TOKEN" oceanbox \
https://gitlab.com/api/v4/projects/54396343/packages/helm/stable
helm repo add bitnami https://charts.bitnami.com/bitnami
+3 -3
View File
@@ -4,9 +4,9 @@ export HOME=/plugin
helm repo update oceanbox
if [ -n "${PARAM_CHART}" ] && [ "${PARAM_CHART}" != "." ]; then
helm show values "${PARAM_CHART}" > values-chart.yaml
if [ -n "$PARAM_CHART" -a "$PARAM_CHART" != "." ]; then
helm show values $PARAM_CHART > values-chart.yaml
elif [ -f chart ]; then
CHART=$(cat chart)
helm show values "${CHART}" > values-chart.yaml
helm show values $CHART > values-chart.yaml
fi
+2 -1
View File
@@ -66,7 +66,7 @@ spec:
itemType: string
collectionType: string
string: ""
# All the fields above besides 'string' apply to both the array and map type parameter announcements.
# All the fields above besides "string" apply to both the array and map type parameter announcements.
# - name: array-param
# # This field communicates the parameter's default value to the UI. Setting this field is optional.
# array: [default, items]
@@ -84,3 +84,4 @@ spec:
# If set to `true` then the plugin receives repository files with original file mode. Dangerous since the repository
# might have executable files. Set to true only if you trust the CMP plugin authors.
preserveFileMode: false
+1 -1
View File
@@ -1,4 +1,4 @@
FROM ghcr.io/helmfile/helmfile:v1.1.9
FROM ghcr.io/helmfile/helmfile:v1.0.0
RUN mkdir -p /home/argocd/cmp-server/config/
COPY plugin.yaml /home/argocd/cmp-server/config/
+2 -2
View File
@@ -3,5 +3,5 @@
img=registry.gitlab.com/oceanbox/manifests/helmfile-cmp
tag=${1:-latest}
docker build -t "${img}":"${tag}" .
docker push "${img}":"${tag}"
docker build -t $img:$tag .
docker push $img:$tag
+3 -4
View File
@@ -1,5 +1,4 @@
#!/bin/sh
# shellcheck disable=SC2154
# NOTE: Ensure errors are part of exitcode
# set -o pipefail
@@ -11,7 +10,7 @@ export HELM_CONFIG_HOME=/tmp/helm/config
export HELMFILE_CACHE_HOME=/tmp/helmfile/cache
export HELMFILE_TEMPDIR=/tmp/helmfile/tmp
test -n ARGOCD_ENV_HELMFILE_ENVIRONMENT && export HELMFILE_ENVIRONMENT="${ARGOCD_ENV_HELMFILE_ENVIRONMENT}"
test -n ARGOCD_ENV_HELMFILE_FILE_PATH && export HELMFILE_FILE_PATH="${ARGOCD_ENV_HELMFILE_FILE_PATH}"
test -n ARGOCD_ENV_HELMFILE_ENVIRONMENT && export HELMFILE_ENVIRONMENT=$ARGOCD_ENV_HELMFILE_ENVIRONMENT
test -n ARGOCD_ENV_HELMFILE_FILE_PATH && export HELMFILE_FILE_PATH=$ARGOCD_ENV_HELMFILE_FILE_PATH
helmfile -n "${ARGOCD_APP_NAMESPACE}" "${ARGS}" template -q --include-crds
helmfile -n "$ARGOCD_APP_NAMESPACE" $ARGS template --include-crds -q
@@ -422,3 +422,4 @@ spec:
path: ca.crt
optional: true
secretName: argocd-repo-server-tls
+1 -1
View File
@@ -13,7 +13,7 @@ kubectl --context ekman apply -f cluster-admin-token.yaml
# kubectl --context oceanbox apply -f _cluster-ekman.yaml
token=$(kubectl --context ekman get secret -n kube-system argocd-manager-token -o yaml | grep ' token:' | cut -d' ' -f4 | base64 -d)
sed "s/@token@/${token}/" cluster-ekman.yaml > _cluster-ekman.yaml
sed "s/@token@/$token/" cluster-ekman.yaml > _cluster-ekman.yaml
echo "configure argocd ekman-cluster..."
cat _cluster-ekman.yaml
kubectl --context oceanbox apply -f _cluster-ekman.yaml
+1
View File
@@ -13,3 +13,4 @@ stringData:
name: staging-vcluster
server: https://staging-vcluster.staging-vcluster
type: Opaque
+4 -4
View File
@@ -32,12 +32,12 @@ projects:
additionalAnnotations: {}
description: sys components project
sourceRepos:
- "*"
- '*'
destinations:
- namespace: "*"
- namespace: '*'
server: https://kubernetes.default.svc
clusterResourceWhitelist:
- group: "*"
kind: "*"
- group: '*'
kind: '*'
orphanedResources:
warn: false
-5
View File
@@ -8,8 +8,3 @@ version: v1.35.2
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application.
appVersion: v1.35.2
dependencies:
- name: diagrid-dashboard
version: "0.1.0"
repository: "file://../diagrid-dashboard"
condition: diagrid-dashboard.enabled
-3
View File
@@ -116,6 +116,3 @@ serviceMonitor:
nodeSelector: {}
tolerations: []
affinity: {}
diagrid-dashboard:
enabled: false
@@ -59,18 +59,12 @@ spec:
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
volumeMounts:
- name: statestore
mountPath: /app/components/statestore.yaml
subPath: statestore.yaml
{{- with .Values.volumeMounts }}
volumeMounts:
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
- name: statestore
configMap:
name: {{ include "diagrid-dashboard.fullname" . }}-statestore
{{- with .Values.volumes }}
volumes:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
@@ -1,7 +1,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "diagrid-dashboard.fullname" . }}-statestore
name: diadash-statestore
data:
statestore.yaml: |
apiVersion: dapr.io/v1alpha1
@@ -17,7 +17,10 @@ data:
- name: redisUsername
value: default
- name: redisPassword
value: secret
value: mrtz-password
# secretKeyRef:
# key: redis-password
# name: {{ .Values.statestore.redis }}
- name: actorStateStore
value: "true"
- name: redisDB
+20 -14
View File
@@ -2,10 +2,6 @@
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
statestore:
scope: my-scope
redis: my-redis
# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
replicaCount: 1
@@ -130,14 +126,14 @@ resources: {}
# memory: 128Mi
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
livenessProbe:
httpGet:
path: /
port: http
readinessProbe:
httpGet:
path: /
port: http
# livenessProbe:
# httpGet:
# path: /
# port: http
# readinessProbe:
# httpGet:
# path: /
# port: http
# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
autoscaling:
@@ -148,13 +144,23 @@ autoscaling:
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes: {}
volumes:
- name: statestore
configMap:
name: diadash-statestore
# Additional volumeMounts on the output Deployment definition.
volumeMounts: {}
volumeMounts:
- name: statestore
mountPath: /app/components/statestore.yaml
subPath: statestore.yaml
nodeSelector: {}
tolerations: []
affinity: {}
statestore:
scope: mrtz-sorcerer
redis: mrtz-sorcerer-redis
+2 -2
View File
@@ -13,9 +13,9 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: v1.6.0
version: v1.2.4
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "v1.6.0"
appVersion: "v1.2.4"
+1 -1
View File
@@ -12,7 +12,7 @@ image:
# This sets the pull policy for images.
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: v1.6.0
tag: v1.2.4
# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
imagePullSecrets:
- name: gitlab-pull-secret
+1 -1
View File
@@ -3,7 +3,7 @@
# Declare variables to be passed into your templates.
replicaCount: 1
image:
repository: registry.gitlab.com/oceanbox/makai
repository: registry.gitlab.com/oceanbox/makai/makai
tag: v0.1.0
pullPolicy: IfNotPresent
init:
-6
View File
@@ -1,6 +0,0 @@
dependencies:
- name: diagrid-dashboard
repository: file://../diagrid-dashboard
version: 0.1.0
digest: sha256:4fdb3148a2a6439223d7844a3083da2de324dd47e5cb3ac4a5d9c436e6e2c775
generated: "2025-12-16T19:38:21.939708629+01:00"
-5
View File
@@ -8,8 +8,3 @@ version: v1.35.2
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application.
appVersion: v1.35.2
dependencies:
- name: diagrid-dashboard
version: "0.1.0"
repository: "file://../diagrid-dashboard"
condition: diagrid-dashboard.enabled
-6
View File
@@ -108,9 +108,3 @@ serviceMonitor:
nodeSelector: {}
tolerations: []
affinity: {}
diagrid-dashboard:
enabled: false
statestore:
scope: sorcerer
redis: sorcerer-redis
+1 -1
View File
@@ -5,7 +5,7 @@
replicaCount: 1
image:
repository: registry
tag: 3
tag: 2
pullPolicy: IfNotPresent
init:
enabled: false
+1 -1
View File
@@ -27,7 +27,7 @@ releases:
- name: argocd-apps
namespace: argocd
chart: argo/argocd-apps
version: 2.0.3
version: 0.0.9
condition: argo.apps.enabled
values:
- ../values/argo/values/apps.yaml.gotmpl
+2 -3
View File
@@ -3,8 +3,7 @@ bases:
repositories:
- name: cert-manager
oci: true
url: 'quay.io/jetstack/charts'
url: 'https://charts.jetstack.io'
commonLabels:
tier: system
@@ -13,7 +12,7 @@ releases:
- name: cert-manager
namespace: cert-manager
chart: cert-manager/cert-manager
version: v1.19.2
version: 1.12.13
condition: cert_manager.enabled
values:
- ../values/cert-manager/values/cert-manager.yaml.gotmpl
-44
View File
@@ -1,44 +0,0 @@
bases:
- ../envs/environments.yaml.gotmpl
repositories:
- name: forgejo
oci: true
url: code.forgejo.org/forgejo-helm
commonLabels:
tier: system
releases:
- name: forgejo
namespace: forgejo
chart: forgejo/forgejo
version: 16.0.0
condition: forgejo.enabled
values:
- ../values/forgejo/values/values.yaml
- ../values/forgejo/values/values-{{ .Environment.Name }}.yaml
postRenderer: ../bin/kustomizer
postRendererArgs:
- ../values/forgejo/kustomize/{{ .Environment.Name }}
missingFileHandler: Info
- name: manifests
namespace: forgejo
chart: manifests
condition: forgejo.enabled
missingFileHandler: Info
values:
- ../values/env.yaml
- ../values/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml
- ../values/forgejo/env.yaml.gotmpl
- ../values/forgejo/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
hooks:
- events: [ prepare, cleanup ]
showlogs: true
command: ../bin/helmify
args:
- '{{`{{ if eq .Event.Name "prepare" }}build{{ else }}clean{{ end }}`}}'
- '{{`{{ .Release.Chart }}`}}'
- '{{`{{ .Environment.Name }}`}}'
- ../values/forgejo/manifests
- manifests
+1 -1
View File
@@ -12,7 +12,7 @@ releases:
- name: ingress-nginx
namespace: ingress-nginx
chart: ingress-nginx/ingress-nginx
version: 4.14.1
version: 4.8.3
condition: nginx.enabled
values:
- ../values/ingress-nginx/values/ingress-nginx.yaml.gotmpl
+1 -1
View File
@@ -15,7 +15,7 @@ releases:
- name: kyverno
namespace: kyverno
chart: kyverno/kyverno
version: 3.6.1
version: 3.5.1
condition: kyverno.enabled
values:
- ../values/kyverno/values/kyverno.yaml.gotmpl
+1 -1
View File
@@ -12,7 +12,7 @@ releases:
- name: mariadb-operator
namespace: mariadb-operator
chart: mariadb-operator/mariadb-operator
version: 25.10.3
version: 25.8.4
condition: mariadb_operator.enabled
values:
- ../values/mariadb-operator/values/mariadb-operator.yaml.gotmpl
+1 -1
View File
@@ -16,7 +16,7 @@ releases:
namespace: {{ .Environment.Name }}-openfga
{{- end }}
chart: openfga/openfga
version: 0.2.50
version: 0.2.45
condition: openfga.enabled
values:
- ../values/openfga/values/values.yaml
@@ -12,7 +12,7 @@ releases:
- name: opentelemetry-collector
namespace: otel
chart: open-telemetry/opentelemetry-collector
version: 0.142.1
version: 0.134.1
condition: otel.enabled
values:
- ../values/opentelemetry-collector/values/values.yaml
+1 -1
View File
@@ -15,7 +15,7 @@ releases:
- name: postgres-operator
namespace: cnpg
chart: cloudnative-pg/cloudnative-pg
version: 0.27.0
version: 0.26.1
condition: postgres_operator.enabled
values:
- ../values/postgres-operator/values/postgres-operator.yaml.gotmpl
+1 -1
View File
@@ -13,7 +13,7 @@ releases:
- name: {{ .Environment.Name }}-rabbitmq
namespace: rabbitmq
chart: bitnami/rabbitmq
version: 13.0.3
version: 12.9.0
condition: rabbitmq.enabled
values:
- ../values/rabbitmq/values/values.yaml
+1 -1
View File
@@ -13,7 +13,7 @@ releases:
- name: slurm-operator
namespace: slinky
chart: slurm-operator/slurm-operator
version: 0.4.1
version: 0.4.0
condition: slurm_operator.enabled
values:
- ../values/slurm-operator/values/slurm-operator.yaml.gotmpl
+42
View File
@@ -0,0 +1,42 @@
bases:
- ../envs/environments.yaml.gotmpl
repositories:
- name: sonatype-nexus
url: git+https://github.com/sonatype/helm3-charts@deploy?ref=master
commonLabels:
tier: system
releases:
- name: sonatype-nexus
namespace: sonatype-nexus
chart: sonatype-nexus/sonatype-nexus
condition: sonatype-nexus.enabled
values:
- ../values/sonatype-nexus/values/values.yaml
- ../values/sonatype-nexus/values/values-{{ .Environment.Name }}.yaml.gotmpl
postRenderer: ../bin/kustomizer
postRendererArgs:
- ../values/sonatype-nexus/kustomize/{{ .Environment.Name }}
missingFileHandler: Info
- name: manifests
namespace: sonatype-nexus
chart: manifests
condition: nsonatype-nexus.enabled
missingFileHandler: Info
values:
- ../values/env.yaml
- ../values/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml
- ../values/sonatype-nexus/env.yaml.gotmpl
- ../values/sonatype-nexus/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
hooks:
- events: [ prepare, cleanup ]
showlogs: true
command: ../bin/helmify
args:
- '{{`{{ if eq .Event.Name "prepare" }}build{{ else }}clean{{ end }}`}}'
- '{{`{{ .Release.Chart }}`}}'
- '{{`{{ .Environment.Name }}`}}'
- ../values/sonatype-nexus/manifests
- manifests
+1 -1
View File
@@ -13,7 +13,7 @@ releases:
- name: spegel
namespace: spegel
chart: spegel/spegel
version: 0.6.0
version: 0.5.1
condition: spegel.enabled
values:
- ../values/spegel/values/spegel.yaml.gotmpl
+1 -1
View File
@@ -15,7 +15,7 @@ releases:
- name: velero
namespace: velero
chart: velero/velero
version: 11.3.2
version: 11.1.1
condition: velero.enabled
values:
- ../values/velero/values/velero.yaml.gotmpl
-70
View File
@@ -1,70 +0,0 @@
let
sources = import ./default.nix;
pkgs = import sources.nixpkgs { };
pre-commit = import sources.git-hooks;
globalExcludes = [
"nix/default.nix"
"attic"
"vcluster"
".*vendor"
".*chart/.*"
".*schema.json"
];
in
pre-commit.run {
src = pkgs.nix-gitignore.gitignoreSource [ ] ../.;
# Do not run at pre-commit time
default_stages = [
"pre-push"
];
# TODO(mrtz): Remove when default
package = pkgs.prek;
# Linters From https://github.com/cachix/pre-commit-hooks.nix
hooks = {
nixfmt-rfc-style = {
enable = true;
excludes = globalExcludes;
};
trim-trailing-whitespace.enable = true;
shellcheck = {
enable = true;
excludes = [
"vcluster/"
"attic/"
];
args = [
"-x"
"-o"
"all"
];
};
yamllint = {
enable = true;
excludes = [
"attic/"
"charts/templates/"
"charts/"
"values/"
"vcluster/"
];
settings = {
strict = true;
configData = ''{ extends: default, rules: { document-start: disable, line-length: {max: 300} } }'';
};
};
check-json.enable = true;
renovate-config-validator = {
enable = true;
files = "renovate.json$";
entry = "renovate-config-validator";
};
};
}
+22 -125
View File
@@ -9,15 +9,8 @@
*/
# Generated by npins. Do not modify; will be overwritten regularly
let
# Backwards-compatibly make something that previously didn't take any arguments take some
# The function must return an attrset, and will unfortunately be eagerly evaluated
# Same thing, but it catches eval errors on the default argument so that one may still call it with other arguments
mkFunctor =
fn:
let
e = builtins.tryEval (fn { });
in
(if e.success then e.value else { error = fn { }; }) // { __functor = _self: fn; };
data = builtins.fromJSON (builtins.readFile ./sources.json);
version = data.version;
# https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295
range =
@@ -28,6 +21,7 @@ let
# https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269
stringAsChars = f: s: concatStrings (map f (stringToCharacters s));
concatMapStrings = f: list: concatStrings (map f list);
concatStrings = builtins.concatStringsSep "";
# If the environment variable NPINS_OVERRIDE_${name} is set, then use
@@ -54,87 +48,41 @@ let
mkSource =
name: spec:
{
pkgs ? null,
}:
assert spec ? type;
let
# Unify across builtin and pkgs fetchers.
# `fetchGit` requires a wrapper because of slight API differences.
fetchers =
if pkgs == null then
{
inherit (builtins) fetchTarball fetchurl;
# For some fucking reason, fetchGit has a different signature than the other builtin fetchers …
fetchGit = args: (builtins.fetchGit args).outPath;
}
else
{
fetchTarball =
{
url,
sha256,
}:
pkgs.fetchzip {
inherit url sha256;
extension = "tar";
};
inherit (pkgs) fetchurl;
fetchGit =
{
url,
submodules,
rev,
name,
narHash,
}:
pkgs.fetchgit {
inherit url rev name;
fetchSubmodules = submodules;
hash = narHash;
};
};
# Dispatch to the correct code path based on the type
path =
if spec.type == "Git" then
mkGitSource fetchers spec
mkGitSource spec
else if spec.type == "GitRelease" then
mkGitSource fetchers spec
mkGitSource spec
else if spec.type == "PyPi" then
mkPyPiSource fetchers spec
mkPyPiSource spec
else if spec.type == "Channel" then
mkChannelSource fetchers spec
mkChannelSource spec
else if spec.type == "Tarball" then
mkTarballSource fetchers spec
else if spec.type == "Container" then
mkContainerSource pkgs spec
mkTarballSource spec
else
builtins.throw "Unknown source type ${spec.type}";
in
spec // { outPath = mayOverride name path; };
mkGitSource =
{
fetchTarball,
fetchGit,
...
}:
{
repository,
revision,
url ? null,
submodules,
hash,
branch ? null,
...
}:
assert repository ? type;
# At the moment, either it is a plain git repository (which has an url), or it is a GitHub/GitLab repository
# In the latter case, there we will always be an url to the tarball
if url != null && !submodules then
fetchTarball {
builtins.fetchTarball {
inherit url;
sha256 = hash;
sha256 = hash; # FIXME: check nix version & use SRI hashes
}
else
let
@@ -145,8 +93,6 @@ let
"https://github.com/${repository.owner}/${repository.repo}.git"
else if repository.type == "GitLab" then
"${repository.server}/${repository.repo_path}.git"
else if repository.type == "Forgejo" then
"${repository.server}/${repository.owner}/${repository.repo}.git"
else
throw "Unrecognized repository type ${repository.type}";
urlToName =
@@ -161,89 +107,40 @@ let
"${if matched == null then "source" else builtins.head matched}${appendShort}";
name = urlToName url revision;
in
fetchGit {
builtins.fetchGit {
rev = revision;
narHash = hash;
inherit name submodules url;
inherit name;
# hash = hash;
inherit url submodules;
};
mkPyPiSource =
{ fetchurl, ... }:
{
url,
hash,
...
}:
fetchurl {
{ url, hash, ... }:
builtins.fetchurl {
inherit url;
sha256 = hash;
};
mkChannelSource =
{ fetchTarball, ... }:
{
url,
hash,
...
}:
fetchTarball {
{ url, hash, ... }:
builtins.fetchTarball {
inherit url;
sha256 = hash;
};
mkTarballSource =
{ fetchTarball, ... }:
{
url,
locked_url ? url,
hash,
...
}:
fetchTarball {
builtins.fetchTarball {
url = locked_url;
sha256 = hash;
};
mkContainerSource =
pkgs:
{
image_name,
image_tag,
image_digest,
...
}:
if pkgs == null then
builtins.throw "container sources require passing in a Nixpkgs value: https://github.com/andir/npins/blob/master/README.md#using-the-nixpkgs-fetchers"
else
pkgs.dockerTools.pullImage {
imageName = image_name;
imageDigest = image_digest;
finalImageTag = image_tag;
};
in
mkFunctor (
{
input ? ./sources.json,
}:
let
data =
if builtins.isPath input then
# while `readFile` will throw an error anyways if the path doesn't exist,
# we still need to check beforehand because *our* error can be caught but not the one from the builtin
# *piegames sighs*
if builtins.pathExists input then
builtins.fromJSON (builtins.readFile input)
else
throw "Input path ${toString input} does not exist"
else if builtins.isAttrs input then
input
else
throw "Unsupported input type ${builtins.typeOf input}, must be a path or an attrset";
version = data.version;
in
if version == 7 then
builtins.mapAttrs (name: spec: mkFunctor (mkSource name spec)) data.pins
if version == 5 then
builtins.mapAttrs mkSource data.pins
else
throw "Unsupported format version ${toString version} in sources.json. Try running `npins upgrade`"
)
+3 -16
View File
@@ -1,24 +1,11 @@
{
"pins": {
"git-hooks": {
"type": "Git",
"repository": {
"type": "GitHub",
"owner": "cachix",
"repo": "git-hooks.nix"
},
"branch": "master",
"submodules": false,
"revision": "f0927703b7b1c8d97511c4116eb9b4ec6645a0fa",
"url": "https://github.com/cachix/git-hooks.nix/archive/f0927703b7b1c8d97511c4116eb9b4ec6645a0fa.tar.gz",
"hash": "sha256-6MkqajPICgugsuZ92OMoQcgSHnD6sJHwk8AxvMcIgTE="
},
"nixpkgs": {
"type": "Channel",
"name": "nixpkgs-unstable",
"url": "https://releases.nixos.org/nixpkgs/nixpkgs-26.05pre927565.13868c071cc7/nixexprs.tar.xz",
"hash": "sha256-wufp5c0nWh/87f9eK7xy1eZXms5zd4yl6S4SR+LfA08="
"url": "https://releases.nixos.org/nixpkgs/nixpkgs-26.05pre903996.59b6c96beacc/nixexprs.tar.xz",
"hash": "0b0yr9d1xyfwgpaj68bimsbjjbj7yis4whjvkrfdycfnasdf0gf0"
}
},
"version": 7
"version": 5
}
+6 -6
View File
@@ -3,7 +3,7 @@
# Simple script for uploading a base64 encoded image into our database. For
# grafana business image panels.
if [[ $# -ne 2 ]]
if [ $# -ne 2 ]
then
echo "Usage: $0 <image-name> <file>.png"
exit 1
@@ -12,9 +12,9 @@ fi
filename=$1
file=$2
if [[ ! -e "${file}" ]]
if [ ! -e $file ]
then
echo "file ${file} does not exist"
echo "file $file does not exist"
exit 1
fi
@@ -22,9 +22,9 @@ function create_image() {
local filename=$1
local data=$2
cat << EOF
INSERT INTO images VALUES('${filename}', '${data}');
INSERT INTO images VALUES('$filename', '$data');
EOF
}
data=$(base64 -w0 < "${file}")
create_image "${filename}" "${data}"
data=$(cat $file | base64 -w0)
create_image $filename $data
+1
View File
@@ -1,3 +1,4 @@
// -*- mode: jsonc -*-
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": [
+21 -42
View File
@@ -3,8 +3,7 @@ groups:
rules:
- alert: etcdMembersDown
annotations:
description:
'etcd cluster "{{ $labels.job }}": members are down ({{ $value
description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value
}}).'
summary: etcd cluster members are down.
expr: |-
@@ -21,20 +20,17 @@ groups:
severity: critical
- alert: etcdInsufficientMembers
annotations:
description:
'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
description: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
}}).'
summary: etcd cluster has insufficient number of members.
expr:
sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"})
expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"})
without (instance) + 1) / 2)
for: 3m
labels:
severity: critical
- alert: etcdNoLeader
annotations:
description:
'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }}
description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }}
has no leader.'
summary: etcd cluster has no leader.
expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
@@ -43,14 +39,12 @@ groups:
severity: critical
- alert: etcdHighNumberOfLeaderChanges
annotations:
description:
'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes
description: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes
within the last 15 minutes. Frequent elections may be a sign of insufficient
resources, high network latency, or disruptions by other components and should
be investigated.'
summary: etcd cluster has high number of leader changes.
expr:
increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"})
expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"})
or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m])
>= 4
for: 5m
@@ -58,8 +52,7 @@ groups:
severity: warning
- alert: etcdHighNumberOfFailedGRPCRequests
annotations:
description:
'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
{{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
summary: etcd cluster has high number of failed grpc requests.
expr: |-
@@ -72,8 +65,7 @@ groups:
severity: warning
- alert: etcdHighNumberOfFailedGRPCRequests
annotations:
description:
'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
{{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
summary: etcd cluster has high number of failed grpc requests.
expr: |-
@@ -86,8 +78,7 @@ groups:
severity: critical
- alert: etcdGRPCRequestsSlow
annotations:
description:
'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests
description: 'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests
is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method
}} method.'
summary: etcd grpc requests are slow
@@ -99,8 +90,7 @@ groups:
severity: critical
- alert: etcdMemberCommunicationSlow
annotations:
description:
'etcd cluster "{{ $labels.job }}": member communication with {{
description: 'etcd cluster "{{ $labels.job }}": member communication with {{
$labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance
}}.'
summary: etcd cluster member communication is slow.
@@ -112,8 +102,7 @@ groups:
severity: warning
- alert: etcdHighNumberOfFailedProposals
annotations:
description:
'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures
description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures
within the last 30 minutes on etcd instance {{ $labels.instance }}.'
summary: etcd cluster has high number of proposal failures.
expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
@@ -122,8 +111,7 @@ groups:
severity: warning
- alert: etcdHighFsyncDurations
annotations:
description:
'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
are {{ $value }}s on etcd instance {{ $labels.instance }}.'
summary: etcd cluster 99th percentile fsync durations are too high.
expr: |-
@@ -134,8 +122,7 @@ groups:
severity: warning
- alert: etcdHighFsyncDurations
annotations:
description:
'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
are {{ $value }}s on etcd instance {{ $labels.instance }}.'
summary: etcd cluster 99th percentile fsync durations are too high.
expr: |-
@@ -146,8 +133,7 @@ groups:
severity: critical
- alert: etcdHighCommitDurations
annotations:
description:
'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
description: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
{{ $value }}s on etcd instance {{ $labels.instance }}.'
summary: etcd cluster 99th percentile commit durations are too high.
expr: |-
@@ -158,13 +144,11 @@ groups:
severity: warning
- alert: etcdDatabaseQuotaLowSpace
annotations:
description:
'etcd cluster "{{ $labels.job }}": database size exceeds the defined
description: 'etcd cluster "{{ $labels.job }}": database size exceeds the defined
quota on etcd instance {{ $labels.instance }}, please defrag or increase the
quota as the writes to etcd will be disabled when it is full.'
summary: etcd cluster database is running full.
expr:
(last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) /
expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) /
last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 >
95
for: 10m
@@ -172,31 +156,26 @@ groups:
severity: critical
- alert: etcdExcessiveDatabaseGrowth
annotations:
description:
'etcd cluster "{{ $labels.job }}": Predicting running out of disk
description: 'etcd cluster "{{ $labels.job }}": Predicting running out of disk
space in the next four hours, based on write observations within the past
four hours on etcd instance {{ $labels.instance }}, please check as it might
be disruptive.'
summary: etcd cluster database growing very fast.
expr:
predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60)
expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60)
> etcd_server_quota_backend_bytes{job=~".*etcd.*"}
for: 10m
labels:
severity: warning
- alert: etcdDatabaseHighFragmentationRatio
annotations:
description:
'etcd cluster "{{ $labels.job }}": database size in use on instance
description: 'etcd cluster "{{ $labels.job }}": database size in use on instance
{{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual
allocated disk space, please run defragmentation (e.g. etcdctl defrag) to
retrieve the unused fragmented disk space.'
runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation
summary:
etcd database size in use is less than 50% of the actual allocated
summary: etcd database size in use is less than 50% of the actual allocated
storage.
expr:
(last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m])
expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m])
/ last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5
and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600
for: 10m
+4 -8
View File
@@ -3,13 +3,11 @@ groups:
rules:
- alert: TargetDown
annotations:
description:
'{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
}} targets in {{ $labels.namespace }} namespace are down.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
summary: One or more targets are unreachable.
expr:
100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
expr: 100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
BY (cluster, job, namespace, service)) > 10
for: 10m
labels:
@@ -23,8 +21,7 @@ groups:
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
summary:
An alert that should always be firing to certify that Alertmanager
summary: An alert that should always be firing to certify that Alertmanager
is working properly.
expr: vector(1)
labels:
@@ -40,8 +37,7 @@ groups:
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
summary: Info-level alert inhibition.
expr:
ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname !=
expr: ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname !=
"InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
labels:
severity: none
+21 -40
View File
@@ -3,21 +3,18 @@ groups:
rules:
- alert: KubePodCrashLooping
annotations:
description:
'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
}}) is in waiting state (reason: "CrashLoopBackOff").'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
summary: Pod is crash looping.
expr:
max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",
expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",
job="kube-state-metrics", namespace=~".*"}[5m]) >= 1
for: 15m
labels:
severity: warning
- alert: KubePodNotReady
annotations:
description:
Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
state for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
summary: Pod has been in a non-ready state for more than 15 minutes.
@@ -34,8 +31,7 @@ groups:
severity: warning
- alert: KubeDeploymentGenerationMismatch
annotations:
description:
Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
}} does not match, this indicates that the Deployment has failed but has not
been rolled back.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
@@ -49,8 +45,7 @@ groups:
severity: warning
- alert: KubeDeploymentReplicasMismatch
annotations:
description:
Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
summary: Deployment has not matched the expected number of replicas.
@@ -69,8 +64,7 @@ groups:
severity: warning
- alert: KubeDeploymentRolloutStuck
annotations:
description:
Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment
description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment
}} is not progressing for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck
summary: Deployment rollout is not progressing.
@@ -82,8 +76,7 @@ groups:
severity: warning
- alert: KubeStatefulSetReplicasMismatch
annotations:
description:
StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
summary: StatefulSet has not matched the expected number of replicas.
@@ -102,8 +95,7 @@ groups:
severity: warning
- alert: KubeStatefulSetGenerationMismatch
annotations:
description:
StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
}} does not match, this indicates that the StatefulSet has failed but has
not been rolled back.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
@@ -117,8 +109,7 @@ groups:
severity: warning
- alert: KubeStatefulSetUpdateNotRolledOut
annotations:
description:
StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
has not been rolled out.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
summary: StatefulSet update has not been rolled out.
@@ -145,8 +136,7 @@ groups:
severity: warning
- alert: KubeDaemonSetRolloutStuck
annotations:
description:
DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not
description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not
finished or progressed for at least 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
summary: DaemonSet rollout is stuck.
@@ -179,22 +169,19 @@ groups:
severity: warning
- alert: KubeContainerWaiting
annotations:
description:
pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
{{ $labels.container}} has been in waiting state for longer than 1 hour.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
summary: Pod container waiting longer than 1 hour
expr:
sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
expr: sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
namespace=~".*"}) > 0
for: 1h
labels:
severity: warning
- alert: KubeDaemonSetNotScheduled
annotations:
description:
"{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are not scheduled."
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are not scheduled.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
summary: DaemonSet pods are not scheduled.
expr: |-
@@ -206,21 +193,18 @@ groups:
severity: warning
- alert: KubeDaemonSetMisScheduled
annotations:
description:
"{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are running where they are not supposed to run."
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are running where they are not supposed to run.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
summary: DaemonSet pods are misscheduled.
expr:
kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
> 0
for: 15m
labels:
severity: warning
- alert: KubeJobNotCompleted
annotations:
description:
Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
than {{ "43200" | humanizeDuration }} to complete.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
summary: Job did not complete in time
@@ -232,8 +216,7 @@ groups:
severity: warning
- alert: KubeJobFailed
annotations:
description:
Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
Removing failed job after investigation should clear this alert.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
summary: Job failed to complete.
@@ -243,8 +226,7 @@ groups:
severity: warning
- alert: KubeHpaReplicasMismatch
annotations:
description:
HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
has not matched the desired number of replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
summary: HPA has not matched desired number of replicas.
@@ -267,8 +249,7 @@ groups:
severity: warning
- alert: KubeHpaMaxedOut
annotations:
description:
HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
has been running at max replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
summary: HPA is running at max replicas
+9 -17
View File
@@ -3,8 +3,7 @@ groups:
rules:
- alert: KubeCPUOvercommit
annotations:
description:
Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
for Pods by {{ $value }} CPU shares and cannot tolerate node failure.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit
summary: Cluster has overcommitted CPU resource requests.
@@ -17,8 +16,7 @@ groups:
severity: warning
- alert: KubeMemoryOvercommit
annotations:
description:
Cluster {{ $labels.cluster }} has overcommitted memory resource
description: Cluster {{ $labels.cluster }} has overcommitted memory resource
requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node
failure.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
@@ -32,8 +30,7 @@ groups:
severity: warning
- alert: KubeCPUQuotaOvercommit
annotations:
description:
Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
for Namespaces.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit
summary: Cluster has overcommitted CPU resource requests.
@@ -47,8 +44,7 @@ groups:
severity: warning
- alert: KubeMemoryQuotaOvercommit
annotations:
description:
Cluster {{ $labels.cluster }} has overcommitted memory resource
description: Cluster {{ $labels.cluster }} has overcommitted memory resource
requests for Namespaces.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit
summary: Cluster has overcommitted memory resource requests.
@@ -62,8 +58,7 @@ groups:
severity: warning
- alert: KubeQuotaAlmostFull
annotations:
description:
Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
summary: Namespace quota is going to be full.
@@ -77,8 +72,7 @@ groups:
severity: info
- alert: KubeQuotaFullyUsed
annotations:
description:
Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
summary: Namespace quota is fully used.
@@ -92,8 +86,7 @@ groups:
severity: info
- alert: KubeQuotaExceeded
annotations:
description:
Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
summary: Namespace quota has exceeded the limits.
@@ -107,10 +100,9 @@ groups:
severity: warning
- alert: CPUThrottlingHigh
annotations:
description:
"{{ $value | humanizePercentage }} throttling of CPU in namespace
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod
}}."
}}.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
summary: Processes experience elevated CPU throttling.
expr: |-
+7 -12
View File
@@ -1,10 +1,10 @@
groups:
- name: kubernetes-storage
rules:
- alert: KubePersistentVolumeFillingUp
annotations:
description:
The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
{{ . }} {{- end }} is only {{ $value | humanizePercentage }} free.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
@@ -26,8 +26,7 @@ groups:
severity: critical
- alert: KubePersistentVolumeFillingUp
annotations:
description:
Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
{{ . }} {{- end }} is expected to fill up within four days. Currently {{ $value
| humanizePercentage }} is available.
@@ -52,8 +51,7 @@ groups:
severity: warning
- alert: KubePersistentVolumeInodesFillingUp
annotations:
description:
The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
{{ . }} {{- end }} only has {{ $value | humanizePercentage }} free inodes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
@@ -75,8 +73,7 @@ groups:
severity: critical
- alert: KubePersistentVolumeInodesFillingUp
annotations:
description:
Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
{{ . }} {{- end }} is expected to run out of inodes within four days. Currently
{{ $value | humanizePercentage }} of its inodes are free.
@@ -101,13 +98,11 @@ groups:
severity: warning
- alert: KubePersistentVolumeErrors
annotations:
description:
The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster
description: The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster
-}} on Cluster {{ . }} {{- end }} has status {{ $labels.phase }}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors
summary: PersistentVolume is having issues with provisioning.
expr:
kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"}
expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"}
> 0
for: 5m
labels:
+28 -55
View File
@@ -3,8 +3,7 @@ groups:
rules:
- alert: NodeFilesystemSpaceFillingUp
annotations:
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
space left and is filling up.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
@@ -22,8 +21,7 @@ groups:
severity: warning
- alert: NodeFilesystemSpaceFillingUp
annotations:
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
space left and is filling up fast.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
@@ -41,8 +39,7 @@ groups:
severity: critical
- alert: NodeFilesystemAlmostOutOfSpace
annotations:
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
space left.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
@@ -58,8 +55,7 @@ groups:
severity: warning
- alert: NodeFilesystemAlmostOutOfSpace
annotations:
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
space left.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
@@ -75,8 +71,7 @@ groups:
severity: critical
- alert: NodeFilesystemFilesFillingUp
annotations:
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
inodes left and is filling up.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
@@ -94,8 +89,7 @@ groups:
severity: warning
- alert: NodeFilesystemFilesFillingUp
annotations:
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
inodes left and is filling up fast.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
@@ -113,8 +107,7 @@ groups:
severity: critical
- alert: NodeFilesystemAlmostOutOfFiles
annotations:
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
inodes left.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
@@ -130,8 +123,7 @@ groups:
severity: warning
- alert: NodeFilesystemAlmostOutOfFiles
annotations:
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
inodes left.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
@@ -147,44 +139,38 @@ groups:
severity: critical
- alert: NodeNetworkReceiveErrs
annotations:
description:
'{{ $labels.instance }} interface {{ $labels.device }} has encountered
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
summary: Network interface is reporting many receive errors.
expr:
rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m])
expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m])
> 0.01
for: 1h
labels:
severity: warning
- alert: NodeNetworkTransmitErrs
annotations:
description:
'{{ $labels.instance }} interface {{ $labels.device }} has encountered
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
summary: Network interface is reporting many transmit errors.
expr:
rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m])
expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m])
> 0.01
for: 1h
labels:
severity: warning
- alert: NodeHighNumberConntrackEntriesUsed
annotations:
description: "{{ $value | humanizePercentage }} of conntrack entries are used."
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
summary: Number of conntrack are getting close to the limit.
expr:
(node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit)
expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit)
> 0.75
labels:
severity: warning
- alert: NodeTextFileCollectorScrapeError
annotations:
description:
Node Exporter text file collector on {{ $labels.instance }} failed
description: Node Exporter text file collector on {{ $labels.instance }} failed
to scrape.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror
summary: Node Exporter text file collector failed to scrape.
@@ -193,8 +179,7 @@ groups:
severity: warning
- alert: NodeClockSkewDetected
annotations:
description:
Clock at {{ $labels.instance }} is out of sync by more than 0.05s.
description: Clock at {{ $labels.instance }} is out of sync by more than 0.05s.
Ensure NTP is configured correctly on this host.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
summary: Clock skew detected.
@@ -215,8 +200,7 @@ groups:
severity: warning
- alert: NodeClockNotSynchronising
annotations:
description:
Clock at {{ $labels.instance }} is not synchronising. Ensure NTP
description: Clock at {{ $labels.instance }} is not synchronising. Ensure NTP
is configured on this host.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
summary: Clock not synchronising.
@@ -229,14 +213,12 @@ groups:
severity: warning
- alert: NodeRAIDDegraded
annotations:
description:
RAID array '{{ $labels.device }}' at {{ $labels.instance }} is
description: RAID array '{{ $labels.device }}' at {{ $labels.instance }} is
in degraded state due to one or more disks failures. Number of spare drives
is insufficient to fix issue automatically.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
summary: RAID Array is degraded.
expr:
node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
- ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"})
> 0
for: 15m
@@ -244,20 +226,17 @@ groups:
severity: critical
- alert: NodeRAIDDiskFailure
annotations:
description:
At least one device in RAID array at {{ $labels.instance }} failed.
description: At least one device in RAID array at {{ $labels.instance }} failed.
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
summary: Failed device in RAID array.
expr:
node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
> 0
labels:
severity: warning
- alert: NodeFileDescriptorLimit
annotations:
description:
File descriptors limit at {{ $labels.instance }} is currently at
description: File descriptors limit at {{ $labels.instance }} is currently at
{{ printf "%.2f" $value }}%.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
summary: Kernel is predicted to exhaust file descriptors limit soon.
@@ -270,8 +249,7 @@ groups:
severity: warning
- alert: NodeFileDescriptorLimit
annotations:
description:
File descriptors limit at {{ $labels.instance }} is currently at
description: File descriptors limit at {{ $labels.instance }} is currently at
{{ printf "%.2f" $value }}%.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
summary: Kernel is predicted to exhaust file descriptors limit soon.
@@ -288,8 +266,7 @@ groups:
CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage
summary: High CPU usage.
expr:
sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter",
expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter",
mode!="idle"}[2m]))) * 100 > 90
for: 15m
labels:
@@ -324,8 +301,7 @@ groups:
Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization
summary: Host is running out of memory.
expr:
100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"}
expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"}
* 100) > 90
for: 15m
labels:
@@ -337,16 +313,14 @@ groups:
This symptom might indicate disk saturation.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation
summary: Disk IO queue is high.
expr:
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
> 10
for: 30m
labels:
severity: warning
- alert: NodeSystemdServiceFailed
annotations:
description:
Systemd service {{ $labels.name }} has entered failed state at
description: Systemd service {{ $labels.name }} has entered failed state at
{{ $labels.instance }}
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed
summary: Systemd service has entered failed state.
@@ -356,8 +330,7 @@ groups:
severity: warning
- alert: NodeBondingDegraded
annotations:
description:
Bonding interface {{ $labels.master }} on {{ $labels.instance }}
description: Bonding interface {{ $labels.master }} on {{ $labels.instance }}
is in degraded state due to one or more slave failures.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded
summary: Bonding interface is degraded
+6 -12
View File
@@ -8,16 +8,14 @@ groups:
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Host high CPU load (instance {{ $labels.instance }})
expr:
(sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m])))
expr: (sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m])))
> 0.9) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
for: 10m
labels:
severity: critical
- alert: MemoryUtilizationHighWarning
annotations:
dashboard:
https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
dashboard: https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
$labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
description: Node {{ $labels.instance }} has less than 10% available memory.
summary: Node Memory utilization warning
@@ -27,8 +25,7 @@ groups:
severity: critical
- alert: MemoryUtilizationHighCritical
annotations:
dashboard:
https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
dashboard: https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
$labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
description: Node {{ $labels.instance }} has less than 5% available memory.
summary: Node Memory utilization critical
@@ -40,8 +37,7 @@ groups:
annotations:
description: Node {{ $labels.node }} has CPU utilization over 90%.
summary: Node has been in not-ready state for longer than 3 minutes
expr:
(sum(max_over_time(kube_node_status_condition{condition="Ready",status="true"}[3m])
expr: (sum(max_over_time(kube_node_status_condition{condition="Ready",status="true"}[3m])
<= 0) by (node)) or (absent(kube_node_status_condition{condition="Ready",status="true"}))
> 0
for: 5m
@@ -54,8 +50,7 @@ groups:
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Kubernetes Node memory pressure (instance {{ $labels.instance }})
expr:
kube_node_status_condition{condition="MemoryPressure",status="true"} ==
expr: kube_node_status_condition{condition="MemoryPressure",status="true"} ==
1
for: 2m
labels:
@@ -67,8 +62,7 @@ groups:
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Kubernetes Container oom killer (instance {{ $labels.instance }})
expr:
(kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total
offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m])
== 1
for: 0m
+4 -8
View File
@@ -3,22 +3,18 @@ groups:
rules:
- alert: VeleroBackupPartialFailures
annotations:
message:
Velero backup {{ $labels.schedule }} has {{$value | humanizePercentage}} partialy
message: Velero backup {{ $labels.schedule }} has {{$value | humanizePercentage}} partialy
failed backups.
expr:
velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
expr: velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
> 0.25
for: 15m
labels:
severity: critical
- alert: VeleroBackupFailures
annotations:
message:
Velero backup {{$labels.schedule}} has {{$value | humanizePercentage}} failed
message: Velero backup {{$labels.schedule}} has {{$value | humanizePercentage}} failed
backups.
expr:
velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
expr: velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
> 0.25
for: 15m
labels:
+6 -12
View File
@@ -3,8 +3,7 @@ groups:
rules:
- alert: X509ExporterReadErrors
annotations:
description:
Over the last 15 minutes, this x509-certificate-exporter instance
description: Over the last 15 minutes, this x509-certificate-exporter instance
has experienced errors reading certificate files or querying the Kubernetes
API. This could be caused by a misconfiguration if triggered when the exporter
starts.
@@ -15,8 +14,7 @@ groups:
severity: warning
- alert: CertificateError
annotations:
description:
Certificate could not be decoded {{if $labels.secret_name }} in
description: Certificate could not be decoded {{if $labels.secret_name }} in
Kubernetes secret "{{ $labels.secret_namespace }}/{{ $labels.secret_name }}"{{else}}at
location "{{ $labels.filepath }}"{{end}}
summary: Certificate cannot be decoded
@@ -26,26 +24,22 @@ groups:
severity: warning
- alert: CertificateRenewal
annotations:
description:
Certificate for "{{ $labels.subject_CN }}" should be renewed {{if
description: Certificate for "{{ $labels.subject_CN }}" should be renewed {{if
$labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
$labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
summary: Certificate should be renewed
expr:
((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
expr: ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 28
for: 15m
labels:
severity: warning
- alert: CertificateExpiration
annotations:
description:
Certificate for "{{ $labels.subject_CN }}" is about to expire {{if
description: Certificate for "{{ $labels.subject_CN }}" is about to expire {{if
$labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
$labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
summary: Certificate is about to expire
expr:
((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
expr: ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 14
for: 15m
labels:
+8 -35
View File
@@ -6,15 +6,11 @@ let
config = { };
overlays = [ ];
};
checks = import ./nix/checks.nix;
in
pkgs.mkShellNoCC {
name = "clstr";
packages =
with pkgs;
[
# dev tools
packages = with pkgs; [
just
npins
@@ -28,40 +24,17 @@ pkgs.mkShellNoCC {
kubelogin
kubelogin-oidc
kubectl-rook-ceph
kubectl-graph
kubectl-klock
graphviz
# other tools activate when needed
# step-cli
# linkerd
# cmctl
# rclone
# velero
# renovate
# other tools
step-cli
linkerd
velero
cmctl
# dapr
dapr-cli
]
++ checks.enabledPackages;
];
# Environment variables
ARGOCD_ENV_CLUSTER_NAME = "hel1";
ARGOCD_ENV_CLUSTER_NAME = "rossby";
HELM_GIT_ACCESS_TOKEN = "glpat-xxx";
shellHook = builtins.concatStringsSep "\n" [
checks.shellHook
];
# Alternative shells
passthru = pkgs.lib.mapAttrs (name: value: pkgs.mkShellNoCC (value // { inherit name; })) {
ci-shell = {
packages = [
pkgs.npins
];
shellHook = ''
export NPINS_DIRECTORY="nix"
'';
};
};
}
-3
View File
@@ -88,8 +88,6 @@ spec:
server: https://kubernetes.default.svc
- namespace: uptime
server: https://kubernetes.default.svc
- namespace: forgejo
server: https://kubernetes.default.svc
sourceRepos:
- https://argoproj.github.io/argo-helm
- https://kubernetes-sigs.github.io/metrics-server/
@@ -125,7 +123,6 @@ spec:
- ghcr.io/slinkyproject/charts/slurm-operator-crds
- ghcr.io/spegel-org/helm-charts
- ghcr.io/dragonflydb/dragonfly-operator/helm/dragonfly-operator
- code.forgejo.org/forgejo-helm
- https://operator.mariadb.com/mariadb-enterprise-operator
- https://operator.mariadb.com
- https://ot-container-kit.github.io/helm-charts
@@ -73,7 +73,7 @@
"connString": "Username=postgres;Password=secret;Host=localhost;Port=5432;Database=app;Pooling=true;",
"sorcerer" : "https://sorcerer.data.oceanbox.io",
"allowedOrigins": [
"https://maps.oceanbox.io"
"https://maps.oceanbox.io",
],
"appName": "atlantis",
"appEnv": "prod",
@@ -79,9 +79,3 @@ resources:
requests:
cpu: 500m
memory: 1Gi
diagrid-dashboard:
enabled: false
statestore:
scope: prod-atlantis
redis: prod-atlantis-redis
@@ -1,6 +1,6 @@
replicaCount: 1
image:
tag: 503ccbb2-debug
tag: faa0a853-debug
podAnnotations:
dapr.io/app-id: "staging-atlantis"
env:
@@ -26,12 +26,12 @@ env:
- name: DB_USER
valueFrom:
secretKeyRef:
name: staging-atlantis-db-app
name: staging-atlantis-db-superuser
key: username
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
name: staging-atlantis-db-app
name: staging-atlantis-db-superuser
key: password
- name: DAPR_API_TOKEN
valueFrom:
@@ -116,6 +116,9 @@ cluster:
db: prod-atlantis-db
namespace: prod-atlantis
resources:
limits:
cpu: 250m
memory: 1Gi
requests:
cpu: 250m
memory: 1Gi
@@ -130,8 +133,3 @@ redis:
resources:
cpu: 150m
memory: 256Mi
diagrid-dashboard:
enabled: false
statestore:
scope: staging-atlantis
redis: staging-atlantis-redis
@@ -10,4 +10,3 @@ podAnnotations:
dapr.io/sidecar-memory-request: "50Mi"
# dapr.io/sidecar-cpu-limit: "100m"
# dapr.io/sidecar-memory-limit: "1000Mi"
-4
View File
@@ -1,8 +1,4 @@
codex:
enabled: false
{{- if eq .Environment.Name "prod" }}
autosync: false
{{- else }}
autosync: true
{{- end }}
env: {{ .Environment.Name }}
@@ -1,67 +0,0 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft": "Warning",
"Microsoft.Hosting": "Error"
}
},
"Debug": {
"LogLevel": {
"Default": "Debug"
}
},
"Console": {
"IncludeScopes": true,
"LogLevel": {
"Default": "Debug"
}
},
"OIDC": {
"issuer": "https://auth.oceanbox.io/realms/oceanbox",
"authorization_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/auth",
"token_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/token",
"jwks_uri": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/certs",
"userinfo_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/userinfo",
"end_session_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/logout",
"device_authorization_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/auth/device",
"clientId": "atlantis",
"clientSecret": "",
"scopes": [
"openid",
"email",
"offline_access",
"profile"
],
"audiences": [
"atlantis"
]
},
"SSO": {
"cookieDomain": ".oceanbox.io",
"cookieName": ".obx.prod",
"ttl": 12.0,
"signedOutRedirectUri": "https://maps.oceanbox.io/",
"realm": "atlantis",
"environment": "prod",
"keyStore": {
"kind": "azure",
"uri": "https://atlantis.blob.core.windows.net",
"key": "dataprotection-keys"
},
"keyVault": {
"kind": "azure",
"uri": "https://atlantisvault.vault.azure.net",
"key": "dataencryption-keys"
}
},
"plainAuthUsers": [
{
"username": "admin",
"password": "en-to-tre-fire",
"groups": [ "/oceanbox" ],
"roles": [ "admin" ]
}
]
}
@@ -1,66 +0,0 @@
- op: add
path: /spec/template/spec/containers/0/envFrom
value:
- secretRef:
name: azure-keyvault
- op: add
path: /spec/template/spec/containers/0/env
value:
- name: APP_NAMESPACE
value: prod-atlantis
- name: DOTNET_ENVIRONMENT
value: Production
- name: ASPNETCORE_ENVIRONMENT
value: Production
- name: DB_HOST
valueFrom:
secretKeyRef:
name: prod-atlantis-db-app
key: host
- name: DB_PORT
valueFrom:
secretKeyRef:
name: prod-atlantis-db-app
key: port
- name: DB_DATABASE
valueFrom:
secretKeyRef:
name: prod-atlantis-db-app
key: dbname
- name: DB_USER
valueFrom:
secretKeyRef:
name: prod-atlantis-db-app
key: user
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
name: prod-atlantis-db-app
key: password
- name: FGA_URL
value: http://prod-openfga.openfga.svc.cluster.local:8080
- name: FGA_DB_HOST
valueFrom:
secretKeyRef:
name: prod-openfga-db-app
key: host
- name: FGA_DB_PORT
valueFrom:
secretKeyRef:
name: prod-openfga-db-app
key: port
- name: FGA_DB_DATABASE
valueFrom:
secretKeyRef:
name: prod-openfga-db-app
key: dbname
- name: FGA_DB_USER
valueFrom:
secretKeyRef:
name: prod-openfga-db-app
key: user
- name: FGA_DB_PASSWORD
valueFrom:
secretKeyRef:
name: prod-openfga-db-app
key: password
@@ -1,15 +0,0 @@
generatorOptions:
disableNameSuffixHash: true
configMapGenerator:
- name: prod-codex-appsettings
files:
- appsettings.json
patches:
- target:
group: apps
version: v1
kind: Deployment
path: deployment_patch.yaml
resources:
- ../base
@@ -62,3 +62,4 @@
secretKeyRef:
name: staging-openfga-db-app
key: password
name: azure-keyvault
@@ -1,14 +0,0 @@
{{- if .Values.clusterConfig.cilium.enabled }}
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: codex-allow-external-services
namespace: {{ .Release.Namespace }}
spec:
egress:
- toFQDNs:
- matchName: cacerts.digicert.com
endpointSelector:
matchLabels: {}
{{- end }}
-27
View File
@@ -1,27 +0,0 @@
replicaCount: 1
ingress:
enabled: true
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
nginx.ingress.kubernetes.io/backend-protocol: HTTP
nginx.ingress.kubernetes.io/ssl-redirect: "true"
oceanbox.io/expose: internal
hosts:
- host: codex.adm.oceanbox.io
paths:
- path: /
pathType: ImplementationSpecific
tls:
- hosts:
- codex.adm.oceanbox.io
secretName: prod-codex-tls
volumes:
- name: appsettings
configMap:
name: prod-codex-appsettings
volumeMounts:
- name: appsettings
mountPath: "/app/appsettings.json"
readOnly: true
subPath: appsettings.json
+2 -2
View File
@@ -1,4 +1,6 @@
replicaCount: 1
image:
tag: 70878e14-debug
ingress:
enabled: true
className: "nginx"
@@ -30,5 +32,3 @@ volumeMounts:
mountPath: "/app/appsettings.Development.json"
readOnly: true
subPath: appsettings.json
image:
tag: 2e1165d9-debug
+2 -7
View File
@@ -34,8 +34,6 @@ spec:
name: http
protocol: TCP
env:
- name: BASE_URL
value: https://fornix.hel1.oceanbox.io
- name: DRUPAL_DATABASE_HOST
value: drupal-db-rw
- name: DRUPAL_DATABASE_PREFIX
@@ -65,15 +63,12 @@ spec:
- mountPath: /opt/drupal/web/sites
name: drupal
subPath: sites
- mountPath: /opt/drupal/patches
name: drupal
subPath: modules/patches
- mountPath: /opt/drupal/composer.json
name: drupal
subPath: modules/composer.json
- mountPath: /opt/drupal/composer.lock
- mountPath: /opt/drupal/patches
name: drupal
subPath: modules/composer.lock
subPath: modules/patches
volumes:
- name: drupal
persistentVolumeClaim:
+1 -2
View File
@@ -2,14 +2,13 @@ clusterConfig:
manifests: https://gitlab.com/oceanbox/manifests.git
env: "prod"
distro: "talos"
domain: "adm.hel1.obx"
domain: "hel1.oceanbox.io"
initca: ""
apiserver: ""
apiserverip: ""
etcd_nodes: ["10.0.1.2, 10.0.1.4, 10.0.1.5"]
k8s_nodes: [""]
cluster: "hel1"
ingress_clusterissuer: "ca-issuer"
ingress_nodes: ["controlplane-1, controlplane-2, controlplane-3"]
ingress_replica_count: 3
ingress_loadbalancer: true
-3
View File
@@ -1,3 +0,0 @@
forgejo:
enabled: true
backup: true
-5
View File
@@ -1,5 +0,0 @@
forgejo:
enabled: false
backup: false
autosync: {{ if eq .Environment.Name "prod" }} false {{ else }} true {{ end }}
env: {{ .Environment.Name }}
-33
View File
@@ -1,33 +0,0 @@
apiVersion: barmancloud.cnpg.io/v1
kind: ObjectStore
metadata:
name: hel-store
namespace: forgejo
spec:
retentionPolicy: "7d"
configuration:
destinationPath: s3://obx-cnpg/hel1/forgejo-db
endpointURL: https://hel1.your-objectstorage.com
s3Credentials:
accessKeyId:
name: cnpg-s3
key: access_key
secretAccessKey:
name: cnpg-s3
key: access_secret
wal:
compression: snappy
---
apiVersion: postgresql.cnpg.io/v1
kind: ScheduledBackup
metadata:
name: forgejo-db
namespace: forgejo
spec:
schedule: "0 0 1 * * *"
backupOwnerReference: self
cluster:
name: forgejo-db
method: plugin
pluginConfiguration:
name: barman-cloud.cloudnative-pg.io
-11
View File
@@ -1,11 +0,0 @@
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: forgejo-db
namespace: forgejo
spec:
instances: 1
imageName: ghcr.io/cloudnative-pg/postgresql:18-minimal-trixie
storage:
resizeInUseVolumes: true
size: 10Gi
-25
View File
@@ -1,25 +0,0 @@
apiVersion: dragonflydb.io/v1alpha1
kind: Dragonfly
metadata:
name: dragonfly-forgejo
namespace: forgejo
spec:
replicas: 1
resources:
requests:
cpu: 150m
memory: 256Mi
limits:
memory: 256Mi
args:
- --dbfilename=dump # Static filename prevents disk exhaustion
- --maxmemory=$(MAX_MEMORY)Mi # Graceful memory management (90% of limit)
- --proactor_threads=1 # Auto-detect CPU cores (optimal threading)
- --cluster_mode=emulated
- --logtostderr
env:
- name: MAX_MEMORY
valueFrom:
resourceFieldRef:
resource: limits.memory
divisor: 1Mi
-11
View File
@@ -1,11 +0,0 @@
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: forgejo-db-monitor
namespace: forgejo
spec:
selector:
matchLabels:
cnpg.io/cluster: forgejo-db
podMetricsEndpoints:
- port: metrics
-134
View File
@@ -1,134 +0,0 @@
replicaCount: 1
image:
registry: code.forgejo.org
repository: forgejo/forgejo
resources:
requests:
cpu: 200m
memory: 512Mi
gitea:
metrics:
enabled: false
serviceMonitor:
enabled: false
config:
APP_NAME: 'Forgejo: With a cup of tea.'
cache:
ENABLED: true
ADAPTER: redis
HOST: redis://dragonfly-forgejo.forgejo.svc:6379/0
session:
PROVIDER: redis
PROVIDER_CONFIG: redis://dragonfly-forgejo.forgejo.svc:6379/1
queue:
TYPE: redis
CONN_STR: redis://dragonfly-forgejo.forgejo.svc:6379/2
storage:
STORAGE_TYPE: minio
MINIO_ENDPOINT: hel1.your-objectstorage.com
MINIO_USE_SSL: true
MINIO_LOCATION: hel1
MINIO_BUCKET: obx-forgejo
security:
INSTALL_LOCK: true
service:
DISABLE_REGISTRATION: false
server:
APP_DATA_PATH: "/data/gitea"
DOMAIN: git.svc.hel1.obx
ROOT_URL: https://git.svc.hel1.obx
SSH_DOMAIN: git.svc.hel1.obx
SSH_PORT: 22
SSH_SERVER_USE_PROXY_PROTOCOL: true
LANDING_PAGE: "explore"
oauth2_client:
ENABLE_AUTO_REGISTRATION: true
UPDATE_AVATAR: true
ACCOUNT_LINKING: auto
database:
DB_TYPE: postgres
MAX_OPEN_CONNS: 90
openid:
ENABLE_OPENID_SIGNIN: false
ENABLE_OPENID_SIGNUP: false
oauth:
- name: 'Oceanbox'
provider: 'openidConnect'
existingSecret: forgejo-oauth-oceanbox
autoDiscoverUrl: 'https://login.microsoftonline.com/3f737008-e9a0-4485-9d27-40329d288089/.well-known/openid-configuration'
scopes: 'openid profile email groups'
groupClaimName: 'groups'
adminGroup: '/oceanbox/devel'
restrictedGroup: ''
additionalConfigFromEnvs:
- name: FORGEJO__STORAGE__MINIO_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: forgejo-s3
key: access_key
- name: FORGEJO__STORAGE__MINIO_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: forgejo-s3
key: secret_key
- name: FORGEJO__DATABASE__PASSWD
valueFrom:
secretKeyRef:
name: forgejo-db-app
key: password
- name: FORGEJO__DATABASE__NAME
valueFrom:
secretKeyRef:
name: forgejo-db-app
key: dbname
- name: FORGEJO__DATABASE__USER
valueFrom:
secretKeyRef:
name: forgejo-db-app
key: user
- name: FORGEJO__DATABASE__HOST
valueFrom:
secretKeyRef:
name: forgejo-db-app
key: host
- name: FORGEJO__DATABASE__DB_TYPE
value: postgres
ingress:
enabled: true
className: nginx
annotations:
cert-manager.io/cluster-issuer: ca-issuer
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "0"
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
nginx.ingress.kubernetes.io/whitelist-source-range: 10.0.0.0/8,172.16.0.0/12,192.168.0.0/16,172.19.255.0/24,100.64.0.0/12
hosts:
- host: git.svc.hel1.obx
paths:
- backend:
service:
name: forgejo-http
port:
number: 3000
path: /
pathType: ImplementationSpecific
tls:
- secretName: forgejo-tls
hosts:
- git.svc.hel1.obx
# service:
# ssh:
# type: LoadBalancer
# port: 22
# annotations:
# load-balancer.hetzner.cloud/location: hel1
# load-balancer.hetzner.cloud/uses-proxyprotocol: 'false'
persistence:
enabled: true
size: 1Gi
+3 -3
View File
@@ -2,7 +2,7 @@ apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
annotations:
cert-manager.io/cluster-issuer: ca-issuer
cert-manager.io/cluster-issuer: letsencrypt-production
nginx.ingress.kubernetes.io/backend-protocol: HTTP
nginx.ingress.kubernetes.io/cors-allow-headers: Content-Type, x-gatus-cache
nginx.ingress.kubernetes.io/enable-cors: "true"
@@ -15,7 +15,7 @@ metadata:
spec:
ingressClassName: nginx
rules:
- host: uptime.adm.hel1.obx
- host: uptime.hel1.oceanbox.io
http:
paths:
- backend:
@@ -27,5 +27,5 @@ spec:
pathType: ImplementationSpecific
tls:
- hosts:
- uptime.adm.hel1.obx
- uptime.hel1.oceanbox.io
secretName: gatus-tls
+1 -1
View File
@@ -29,7 +29,7 @@ config:
security:
oidc:
issuer-url: "https://login.microsoftonline.com/3f737008-e9a0-4485-9d27-40329d288089/v2.0"
redirect-url: "https://uptime.adm.hel1.obx/authorization-code/callback"
redirect-url: "https://uptime.hel1.oceanbox.io/authorization-code/callback"
client-id: "38af7e63-b097-4f1b-8dac-3d0030dceaf9"
client-secret: "${CLIENT_SECRET}"
scopes: ["openid"]
+3 -17
View File
@@ -103,10 +103,11 @@ configMaps:
"Moritz.Jorg@oceanbox.io",
"simen.kirkvik@oceanbox.io",
"stig.r.jensen@oceanbox.io",
"ole.tytlandsvik@oceanbox.io",
],
"group:devops": [
"radovan.bast@oceanbox.io",
"ole.tytlandsvik@oceanbox.io",
"ismael.abujadur@oceanbox.io",
],
"group:oceanographer": [
"frank.gaardsted@oceanbox.io",
@@ -114,7 +115,6 @@ configMaps:
"helge.avlesen@oceanbox.io",
"isa.rosso@oceanbox.io",
"jonathan.lilly@oceanbox.io",
"faith.iha@oceanbox.io",
],
"group:manager": [
"svenn.hanssen@oceanbox.io",
@@ -124,9 +124,7 @@ configMaps:
"pal.herstad@oceanbox.io",
],
"group:dev": [],
"group:intern": [
"haavahak@stud.ntnu.no",
],
"group:intern": [],
},
// tagOwners in tailscale is an association between a TAG and the people allowed to set this TAG on a server.
// This is documented [here](https://tailscale.com/kb/1068/acl-tags#defining-a-tag)
@@ -218,15 +216,6 @@ configMaps:
"dc.hel1.net:443",
]
},
{
"action": "accept",
"src": [
"group:intern",
],
"dst": [
"tag:hpc:22,80,443",
]
},
{
"action": "accept",
"src": [ "*" ],
@@ -247,7 +236,6 @@ configMaps:
{ "action": "accept", "src": [ "svenn.hanssen@oceanbox.io" ], "dst": [ "svenn.hanssen@oceanbox.io:*" ] },
{ "action": "accept", "src": [ "hilde.iversen@oceanbox.io" ], "dst": [ "hilde.iversen@oceanbox.io:*" ] },
{ "action": "accept", "src": [ "pal.herstad@oceanbox.io" ], "dst": [ "pal.herstad@oceanbox.io:*" ] },
{ "action": "accept", "src": [ "faith.iha@oceanbox.io" ], "dst": [ "faith.iha@oceanbox.io:*" ] },
// s/"\([^"]*\)"/{ "action": "accept", "src": [ "\1" ], "dst": [ "\1:*" ] },
]
}
@@ -260,7 +248,6 @@ configMaps:
{ "name": "maps.beta.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "maps.dev.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "atlantis.beta.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "codex.adm.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "codex.dev.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "auth.oceanbox.io", "type": "A", "value": "10.255.241.11" },
@@ -343,7 +330,6 @@ configMaps:
{ "name": "mrtz-sorcerer.dev.vtn.obx", "type": "A", "value": "172.16.239.221" },
{ "name": "mrtz-plume.ekman.oceanbox.io", "type": "A", "value": "10.255.241.99" },
{ "name": "simkir-atlantis.dev.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "simkir-maps.dev.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "simkir-codex.dev.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "simkir-sorcerer.ekman.oceanbox.io", "type": "A", "value": "10.255.241.99" },
{ "name": "simkir-sorcerer.dev.vtn.obx", "type": "A", "value": "172.16.239.221" },
@@ -1,11 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: custom-error-pages
namespace: ingress-nginx
data:
"404": |
<p>Could not find the page you were looking for.</p>
"503": |
<p>Oceanbox is having a woopsie dupsie. uwu</p>
<img alt="Insert oceanbox Text TV maintanence window here."/>
@@ -113,41 +113,8 @@ controller:
defaultBackend:
enabled: true
image:
registry: registry.k8s.io
image: ingress-nginx/custom-error-pages
## for backwards compatibility consider setting the full image url via the repository value below
## use *either* current default registry/image or repository format or installing chart by providing the values.yaml will fail
## repository:
tag: "v1.2.5"
pullPolicy: IfNotPresent
# nobody user -> uid 65534
runAsUser: 65534
runAsNonRoot: true
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
tolerations:
- key: unschedulable
operator: Exists
effect: NoSchedule
## Additional volumeMounts to the default backend container.
# - name: copy-portal-skins
# mountPath: /var/lib/lemonldap-ng/portal/skins
extraVolumeMounts:
- name: custom-error-pages
mountPath: /www
## Additional volumes to the default backend pod.
# - name: copy-portal-skins
# emptyDir: {}
extraVolumes:
- name: custom-error-pages
configMap:
name: custom-error-pages
items:
- key: "404"
path: "404.html"
- key: "503"
path: "503.html"
+1 -1
View File
@@ -1,6 +1,6 @@
replicaCount: 1
image:
tag: "ce1340fa-debug"
tag: "01ac1d47-debug"
env:
- name: APP_VERSION
value: "0.0.0-staging"
+1 -1
View File
@@ -6,5 +6,5 @@
"appVersion": "1.0.0",
"cacheDir": "/data/archives/cache/prod",
"otelCollector": "http://10.255.241.12:4317",
"sentryUrl": "https://2b68ecf0c4d02e6cc9433c371321ac9d@o4509530141622272.ingest.de.sentry.io/4509910315237456"
"sentryUrl": "https://2b68ecf0c4d02e6cc9433c371321ac9d@o4509530141622272.ingest.de.sentry.io/4509910315237456",
}
-6
View File
@@ -35,10 +35,4 @@ spec:
prune: true
selfHeal: false
{{- end }}
ignoreDifferences:
- group: apps
kind: StatefulSet
name: rabbitmq
jsonPointers:
- /spec/template/metadata/annotations
{{- end }}
-9
View File
@@ -30,12 +30,3 @@ ingress:
secrets: []
selfSigned: false
tls: true
resources:
limits:
ephemeral-storage: 1Gi
memory: 512Mi
requests:
cpu: 250m
ephemeral-storage: 50Mi
memory: 512Mi
@@ -36,12 +36,3 @@ ingress:
secrets: []
selfSigned: false
tls: true
resources:
limits:
ephemeral-storage: 1Gi
memory: 512Mi
requests:
cpu: 250m
ephemeral-storage: 50Mi
memory: 512Mi
+3
View File
@@ -0,0 +1,3 @@
sonatype-nexus:
enabled: true
autosync: true

Some files were not shown because too many files have changed in this diff Show More