Compare commits

...

195 Commits

Author SHA1 Message Date
mrtz 75a5fb5c83 devel: Useful kubectl plugins 2026-01-20 18:17:07 +01:00
mrtz 0eb60de429 chore(ingress-nginx): Bump to latest 2026-01-20 17:16:54 +01:00
mrtz 9d034eea25 chore(ingress-nginx): Bump to latest v4.9 2026-01-20 17:03:03 +01:00
mrtz 6104114404 fix(rabbitmq): Also bump resources for prod 2026-01-20 15:45:13 +01:00
mrtz 1e7126fedb ci: sorcerer 2026-01-20 14:42:27 +00:00
mrtz 0d12907f4c ci: atlantis 2026-01-20 14:42:20 +00:00
mrtz 297e5efd88 fix(rabbitmq): Set proper memory requests 2026-01-20 15:38:30 +01:00
mrtz d09eabd2bd chore(rabbitmq): Bump to latest v13 2026-01-20 15:29:51 +01:00
mrtz 351bb41f80 chore(rabbitmq): Bump to latest v12 2026-01-20 15:15:43 +01:00
mrtz fd773bff9f fix(rabbitmq): Prepare for upgrades 2026-01-20 15:08:19 +01:00
simkir 196d3ed0eb atlantis: Remove limits in staging 2026-01-20 09:04:40 +01:00
mrtz f617f29a50 ci: atlantis 2026-01-19 19:07:24 +00:00
mrtz 0bc45748cf fix(spegel): Typo 2026-01-19 19:27:45 +01:00
mrtz fdbdb138e1 fix(spegel): Double the escape, double the fun 2026-01-19 19:26:55 +01:00
mrtz b2ed367b2a fix(spegel): Whitelist gitlab 2026-01-19 19:25:41 +01:00
stigrj 22cb7bddb6 ci: atlantis 2026-01-19 16:15:47 +00:00
simkir fe1c3db4b2 ci: codex 2026-01-19 15:08:56 +00:00
Radovan Bast 830c44644d ci: makai 2026-01-19 15:03:04 +00:00
simkir 5825a4bbc2 ci: atlantis 2026-01-19 14:50:46 +00:00
simkir a7b3310a10 codex: Remove manual production tag 2026-01-19 15:49:56 +01:00
Radovan Bast ecfa74dddd ci: makai 2026-01-19 14:35:06 +00:00
juselius 8a931d7c03 ci: codex 2026-01-19 13:43:06 +00:00
juselius c7b099cff2 ci: sorcerer 2026-01-19 13:43:00 +00:00
juselius 24276410c1 ci: atlantis 2026-01-19 13:42:53 +00:00
mrtz 5493008cb6 chore(spegel): Bump to 0.6.0 2026-01-18 18:19:45 +01:00
juselius a788539d33 ci: codex 2026-01-16 19:06:46 +00:00
juselius 3e06946d04 Merge branch 'automated/npins-update-20260116' into 'main'
chore: update npins dependencies

See merge request oceanbox/manifests!67
2026-01-16 20:03:00 +01:00
mrtz 29a51653f3 chore: update npins dependencies
Automated update of Nix dependencies via npins.

    Updated packages:
    +      "hash": "sha256-wufp5c0nWh/87f9eK7xy1eZXms5zd4yl6S4SR+LfA08="
2026-01-16 15:00:16 +00:00
mrtz 23b43c9b41 chore(forgejo): Bump to 14.0 2026-01-16 07:03:19 +01:00
mrtz 53ac321316 ci: codex 2026-01-15 17:01:05 +00:00
mrtz c5d42f2266 ci: sorcerer 2026-01-15 17:00:59 +00:00
mrtz a8bbe28137 ci: atlantis 2026-01-15 17:00:50 +00:00
Radovan Bast ed9dd67040 ci: makai 2026-01-15 11:22:39 +00:00
Radovan Bast ef13e1f980 ci: makai 2026-01-15 08:23:47 +00:00
simkir 5d3f57e518 ci: codex 2026-01-15 07:36:32 +00:00
simkir 97ed914338 Enable auto-sync for codex-staging 2026-01-15 08:33:50 +01:00
mrtz aa0ee6ad37 fix: Naming 2026-01-15 00:10:48 +01:00
mrtz 7afc34dbf8 fix(forgejo): HEll 2026-01-15 00:08:08 +01:00
mrtz c77e11f0d2 fix(forgejo): It's getting late us -> eu 2026-01-14 23:58:02 +01:00
mrtz 78892df3fc fix(forgejo): Location matters... 2026-01-14 23:54:52 +01:00
mrtz c3b1cab416 fix(forgejo): Non fully-qualified 2026-01-14 23:52:35 +01:00
mrtz 7227f07b71 fix(forgejo): Remove duplicate storage type 2026-01-14 23:51:54 +01:00
mrtz 683c7f36c3 fix(forgejo): App Path 2026-01-14 23:17:10 +01:00
mrtz 98812a6a3b fix(forgejo): Try us-east 2026-01-14 19:19:34 +01:00
mrtz 8f990cff54 fix(forgejo): Set DB type 2026-01-14 18:49:39 +01:00
mrtz a2678efd78 fix(forgejo): Remove ssl 2026-01-14 18:47:10 +01:00
mrtz cdbacbd34c fix(forgejo): Remove ssl 2026-01-14 18:39:22 +01:00
mrtz 20ca29d5ec fix(forgejo): Hetzner on :443 2026-01-14 18:35:26 +01:00
mrtz 9c42fd665d fix(forgejo): Set storagetype 2026-01-14 18:31:15 +01:00
mrtz 7468b902ce fix(foregjo): Change minio endpoint 2026-01-14 18:29:05 +01:00
simkir 62578486ce ci: sorcerer 2026-01-14 15:55:48 +00:00
simkir 6b17805a42 ci: codex 2026-01-14 15:55:41 +00:00
simkir e35b81b356 ci: atlantis 2026-01-14 15:55:33 +00:00
mrtz 10758b334b chore(forgejo): Bump to 15.1.0 2026-01-14 10:23:46 +01:00
mrtz 8f61e63f29 fix(forgejo): rm location for minio 2026-01-13 16:35:47 +01:00
mrtz 621598dee3 fix(forgejo): Move endpoint to envs 2026-01-13 16:30:46 +01:00
mrtz 0689bd47f2 fix(forgejo): Remove port 2026-01-13 16:23:13 +01:00
mrtz 006efc31c2 fix(forgejo): Use SSL 2026-01-13 16:20:13 +01:00
mrtz 9d45101ed9 fix(forgejo): Valid minio endpoint 2026-01-13 16:15:18 +01:00
mrtz d630bdebef fix(forgejo): Type minioc -> minio 2026-01-13 16:12:06 +01:00
mrtz 8182141bc1 fix(forgejo): Add s3 for packages 2026-01-13 16:08:25 +01:00
Radovan Bast dc67fa2271 ci: makai 2026-01-13 13:16:11 +00:00
simkir 37ea2ad85c ci: atlantis 2026-01-13 12:37:37 +00:00
Radovan Bast 6a5da41480 ci: makai 2026-01-13 12:23:36 +00:00
Radovan Bast cd25aa8a1a ci: makai 2026-01-13 11:56:21 +00:00
Radovan Bast 05a3a69976 ci: makai 2026-01-12 17:03:01 +00:00
simkir 0697a4da10 ci: codex 2026-01-12 16:43:47 +00:00
simkir 8d5443e126 Bump codex 0.0.1-beta.2 -> 0.0.1-beta.3 2026-01-12 17:42:25 +01:00
simkir b32e0643fb ci: codex 2026-01-12 13:48:28 +00:00
mrtz af7f4c8116 ci: codex 2026-01-12 13:44:58 +00:00
mrtz 37bb29b36a ci: atlantis 2026-01-12 13:44:49 +00:00
simkir af04b27c10 Bump codex 0.0.1-beta.1 -> 0.0.1-beta.2 2026-01-12 14:36:23 +01:00
Radovan Bast a42010546f ci: makai 2026-01-12 11:35:36 +00:00
mrtz 7034d20e39 fix(velero): Working version of kubectl 2026-01-12 10:16:32 +01:00
mrtz dbdfcb4f21 fix(velero): Bump plugin and remove legacy kubectl 2026-01-12 10:02:02 +01:00
mrtz 22148fb162 fix(velero): Bump to 11.3.2 2026-01-12 09:58:48 +01:00
mrtz 3086214bac ci: atlantis 2026-01-11 14:27:07 +00:00
mrtz e6c99a8567 chore: Bump nixpkgs 2026-01-11 14:05:38 +01:00
mrtz fa9d45fbb7 fix(forgejo): Remove LB 2026-01-09 15:53:38 +01:00
mrtz 72eb20fb5b fix(forgejo): Change realm 2026-01-09 15:20:17 +01:00
mrtz eb141a7efe fix(forgejo): Add OIDC login 2026-01-09 15:04:38 +01:00
mrtz 773550df56 fix(forgejo): Add back whitelist 2026-01-09 14:34:53 +01:00
mrtz a93173066d fix(forgejo): Remove ssh via LB for now 2026-01-09 14:33:26 +01:00
simkir b39ed6cc54 fix(codex): Set port for fga url 2026-01-09 14:32:23 +01:00
mrtz 685328685b fix(forgejo): Use secrets for DB 2026-01-09 14:28:59 +01:00
simkir 40beab6e4f fix(codex): Remove codex.oceanbox.io from ing hosts 2026-01-09 14:21:39 +01:00
mrtz 46c890c6c3 fix(forgjo): Remove comments in dragonfly 2026-01-09 14:20:53 +01:00
simkir aaa7cf4a6e fix(codex): Rm codex.oceanbox.io from ing tls hosts 2026-01-09 14:19:44 +01:00
simkir 55d385ea6a fix(codex): Set correct openfga url 2026-01-09 14:19:44 +01:00
mrtz 80ebe7c278 fix(forgejo): Limit cpu count dragonfly 2026-01-09 14:19:01 +01:00
mrtz cf5b0273c2 fix(forgejo): Increase dragonfly RAM 2026-01-09 14:13:51 +01:00
mrtz c8ec4161aa fix(forgejo): Move to sys and 2026-01-09 13:55:32 +01:00
simkir 59580b5d29 fix(nginx): Move error page to 503 2026-01-08 14:42:33 +01:00
simkir ddc8c7b253 Add simkir-maps.dev.oceanbox.io to ts dns 2026-01-08 14:32:19 +01:00
simkir 36f0f11ef6 Add codex.adm.oceanbox.io to ts dns 2026-01-08 14:32:02 +01:00
simkir ea1a0a2eb5 Add custom 404.html to nginx default backend 2026-01-08 14:25:22 +01:00
simkir ffb572e762 fix(nginx): Set default backend to custom-error-pages 2026-01-08 14:10:26 +01:00
Radovan Bast f46ca7d2be ci: makai 2026-01-08 11:29:28 +00:00
Radovan Bast 2cd14292d2 ci: makai 2026-01-08 09:48:30 +00:00
mrtz 9470c73e92 fix(forgejo): Correct s3 url 2026-01-08 08:50:33 +01:00
mrtz 922e2fd0ea feat: Add forgejo 2026-01-07 23:22:08 +01:00
Radovan Bast 8c2f6d53c9 ci: makai 2026-01-07 14:22:11 +00:00
simkir 7041b91c45 fix(codex): Add codex.adm.oceanbox.io ing. path 2026-01-07 15:02:20 +01:00
simkir a1c3f766b5 fix(codex): Rename prod ing tls secret to prod 2026-01-07 14:55:50 +01:00
simkir d5e6d86f4b Bump codex 0.0.0-alpha.1 -> 0.0.1-beta.1 2026-01-07 14:33:06 +01:00
simkir 608fae0bf1 ci: codex 2026-01-07 13:29:09 +00:00
simkir d3fd3b7c5b fix(codex): Mount correct cm 2026-01-07 14:20:54 +01:00
simkir 556756d0a0 Rename codex prod appsettings file 2026-01-07 14:18:53 +01:00
simkir d242c23ae3 Rename prod codex cilium network policy 2026-01-07 14:16:14 +01:00
simkir 3255430a3b Add prod codex 2026-01-07 14:10:49 +01:00
Radovan Bast 7594dfe93d ci: makai 2026-01-07 11:28:39 +00:00
mrtz 616a1915f2 fix(atlantis): Staging should use app instead of superuser secret 2026-01-07 11:24:26 +01:00
Radovan Bast de6963de12 ci: makai 2026-01-06 10:34:18 +00:00
juselius 5d8a4056e3 Merge branch 'main' of gitlab.com:oceanbox/manifests 2026-01-06 10:52:31 +01:00
juselius 9c9c87bf2f fix: add intern to headscale acl 2026-01-06 10:52:26 +01:00
Radovan Bast 89a54a995d ci: makai 2026-01-06 09:12:43 +00:00
juselius 707c37b9f1 fix: add faith to headscale acl 2026-01-05 15:45:47 +01:00
Radovan Bast 14ae0e358b ci: makai 2026-01-05 14:44:07 +00:00
simkir ef82ce7bc5 ci: sorcerer 2026-01-05 14:04:34 +00:00
simkir d4d9d9a3b6 ci: atlantis 2026-01-05 14:04:27 +00:00
mrtz f55fd396fc ci: sorcerer 2026-01-05 13:23:48 +00:00
mrtz 91e98e3949 ci: atlantis 2026-01-05 13:23:41 +00:00
mrtz 40eb429c17 Merge branch 'renovate/opentelemetry-collector-0.x' into 'main'
Update Helm release opentelemetry-collector to v0.142.1

See merge request oceanbox/manifests!58
2026-01-05 12:27:26 +01:00
mrtz 432a73a4ba Merge branch 'renovate/kyverno-3.x' into 'main'
Update Helm release kyverno to v3.6.1

See merge request oceanbox/manifests!40
2026-01-05 10:19:45 +01:00
mrtz de4ab27a2d Merge branch 'renovate/argocd-apps-2.x' into 'main'
Update Helm release argocd-apps to v2

See merge request oceanbox/manifests!63
2026-01-05 10:18:15 +01:00
mrtz def3f19dff fix(makai): Correct path for new container reg 2026-01-05 09:05:16 +01:00
Radovan Bast 3336c9782c ci: makai 2026-01-05 07:39:46 +00:00
Radovan Bast b943caef06 ci: makai 2026-01-04 17:56:42 +00:00
mrtz 83a3cece0b Merge branch 'renovate/registry-3.x' into 'main'
Update registry Docker tag to v3

See merge request oceanbox/manifests!66
2026-01-04 11:29:24 +01:00
mrtz 2155c4c654 Merge branch 'renovate/openfga-0.x' into 'main'
Update Helm release openfga to v0.2.50

See merge request oceanbox/manifests!54
2026-01-04 11:02:48 +01:00
mrtz 13e44a495f Merge branch 'renovate/slurm-operator-0.x' into 'main'
Update slurm-operator Docker tag to v0.4.1

See merge request oceanbox/manifests!55
2026-01-04 11:02:03 +01:00
mrtz 923f2b81b9 Merge branch 'renovate/cloudnative-pg-0.x' into 'main'
Update Helm release cloudnative-pg to v0.27.0

See merge request oceanbox/manifests!56
2026-01-04 10:58:21 +01:00
mrtz fad034ca44 Merge branch 'renovate/mariadb-operator-25.x' into 'main'
Update Helm release mariadb-operator to v25.10.3

See merge request oceanbox/manifests!57
2026-01-04 10:54:03 +01:00
Renovate Bot 31d1918b86 Update registry Docker tag to v3 2026-01-04 08:59:02 +00:00
Renovate Bot 34181f92b1 Update Helm release argocd-apps to v2 2026-01-04 08:58:55 +00:00
Renovate Bot 1d8b1bebcd Update Helm release opentelemetry-collector to v0.142.1 2026-01-04 08:58:42 +00:00
Renovate Bot 91fba971e2 Update Helm release mariadb-operator to v25.10.3 2026-01-04 08:58:39 +00:00
Renovate Bot 4bb68c68a8 Update Helm release kyverno to v3.6.1 2026-01-04 08:58:33 +00:00
Renovate Bot 4fe9cfee86 Update Helm release cloudnative-pg to v0.27.0 2026-01-04 08:58:17 +00:00
Renovate Bot c580b22ff5 Update slurm-operator Docker tag to v0.4.1 2026-01-04 08:58:10 +00:00
Renovate Bot 988ba5a4c2 Update Helm release openfga to v0.2.50 2026-01-04 08:58:07 +00:00
mrtz e9e72da86a fix(headscale): Add Ole 2026-01-02 17:02:02 +01:00
juselius a1c1022465 fix: fix ca issuer 2025-12-30 14:37:26 +01:00
mrtz 4de318d814 fix(hel1): Default cluster-ca 2025-12-30 14:14:52 +01:00
mrtz 7402bad7a4 fix(hel1): Add adm to default url 2025-12-30 13:59:48 +01:00
mrtz 113a582649 fix(hel1): Update base url 2025-12-30 13:54:55 +01:00
mrtz 73b8b11088 Merge branch 'automated/npins-update-20251230' into 'main'
chore: update npins dependencies

See merge request oceanbox/manifests!52
2025-12-30 10:40:41 +01:00
mrtz f6854b72c8 chore: update npins dependencies
Automated update of Nix dependencies via npins.

    Updated packages:
    +      "hash": "18hsj84ndffq8dz2nh7mv2xib113lxc83spkg3csgzw0agpmkris"
2025-12-30 09:39:02 +00:00
mrtz bb1078b0f2 fix: Disable old ci 2025-12-30 10:35:19 +01:00
mrtz 983fa68f6a chore: Add ci 2025-12-30 10:34:27 +01:00
mrtz 9876d5bec5 ci(nix): Add CI shell 2025-12-30 10:25:44 +01:00
mrtz b6af70c8ca fix(umami): Disable telemetry 2025-12-29 13:34:11 +01:00
mrtz 957526a6bc fix(rules/bootstrap): Format yaml 2025-12-29 13:23:04 +01:00
mrtz f81a4b2732 treewide: Format with shellcheck, jsonlint and yamllint 2025-12-29 12:41:13 +01:00
mrtz d7e4fb43cb fix(cert-manager): Bump to latest release 2025-12-29 11:10:12 +01:00
mrtz e94ed8155e fix(cert-manager): Switch to oci registry 2025-12-29 11:01:52 +01:00
juselius c8a0a98167 fix: update gatus to adm.hel1.obx 2025-12-28 14:36:33 +01:00
mrtz 9cddd9b404 ci: sorcerer 2025-12-22 12:21:24 +00:00
mrtz 3df44cd4b2 ci: atlantis 2025-12-22 12:21:18 +00:00
Radovan Bast 53ac794bd6 ci: makai 2025-12-22 08:35:04 +00:00
Radovan Bast f1a382c76c ci: makai 2025-12-19 21:01:38 +00:00
Radovan Bast 7a7459db10 ci: makai 2025-12-19 20:49:01 +00:00
Radovan Bast ed3515c752 ci: makai 2025-12-19 15:48:31 +00:00
juselius 19457af158 ci: fornix 2025-12-19 15:20:13 +00:00
Radovan Bast e455612874 ci: makai 2025-12-19 14:44:18 +00:00
Radovan Bast df757cf361 ci: makai 2025-12-19 14:27:49 +00:00
juselius eb8f6e83ca ci: fornix 2025-12-19 10:55:33 +00:00
juselius 1668c8db54 ci: fornix 2025-12-19 10:53:07 +00:00
juselius d739c3d1b1 ci: fornix 2025-12-18 14:42:21 +00:00
juselius 10393587b2 ci: fornix 2025-12-18 09:12:46 +00:00
Radovan Bast 64e5b26352 ci: makai 2025-12-18 09:00:22 +00:00
juselius 49ad715025 fix: argh!!! 2025-12-18 09:51:17 +01:00
juselius ee6f7e1d56 fix: argh... 2025-12-18 09:46:59 +01:00
juselius 468eaeed88 Merge branch 'main' of gitlab.com:oceanbox/manifests 2025-12-18 09:45:42 +01:00
juselius 257a55fab7 fix: add composer.lock to persistent drupal 2025-12-18 09:43:35 +01:00
juselius 6fb44f6ba4 ci: fornix 2025-12-18 08:40:27 +00:00
juselius b456dbc0ff fix: add BASE_URL env to drupal deployment 2025-12-18 09:16:35 +01:00
Radovan Bast c415754e46 ci: makai 2025-12-18 07:46:03 +00:00
Radovan Bast 2688f381ef ci: makai 2025-12-17 14:49:42 +00:00
juselius 10c6708bd4 ci: fornix 2025-12-17 13:58:32 +00:00
juselius a07e19b22c fix: disable diagrid dashboard 2025-12-17 14:36:16 +01:00
juselius 2e9dc96ded fix: disable /data path kustomization in sorcerer 2025-12-17 14:13:18 +01:00
juselius 0348b1d46f fix: fix diagrid dasboard statestore config 2025-12-17 14:06:55 +01:00
juselius 22383f1d88 fix: update helmfile container 2025-12-17 13:52:51 +01:00
juselius e2641b18b6 Merge branch 'main' of gitlab.com:oceanbox/manifests 2025-12-17 13:50:20 +01:00
juselius 86240fc085 fix: enable probes on diagrid-dashboard 2025-12-17 13:50:12 +01:00
Radovan Bast 799b6c2858 ci: makai 2025-12-17 11:46:38 +00:00
Radovan Bast daa5b60c43 ci: makai 2025-12-17 09:58:16 +00:00
juselius ca0a228660 feat: enable diagrid dashboard for staging sorcerer 2025-12-17 10:54:42 +01:00
juselius 621945dbf2 Merge remote-tracking branch 'origin/diadash' 2025-12-17 10:46:53 +01:00
Radovan Bast 847c70b547 ci: makai 2025-12-17 08:59:00 +00:00
juselius 40a04b72ae ci: fornix 2025-12-17 08:40:11 +00:00
juselius 457a260d0e ci: fornix 2025-12-17 07:51:12 +00:00
Radovan Bast 49b2992a41 ci: makai 2025-12-16 19:57:00 +00:00
juselius 605581fc40 feat: add diagrid workflow dashboard subchart sorcerer and atlantis 2025-12-16 19:55:59 +01:00
140 changed files with 3382 additions and 2424 deletions
+1
View File
@@ -1,6 +1,7 @@
#!/usr/bin/env bash
# the shebang is ignored, but nice for editors
watch_file nix/sources.json
watch_file nix/checks.nix
# Load .env file if it exists
dotenv_if_exists
+1
View File
@@ -1,6 +1,7 @@
*.tgz
_*/
.direnv/
.env
.pre-commit-config.yaml
_*.yaml
backup/
+50 -42
View File
@@ -1,46 +1,54 @@
image:
name: alpine/helm:latest
entrypoint: [ "/bin/bash", "-c" ]
# yaml-language-server: $schema=https://gitlab.com/gitlab-org/gitlab/-/raw/master/app/assets/javascripts/editor/schema/ci.json
default:
tags:
- nix
stages:
- release
include:
- project: oceanbox/gitlab-ci
ref: v4.5
file: template/Base.gitlab-ci.yml
# stages:
# - release
release:
stage: release
rules:
- if: '$CI_COMMIT_BRANCH =~ /^main/'
when: always
- when: never
script:
- |
cd $CI_PROJECT_DIR
for i in $(git show --pretty="" --name-only | grep '^charts/.*/Chart.yaml' | cut -d/ -f2); do
pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
if [ ! -z $pack ]; then
chart=$(basename $pack)
curl --request POST \
--user gitlab-ci-token:$CI_JOB_TOKEN \
--form "chart=@${chart}" \
"${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
fi
done
# image:
# name: alpine/helm:latest
# entrypoint: ["/bin/bash", "-c"]
rebuild:
stage: release
rules:
- when: manual
allow_failure: true
script:
- |
cd $CI_PROJECT_DIR
for i in $(find ./charts -maxdepth 2 -name Chart.yaml | cut -d/ -f3); do
pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
if [ ! -z $pack ]; then
chart=$(basename $pack)
curl --request POST \
--user gitlab-ci-token:$CI_JOB_TOKEN \
--form "chart=@${chart}" \
"${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
fi
done
# release:
# stage: release
# rules:
# - if: "$CI_COMMIT_BRANCH =~ /^main/"
# when: always
# - when: never
# script:
# - |
# cd $CI_PROJECT_DIR
# for i in $(git show --pretty="" --name-only | grep '^charts/.*/Chart.yaml' | cut -d/ -f2); do
# pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
# if [ ! -z $pack ]; then
# chart=$(basename $pack)
# curl --request POST \
# --user gitlab-ci-token:$CI_JOB_TOKEN \
# --form "chart=@${chart}" \
# "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
# fi
# done
# rebuild:
# stage: release
# rules:
# - when: manual
# allow_failure: true
# script:
# - |
# cd $CI_PROJECT_DIR
# for i in $(find ./charts -maxdepth 2 -name Chart.yaml | cut -d/ -f3); do
# pack=$(helm package ./charts/$i | sed 's/Success.*: \(.*\)/\1/')
# if [ ! -z $pack ]; then
# chart=$(basename $pack)
# curl --request POST \
# --user gitlab-ci-token:$CI_JOB_TOKEN \
# --form "chart=@${chart}" \
# "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/helm/api/stable/charts"
# fi
# done
+19 -12
View File
@@ -6,23 +6,32 @@ let
values = lib.apps.appValues {
inherit env;
base = ../values/atlantis;
extraValues = {};
extraValues = { };
};
kustomize = r:
kustomize =
r:
if r.kind == "Deployment" then
lib.attrsets.recursiveUpdate r {
spec.template.spec.containers =
builtins.map (x:
x // {
spec.template.spec.containers = builtins.map (
x:
x
// {
livenessProbe.httpGet.path = "/healthz";
readinessProble.httpGet.path = "/healthz";
env = x.env ++ [ { name = "INERNAL_PORT"; value = 8000; } ];
}) r.spec.template.spec.containers;
env = x.env ++ [
{
name = "INERNAL_PORT";
value = 8000;
}
];
}
) r.spec.template.spec.containers;
}
else if r.kind == "Service" then
{}
else r;
{ }
else
r;
in
{
options.apps.atlantis = lib.apps.appOptions {
@@ -34,9 +43,7 @@ in
hostname = lib.mkOption {
type = lib.types.str;
default = if env == "prod"
then "maps.oceanbox.io"
else "atlantis.beta.oceanbox.io";
default = if env == "prod" then "maps.oceanbox.io" else "atlantis.beta.oceanbox.io";
description = "Revision";
};
};
+7 -9
View File
@@ -6,17 +6,15 @@ let
values = lib.apps.appValues {
inherit env;
base = ../values/openfga;
extraValues = {};
extraValues = { };
};
kustomize = r:
if r.kind == "Job" then
lib.attrsets.recursiveUpdate r { spec.backoffLimit = 2; }
else r;
kustomize =
r: if r.kind == "Job" then lib.attrsets.recursiveUpdate r { spec.backoffLimit = 2; } else r;
in
{
options.apps.openfga = lib.apps.appOptions {};
{
options.apps.openfga = lib.apps.appOptions { };
config = lib.apps.appConfig cfg "${env}-openfga" {
helm.releases."${env}-openfga" = {
@@ -30,10 +28,10 @@ in
transformer = rs: builtins.map (x: kustomize x) rs;
};
annotations = {};
annotations = { };
resources = {
services.poop.spec = {
};
};
};
}
}
-3
View File
@@ -1,7 +1,4 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
+7 -7
View File
@@ -2,16 +2,16 @@
server="root@fs1-0"
path="/vol/brick0/nfs0/k1/pv-oceanbox-dex"
dest="$server:$path"
dest="${server}:${path}"
index=$(basename dist/assets/index-*.js)
ssh $server -- rm $path/static/js/*.js
scp dist/assets/*.js $dest/static/js/
ssh "${server}" -- rm "${path}"/static/js/*.js
scp dist/assets/*.js "${dest}"/static/js/
sed -r "s/@index@/$index/" ./dex/templates/login.html > login.html.$$
scp ./dex/templates/* $dest/templates/
scp ./dex/static/*.* $dest/static/
scp login.html.$$ $dest/templates/login.html
sed -r "s/@index@/${index}/" ./dex/templates/login.html > login.html.$$
scp ./dex/templates/* "${dest}"/templates/
scp ./dex/static/*.* "${dest}"/static/
scp login.html.$$ "${dest}"/templates/login.html
rm login.html.$$
ssh admin@k1-0.itpartner.intern -- kubectl rollout restart -n oceanbox deployment/dex
+18 -17
View File
@@ -1,4 +1,5 @@
#!/usr/bin/env bash
# shellcheck disable=SC2034 # Unused variables left for readability
helmfile () {
@@ -10,30 +11,30 @@ bases:
- ../envs/environments.yaml.gotmpl
commonLabels:
tier: $tier
tier: ${tier}
releases:
- name: $name
namespace: {{ .Environment.Name }}-$name
chart: ../charts/$name
condition: $name.enabled
- name: ${name}
namespace: {{ .Environment.Name }}-${name}
chart: ../charts/${name}
condition: ${name}.enabled
values:
- ../values/$name/values/values.yaml.gotmpl
- ../values/$name/values/values-{{ .Environment.Name }}.yaml
- ../values/${name}/values/values.yaml.gotmpl
- ../values/${name}/values/values-{{ .Environment.Name }}.yaml
postRenderer: ../bin/kustomizer
postRendererArgs:
- ../values/$name/kustomize/{{ .Environment.Name }}
- ../values/${name}/kustomize/{{ .Environment.Name }}
missingFileHandler: Info
- name: manifests
namespace: {{ .Environment.Name }}-$name
namespace: {{ .Environment.Name }}-${name}
chart: manifests
condition: $name.enabled
condition: ${name}.enabled
missingFileHandler: Info
values:
- ../values/env.yaml
- ../values/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml
- ../values/$name/env.yaml.gotmpl
- ../values/$name/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
- ../values/${name}/env.yaml.gotmpl
- ../values/${name}/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
hooks:
- events: [ prepare, cleanup ]
showlogs: true
@@ -42,7 +43,7 @@ releases:
- '{{\`{{ if eq .Event.Name "prepare" }}build{{ else }}clean{{ end }}\`}}'
- '{{\`{{ .Release.Chart }}\`}}'
- '{{\`{{ .Environment.Name }}\`}}'
- ../values/$name/manifests
- ../values/${name}/manifests
- manifests
EOF
}
@@ -59,10 +60,10 @@ done
name=$1
tier=$2
if [ -n "$ns" ]; then
namespace="namespace: {{ .Environment.Name }}-$name"
if [[ -n "${ns}" ]]; then
namespace="namespace: {{ .Environment.Name }}-${name}"
else
namespace="namespace: $name"
namespace="namespace: ${name}"
fi
helmfile $1 $2
helmfile "$1" "$2"
+13 -14
View File
@@ -4,39 +4,38 @@ set -o pipefail
cmd=$1
chart=$2
env=$3
manifests=${4:-manifests}
outdir=${5:-_manifests}
build() {
mkdir -p $outdir/templates
echo "Creating $outdir/templates"
mkdir -p "${outdir}"/templates
echo "Creating ${outdir}/templates"
echo "generating $outdir/Chart.yaml" 1>&2
echo "generating ${outdir}/Chart.yaml" 1>&2
cat <<EOF > $outdir/Chart.yaml
cat <<EOF > "${outdir}"/Chart.yaml
apiVersion: v1
appVersion: "1.0"
# description: A Helm chart for Kubernetes
name: $chart
name: ${chart}
version: 0.1.0
EOF
if [ -d $manifests ]; then
cp -r $manifests/* $outdir/templates
elif [ -f $manifests ]; then
cp $manifests $outdir/templates
if [[ -d "${manifests}" ]]; then
cp -r "${manifests}"/* "${outdir}"/templates
elif [[ -f "${manifests}" ]]; then
cp "${manifests}" "${outdir}"/templates
fi
}
clean() {
echo "cleaning $outdir" 1>&2
rm -rf $outdir
echo "cleaning ${outdir}" 1>&2
rm -rf "${outdir}"
}
case "$cmd" in
case "${cmd}" in
"build" ) build ;;
"clean" ) clean ;;
* ) echo "unsupported command: $cmd" 1>&2; exit 1 ;;
* ) echo "unsupported command: ${cmd}" 1>&2; exit 1 ;;
esac
+5 -5
View File
@@ -1,13 +1,13 @@
#!/usr/bin/env bash
[ $# != 1 ] && exit 1
[[ $# != 1 ]] && exit 1
dir=$1
base=$dir/../base
base=${dir}/../base
if [ -f $base/kustomization.yaml -a -f $dir/kustomization.yaml ]; then
cat > $base/_manifest.yaml
kubectl kustomize $dir
if [[ -f "${base}"/kustomization.yaml ]] && [[ -f "${dir}"/kustomization.yaml ]]; then
cat > "${base}"/_manifest.yaml
kubectl kustomize "${dir}"
else
cat
fi
+8 -8
View File
@@ -3,16 +3,16 @@ kind: ClusterRole
metadata:
name: argocd-cluster-admin
rules:
- apiGroups:
- '*'
- apiGroups:
- "*"
resources:
- '*'
- "*"
verbs:
- '*'
- nonResourceURLs:
- '*'
- "*"
- nonResourceURLs:
- "*"
verbs:
- '*'
- "*"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
@@ -23,7 +23,7 @@ roleRef:
kind: ClusterRole
name: argocd-cluster-admin
subjects:
- kind: ServiceAccount
- kind: ServiceAccount
name: argocd-cluster-admin
namespace: kube-system
---
-2
View File
@@ -6,5 +6,3 @@ metadata:
name: cluster-admin-token
namespace: kube-system
type: kubernetes.io/service-account-token
-2
View File
@@ -10,5 +10,3 @@ metadata:
name: cluster-ekman
namespace: argocd
type: Opaque
+2 -2
View File
@@ -3,5 +3,5 @@
img=registry.gitlab.com/oceanbox/manifests/helm-kustomize-cmp
tag=${1:-latest}
docker build -t $img:$tag .
docker push $img:$tag
docker build -t "${img}":"${tag}" .
docker push "${img}":"${tag}"
+16 -15
View File
@@ -1,14 +1,15 @@
#!/bin/sh
# shellcheck disable=SC2154
export HOME=/plugin
env > /tmp/$ARGOCD_APP_NAME.env
env > /tmp/"${ARGOCD_APP_NAME}".env
echo "$ARGOCD_APP_PARAMETERS" | jq '.[] | select(.name == "helm-parameters") | .map' | yq -P -oy > parameters.yaml
cp parameters.yaml /tmp/$ARGOCD_APP_NAME-parameters.yaml
echo "${ARGOCD_APP_PARAMETERS}" | jq '.[] | select(.name == "helm-parameters") | .map' | yq -P -oy > parameters.yaml
cp parameters.yaml /tmp/"${ARGOCD_APP_NAME}"-parameters.yaml
if [ -n "$PARAM_CHART" -a "$PARAM_CHART" != "." ]; then
CHART=$PARAM_CHART
if [ -n "${PARAM_CHART}" ] && [ "${PARAM_CHART}" != "." ]; then
CHART=${PARAM_CHART}
elif [ -d chart ]; then
CHART=chart
elif [ -f chart ]; then
@@ -18,19 +19,19 @@ else
fi
[ -f chart/values.yaml ] && VALUES="-f chart/values.yaml"
[ -f values-chart.yaml ] && VALUES="$VALUES -f values-chart.yaml"
[ -f values.yaml ] && VALUES="$VALUES -f values.yaml"
[ -f values-$PARAM_ENV.yaml ] && VALUES="$VALUES -f values-$PARAM_ENV.yaml"
VALUES="$VALUES -f parameters.yaml"
[ -f values-chart.yaml ] && VALUES="${VALUES} -f values-chart.yaml"
[ -f values.yaml ] && VALUES="${VALUES} -f values.yaml"
[ -f values-"${PARAM_ENV}".yaml ] && VALUES="${VALUES} -f values-${PARAM_ENV}.yaml"
VALUES="${VALUES} -f parameters.yaml"
helm dependency update $CHART >/tmp/$ARGOCD_APP_NAME-helm-dependency-build.out
helm dependency update "${CHART}" >/tmp/"${ARGOCD_APP_NAME}"-helm-dependency-build.out
mkdir -p base
echo "helm template -n $ARGOCD_APP_NAMESPACE $PARAM_FLAGS $VALUES $ARGOCD_APP_NAME $CHART" > /tmp/$ARGOCD_APP_NAME-helm.sh
helm template -n $ARGOCD_APP_NAMESPACE $PARAM_FLAGS $VALUES $ARGOCD_APP_NAME $CHART > ./base/_manifest.yaml
echo "helm template -n ${ARGOCD_APP_NAMESPACE} ${PARAM_FLAGS} ${VALUES} ${ARGOCD_APP_NAME} ${CHART}" > /tmp/"${ARGOCD_APP_NAME}"-helm.sh
helm template -n "${ARGOCD_APP_NAMESPACE}" "${PARAM_FLAGS}" "${VALUES}" "${ARGOCD_APP_NAME}" "${CHART}" > ./base/_manifest.yaml
cp ./base/_manifest.yaml /tmp/$ARGOCD_APP_NAME-manifest.yaml
cp ./base/_manifest.yaml /tmp/"${ARGOCD_APP_NAME}"-manifest.yaml
[ -d "$PARAM_ENV" ] && kubectl kustomize $PARAM_ENV > /tmp/$ARGOCD_APP_NAME-manifest.yaml
[ -d "${PARAM_ENV}" ] && kubectl kustomize "${PARAM_ENV}" > /tmp/"${ARGOCD_APP_NAME}"-manifest.yaml
cat /tmp/$ARGOCD_APP_NAME-manifest.yaml
cat /tmp/"${ARGOCD_APP_NAME}"-manifest.yaml
+1 -1
View File
@@ -18,7 +18,7 @@ EOF
exit 0
fi
yq e -o=p $VALUES | jq --slurp --raw-input '
yq e -o=p "${VALUES}" | jq --slurp --raw-input '
[{
name: "helm-parameters",
title: "Helm Parameters",
@@ -1,8 +1,9 @@
#!/bin/sh
# shellcheck disable=SC2154
export HOME=/plugin
helm repo add --username argocd-helm --password "$OCEANBOX_HELM_ACCESS_TOKEN" oceanbox \
helm repo add --username argocd-helm --password "${OCEANBOX_HELM_ACCESS_TOKEN}" oceanbox \
https://gitlab.com/api/v4/projects/54396343/packages/helm/stable
helm repo add bitnami https://charts.bitnami.com/bitnami
+3 -3
View File
@@ -4,9 +4,9 @@ export HOME=/plugin
helm repo update oceanbox
if [ -n "$PARAM_CHART" -a "$PARAM_CHART" != "." ]; then
helm show values $PARAM_CHART > values-chart.yaml
if [ -n "${PARAM_CHART}" ] && [ "${PARAM_CHART}" != "." ]; then
helm show values "${PARAM_CHART}" > values-chart.yaml
elif [ -f chart ]; then
CHART=$(cat chart)
helm show values $CHART > values-chart.yaml
helm show values "${CHART}" > values-chart.yaml
fi
+3 -4
View File
@@ -9,7 +9,7 @@ spec:
init:
# Init always happens immediately before generate, but its output is not treated as manifests.
# This is a good place to, for example, download chart dependencies.
command: [ /bin/sh ]
command: [/bin/sh]
args:
- /plugin/init.sh
# The generate command runs in the Application source directory each time manifests are generated. Standard output
@@ -17,7 +17,7 @@ spec:
# To write log messages from the command, write them to stderr, it will always be displayed.
# Error output will be sent to the UI, so avoid printing sensitive information (such as secrets).
generate:
command: [ /bin/sh ]
command: [/bin/sh]
args:
- /plugin/generate.sh
@@ -66,7 +66,7 @@ spec:
itemType: string
collectionType: string
string: ""
# All the fields above besides "string" apply to both the array and map type parameter announcements.
# All the fields above besides 'string' apply to both the array and map type parameter announcements.
# - name: array-param
# # This field communicates the parameter's default value to the UI. Setting this field is optional.
# array: [default, items]
@@ -84,4 +84,3 @@ spec:
# If set to `true` then the plugin receives repository files with original file mode. Dangerous since the repository
# might have executable files. Set to true only if you trust the CMP plugin authors.
preserveFileMode: false
+1 -1
View File
@@ -1,4 +1,4 @@
FROM ghcr.io/helmfile/helmfile:v1.0.0
FROM ghcr.io/helmfile/helmfile:v1.1.9
RUN mkdir -p /home/argocd/cmp-server/config/
COPY plugin.yaml /home/argocd/cmp-server/config/
+2 -2
View File
@@ -3,5 +3,5 @@
img=registry.gitlab.com/oceanbox/manifests/helmfile-cmp
tag=${1:-latest}
docker build -t $img:$tag .
docker push $img:$tag
docker build -t "${img}":"${tag}" .
docker push "${img}":"${tag}"
+4 -3
View File
@@ -1,4 +1,5 @@
#!/bin/sh
# shellcheck disable=SC2154
# NOTE: Ensure errors are part of exitcode
# set -o pipefail
@@ -10,7 +11,7 @@ export HELM_CONFIG_HOME=/tmp/helm/config
export HELMFILE_CACHE_HOME=/tmp/helmfile/cache
export HELMFILE_TEMPDIR=/tmp/helmfile/tmp
test -n ARGOCD_ENV_HELMFILE_ENVIRONMENT && export HELMFILE_ENVIRONMENT=$ARGOCD_ENV_HELMFILE_ENVIRONMENT
test -n ARGOCD_ENV_HELMFILE_FILE_PATH && export HELMFILE_FILE_PATH=$ARGOCD_ENV_HELMFILE_FILE_PATH
test -n ARGOCD_ENV_HELMFILE_ENVIRONMENT && export HELMFILE_ENVIRONMENT="${ARGOCD_ENV_HELMFILE_ENVIRONMENT}"
test -n ARGOCD_ENV_HELMFILE_FILE_PATH && export HELMFILE_FILE_PATH="${ARGOCD_ENV_HELMFILE_FILE_PATH}"
helmfile -n "$ARGOCD_APP_NAMESPACE" $ARGS template --include-crds -q
helmfile -n "${ARGOCD_APP_NAMESPACE}" "${ARGS}" template -q --include-crds
+1 -1
View File
@@ -4,7 +4,7 @@ metadata:
name: helmfile-cmp
spec:
generate:
command: [ "/bin/sh" ]
command: ["/bin/sh"]
args:
- /plugin/generate.sh
lockRepo: false
@@ -422,4 +422,3 @@ spec:
path: ca.crt
optional: true
secretName: argocd-repo-server-tls
+1 -1
View File
@@ -13,7 +13,7 @@ kubectl --context ekman apply -f cluster-admin-token.yaml
# kubectl --context oceanbox apply -f _cluster-ekman.yaml
token=$(kubectl --context ekman get secret -n kube-system argocd-manager-token -o yaml | grep ' token:' | cut -d' ' -f4 | base64 -d)
sed "s/@token@/$token/" cluster-ekman.yaml > _cluster-ekman.yaml
sed "s/@token@/${token}/" cluster-ekman.yaml > _cluster-ekman.yaml
echo "configure argocd ekman-cluster..."
cat _cluster-ekman.yaml
kubectl --context oceanbox apply -f _cluster-ekman.yaml
-1
View File
@@ -13,4 +13,3 @@ stringData:
name: staging-vcluster
server: https://staging-vcluster.staging-vcluster
type: Opaque
+4 -4
View File
@@ -32,12 +32,12 @@ projects:
additionalAnnotations: {}
description: sys components project
sourceRepos:
- '*'
- "*"
destinations:
- namespace: '*'
- namespace: "*"
server: https://kubernetes.default.svc
clusterResourceWhitelist:
- group: '*'
kind: '*'
- group: "*"
kind: "*"
orphanedResources:
warn: false
+5
View File
@@ -8,3 +8,8 @@ version: v1.35.2
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application.
appVersion: v1.35.2
dependencies:
- name: diagrid-dashboard
version: "0.1.0"
repository: "file://../diagrid-dashboard"
condition: diagrid-dashboard.enabled
+3
View File
@@ -116,3 +116,6 @@ serviceMonitor:
nodeSelector: {}
tolerations: []
affinity: {}
diagrid-dashboard:
enabled: false
@@ -59,12 +59,18 @@ spec:
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumeMounts }}
volumeMounts:
- name: statestore
mountPath: /app/components/statestore.yaml
subPath: statestore.yaml
{{- with .Values.volumeMounts }}
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.volumes }}
volumes:
- name: statestore
configMap:
name: {{ include "diagrid-dashboard.fullname" . }}-statestore
{{- with .Values.volumes }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
@@ -1,7 +1,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: diadash-statestore
name: {{ include "diagrid-dashboard.fullname" . }}-statestore
data:
statestore.yaml: |
apiVersion: dapr.io/v1alpha1
@@ -17,10 +17,7 @@ data:
- name: redisUsername
value: default
- name: redisPassword
value: mrtz-password
# secretKeyRef:
# key: redis-password
# name: {{ .Values.statestore.redis }}
value: secret
- name: actorStateStore
value: "true"
- name: redisDB
+14 -20
View File
@@ -2,6 +2,10 @@
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
statestore:
scope: my-scope
redis: my-redis
# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/
replicaCount: 1
@@ -126,14 +130,14 @@ resources: {}
# memory: 128Mi
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
# livenessProbe:
# httpGet:
# path: /
# port: http
# readinessProbe:
# httpGet:
# path: /
# port: http
livenessProbe:
httpGet:
path: /
port: http
readinessProbe:
httpGet:
path: /
port: http
# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
autoscaling:
@@ -144,23 +148,13 @@ autoscaling:
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes:
- name: statestore
configMap:
name: diadash-statestore
volumes: {}
# Additional volumeMounts on the output Deployment definition.
volumeMounts:
- name: statestore
mountPath: /app/components/statestore.yaml
subPath: statestore.yaml
volumeMounts: {}
nodeSelector: {}
tolerations: []
affinity: {}
statestore:
scope: mrtz-sorcerer
redis: mrtz-sorcerer-redis
+2 -2
View File
@@ -13,9 +13,9 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: v1.2.4
version: v1.6.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "v1.2.4"
appVersion: "v1.6.0"
+1 -1
View File
@@ -12,7 +12,7 @@ image:
# This sets the pull policy for images.
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: v1.2.4
tag: v1.6.0
# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/
imagePullSecrets:
- name: gitlab-pull-secret
+1 -1
View File
@@ -3,7 +3,7 @@
# Declare variables to be passed into your templates.
replicaCount: 1
image:
repository: registry.gitlab.com/oceanbox/makai/makai
repository: registry.gitlab.com/oceanbox/makai
tag: v0.1.0
pullPolicy: IfNotPresent
init:
+6
View File
@@ -0,0 +1,6 @@
dependencies:
- name: diagrid-dashboard
repository: file://../diagrid-dashboard
version: 0.1.0
digest: sha256:4fdb3148a2a6439223d7844a3083da2de324dd47e5cb3ac4a5d9c436e6e2c775
generated: "2025-12-16T19:38:21.939708629+01:00"
+5
View File
@@ -8,3 +8,8 @@ version: v1.35.2
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application.
appVersion: v1.35.2
dependencies:
- name: diagrid-dashboard
version: "0.1.0"
repository: "file://../diagrid-dashboard"
condition: diagrid-dashboard.enabled
+6
View File
@@ -108,3 +108,9 @@ serviceMonitor:
nodeSelector: {}
tolerations: []
affinity: {}
diagrid-dashboard:
enabled: false
statestore:
scope: sorcerer
redis: sorcerer-redis
+1 -1
View File
@@ -5,7 +5,7 @@
replicaCount: 1
image:
repository: registry
tag: 2
tag: 3
pullPolicy: IfNotPresent
init:
enabled: false
+1 -1
View File
@@ -27,7 +27,7 @@ releases:
- name: argocd-apps
namespace: argocd
chart: argo/argocd-apps
version: 0.0.9
version: 2.0.3
condition: argo.apps.enabled
values:
- ../values/argo/values/apps.yaml.gotmpl
+3 -2
View File
@@ -3,7 +3,8 @@ bases:
repositories:
- name: cert-manager
url: 'https://charts.jetstack.io'
oci: true
url: 'quay.io/jetstack/charts'
commonLabels:
tier: system
@@ -12,7 +13,7 @@ releases:
- name: cert-manager
namespace: cert-manager
chart: cert-manager/cert-manager
version: 1.12.13
version: v1.19.2
condition: cert_manager.enabled
values:
- ../values/cert-manager/values/cert-manager.yaml.gotmpl
+44
View File
@@ -0,0 +1,44 @@
bases:
- ../envs/environments.yaml.gotmpl
repositories:
- name: forgejo
oci: true
url: code.forgejo.org/forgejo-helm
commonLabels:
tier: system
releases:
- name: forgejo
namespace: forgejo
chart: forgejo/forgejo
version: 16.0.0
condition: forgejo.enabled
values:
- ../values/forgejo/values/values.yaml
- ../values/forgejo/values/values-{{ .Environment.Name }}.yaml
postRenderer: ../bin/kustomizer
postRendererArgs:
- ../values/forgejo/kustomize/{{ .Environment.Name }}
missingFileHandler: Info
- name: manifests
namespace: forgejo
chart: manifests
condition: forgejo.enabled
missingFileHandler: Info
values:
- ../values/env.yaml
- ../values/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml
- ../values/forgejo/env.yaml.gotmpl
- ../values/forgejo/env-{{ requiredEnv "ARGOCD_ENV_CLUSTER_NAME" }}.yaml.gotmpl
hooks:
- events: [ prepare, cleanup ]
showlogs: true
command: ../bin/helmify
args:
- '{{`{{ if eq .Event.Name "prepare" }}build{{ else }}clean{{ end }}`}}'
- '{{`{{ .Release.Chart }}`}}'
- '{{`{{ .Environment.Name }}`}}'
- ../values/forgejo/manifests
- manifests
+1 -1
View File
@@ -12,7 +12,7 @@ releases:
- name: ingress-nginx
namespace: ingress-nginx
chart: ingress-nginx/ingress-nginx
version: 4.8.3
version: 4.14.1
condition: nginx.enabled
values:
- ../values/ingress-nginx/values/ingress-nginx.yaml.gotmpl
+1 -1
View File
@@ -15,7 +15,7 @@ releases:
- name: kyverno
namespace: kyverno
chart: kyverno/kyverno
version: 3.5.1
version: 3.6.1
condition: kyverno.enabled
values:
- ../values/kyverno/values/kyverno.yaml.gotmpl
+1 -1
View File
@@ -12,7 +12,7 @@ releases:
- name: mariadb-operator
namespace: mariadb-operator
chart: mariadb-operator/mariadb-operator
version: 25.8.4
version: 25.10.3
condition: mariadb_operator.enabled
values:
- ../values/mariadb-operator/values/mariadb-operator.yaml.gotmpl
+1 -1
View File
@@ -16,7 +16,7 @@ releases:
namespace: {{ .Environment.Name }}-openfga
{{- end }}
chart: openfga/openfga
version: 0.2.45
version: 0.2.50
condition: openfga.enabled
values:
- ../values/openfga/values/values.yaml
@@ -12,7 +12,7 @@ releases:
- name: opentelemetry-collector
namespace: otel
chart: open-telemetry/opentelemetry-collector
version: 0.134.1
version: 0.142.1
condition: otel.enabled
values:
- ../values/opentelemetry-collector/values/values.yaml
+1 -1
View File
@@ -15,7 +15,7 @@ releases:
- name: postgres-operator
namespace: cnpg
chart: cloudnative-pg/cloudnative-pg
version: 0.26.1
version: 0.27.0
condition: postgres_operator.enabled
values:
- ../values/postgres-operator/values/postgres-operator.yaml.gotmpl
+1 -1
View File
@@ -13,7 +13,7 @@ releases:
- name: {{ .Environment.Name }}-rabbitmq
namespace: rabbitmq
chart: bitnami/rabbitmq
version: 12.9.0
version: 13.0.3
condition: rabbitmq.enabled
values:
- ../values/rabbitmq/values/values.yaml
+1 -1
View File
@@ -13,7 +13,7 @@ releases:
- name: slurm-operator
namespace: slinky
chart: slurm-operator/slurm-operator
version: 0.4.0
version: 0.4.1
condition: slurm_operator.enabled
values:
- ../values/slurm-operator/values/slurm-operator.yaml.gotmpl
+1 -1
View File
@@ -13,7 +13,7 @@ releases:
- name: spegel
namespace: spegel
chart: spegel/spegel
version: 0.5.1
version: 0.6.0
condition: spegel.enabled
values:
- ../values/spegel/values/spegel.yaml.gotmpl
+1 -1
View File
@@ -15,7 +15,7 @@ releases:
- name: velero
namespace: velero
chart: velero/velero
version: 11.1.1
version: 11.3.2
condition: velero.enabled
values:
- ../values/velero/values/velero.yaml.gotmpl
+70
View File
@@ -0,0 +1,70 @@
let
sources = import ./default.nix;
pkgs = import sources.nixpkgs { };
pre-commit = import sources.git-hooks;
globalExcludes = [
"nix/default.nix"
"attic"
"vcluster"
".*vendor"
".*chart/.*"
".*schema.json"
];
in
pre-commit.run {
src = pkgs.nix-gitignore.gitignoreSource [ ] ../.;
# Do not run at pre-commit time
default_stages = [
"pre-push"
];
# TODO(mrtz): Remove when default
package = pkgs.prek;
# Linters From https://github.com/cachix/pre-commit-hooks.nix
hooks = {
nixfmt-rfc-style = {
enable = true;
excludes = globalExcludes;
};
trim-trailing-whitespace.enable = true;
shellcheck = {
enable = true;
excludes = [
"vcluster/"
"attic/"
];
args = [
"-x"
"-o"
"all"
];
};
yamllint = {
enable = true;
excludes = [
"attic/"
"charts/templates/"
"charts/"
"values/"
"vcluster/"
];
settings = {
strict = true;
configData = ''{ extends: default, rules: { document-start: disable, line-length: {max: 300} } }'';
};
};
check-json.enable = true;
renovate-config-validator = {
enable = true;
files = "renovate.json$";
entry = "renovate-config-validator";
};
};
}
+126 -23
View File
@@ -9,8 +9,15 @@
*/
# Generated by npins. Do not modify; will be overwritten regularly
let
data = builtins.fromJSON (builtins.readFile ./sources.json);
version = data.version;
# Backwards-compatibly make something that previously didn't take any arguments take some
# The function must return an attrset, and will unfortunately be eagerly evaluated
# Same thing, but it catches eval errors on the default argument so that one may still call it with other arguments
mkFunctor =
fn:
let
e = builtins.tryEval (fn { });
in
(if e.success then e.value else { error = fn { }; }) // { __functor = _self: fn; };
# https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/lists.nix#L295
range =
@@ -21,7 +28,6 @@ let
# https://github.com/NixOS/nixpkgs/blob/0258808f5744ca980b9a1f24fe0b1e6f0fecee9c/lib/strings.nix#L269
stringAsChars = f: s: concatStrings (map f (stringToCharacters s));
concatMapStrings = f: list: concatStrings (map f list);
concatStrings = builtins.concatStringsSep "";
# If the environment variable NPINS_OVERRIDE_${name} is set, then use
@@ -48,41 +54,87 @@ let
mkSource =
name: spec:
{
pkgs ? null,
}:
assert spec ? type;
let
# Unify across builtin and pkgs fetchers.
# `fetchGit` requires a wrapper because of slight API differences.
fetchers =
if pkgs == null then
{
inherit (builtins) fetchTarball fetchurl;
# For some fucking reason, fetchGit has a different signature than the other builtin fetchers …
fetchGit = args: (builtins.fetchGit args).outPath;
}
else
{
fetchTarball =
{
url,
sha256,
}:
pkgs.fetchzip {
inherit url sha256;
extension = "tar";
};
inherit (pkgs) fetchurl;
fetchGit =
{
url,
submodules,
rev,
name,
narHash,
}:
pkgs.fetchgit {
inherit url rev name;
fetchSubmodules = submodules;
hash = narHash;
};
};
# Dispatch to the correct code path based on the type
path =
if spec.type == "Git" then
mkGitSource spec
mkGitSource fetchers spec
else if spec.type == "GitRelease" then
mkGitSource spec
mkGitSource fetchers spec
else if spec.type == "PyPi" then
mkPyPiSource spec
mkPyPiSource fetchers spec
else if spec.type == "Channel" then
mkChannelSource spec
mkChannelSource fetchers spec
else if spec.type == "Tarball" then
mkTarballSource spec
mkTarballSource fetchers spec
else if spec.type == "Container" then
mkContainerSource pkgs spec
else
builtins.throw "Unknown source type ${spec.type}";
in
spec // { outPath = mayOverride name path; };
mkGitSource =
{
fetchTarball,
fetchGit,
...
}:
{
repository,
revision,
url ? null,
submodules,
hash,
branch ? null,
...
}:
assert repository ? type;
# At the moment, either it is a plain git repository (which has an url), or it is a GitHub/GitLab repository
# In the latter case, there we will always be an url to the tarball
if url != null && !submodules then
builtins.fetchTarball {
fetchTarball {
inherit url;
sha256 = hash; # FIXME: check nix version & use SRI hashes
sha256 = hash;
}
else
let
@@ -93,6 +145,8 @@ let
"https://github.com/${repository.owner}/${repository.repo}.git"
else if repository.type == "GitLab" then
"${repository.server}/${repository.repo_path}.git"
else if repository.type == "Forgejo" then
"${repository.server}/${repository.owner}/${repository.repo}.git"
else
throw "Unrecognized repository type ${repository.type}";
urlToName =
@@ -107,40 +161,89 @@ let
"${if matched == null then "source" else builtins.head matched}${appendShort}";
name = urlToName url revision;
in
builtins.fetchGit {
fetchGit {
rev = revision;
inherit name;
# hash = hash;
inherit url submodules;
narHash = hash;
inherit name submodules url;
};
mkPyPiSource =
{ url, hash, ... }:
builtins.fetchurl {
{ fetchurl, ... }:
{
url,
hash,
...
}:
fetchurl {
inherit url;
sha256 = hash;
};
mkChannelSource =
{ url, hash, ... }:
builtins.fetchTarball {
{ fetchTarball, ... }:
{
url,
hash,
...
}:
fetchTarball {
inherit url;
sha256 = hash;
};
mkTarballSource =
{ fetchTarball, ... }:
{
url,
locked_url ? url,
hash,
...
}:
builtins.fetchTarball {
fetchTarball {
url = locked_url;
sha256 = hash;
};
mkContainerSource =
pkgs:
{
image_name,
image_tag,
image_digest,
...
}:
if pkgs == null then
builtins.throw "container sources require passing in a Nixpkgs value: https://github.com/andir/npins/blob/master/README.md#using-the-nixpkgs-fetchers"
else
pkgs.dockerTools.pullImage {
imageName = image_name;
imageDigest = image_digest;
finalImageTag = image_tag;
};
in
if version == 5 then
builtins.mapAttrs mkSource data.pins
else
mkFunctor (
{
input ? ./sources.json,
}:
let
data =
if builtins.isPath input then
# while `readFile` will throw an error anyways if the path doesn't exist,
# we still need to check beforehand because *our* error can be caught but not the one from the builtin
# *piegames sighs*
if builtins.pathExists input then
builtins.fromJSON (builtins.readFile input)
else
throw "Input path ${toString input} does not exist"
else if builtins.isAttrs input then
input
else
throw "Unsupported input type ${builtins.typeOf input}, must be a path or an attrset";
version = data.version;
in
if version == 7 then
builtins.mapAttrs (name: spec: mkFunctor (mkSource name spec)) data.pins
else
throw "Unsupported format version ${toString version} in sources.json. Try running `npins upgrade`"
)
+16 -3
View File
@@ -1,11 +1,24 @@
{
"pins": {
"git-hooks": {
"type": "Git",
"repository": {
"type": "GitHub",
"owner": "cachix",
"repo": "git-hooks.nix"
},
"branch": "master",
"submodules": false,
"revision": "f0927703b7b1c8d97511c4116eb9b4ec6645a0fa",
"url": "https://github.com/cachix/git-hooks.nix/archive/f0927703b7b1c8d97511c4116eb9b4ec6645a0fa.tar.gz",
"hash": "sha256-6MkqajPICgugsuZ92OMoQcgSHnD6sJHwk8AxvMcIgTE="
},
"nixpkgs": {
"type": "Channel",
"name": "nixpkgs-unstable",
"url": "https://releases.nixos.org/nixpkgs/nixpkgs-26.05pre903996.59b6c96beacc/nixexprs.tar.xz",
"hash": "0b0yr9d1xyfwgpaj68bimsbjjbj7yis4whjvkrfdycfnasdf0gf0"
"url": "https://releases.nixos.org/nixpkgs/nixpkgs-26.05pre927565.13868c071cc7/nixexprs.tar.xz",
"hash": "sha256-wufp5c0nWh/87f9eK7xy1eZXms5zd4yl6S4SR+LfA08="
}
},
"version": 5
"version": 7
}
+6 -6
View File
@@ -3,7 +3,7 @@
# Simple script for uploading a base64 encoded image into our database. For
# grafana business image panels.
if [ $# -ne 2 ]
if [[ $# -ne 2 ]]
then
echo "Usage: $0 <image-name> <file>.png"
exit 1
@@ -12,9 +12,9 @@ fi
filename=$1
file=$2
if [ ! -e $file ]
if [[ ! -e "${file}" ]]
then
echo "file $file does not exist"
echo "file ${file} does not exist"
exit 1
fi
@@ -22,9 +22,9 @@ function create_image() {
local filename=$1
local data=$2
cat << EOF
INSERT INTO images VALUES('$filename', '$data');
INSERT INTO images VALUES('${filename}', '${data}');
EOF
}
data=$(cat $file | base64 -w0)
create_image $filename $data
data=$(base64 -w0 < "${file}")
create_image "${filename}" "${data}"
-1
View File
@@ -1,4 +1,3 @@
// -*- mode: jsonc -*-
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": [
+43 -22
View File
@@ -1,9 +1,10 @@
groups:
- name: etcd
- name: etcd
rules:
- alert: etcdMembersDown
annotations:
description: 'etcd cluster "{{ $labels.job }}": members are down ({{ $value
description:
'etcd cluster "{{ $labels.job }}": members are down ({{ $value
}}).'
summary: etcd cluster members are down.
expr: |-
@@ -20,17 +21,20 @@ groups:
severity: critical
- alert: etcdInsufficientMembers
annotations:
description: 'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
description:
'etcd cluster "{{ $labels.job }}": insufficient members ({{ $value
}}).'
summary: etcd cluster has insufficient number of members.
expr: sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"})
expr:
sum(up{job=~".*etcd.*"} == bool 1) without (instance) < ((count(up{job=~".*etcd.*"})
without (instance) + 1) / 2)
for: 3m
labels:
severity: critical
- alert: etcdNoLeader
annotations:
description: 'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }}
description:
'etcd cluster "{{ $labels.job }}": member {{ $labels.instance }}
has no leader.'
summary: etcd cluster has no leader.
expr: etcd_server_has_leader{job=~".*etcd.*"} == 0
@@ -39,12 +43,14 @@ groups:
severity: critical
- alert: etcdHighNumberOfLeaderChanges
annotations:
description: 'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes
description:
'etcd cluster "{{ $labels.job }}": {{ $value }} leader changes
within the last 15 minutes. Frequent elections may be a sign of insufficient
resources, high network latency, or disruptions by other components and should
be investigated.'
summary: etcd cluster has high number of leader changes.
expr: increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"})
expr:
increase((max without (instance) (etcd_server_leader_changes_seen_total{job=~".*etcd.*"})
or 0*absent(etcd_server_leader_changes_seen_total{job=~".*etcd.*"}))[15m:1m])
>= 4
for: 5m
@@ -52,7 +58,8 @@ groups:
severity: warning
- alert: etcdHighNumberOfFailedGRPCRequests
annotations:
description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
description:
'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
{{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
summary: etcd cluster has high number of failed grpc requests.
expr: |-
@@ -65,7 +72,8 @@ groups:
severity: warning
- alert: etcdHighNumberOfFailedGRPCRequests
annotations:
description: 'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
description:
'etcd cluster "{{ $labels.job }}": {{ $value }}% of requests for
{{ $labels.grpc_method }} failed on etcd instance {{ $labels.instance }}.'
summary: etcd cluster has high number of failed grpc requests.
expr: |-
@@ -78,7 +86,8 @@ groups:
severity: critical
- alert: etcdGRPCRequestsSlow
annotations:
description: 'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests
description:
'etcd cluster "{{ $labels.job }}": 99th percentile of gRPC requests
is {{ $value }}s on etcd instance {{ $labels.instance }} for {{ $labels.grpc_method
}} method.'
summary: etcd grpc requests are slow
@@ -90,7 +99,8 @@ groups:
severity: critical
- alert: etcdMemberCommunicationSlow
annotations:
description: 'etcd cluster "{{ $labels.job }}": member communication with {{
description:
'etcd cluster "{{ $labels.job }}": member communication with {{
$labels.To }} is taking {{ $value }}s on etcd instance {{ $labels.instance
}}.'
summary: etcd cluster member communication is slow.
@@ -102,7 +112,8 @@ groups:
severity: warning
- alert: etcdHighNumberOfFailedProposals
annotations:
description: 'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures
description:
'etcd cluster "{{ $labels.job }}": {{ $value }} proposal failures
within the last 30 minutes on etcd instance {{ $labels.instance }}.'
summary: etcd cluster has high number of proposal failures.
expr: rate(etcd_server_proposals_failed_total{job=~".*etcd.*"}[15m]) > 5
@@ -111,7 +122,8 @@ groups:
severity: warning
- alert: etcdHighFsyncDurations
annotations:
description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
description:
'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
are {{ $value }}s on etcd instance {{ $labels.instance }}.'
summary: etcd cluster 99th percentile fsync durations are too high.
expr: |-
@@ -122,7 +134,8 @@ groups:
severity: warning
- alert: etcdHighFsyncDurations
annotations:
description: 'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
description:
'etcd cluster "{{ $labels.job }}": 99th percentile fsync durations
are {{ $value }}s on etcd instance {{ $labels.instance }}.'
summary: etcd cluster 99th percentile fsync durations are too high.
expr: |-
@@ -133,7 +146,8 @@ groups:
severity: critical
- alert: etcdHighCommitDurations
annotations:
description: 'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
description:
'etcd cluster "{{ $labels.job }}": 99th percentile commit durations
{{ $value }}s on etcd instance {{ $labels.instance }}.'
summary: etcd cluster 99th percentile commit durations are too high.
expr: |-
@@ -144,11 +158,13 @@ groups:
severity: warning
- alert: etcdDatabaseQuotaLowSpace
annotations:
description: 'etcd cluster "{{ $labels.job }}": database size exceeds the defined
description:
'etcd cluster "{{ $labels.job }}": database size exceeds the defined
quota on etcd instance {{ $labels.instance }}, please defrag or increase the
quota as the writes to etcd will be disabled when it is full.'
summary: etcd cluster database is running full.
expr: (last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) /
expr:
(last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m]) /
last_over_time(etcd_server_quota_backend_bytes{job=~".*etcd.*"}[5m]))*100 >
95
for: 10m
@@ -156,26 +172,31 @@ groups:
severity: critical
- alert: etcdExcessiveDatabaseGrowth
annotations:
description: 'etcd cluster "{{ $labels.job }}": Predicting running out of disk
description:
'etcd cluster "{{ $labels.job }}": Predicting running out of disk
space in the next four hours, based on write observations within the past
four hours on etcd instance {{ $labels.instance }}, please check as it might
be disruptive.'
summary: etcd cluster database growing very fast.
expr: predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60)
expr:
predict_linear(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[4h], 4*60*60)
> etcd_server_quota_backend_bytes{job=~".*etcd.*"}
for: 10m
labels:
severity: warning
- alert: etcdDatabaseHighFragmentationRatio
annotations:
description: 'etcd cluster "{{ $labels.job }}": database size in use on instance
description:
'etcd cluster "{{ $labels.job }}": database size in use on instance
{{ $labels.instance }} is {{ $value | humanizePercentage }} of the actual
allocated disk space, please run defragmentation (e.g. etcdctl defrag) to
retrieve the unused fragmented disk space.'
runbook_url: https://etcd.io/docs/v3.5/op-guide/maintenance/#defragmentation
summary: etcd database size in use is less than 50% of the actual allocated
summary:
etcd database size in use is less than 50% of the actual allocated
storage.
expr: (last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m])
expr:
(last_over_time(etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"}[5m])
/ last_over_time(etcd_mvcc_db_total_size_in_bytes{job=~".*etcd.*"}[5m])) < 0.5
and etcd_mvcc_db_total_size_in_use_in_bytes{job=~".*etcd.*"} > 104857600
for: 10m
+9 -5
View File
@@ -1,13 +1,15 @@
groups:
- name: general.rules
- name: general.rules
rules:
- alert: TargetDown
annotations:
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
description:
'{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
}} targets in {{ $labels.namespace }} namespace are down.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
summary: One or more targets are unreachable.
expr: 100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
expr:
100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
BY (cluster, job, namespace, service)) > 10
for: 10m
labels:
@@ -21,7 +23,8 @@ groups:
mechanisms that send a notification when this alert is not firing. For example the
"DeadMansSnitch" integration in PagerDuty.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog
summary: An alert that should always be firing to certify that Alertmanager
summary:
An alert that should always be firing to certify that Alertmanager
is working properly.
expr: vector(1)
labels:
@@ -37,7 +40,8 @@ groups:
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
summary: Info-level alert inhibition.
expr: ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname !=
expr:
ALERTS{severity = "info"} == 1 unless on (namespace) ALERTS{alertname !=
"InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
labels:
severity: none
+41 -22
View File
@@ -1,20 +1,23 @@
groups:
- name: kubernetes-apps
- name: kubernetes-apps
rules:
- alert: KubePodCrashLooping
annotations:
description: 'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
description:
'Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container
}}) is in waiting state (reason: "CrashLoopBackOff").'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodcrashlooping
summary: Pod is crash looping.
expr: max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",
expr:
max_over_time(kube_pod_container_status_waiting_reason{reason="CrashLoopBackOff",
job="kube-state-metrics", namespace=~".*"}[5m]) >= 1
for: 15m
labels:
severity: warning
- alert: KubePodNotReady
annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
description:
Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
state for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
summary: Pod has been in a non-ready state for more than 15 minutes.
@@ -31,7 +34,8 @@ groups:
severity: warning
- alert: KubeDeploymentGenerationMismatch
annotations:
description: Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
description:
Deployment generation for {{ $labels.namespace }}/{{ $labels.deployment
}} does not match, this indicates that the Deployment has failed but has not
been rolled back.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentgenerationmismatch
@@ -45,7 +49,8 @@ groups:
severity: warning
- alert: KubeDeploymentReplicasMismatch
annotations:
description: Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
description:
Deployment {{ $labels.namespace }}/{{ $labels.deployment }} has
not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentreplicasmismatch
summary: Deployment has not matched the expected number of replicas.
@@ -64,7 +69,8 @@ groups:
severity: warning
- alert: KubeDeploymentRolloutStuck
annotations:
description: Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment
description:
Rollout of deployment {{ $labels.namespace }}/{{ $labels.deployment
}} is not progressing for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedeploymentrolloutstuck
summary: Deployment rollout is not progressing.
@@ -76,7 +82,8 @@ groups:
severity: warning
- alert: KubeStatefulSetReplicasMismatch
annotations:
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
description:
StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} has
not matched the expected number of replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetreplicasmismatch
summary: StatefulSet has not matched the expected number of replicas.
@@ -95,7 +102,8 @@ groups:
severity: warning
- alert: KubeStatefulSetGenerationMismatch
annotations:
description: StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
description:
StatefulSet generation for {{ $labels.namespace }}/{{ $labels.statefulset
}} does not match, this indicates that the StatefulSet has failed but has
not been rolled back.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetgenerationmismatch
@@ -109,7 +117,8 @@ groups:
severity: warning
- alert: KubeStatefulSetUpdateNotRolledOut
annotations:
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
description:
StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }} update
has not been rolled out.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
summary: StatefulSet update has not been rolled out.
@@ -136,7 +145,8 @@ groups:
severity: warning
- alert: KubeDaemonSetRolloutStuck
annotations:
description: DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not
description:
DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset }} has not
finished or progressed for at least 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetrolloutstuck
summary: DaemonSet rollout is stuck.
@@ -169,19 +179,22 @@ groups:
severity: warning
- alert: KubeContainerWaiting
annotations:
description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
description:
pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on container
{{ $labels.container}} has been in waiting state for longer than 1 hour.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
summary: Pod container waiting longer than 1 hour
expr: sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
expr:
sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
namespace=~".*"}) > 0
for: 1h
labels:
severity: warning
- alert: KubeDaemonSetNotScheduled
annotations:
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are not scheduled.'
description:
"{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are not scheduled."
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetnotscheduled
summary: DaemonSet pods are not scheduled.
expr: |-
@@ -193,18 +206,21 @@ groups:
severity: warning
- alert: KubeDaemonSetMisScheduled
annotations:
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are running where they are not supposed to run.'
description:
"{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
}} are running where they are not supposed to run."
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubedaemonsetmisscheduled
summary: DaemonSet pods are misscheduled.
expr: kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
expr:
kube_daemonset_status_number_misscheduled{job="kube-state-metrics", namespace=~".*"}
> 0
for: 15m
labels:
severity: warning
- alert: KubeJobNotCompleted
annotations:
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
description:
Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking more
than {{ "43200" | humanizeDuration }} to complete.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
summary: Job did not complete in time
@@ -216,7 +232,8 @@ groups:
severity: warning
- alert: KubeJobFailed
annotations:
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
description:
Job {{ $labels.namespace }}/{{ $labels.job_name }} failed to complete.
Removing failed job after investigation should clear this alert.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobfailed
summary: Job failed to complete.
@@ -226,7 +243,8 @@ groups:
severity: warning
- alert: KubeHpaReplicasMismatch
annotations:
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
description:
HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
has not matched the desired number of replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpareplicasmismatch
summary: HPA has not matched desired number of replicas.
@@ -249,7 +267,8 @@ groups:
severity: warning
- alert: KubeHpaMaxedOut
annotations:
description: HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
description:
HPA {{ $labels.namespace }}/{{ $labels.horizontalpodautoscaler }}
has been running at max replicas for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubehpamaxedout
summary: HPA is running at max replicas
+18 -10
View File
@@ -1,9 +1,10 @@
groups:
- name: kubernetes-resources
- name: kubernetes-resources
rules:
- alert: KubeCPUOvercommit
annotations:
description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
description:
Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
for Pods by {{ $value }} CPU shares and cannot tolerate node failure.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuovercommit
summary: Cluster has overcommitted CPU resource requests.
@@ -16,7 +17,8 @@ groups:
severity: warning
- alert: KubeMemoryOvercommit
annotations:
description: Cluster {{ $labels.cluster }} has overcommitted memory resource
description:
Cluster {{ $labels.cluster }} has overcommitted memory resource
requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node
failure.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryovercommit
@@ -30,7 +32,8 @@ groups:
severity: warning
- alert: KubeCPUQuotaOvercommit
annotations:
description: Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
description:
Cluster {{ $labels.cluster }} has overcommitted CPU resource requests
for Namespaces.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecpuquotaovercommit
summary: Cluster has overcommitted CPU resource requests.
@@ -44,7 +47,8 @@ groups:
severity: warning
- alert: KubeMemoryQuotaOvercommit
annotations:
description: Cluster {{ $labels.cluster }} has overcommitted memory resource
description:
Cluster {{ $labels.cluster }} has overcommitted memory resource
requests for Namespaces.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubememoryquotaovercommit
summary: Cluster has overcommitted memory resource requests.
@@ -58,7 +62,8 @@ groups:
severity: warning
- alert: KubeQuotaAlmostFull
annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
description:
Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaalmostfull
summary: Namespace quota is going to be full.
@@ -72,7 +77,8 @@ groups:
severity: info
- alert: KubeQuotaFullyUsed
annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
description:
Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotafullyused
summary: Namespace quota is fully used.
@@ -86,7 +92,8 @@ groups:
severity: info
- alert: KubeQuotaExceeded
annotations:
description: Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
description:
Namespace {{ $labels.namespace }} is using {{ $value | humanizePercentage
}} of its {{ $labels.resource }} quota.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubequotaexceeded
summary: Namespace quota has exceeded the limits.
@@ -100,9 +107,10 @@ groups:
severity: warning
- alert: CPUThrottlingHigh
annotations:
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace
description:
"{{ $value | humanizePercentage }} throttling of CPU in namespace
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{ $labels.pod
}}.'
}}."
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
summary: Processes experience elevated CPU throttling.
expr: |-
+13 -8
View File
@@ -1,10 +1,10 @@
groups:
- name: kubernetes-storage
- name: kubernetes-storage
rules:
- alert: KubePersistentVolumeFillingUp
annotations:
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
description:
The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
{{ . }} {{- end }} is only {{ $value | humanizePercentage }} free.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumefillingup
@@ -26,7 +26,8 @@ groups:
severity: critical
- alert: KubePersistentVolumeFillingUp
annotations:
description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
description:
Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
{{ . }} {{- end }} is expected to fill up within four days. Currently {{ $value
| humanizePercentage }} is available.
@@ -51,7 +52,8 @@ groups:
severity: warning
- alert: KubePersistentVolumeInodesFillingUp
annotations:
description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
description:
The PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
{{ . }} {{- end }} only has {{ $value | humanizePercentage }} free inodes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeinodesfillingup
@@ -73,7 +75,8 @@ groups:
severity: critical
- alert: KubePersistentVolumeInodesFillingUp
annotations:
description: Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
description:
Based on recent sampling, the PersistentVolume claimed by {{ $labels.persistentvolumeclaim
}} in Namespace {{ $labels.namespace }} {{ with $labels.cluster -}} on Cluster
{{ . }} {{- end }} is expected to run out of inodes within four days. Currently
{{ $value | humanizePercentage }} of its inodes are free.
@@ -98,11 +101,13 @@ groups:
severity: warning
- alert: KubePersistentVolumeErrors
annotations:
description: The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster
description:
The persistent volume {{ $labels.persistentvolume }} {{ with $labels.cluster
-}} on Cluster {{ . }} {{- end }} has status {{ $labels.phase }}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepersistentvolumeerrors
summary: PersistentVolume is having issues with provisioning.
expr: kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"}
expr:
kube_persistentvolume_status_phase{phase=~"Failed|Pending",job="kube-state-metrics"}
> 0
for: 5m
labels:
+56 -29
View File
@@ -1,9 +1,10 @@
groups:
- name: node-exporter
- name: node-exporter
rules:
- alert: NodeFilesystemSpaceFillingUp
annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
space left and is filling up.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
@@ -21,7 +22,8 @@ groups:
severity: warning
- alert: NodeFilesystemSpaceFillingUp
annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
space left and is filling up fast.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemspacefillingup
@@ -39,7 +41,8 @@ groups:
severity: critical
- alert: NodeFilesystemAlmostOutOfSpace
annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
space left.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
@@ -55,7 +58,8 @@ groups:
severity: warning
- alert: NodeFilesystemAlmostOutOfSpace
annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
space left.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutofspace
@@ -71,7 +75,8 @@ groups:
severity: critical
- alert: NodeFilesystemFilesFillingUp
annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
inodes left and is filling up.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
@@ -89,7 +94,8 @@ groups:
severity: warning
- alert: NodeFilesystemFilesFillingUp
annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
inodes left and is filling up fast.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemfilesfillingup
@@ -107,7 +113,8 @@ groups:
severity: critical
- alert: NodeFilesystemAlmostOutOfFiles
annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
inodes left.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
@@ -123,7 +130,8 @@ groups:
severity: warning
- alert: NodeFilesystemAlmostOutOfFiles
annotations:
description: Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
description:
Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint
}}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available
inodes left.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefilesystemalmostoutoffiles
@@ -139,38 +147,44 @@ groups:
severity: critical
- alert: NodeNetworkReceiveErrs
annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
description:
'{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} receive errors in the last two minutes.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworkreceiveerrs
summary: Network interface is reporting many receive errors.
expr: rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m])
expr:
rate(node_network_receive_errs_total{job="node-exporter"}[2m]) / rate(node_network_receive_packets_total{job="node-exporter"}[2m])
> 0.01
for: 1h
labels:
severity: warning
- alert: NodeNetworkTransmitErrs
annotations:
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered
description:
'{{ $labels.instance }} interface {{ $labels.device }} has encountered
{{ printf "%.0f" $value }} transmit errors in the last two minutes.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodenetworktransmiterrs
summary: Network interface is reporting many transmit errors.
expr: rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m])
expr:
rate(node_network_transmit_errs_total{job="node-exporter"}[2m]) / rate(node_network_transmit_packets_total{job="node-exporter"}[2m])
> 0.01
for: 1h
labels:
severity: warning
- alert: NodeHighNumberConntrackEntriesUsed
annotations:
description: '{{ $value | humanizePercentage }} of conntrack entries are used.'
description: "{{ $value | humanizePercentage }} of conntrack entries are used."
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodehighnumberconntrackentriesused
summary: Number of conntrack are getting close to the limit.
expr: (node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit)
expr:
(node_nf_conntrack_entries{job="node-exporter"} / node_nf_conntrack_entries_limit)
> 0.75
labels:
severity: warning
- alert: NodeTextFileCollectorScrapeError
annotations:
description: Node Exporter text file collector on {{ $labels.instance }} failed
description:
Node Exporter text file collector on {{ $labels.instance }} failed
to scrape.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodetextfilecollectorscrapeerror
summary: Node Exporter text file collector failed to scrape.
@@ -179,7 +193,8 @@ groups:
severity: warning
- alert: NodeClockSkewDetected
annotations:
description: Clock at {{ $labels.instance }} is out of sync by more than 0.05s.
description:
Clock at {{ $labels.instance }} is out of sync by more than 0.05s.
Ensure NTP is configured correctly on this host.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclockskewdetected
summary: Clock skew detected.
@@ -200,7 +215,8 @@ groups:
severity: warning
- alert: NodeClockNotSynchronising
annotations:
description: Clock at {{ $labels.instance }} is not synchronising. Ensure NTP
description:
Clock at {{ $labels.instance }} is not synchronising. Ensure NTP
is configured on this host.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodeclocknotsynchronising
summary: Clock not synchronising.
@@ -213,12 +229,14 @@ groups:
severity: warning
- alert: NodeRAIDDegraded
annotations:
description: RAID array '{{ $labels.device }}' at {{ $labels.instance }} is
description:
RAID array '{{ $labels.device }}' at {{ $labels.instance }} is
in degraded state due to one or more disks failures. Number of spare drives
is insufficient to fix issue automatically.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddegraded
summary: RAID Array is degraded.
expr: node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
expr:
node_md_disks_required{job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
- ignoring (state) (node_md_disks{state="active",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"})
> 0
for: 15m
@@ -226,17 +244,20 @@ groups:
severity: critical
- alert: NodeRAIDDiskFailure
annotations:
description: At least one device in RAID array at {{ $labels.instance }} failed.
description:
At least one device in RAID array at {{ $labels.instance }} failed.
Array '{{ $labels.device }}' needs attention and possibly a disk swap.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/noderaiddiskfailure
summary: Failed device in RAID array.
expr: node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
expr:
node_md_disks{state="failed",job="node-exporter",device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}
> 0
labels:
severity: warning
- alert: NodeFileDescriptorLimit
annotations:
description: File descriptors limit at {{ $labels.instance }} is currently at
description:
File descriptors limit at {{ $labels.instance }} is currently at
{{ printf "%.2f" $value }}%.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
summary: Kernel is predicted to exhaust file descriptors limit soon.
@@ -249,7 +270,8 @@ groups:
severity: warning
- alert: NodeFileDescriptorLimit
annotations:
description: File descriptors limit at {{ $labels.instance }} is currently at
description:
File descriptors limit at {{ $labels.instance }} is currently at
{{ printf "%.2f" $value }}%.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodefiledescriptorlimit
summary: Kernel is predicted to exhaust file descriptors limit soon.
@@ -266,7 +288,8 @@ groups:
CPU usage at {{ $labels.instance }} has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodecpuhighusage
summary: High CPU usage.
expr: sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter",
expr:
sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{job="node-exporter",
mode!="idle"}[2m]))) * 100 > 90
for: 15m
labels:
@@ -301,7 +324,8 @@ groups:
Memory is filling up at {{ $labels.instance }}, has been above 90% for the last 15 minutes, is currently at {{ printf "%.2f" $value }}%.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodememoryhighutilization
summary: Host is running out of memory.
expr: 100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"}
expr:
100 - (node_memory_MemAvailable_bytes{job="node-exporter"} / node_memory_MemTotal_bytes{job="node-exporter"}
* 100) > 90
for: 15m
labels:
@@ -313,14 +337,16 @@ groups:
This symptom might indicate disk saturation.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodediskiosaturation
summary: Disk IO queue is high.
expr: rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
expr:
rate(node_disk_io_time_weighted_seconds_total{job="node-exporter", device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"}[5m])
> 10
for: 30m
labels:
severity: warning
- alert: NodeSystemdServiceFailed
annotations:
description: Systemd service {{ $labels.name }} has entered failed state at
description:
Systemd service {{ $labels.name }} has entered failed state at
{{ $labels.instance }}
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodesystemdservicefailed
summary: Systemd service has entered failed state.
@@ -330,7 +356,8 @@ groups:
severity: warning
- alert: NodeBondingDegraded
annotations:
description: Bonding interface {{ $labels.master }} on {{ $labels.instance }}
description:
Bonding interface {{ $labels.master }} on {{ $labels.instance }}
is in degraded state due to one or more slave failures.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/node/nodebondingdegraded
summary: Bonding interface is degraded
+13 -7
View File
@@ -1,5 +1,5 @@
groups:
- name: node-resource-utilization.rules
- name: node-resource-utilization.rules
rules:
- alert: HostHighCpuLoad
annotations:
@@ -8,14 +8,16 @@ groups:
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Host high CPU load (instance {{ $labels.instance }})
expr: (sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m])))
expr:
(sum by (instance) (avg by (mode, instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m])))
> 0.9) * on(instance) group_left (nodename) node_uname_info{nodename=~".+"}
for: 10m
labels:
severity: critical
- alert: MemoryUtilizationHighWarning
annotations:
dashboard: https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
dashboard:
https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
$labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
description: Node {{ $labels.instance }} has less than 10% available memory.
summary: Node Memory utilization warning
@@ -25,7 +27,8 @@ groups:
severity: critical
- alert: MemoryUtilizationHighCritical
annotations:
dashboard: https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
dashboard:
https://grafana.ads1.itpartner.no/explore?orgId=1&left=%7B%22datasource%22:%22Prometheus%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22instant%22:true,%22range%22:true,%22exemplar%22:false,%22expr%22:%22topk(10,%20sum(container_memory_usage_bytes%7Bcontainer!%3D%5C%22%5C%22,%20container!%3D%5C%22POD%5{
$labels.instance }}%5C%22%7D)%20by%20(container,%20pod,%20namespace))%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D
description: Node {{ $labels.instance }} has less than 5% available memory.
summary: Node Memory utilization critical
@@ -37,7 +40,8 @@ groups:
annotations:
description: Node {{ $labels.node }} has CPU utilization over 90%.
summary: Node has been in not-ready state for longer than 3 minutes
expr: (sum(max_over_time(kube_node_status_condition{condition="Ready",status="true"}[3m])
expr:
(sum(max_over_time(kube_node_status_condition{condition="Ready",status="true"}[3m])
<= 0) by (node)) or (absent(kube_node_status_condition{condition="Ready",status="true"}))
> 0
for: 5m
@@ -50,7 +54,8 @@ groups:
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Kubernetes Node memory pressure (instance {{ $labels.instance }})
expr: kube_node_status_condition{condition="MemoryPressure",status="true"} ==
expr:
kube_node_status_condition{condition="MemoryPressure",status="true"} ==
1
for: 2m
labels:
@@ -62,7 +67,8 @@ groups:
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Kubernetes Container oom killer (instance {{ $labels.instance }})
expr: (kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total
expr:
(kube_pod_container_status_restarts_total - kube_pod_container_status_restarts_total
offset 10m >= 1) and ignoring (reason) min_over_time(kube_pod_container_status_last_terminated_reason{reason="OOMKilled"}[10m])
== 1
for: 0m
+9 -5
View File
@@ -1,20 +1,24 @@
groups:
- name: velero
- name: velero
rules:
- alert: VeleroBackupPartialFailures
annotations:
message: Velero backup {{ $labels.schedule }} has {{$value | humanizePercentage}} partialy
message:
Velero backup {{ $labels.schedule }} has {{$value | humanizePercentage}} partialy
failed backups.
expr: velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
expr:
velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
> 0.25
for: 15m
labels:
severity: critical
- alert: VeleroBackupFailures
annotations:
message: Velero backup {{$labels.schedule}} has {{$value | humanizePercentage}} failed
message:
Velero backup {{$labels.schedule}} has {{$value | humanizePercentage}} failed
backups.
expr: velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
expr:
velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""}
> 0.25
for: 15m
labels:
+13 -7
View File
@@ -1,9 +1,10 @@
groups:
- name: x509-certificate-exporter.rules
- name: x509-certificate-exporter.rules
rules:
- alert: X509ExporterReadErrors
annotations:
description: Over the last 15 minutes, this x509-certificate-exporter instance
description:
Over the last 15 minutes, this x509-certificate-exporter instance
has experienced errors reading certificate files or querying the Kubernetes
API. This could be caused by a misconfiguration if triggered when the exporter
starts.
@@ -14,7 +15,8 @@ groups:
severity: warning
- alert: CertificateError
annotations:
description: Certificate could not be decoded {{if $labels.secret_name }} in
description:
Certificate could not be decoded {{if $labels.secret_name }} in
Kubernetes secret "{{ $labels.secret_namespace }}/{{ $labels.secret_name }}"{{else}}at
location "{{ $labels.filepath }}"{{end}}
summary: Certificate cannot be decoded
@@ -24,22 +26,26 @@ groups:
severity: warning
- alert: CertificateRenewal
annotations:
description: Certificate for "{{ $labels.subject_CN }}" should be renewed {{if
description:
Certificate for "{{ $labels.subject_CN }}" should be renewed {{if
$labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
$labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
summary: Certificate should be renewed
expr: ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
expr:
((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 28
for: 15m
labels:
severity: warning
- alert: CertificateExpiration
annotations:
description: Certificate for "{{ $labels.subject_CN }}" is about to expire {{if
description:
Certificate for "{{ $labels.subject_CN }}" is about to expire {{if
$labels.secret_name }}in Kubernetes secret "{{ $labels.secret_namespace }}/{{
$labels.secret_name }}"{{else}}at location "{{ $labels.filepath }}"{{end}}
summary: Certificate is about to expire
expr: ((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
expr:
((x509_cert_not_after{secret_name!="linkerd-identity-issuer", issuer_O="",
issuer_CN!="webhook.linkerd.cluster.local"} - time()) / 86400) < 14
for: 15m
labels:
+35 -8
View File
@@ -6,11 +6,15 @@ let
config = { };
overlays = [ ];
};
checks = import ./nix/checks.nix;
in
pkgs.mkShellNoCC {
name = "clstr";
packages = with pkgs; [
packages =
with pkgs;
[
# dev tools
just
npins
@@ -24,17 +28,40 @@ pkgs.mkShellNoCC {
kubelogin
kubelogin-oidc
kubectl-rook-ceph
kubectl-graph
kubectl-klock
graphviz
# other tools
step-cli
linkerd
velero
cmctl
# other tools activate when needed
# step-cli
# linkerd
# cmctl
# rclone
# velero
# renovate
# dapr
dapr-cli
]
++ checks.enabledPackages;
# Environment variables
ARGOCD_ENV_CLUSTER_NAME = "hel1";
HELM_GIT_ACCESS_TOKEN = "glpat-xxx";
shellHook = builtins.concatStringsSep "\n" [
checks.shellHook
];
ARGOCD_ENV_CLUSTER_NAME = "rossby";
HELM_GIT_ACCESS_TOKEN = "glpat-xxx";
# Alternative shells
passthru = pkgs.lib.mapAttrs (name: value: pkgs.mkShellNoCC (value // { inherit name; })) {
ci-shell = {
packages = [
pkgs.npins
];
shellHook = ''
export NPINS_DIRECTORY="nix"
'';
};
};
}
+3
View File
@@ -88,6 +88,8 @@ spec:
server: https://kubernetes.default.svc
- namespace: uptime
server: https://kubernetes.default.svc
- namespace: forgejo
server: https://kubernetes.default.svc
sourceRepos:
- https://argoproj.github.io/argo-helm
- https://kubernetes-sigs.github.io/metrics-server/
@@ -123,6 +125,7 @@ spec:
- ghcr.io/slinkyproject/charts/slurm-operator-crds
- ghcr.io/spegel-org/helm-charts
- ghcr.io/dragonflydb/dragonfly-operator/helm/dragonfly-operator
- code.forgejo.org/forgejo-helm
- https://operator.mariadb.com/mariadb-enterprise-operator
- https://operator.mariadb.com
- https://ot-container-kit.github.io/helm-charts
@@ -73,7 +73,7 @@
"connString": "Username=postgres;Password=secret;Host=localhost;Port=5432;Database=app;Pooling=true;",
"sorcerer" : "https://sorcerer.data.oceanbox.io",
"allowedOrigins": [
"https://maps.oceanbox.io",
"https://maps.oceanbox.io"
],
"appName": "atlantis",
"appEnv": "prod",
@@ -79,3 +79,9 @@ resources:
requests:
cpu: 500m
memory: 1Gi
diagrid-dashboard:
enabled: false
statestore:
scope: prod-atlantis
redis: prod-atlantis-redis
@@ -1,6 +1,6 @@
replicaCount: 1
image:
tag: faa0a853-debug
tag: 503ccbb2-debug
podAnnotations:
dapr.io/app-id: "staging-atlantis"
env:
@@ -26,12 +26,12 @@ env:
- name: DB_USER
valueFrom:
secretKeyRef:
name: staging-atlantis-db-superuser
name: staging-atlantis-db-app
key: username
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
name: staging-atlantis-db-superuser
name: staging-atlantis-db-app
key: password
- name: DAPR_API_TOKEN
valueFrom:
@@ -116,9 +116,6 @@ cluster:
db: prod-atlantis-db
namespace: prod-atlantis
resources:
limits:
cpu: 250m
memory: 1Gi
requests:
cpu: 250m
memory: 1Gi
@@ -133,3 +130,8 @@ redis:
resources:
cpu: 150m
memory: 256Mi
diagrid-dashboard:
enabled: false
statestore:
scope: staging-atlantis
redis: staging-atlantis-redis
@@ -10,3 +10,4 @@ podAnnotations:
dapr.io/sidecar-memory-request: "50Mi"
# dapr.io/sidecar-cpu-limit: "100m"
# dapr.io/sidecar-memory-limit: "1000Mi"
+4
View File
@@ -1,4 +1,8 @@
codex:
enabled: false
{{- if eq .Environment.Name "prod" }}
autosync: false
{{- else }}
autosync: true
{{- end }}
env: {{ .Environment.Name }}
@@ -0,0 +1,67 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft": "Warning",
"Microsoft.Hosting": "Error"
}
},
"Debug": {
"LogLevel": {
"Default": "Debug"
}
},
"Console": {
"IncludeScopes": true,
"LogLevel": {
"Default": "Debug"
}
},
"OIDC": {
"issuer": "https://auth.oceanbox.io/realms/oceanbox",
"authorization_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/auth",
"token_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/token",
"jwks_uri": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/certs",
"userinfo_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/userinfo",
"end_session_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/logout",
"device_authorization_endpoint": "https://auth.oceanbox.io/realms/oceanbox/protocol/openid-connect/auth/device",
"clientId": "atlantis",
"clientSecret": "",
"scopes": [
"openid",
"email",
"offline_access",
"profile"
],
"audiences": [
"atlantis"
]
},
"SSO": {
"cookieDomain": ".oceanbox.io",
"cookieName": ".obx.prod",
"ttl": 12.0,
"signedOutRedirectUri": "https://maps.oceanbox.io/",
"realm": "atlantis",
"environment": "prod",
"keyStore": {
"kind": "azure",
"uri": "https://atlantis.blob.core.windows.net",
"key": "dataprotection-keys"
},
"keyVault": {
"kind": "azure",
"uri": "https://atlantisvault.vault.azure.net",
"key": "dataencryption-keys"
}
},
"plainAuthUsers": [
{
"username": "admin",
"password": "en-to-tre-fire",
"groups": [ "/oceanbox" ],
"roles": [ "admin" ]
}
]
}
@@ -0,0 +1,66 @@
- op: add
path: /spec/template/spec/containers/0/envFrom
value:
- secretRef:
name: azure-keyvault
- op: add
path: /spec/template/spec/containers/0/env
value:
- name: APP_NAMESPACE
value: prod-atlantis
- name: DOTNET_ENVIRONMENT
value: Production
- name: ASPNETCORE_ENVIRONMENT
value: Production
- name: DB_HOST
valueFrom:
secretKeyRef:
name: prod-atlantis-db-app
key: host
- name: DB_PORT
valueFrom:
secretKeyRef:
name: prod-atlantis-db-app
key: port
- name: DB_DATABASE
valueFrom:
secretKeyRef:
name: prod-atlantis-db-app
key: dbname
- name: DB_USER
valueFrom:
secretKeyRef:
name: prod-atlantis-db-app
key: user
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
name: prod-atlantis-db-app
key: password
- name: FGA_URL
value: http://prod-openfga.openfga.svc.cluster.local:8080
- name: FGA_DB_HOST
valueFrom:
secretKeyRef:
name: prod-openfga-db-app
key: host
- name: FGA_DB_PORT
valueFrom:
secretKeyRef:
name: prod-openfga-db-app
key: port
- name: FGA_DB_DATABASE
valueFrom:
secretKeyRef:
name: prod-openfga-db-app
key: dbname
- name: FGA_DB_USER
valueFrom:
secretKeyRef:
name: prod-openfga-db-app
key: user
- name: FGA_DB_PASSWORD
valueFrom:
secretKeyRef:
name: prod-openfga-db-app
key: password
@@ -0,0 +1,15 @@
generatorOptions:
disableNameSuffixHash: true
configMapGenerator:
- name: prod-codex-appsettings
files:
- appsettings.json
patches:
- target:
group: apps
version: v1
kind: Deployment
path: deployment_patch.yaml
resources:
- ../base
@@ -62,4 +62,3 @@
secretKeyRef:
name: staging-openfga-db-app
key: password
name: azure-keyvault
@@ -0,0 +1,14 @@
{{- if .Values.clusterConfig.cilium.enabled }}
apiVersion: cilium.io/v2
kind: CiliumNetworkPolicy
metadata:
name: codex-allow-external-services
namespace: {{ .Release.Namespace }}
spec:
egress:
- toFQDNs:
- matchName: cacerts.digicert.com
endpointSelector:
matchLabels: {}
{{- end }}
+27
View File
@@ -0,0 +1,27 @@
replicaCount: 1
ingress:
enabled: true
className: "nginx"
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
nginx.ingress.kubernetes.io/backend-protocol: HTTP
nginx.ingress.kubernetes.io/ssl-redirect: "true"
oceanbox.io/expose: internal
hosts:
- host: codex.adm.oceanbox.io
paths:
- path: /
pathType: ImplementationSpecific
tls:
- hosts:
- codex.adm.oceanbox.io
secretName: prod-codex-tls
volumes:
- name: appsettings
configMap:
name: prod-codex-appsettings
volumeMounts:
- name: appsettings
mountPath: "/app/appsettings.json"
readOnly: true
subPath: appsettings.json
+2 -2
View File
@@ -1,6 +1,4 @@
replicaCount: 1
image:
tag: 70878e14-debug
ingress:
enabled: true
className: "nginx"
@@ -32,3 +30,5 @@ volumeMounts:
mountPath: "/app/appsettings.Development.json"
readOnly: true
subPath: appsettings.json
image:
tag: 2e1165d9-debug
+8 -3
View File
@@ -34,6 +34,8 @@ spec:
name: http
protocol: TCP
env:
- name: BASE_URL
value: https://fornix.hel1.oceanbox.io
- name: DRUPAL_DATABASE_HOST
value: drupal-db-rw
- name: DRUPAL_DATABASE_PREFIX
@@ -63,12 +65,15 @@ spec:
- mountPath: /opt/drupal/web/sites
name: drupal
subPath: sites
- mountPath: /opt/drupal/composer.json
name: drupal
subPath: modules/composer.json
- mountPath: /opt/drupal/patches
name: drupal
subPath: modules/patches
- mountPath: /opt/drupal/composer.json
name: drupal
subPath: modules/composer.json
- mountPath: /opt/drupal/composer.lock
name: drupal
subPath: modules/composer.lock
volumes:
- name: drupal
persistentVolumeClaim:
+2 -1
View File
@@ -2,13 +2,14 @@ clusterConfig:
manifests: https://gitlab.com/oceanbox/manifests.git
env: "prod"
distro: "talos"
domain: "hel1.oceanbox.io"
domain: "adm.hel1.obx"
initca: ""
apiserver: ""
apiserverip: ""
etcd_nodes: ["10.0.1.2, 10.0.1.4, 10.0.1.5"]
k8s_nodes: [""]
cluster: "hel1"
ingress_clusterissuer: "ca-issuer"
ingress_nodes: ["controlplane-1, controlplane-2, controlplane-3"]
ingress_replica_count: 3
ingress_loadbalancer: true
+3
View File
@@ -0,0 +1,3 @@
forgejo:
enabled: true
backup: true
+5
View File
@@ -0,0 +1,5 @@
forgejo:
enabled: false
backup: false
autosync: {{ if eq .Environment.Name "prod" }} false {{ else }} true {{ end }}
env: {{ .Environment.Name }}
+33
View File
@@ -0,0 +1,33 @@
apiVersion: barmancloud.cnpg.io/v1
kind: ObjectStore
metadata:
name: hel-store
namespace: forgejo
spec:
retentionPolicy: "7d"
configuration:
destinationPath: s3://obx-cnpg/hel1/forgejo-db
endpointURL: https://hel1.your-objectstorage.com
s3Credentials:
accessKeyId:
name: cnpg-s3
key: access_key
secretAccessKey:
name: cnpg-s3
key: access_secret
wal:
compression: snappy
---
apiVersion: postgresql.cnpg.io/v1
kind: ScheduledBackup
metadata:
name: forgejo-db
namespace: forgejo
spec:
schedule: "0 0 1 * * *"
backupOwnerReference: self
cluster:
name: forgejo-db
method: plugin
pluginConfiguration:
name: barman-cloud.cloudnative-pg.io
+11
View File
@@ -0,0 +1,11 @@
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: forgejo-db
namespace: forgejo
spec:
instances: 1
imageName: ghcr.io/cloudnative-pg/postgresql:18-minimal-trixie
storage:
resizeInUseVolumes: true
size: 10Gi
+25
View File
@@ -0,0 +1,25 @@
apiVersion: dragonflydb.io/v1alpha1
kind: Dragonfly
metadata:
name: dragonfly-forgejo
namespace: forgejo
spec:
replicas: 1
resources:
requests:
cpu: 150m
memory: 256Mi
limits:
memory: 256Mi
args:
- --dbfilename=dump # Static filename prevents disk exhaustion
- --maxmemory=$(MAX_MEMORY)Mi # Graceful memory management (90% of limit)
- --proactor_threads=1 # Auto-detect CPU cores (optimal threading)
- --cluster_mode=emulated
- --logtostderr
env:
- name: MAX_MEMORY
valueFrom:
resourceFieldRef:
resource: limits.memory
divisor: 1Mi
+42
View File
@@ -0,0 +1,42 @@
{{- if .Values.clusterConfig.argo.enabled }}
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: forgejo
namespace: argocd
annotations:
argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
destination:
namespace: forgejo
server: https://kubernetes.default.svc
project: sys
sources:
- repoURL: {{ .Values.clusterConfig.manifests }}
targetRevision: HEAD
path: helmfile.d
plugin:
name: helmfile-cmp
env:
- name: CLUSTER_NAME
value: {{ .Values.clusterConfig.cluster }}
- name: HELMFILE_ENVIRONMENT
value: {{ .Values.forgejo.env }}
- name: HELMFILE_FILE_PATH
value: forgejo.yaml.gotmpl
syncPolicy:
managedNamespaceMetadata:
labels:
component: sys
syncOptions:
- CreateNamespace=true
- ApplyOutOfSyncOnly=true
# - ServerSideApply=true
{{- if .Values.forgejo.autosync }}
automated:
prune: true
# selfHeal: false
{{- end }}
{{- end }}
+11
View File
@@ -0,0 +1,11 @@
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: forgejo-db-monitor
namespace: forgejo
spec:
selector:
matchLabels:
cnpg.io/cluster: forgejo-db
podMetricsEndpoints:
- port: metrics
+134
View File
@@ -0,0 +1,134 @@
replicaCount: 1
image:
registry: code.forgejo.org
repository: forgejo/forgejo
resources:
requests:
cpu: 200m
memory: 512Mi
gitea:
metrics:
enabled: false
serviceMonitor:
enabled: false
config:
APP_NAME: 'Forgejo: With a cup of tea.'
cache:
ENABLED: true
ADAPTER: redis
HOST: redis://dragonfly-forgejo.forgejo.svc:6379/0
session:
PROVIDER: redis
PROVIDER_CONFIG: redis://dragonfly-forgejo.forgejo.svc:6379/1
queue:
TYPE: redis
CONN_STR: redis://dragonfly-forgejo.forgejo.svc:6379/2
storage:
STORAGE_TYPE: minio
MINIO_ENDPOINT: hel1.your-objectstorage.com
MINIO_USE_SSL: true
MINIO_LOCATION: hel1
MINIO_BUCKET: obx-forgejo
security:
INSTALL_LOCK: true
service:
DISABLE_REGISTRATION: false
server:
APP_DATA_PATH: "/data/gitea"
DOMAIN: git.svc.hel1.obx
ROOT_URL: https://git.svc.hel1.obx
SSH_DOMAIN: git.svc.hel1.obx
SSH_PORT: 22
SSH_SERVER_USE_PROXY_PROTOCOL: true
LANDING_PAGE: "explore"
oauth2_client:
ENABLE_AUTO_REGISTRATION: true
UPDATE_AVATAR: true
ACCOUNT_LINKING: auto
database:
DB_TYPE: postgres
MAX_OPEN_CONNS: 90
openid:
ENABLE_OPENID_SIGNIN: false
ENABLE_OPENID_SIGNUP: false
oauth:
- name: 'Oceanbox'
provider: 'openidConnect'
existingSecret: forgejo-oauth-oceanbox
autoDiscoverUrl: 'https://login.microsoftonline.com/3f737008-e9a0-4485-9d27-40329d288089/.well-known/openid-configuration'
scopes: 'openid profile email groups'
groupClaimName: 'groups'
adminGroup: '/oceanbox/devel'
restrictedGroup: ''
additionalConfigFromEnvs:
- name: FORGEJO__STORAGE__MINIO_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: forgejo-s3
key: access_key
- name: FORGEJO__STORAGE__MINIO_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: forgejo-s3
key: secret_key
- name: FORGEJO__DATABASE__PASSWD
valueFrom:
secretKeyRef:
name: forgejo-db-app
key: password
- name: FORGEJO__DATABASE__NAME
valueFrom:
secretKeyRef:
name: forgejo-db-app
key: dbname
- name: FORGEJO__DATABASE__USER
valueFrom:
secretKeyRef:
name: forgejo-db-app
key: user
- name: FORGEJO__DATABASE__HOST
valueFrom:
secretKeyRef:
name: forgejo-db-app
key: host
- name: FORGEJO__DATABASE__DB_TYPE
value: postgres
ingress:
enabled: true
className: nginx
annotations:
cert-manager.io/cluster-issuer: ca-issuer
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/proxy-body-size: "0"
nginx.ingress.kubernetes.io/proxy-read-timeout: "600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "600"
nginx.ingress.kubernetes.io/whitelist-source-range: 10.0.0.0/8,172.16.0.0/12,192.168.0.0/16,172.19.255.0/24,100.64.0.0/12
hosts:
- host: git.svc.hel1.obx
paths:
- backend:
service:
name: forgejo-http
port:
number: 3000
path: /
pathType: ImplementationSpecific
tls:
- secretName: forgejo-tls
hosts:
- git.svc.hel1.obx
# service:
# ssh:
# type: LoadBalancer
# port: 22
# annotations:
# load-balancer.hetzner.cloud/location: hel1
# load-balancer.hetzner.cloud/uses-proxyprotocol: 'false'
persistence:
enabled: true
size: 1Gi
+3 -3
View File
@@ -2,7 +2,7 @@ apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
cert-manager.io/cluster-issuer: ca-issuer
nginx.ingress.kubernetes.io/backend-protocol: HTTP
nginx.ingress.kubernetes.io/cors-allow-headers: Content-Type, x-gatus-cache
nginx.ingress.kubernetes.io/enable-cors: "true"
@@ -15,7 +15,7 @@ metadata:
spec:
ingressClassName: nginx
rules:
- host: uptime.hel1.oceanbox.io
- host: uptime.adm.hel1.obx
http:
paths:
- backend:
@@ -27,5 +27,5 @@ spec:
pathType: ImplementationSpecific
tls:
- hosts:
- uptime.hel1.oceanbox.io
- uptime.adm.hel1.obx
secretName: gatus-tls
+1 -1
View File
@@ -29,7 +29,7 @@ config:
security:
oidc:
issuer-url: "https://login.microsoftonline.com/3f737008-e9a0-4485-9d27-40329d288089/v2.0"
redirect-url: "https://uptime.hel1.oceanbox.io/authorization-code/callback"
redirect-url: "https://uptime.adm.hel1.obx/authorization-code/callback"
client-id: "38af7e63-b097-4f1b-8dac-3d0030dceaf9"
client-secret: "${CLIENT_SECRET}"
scopes: ["openid"]
+17 -3
View File
@@ -103,11 +103,10 @@ configMaps:
"Moritz.Jorg@oceanbox.io",
"simen.kirkvik@oceanbox.io",
"stig.r.jensen@oceanbox.io",
"ole.tytlandsvik@oceanbox.io",
],
"group:devops": [
"radovan.bast@oceanbox.io",
"ole.tytlandsvik@oceanbox.io",
"ismael.abujadur@oceanbox.io",
],
"group:oceanographer": [
"frank.gaardsted@oceanbox.io",
@@ -115,6 +114,7 @@ configMaps:
"helge.avlesen@oceanbox.io",
"isa.rosso@oceanbox.io",
"jonathan.lilly@oceanbox.io",
"faith.iha@oceanbox.io",
],
"group:manager": [
"svenn.hanssen@oceanbox.io",
@@ -124,7 +124,9 @@ configMaps:
"pal.herstad@oceanbox.io",
],
"group:dev": [],
"group:intern": [],
"group:intern": [
"haavahak@stud.ntnu.no",
],
},
// tagOwners in tailscale is an association between a TAG and the people allowed to set this TAG on a server.
// This is documented [here](https://tailscale.com/kb/1068/acl-tags#defining-a-tag)
@@ -216,6 +218,15 @@ configMaps:
"dc.hel1.net:443",
]
},
{
"action": "accept",
"src": [
"group:intern",
],
"dst": [
"tag:hpc:22,80,443",
]
},
{
"action": "accept",
"src": [ "*" ],
@@ -236,6 +247,7 @@ configMaps:
{ "action": "accept", "src": [ "svenn.hanssen@oceanbox.io" ], "dst": [ "svenn.hanssen@oceanbox.io:*" ] },
{ "action": "accept", "src": [ "hilde.iversen@oceanbox.io" ], "dst": [ "hilde.iversen@oceanbox.io:*" ] },
{ "action": "accept", "src": [ "pal.herstad@oceanbox.io" ], "dst": [ "pal.herstad@oceanbox.io:*" ] },
{ "action": "accept", "src": [ "faith.iha@oceanbox.io" ], "dst": [ "faith.iha@oceanbox.io:*" ] },
// s/"\([^"]*\)"/{ "action": "accept", "src": [ "\1" ], "dst": [ "\1:*" ] },
]
}
@@ -248,6 +260,7 @@ configMaps:
{ "name": "maps.beta.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "maps.dev.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "atlantis.beta.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "codex.adm.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "codex.dev.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "auth.oceanbox.io", "type": "A", "value": "10.255.241.11" },
@@ -330,6 +343,7 @@ configMaps:
{ "name": "mrtz-sorcerer.dev.vtn.obx", "type": "A", "value": "172.16.239.221" },
{ "name": "mrtz-plume.ekman.oceanbox.io", "type": "A", "value": "10.255.241.99" },
{ "name": "simkir-atlantis.dev.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "simkir-maps.dev.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "simkir-codex.dev.oceanbox.io", "type": "A", "value": "10.255.241.11" },
{ "name": "simkir-sorcerer.ekman.oceanbox.io", "type": "A", "value": "10.255.241.99" },
{ "name": "simkir-sorcerer.dev.vtn.obx", "type": "A", "value": "172.16.239.221" },
@@ -0,0 +1,11 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: custom-error-pages
namespace: ingress-nginx
data:
"404": |
<p>Could not find the page you were looking for.</p>
"503": |
<p>Oceanbox is having a woopsie dupsie. uwu</p>
<img alt="Insert oceanbox Text TV maintanence window here."/>
@@ -113,8 +113,41 @@ controller:
defaultBackend:
enabled: true
image:
registry: registry.k8s.io
image: ingress-nginx/custom-error-pages
## for backwards compatibility consider setting the full image url via the repository value below
## use *either* current default registry/image or repository format or installing chart by providing the values.yaml will fail
## repository:
tag: "v1.2.5"
pullPolicy: IfNotPresent
# nobody user -> uid 65534
runAsUser: 65534
runAsNonRoot: true
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
tolerations:
- key: unschedulable
operator: Exists
effect: NoSchedule
## Additional volumeMounts to the default backend container.
# - name: copy-portal-skins
# mountPath: /var/lib/lemonldap-ng/portal/skins
extraVolumeMounts:
- name: custom-error-pages
mountPath: /www
## Additional volumes to the default backend pod.
# - name: copy-portal-skins
# emptyDir: {}
extraVolumes:
- name: custom-error-pages
configMap:
name: custom-error-pages
items:
- key: "404"
path: "404.html"
- key: "503"
path: "503.html"
+1 -1
View File
@@ -1,6 +1,6 @@
replicaCount: 1
image:
tag: "01ac1d47-debug"
tag: "ce1340fa-debug"
env:
- name: APP_VERSION
value: "0.0.0-staging"
+1 -1
View File
@@ -6,5 +6,5 @@
"appVersion": "1.0.0",
"cacheDir": "/data/archives/cache/prod",
"otelCollector": "http://10.255.241.12:4317",
"sentryUrl": "https://2b68ecf0c4d02e6cc9433c371321ac9d@o4509530141622272.ingest.de.sentry.io/4509910315237456",
"sentryUrl": "https://2b68ecf0c4d02e6cc9433c371321ac9d@o4509530141622272.ingest.de.sentry.io/4509910315237456"
}
+6
View File
@@ -35,4 +35,10 @@ spec:
prune: true
selfHeal: false
{{- end }}
ignoreDifferences:
- group: apps
kind: StatefulSet
name: rabbitmq
jsonPointers:
- /spec/template/metadata/annotations
{{- end }}
+9
View File
@@ -30,3 +30,12 @@ ingress:
secrets: []
selfSigned: false
tls: true
resources:
limits:
ephemeral-storage: 1Gi
memory: 512Mi
requests:
cpu: 250m
ephemeral-storage: 50Mi
memory: 512Mi
@@ -36,3 +36,12 @@ ingress:
secrets: []
selfSigned: false
tls: true
resources:
limits:
ephemeral-storage: 1Gi
memory: 512Mi
requests:
cpu: 250m
ephemeral-storage: 50Mi
memory: 512Mi

Some files were not shown because too many files have changed in this diff Show More