chore: Cleanup old nix code and add helmfile experiments

This commit is contained in:
Moritz Jörg
2025-06-02 12:32:14 +02:00
parent 96debd5181
commit 65d65ea126
38 changed files with 1109 additions and 1078 deletions
@@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- _manifest.yaml
@@ -0,0 +1,4 @@
generatorOptions:
disableNameSuffixHash: true
resources:
- ../base
+39
View File
@@ -0,0 +1,39 @@
{{- if .Values.clusterConfig.argo.enabled }}
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: kyverno
namespace: argocd
spec:
destination:
namespace: kyverno
server: 'https://kubernetes.default.svc'
sources:
- repoURL: {{ .Values.clusterConfig.manifests }}
targetRevision: HEAD
path: helmfiles/kyverno
plugin:
name: helmfile
env:
- name: CLUSTER_NAME
value: {{ .Values.clusterConfig.cluster }}
project: sys
syncPolicy:
managedNamespaceMetadata:
labels:
component: sys
syncOptions:
- CreateNamespace=true
- ApplyOutOfSyncOnly=true
# - ServerSideApply=true
{{- if .Values.kyverno.autosync }}
automated:
prune: true
# selfHeal: false
{{- end }}
ignoreDifferences:
- group: batch
kind: CronJob
jqPathExpressions:
- '.spec.jobTemplate.spec.template.spec.containers[]?.resources'
{{- end }}
+11
View File
@@ -0,0 +1,11 @@
kyverno:
enabled: true
autosync: false
metrics: false
resources:
cleanupController:
memory: "128Mi"
reportsController:
memory: "128Mi"
backgroundController:
memory: "128Mi"
+41
View File
@@ -0,0 +1,41 @@
replicaCount: 3
{{ if .Values.kyverno.metrics }}
admissionController:
serviceMonitor:
enabled: true
metricsService:
create: true
backgroundController:
serviceMonitor:
enabled: true
metricsService:
create: true
cleanupController:
serviceMonitor:
enabled: true
metricsService:
create: true
reportsController:
serviceMonitor:
enabled: true
metricsService:
create: true
{{ end }}
cleanupController:
resources:
limits:
memory: {{ .Values.kyverno.resources.cleanupController.memory }}
requests:
memory: {{ .Values.kyverno.resources.cleanupController.memory }}
reportsController:
resources:
limits:
memory: {{ .Values.kyverno.resources.reportsController.memory }}
requests:
memory: {{ .Values.kyverno.resources.reportsController.memory }}
backgroundController:
resources:
limits:
memory: {{ .Values.kyverno.resources.backgroundController.memory }}
requests:
memory: {{ .Values.kyverno.resources.backgroundController.memory }}
@@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- _manifest.yaml
@@ -0,0 +1,4 @@
generatorOptions:
disableNameSuffixHash: true
resources:
- ../base
@@ -0,0 +1,34 @@
{{- if .Values.clusterConfig.argo.enabled }}
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: postgres-operator
namespace: argocd
spec:
destination:
namespace: cnpg
server: 'https://kubernetes.default.svc'
sources:
- repoURL: {{ .Values.clusterConfig.manifests }}
targetRevision: HEAD
path: helmfiles/postgres-operator
plugin:
name: helmfile
env:
- name: CLUSTER_NAME
value: {{ .Values.clusterConfig.cluster }}
project: sys
syncPolicy:
managedNamespaceMetadata:
labels:
component: sys
syncOptions:
- CreateNamespace=true
- ApplyOutOfSyncOnly=true
# - ServerSideApply=true
{{- if .Values.postgres_operator.autosync }}
automated:
prune: true
# selfHeal: false
{{- end }}
{{- end }}
@@ -0,0 +1,3 @@
postgres_operator:
enabled: true
autosync: false
@@ -0,0 +1,74 @@
{{- if .Values.clusterConfig.argo.enabled }}
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: prometheus
namespace: argocd
spec:
destination:
namespace: prometheus
server: 'https://kubernetes.default.svc'
sources:
- repoURL: {{ .Values.clusterConfig.manifests }}
targetRevision: HEAD
path: helmfiles/cilium
plugin:
name: helmfile
env:
- name: CLUSTER_NAME
value: {{ .Values.clusterConfig.cluster }}
project: sys
syncPolicy:
syncOptions:
- ServerSideApply=true
{{- if .Values.prometheus.autosync }}
automated:
prune: true
# selfHeal: false
{{- end }}
ignoreDifferences:
- group: apps
kind: Deployment
jqPathExpressions:
- '.spec.template.spec.containers[]?.resources'
- group: monitoring.coreos.com
kind: ServiceMonitor
jqPathExpressions:
- '.spec.endpoints[]?.relabelings'
- group: admissionregistration.k8s.io
kind: MutatingWebhookConfiguration
jqPathExpressions:
- '.webhooks[]?.clientConfig.caBundle'
- group: admissionregistration.k8s.io
kind: ValidatingWebhookConfiguration
jqPathExpressions:
- '.webhooks[]?.clientConfig.caBundle'
---
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: prometheus-crd
namespace: argocd
annotations:
argocd.argoproj.io/sync-wave: "-1"
finalizers:
- resources-finalizer.argocd.argoproj.io
spec:
destination:
namespace: prometheus
server: 'https://kubernetes.default.svc'
source:
repoURL: 'https://prometheus-community.github.io/helm-charts'
targetRevision: '14.0.0'
chart: prometheus-operator-crds
project: sys
syncPolicy:
managedNamespaceMetadata:
labels:
component: sys
automated: {}
syncOptions:
- ServerSideApply=true
- CreateNamespace=true
- ApplyOutOfSyncOnly=true
{{- end }}
@@ -0,0 +1,15 @@
cilium:
enabled: true
nodePort:
enable: true
l2announcement:
enable: true
loadbalancerPool:
enabled: true
cidr:
- 10.255.241.11/32
- 10.255.241.12/32
- 10.255.241.13/32
- 10.255.241.14/32
- 10.255.241.15/32
+32
View File
@@ -0,0 +1,32 @@
cilium:
enabled: false
autosync: true
spire:
enabled: false
envoy:
enabled: false
hubble:
ui: true
enabled: false
encryption:
enabled: true
type: wireguard
kubeProxyReplacement: true
l2announcement:
enabled: false
nodePort:
enabled: false
gatewayAPI:
enabled: false
ingressController:
enabled: false
defaultClass: false
loadbalancerMode: shared
policyAuditMode: false
upgradeCompatability: 1.15
k8sServiceHost: localhost
k8sServicePort: 7445
loadbalancerPool:
enabled: false
cidr: []
@@ -0,0 +1,106 @@
authentication:
mutual:
spire:
enabled: {{ .Values.cilium.spire.enabled }}
cgroup:
autoMount:
enabled: false
hostRoot: /sys/fs/cgroup
dashboards:
enabled: true
namespace: prometheus
enableXTSocketFallback: false
encryption:
enabled: {{ .Values.cilium.encryption.enabled }}
type: {{ .Values.cilium.encryption.type}}
envoy:
enabled: {{ .Values.cilium.envoy.enabled }}
prometheus:
serviceMonitor:
enabled: {{ .Values.cilium.envoy.enabled }}
extraConfig:
enable-envoy-config: "true"
hubble:
enabled: true
tls:
auto:
method: cronJob
metrics:
dashboards:
enabled: true
namespace: prometheus
enabled:
- dns:query;ignoreAAAA
- drop
- tcp
- flow
- icmp
- policy:sourceContext=app|workload-name|pod|reserved-identity;destinationContext=app|workload-name|pod|dns|reserved-identity;labelsContext=source_namespace,destination_namespace
- httpV2:exemplars=false;labelsContext=source_ip,source_namespace,source_workload,destination_ip,destination_namespace,destination_workload,traffic_direction
port: 12304
serviceMonitor:
enabled: true
redact:
enabled: true
relay:
enabled: true
prometheus:
enabled: true
serviceMonitor:
enabled: true
ui:
enabled: {{ .Values.cilium.hubble.ui }}
ipam:
mode: kubernetes
kubeProxyReplacement: {{ .Values.cilium.kubeProxyReplacement }}
l2announcements:
enabled: {{ .Values.cilium.l2announcement.enabled }}
k8sServiceHost: {{ .Values.cilium.k8sServiceHost }}
k8sServicePort: {{ .Values.cilium.k8sServicePort }}
nodePort:
enabled: {{ .Values.cilium.nodePort.enabled }}
gatewayAPI:
enabled: {{ .Values.cilium.gatewayAPI.enabled }}
ingressController:
enabled: {{ .Values.cilium.ingressController.enabled }}
default: {{ .Values.cilium.ingressController.defaultClass }}
loadbalancerMode: {{ .Values.cilium.ingressController.loadbalancerMode }}
operator:
dashboards:
enabled: true
namespace: prometheus
prometheus:
enabled: true
port: 12301
serviceMointor:
enabled: true
port: 12302
rollOutPods: true
policyAuditMode: {{ .Values.cilium.policyAuditMode }}
prometheus:
enabled: true
port: 12300
serviceMonitor:
enabled: true
rollOutCiliumPods: true
securityContext:
capabilities:
ciliumAgent:
- CHOWN
- KILL
- NET_ADMIN
- NET_RAW
- IPC_LOCK
- SYS_ADMIN
- SYS_RESOURCE
- DAC_OVERRIDE
- FOWNER
- SETGID
- SETUID
cleanCiliumState:
- NET_ADMIN
- SYS_ADMIN
- SYS_RESOURCE
{{- with .Values.cilium.upgradeCompatability}}
upgradeCompatability: {{ . }}
{{- end }}
@@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- _manifest.yaml
@@ -0,0 +1,4 @@
generatorOptions:
disableNameSuffixHash: true
resources:
- ../base
+34
View File
@@ -0,0 +1,34 @@
{{- if .Values.clusterConfig.argo.enabled }}
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: velero
namespace: argocd
spec:
destination:
namespace: velero
server: 'https://kubernetes.default.svc'
sources:
- repoURL: {{ .Values.clusterConfig.manifests }}
targetRevision: HEAD
path: helmfiles/velero
plugin:
name: helmfile
env:
- name: CLUSTER_NAME
value: {{ .Values.clusterConfig.cluster }}
project: sys
syncPolicy:
managedNamespaceMetadata:
labels:
component: sys
syncOptions:
- CreateNamespace=true
- ApplyOutOfSyncOnly=true
# - ServerSideApply=true
{{- if .Values.velero.autosync }}
automated:
prune: true
# selfHeal: false
{{- end }}
{{- end }}
+28
View File
@@ -0,0 +1,28 @@
velero:
enabled: true
autosync: true
kubeletRootDir: "/var/lib/kubernetes/pods"
bucket: velero-backup
bsl: default
# Opt-in or opt-out pvc backup
# https://velero.io/docs/main/file-system-backup/#to-back-up
backupAllVolumes: true
credentials:
secretName: "s3-credentials"
s3:
region: us-east-1
url: "https://nutanix-obj-s3.kube-system"
insecureSkipTLSVerify: true
resources:
velero:
request:
cpu: 500m
memory: 1Gi
limit:
memory: 2Gi
nodeAgent:
request:
cpu: 500m
memory: 1Gi
limit:
memory: 2Gi
+74
View File
@@ -0,0 +1,74 @@
initContainers:
- name: velero-plugin-for-aws
image: velero/velero-plugin-for-aws:v1.9.0
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /target
name: plugins
credentials:
useSecret: true
existingSecret: {{ .Values.velero.credentials.secretName }}
deployNodeAgent: true
nodeAgent:
podVolumePath: {{ .Values.velero.kubeletRootDir }}
resources:
requests:
cpu: {{ .Values.velero.resources.nodeAgent.request.cpu | default "500m" }}
memory: {{ .Values.velero.resources.nodeAgent.request.memory | default "1Gi" }}
limits:
memory: {{ .Values.velero.resources.nodeAgent.limit.memory | default "2Gi" }}
snapshotsEnabled: false
resources:
requests:
cpu: {{ .Values.velero.resources.velero.request.cpu | default "500m" }}
memory: {{ .Values.velero.resources.velero.request.memory | default "1Gi" }}
limits:
memory: {{ .Values.velero.resources.velero.limit.memory | default "1Gi" }}
configuration:
uploaderType: kopia
# Backup all volumes by default
defaultVolumesToFsBackup: {{ .Values.velero.backupAllVolumes }}
## https://velero.io/docs/v1.6/api-types/backupstoragelocation/
backupStorageLocation:
- name: {{ .Values.velero.bsl }}
bucket: {{ .Values.velero.bucket }}
provider: aws
default: true
## prefix is the directory under which all Velero data should be stored within the bucket. Optional.
prefix: {{ .Values.cluster_config.cluster }}/velero
accessMode: ReadWrite
config:
## ONLY us-east-1 region is supported by nutanix
region: {{ .Values.velero.s3.region }}
s3ForcePathStyle: "true"
s3Url: {{ .Values.velero.s3.url }}
{{- if .Values.velero.s3.insecureSkipTLSVerify }}
insecureSkipTLSVerify: true
{{- end }}
metrics:
serviceMonitor:
enabled: true
additionalLabels:
k8s-app: "velero"
release: "prometheus"
prometheusRule:
enabled: true
# namespace: ""
# Rules to be deployed
spec:
- alert: VeleroBackupPartialFailures
annotations:
message: Velero backup {{` {{ $labels.schedule }} `}} has {{` {{$value | humanizePercentage}} `}} partialy failed backups.
expr: |-
velero_backup_partial_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""} > 0.25
for: 15m
labels:
severity: critical
- alert: VeleroBackupFailures
annotations:
message: Velero backup {{` {{$labels.schedule}} `}} has {{` {{$value | humanizePercentage}} `}} failed backups.
expr: |-
velero_backup_failure_total{schedule!=""} / velero_backup_attempt_total{schedule!=""} > 0.25
for: 15m
labels:
severity: critical