feat: Add Kueue and JobSet to ekman

This commit is contained in:
2026-03-16 14:52:43 +01:00
parent cc8a121bf6
commit be7954d499
8 changed files with 41 additions and 146 deletions
+1 -1
View File
@@ -8,7 +8,7 @@ releases:
- name: kueue
namespace: kueue-system
chart: oci://registry.k8s.io/kueue/charts/kueue
version: 0.15.0
version: 0.16.2
condition: kueue.enabled
values:
- ../values/kueue/values/values.yaml
+1 -1
View File
@@ -38,7 +38,7 @@ spec:
- group: ""
kind: Secret
name: jobset-webhook-server-cert
namespace: default
namespace: jobset-system
jsonPointers:
- /data
{{- end }}
-3
View File
@@ -1,3 +0,0 @@
kueue:
enabled: true
autosync: false
+32 -27
View File
@@ -2,50 +2,55 @@ apiVersion: kueue.x-k8s.io/v1beta1
kind: ResourceFlavor
metadata:
name: compute # Just needs to exist, can be managed with tains/tolerations
spec:
nodeLabels:
node-role.kubernetes.io/compute: compute
topology.kubernetes.io/group: c1 # Only run on C1 for now
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: ClusterQueue
metadata:
name: jobs
name: cluster-queue
spec:
cohort: general
namespaceSelector: {} # Accept workloads from any namespace
preemption:
withinClusterQueue: "LowerPriority" # Allow higher priority to preempt lower
queueingStrategy: BestEffortFIFO
# preemption:
# withinClusterQueue: "LowerPriority" # Allow higher priority to preempt lower
resourceGroups:
- coveredResources: ["cpu", "memory"] # Cover both memory and cpu resources
flavors:
- name: compute
resources:
- name: "cpu"
nominalQuota: '4'
nominalQuota: '32'
- name: "memory"
nominalQuota: 8Gi
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: LocalQueue
metadata:
name: prod-queue
namespace: prod-sorcerer
spec:
clusterQueue: jobs
nominalQuota: 64Gi
# ---
# apiVersion: kueue.x-k8s.io/v1beta1
# kind: LocalQueue
# metadata:
# name: prod-queue
# namespace: prod-queue
# spec:
# clusterQueue: cluster-queue
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: LocalQueue
metadata:
name: staging-queue
namespace: staging-sorcerer
namespace: dev-queue
spec:
clusterQueue: jobs
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: WorkloadPriorityClass
metadata:
name: "normal"
value: 100
---
apiVersion: kueue.x-k8s.io/v1beta1
kind: WorkloadPriorityClass
metadata:
name: "high"
value: 200 # Higher value = higher priority
clusterQueue: cluster-queue
# ---
# apiVersion: kueue.x-k8s.io/v1beta1
# kind: WorkloadPriorityClass
# metadata:
# name: "normal"
# value: 100
# ---
# apiVersion: kueue.x-k8s.io/v1beta1
# kind: WorkloadPriorityClass
# metadata:
# name: "high"
# value: 200 # Higher value = higher priority
-89
View File
@@ -1,89 +0,0 @@
{{- if eq .Values.clusterConfig.cluster "ekman"}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: kueueviz-ingress
namespace: kueue-system
annotations:
cert-manager.io/cluster-issuer: ca-issuer
nginx.ingress.kubernetes.io/backend-protocol: HTTP
nginx.ingress.kubernetes.io/proxy-buffer-size: 128k
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
nginx.ingress.kubernetes.io/ssl-passthrough: "true"
nginx.ingress.kubernetes.io/ssl-redirect: "true"
nginx.ingress.kubernetes.io/websocket-services: kueue-kueueviz-backend
spec:
rules:
- host: kueue.dev.tos.obx
http:
paths:
- path: /ws
pathType: Prefix
backend:
service:
name: kueue-kueueviz-backend
port:
number: 8080
- path: /api(/|$)(.*)
pathType: Prefix
backend:
service:
name: kueue-kueueviz-backend
port:
number: 8080
- path: /
pathType: Prefix
backend:
service:
name: kueue-kueueviz-frontend
port:
number: 8080
tls:
- hosts:
- kueue.dev.tos.obx
secretName: kueueviz-tls
{{- end}}
---
{{- if eq .Values.clusterConfig.cluster "rossby"}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: kueueviz-ingress
namespace: kueue-system
annotations:
cert-manager.io/cluster-issuer: ca-issuer
nginx.ingress.kubernetes.io/websocket-services: kueue-kueueviz-backend
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
spec:
rules:
- host: kueue.dev.vtn.obx
http:
paths:
- path: /ws
pathType: Prefix
backend:
service:
name: kueue-kueueviz-backend
port:
number: 8080
- path: /api(/|$)(.*)
pathType: Prefix
backend:
service:
name: kueue-kueueviz-backend
port:
number: 8080
- path: /
pathType: Prefix
backend:
service:
name: kueue-kueueviz-frontend
port:
number: 8080
tls:
- hosts:
- kueue.dev.vtn.obx
secretName: kueueviz-tls
{{- end}}
-9
View File
@@ -1,9 +0,0 @@
kueueViz:
backend:
env:
- name: KUEUEVIZ_ALLOWED_ORIGINS
value: "https://kueue.dev.tos.obx"
frontend:
env:
- name: REACT_APP_WEBSOCKET_URL
value: "wss://kueue.dev.tos.obx"
-9
View File
@@ -1,9 +0,0 @@
kueueViz:
backend:
env:
- name: KUEUEVIZ_ALLOWED_ORIGINS
value: "https://kueue.dev.vtn.obx"
frontend:
env:
- name: REACT_APP_WEBSOCKET_URL
value: "wss://kueue.dev.vtn.obx"
+7 -7
View File
@@ -1,9 +1,9 @@
controllerManager:
featureGates:
- name: TopologyAwareScheduling
enabled: true
- name: LocalQueueMetrics
enabled: true
# featureGates:
# - name: TopologyAwareScheduling
# enabled: true
# - name: LocalQueueMetrics
# enabled: true
managerConfig:
controllerManagerConfigYaml: |
apiVersion: config.kueue.x-k8s.io/v1beta1
@@ -14,8 +14,8 @@ controllerManager:
- jobset.x-k8s.io/jobset
internalCertManagement:
enable: false
enableCertManager: false
enableCertManager: true
enablePrometheus: true
metrics:
prometheusNamespace: prometheus
enableKueueViz: true
enableKueueViz: false