feat: Add Kueue and JobSet to ekman
This commit is contained in:
@@ -8,7 +8,7 @@ releases:
|
||||
- name: kueue
|
||||
namespace: kueue-system
|
||||
chart: oci://registry.k8s.io/kueue/charts/kueue
|
||||
version: 0.15.0
|
||||
version: 0.16.2
|
||||
condition: kueue.enabled
|
||||
values:
|
||||
- ../values/kueue/values/values.yaml
|
||||
|
||||
@@ -38,7 +38,7 @@ spec:
|
||||
- group: ""
|
||||
kind: Secret
|
||||
name: jobset-webhook-server-cert
|
||||
namespace: default
|
||||
namespace: jobset-system
|
||||
jsonPointers:
|
||||
- /data
|
||||
{{- end }}
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
kueue:
|
||||
enabled: true
|
||||
autosync: false
|
||||
@@ -2,50 +2,55 @@ apiVersion: kueue.x-k8s.io/v1beta1
|
||||
kind: ResourceFlavor
|
||||
metadata:
|
||||
name: compute # Just needs to exist, can be managed with tains/tolerations
|
||||
spec:
|
||||
nodeLabels:
|
||||
node-role.kubernetes.io/compute: compute
|
||||
topology.kubernetes.io/group: c1 # Only run on C1 for now
|
||||
---
|
||||
apiVersion: kueue.x-k8s.io/v1beta1
|
||||
kind: ClusterQueue
|
||||
metadata:
|
||||
name: jobs
|
||||
name: cluster-queue
|
||||
spec:
|
||||
cohort: general
|
||||
namespaceSelector: {} # Accept workloads from any namespace
|
||||
preemption:
|
||||
withinClusterQueue: "LowerPriority" # Allow higher priority to preempt lower
|
||||
queueingStrategy: BestEffortFIFO
|
||||
# preemption:
|
||||
# withinClusterQueue: "LowerPriority" # Allow higher priority to preempt lower
|
||||
resourceGroups:
|
||||
- coveredResources: ["cpu", "memory"] # Cover both memory and cpu resources
|
||||
flavors:
|
||||
- name: compute
|
||||
resources:
|
||||
- name: "cpu"
|
||||
nominalQuota: '4'
|
||||
nominalQuota: '32'
|
||||
- name: "memory"
|
||||
nominalQuota: 8Gi
|
||||
---
|
||||
apiVersion: kueue.x-k8s.io/v1beta1
|
||||
kind: LocalQueue
|
||||
metadata:
|
||||
name: prod-queue
|
||||
namespace: prod-sorcerer
|
||||
spec:
|
||||
clusterQueue: jobs
|
||||
nominalQuota: 64Gi
|
||||
# ---
|
||||
# apiVersion: kueue.x-k8s.io/v1beta1
|
||||
# kind: LocalQueue
|
||||
# metadata:
|
||||
# name: prod-queue
|
||||
# namespace: prod-queue
|
||||
# spec:
|
||||
# clusterQueue: cluster-queue
|
||||
---
|
||||
apiVersion: kueue.x-k8s.io/v1beta1
|
||||
kind: LocalQueue
|
||||
metadata:
|
||||
name: staging-queue
|
||||
namespace: staging-sorcerer
|
||||
namespace: dev-queue
|
||||
spec:
|
||||
clusterQueue: jobs
|
||||
---
|
||||
apiVersion: kueue.x-k8s.io/v1beta1
|
||||
kind: WorkloadPriorityClass
|
||||
metadata:
|
||||
name: "normal"
|
||||
value: 100
|
||||
---
|
||||
apiVersion: kueue.x-k8s.io/v1beta1
|
||||
kind: WorkloadPriorityClass
|
||||
metadata:
|
||||
name: "high"
|
||||
value: 200 # Higher value = higher priority
|
||||
clusterQueue: cluster-queue
|
||||
# ---
|
||||
# apiVersion: kueue.x-k8s.io/v1beta1
|
||||
# kind: WorkloadPriorityClass
|
||||
# metadata:
|
||||
# name: "normal"
|
||||
# value: 100
|
||||
# ---
|
||||
# apiVersion: kueue.x-k8s.io/v1beta1
|
||||
# kind: WorkloadPriorityClass
|
||||
# metadata:
|
||||
# name: "high"
|
||||
# value: 200 # Higher value = higher priority
|
||||
|
||||
@@ -1,89 +0,0 @@
|
||||
{{- if eq .Values.clusterConfig.cluster "ekman"}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: kueueviz-ingress
|
||||
namespace: kueue-system
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: ca-issuer
|
||||
nginx.ingress.kubernetes.io/backend-protocol: HTTP
|
||||
nginx.ingress.kubernetes.io/proxy-buffer-size: 128k
|
||||
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
|
||||
nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
|
||||
nginx.ingress.kubernetes.io/ssl-passthrough: "true"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/websocket-services: kueue-kueueviz-backend
|
||||
spec:
|
||||
rules:
|
||||
- host: kueue.dev.tos.obx
|
||||
http:
|
||||
paths:
|
||||
- path: /ws
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: kueue-kueueviz-backend
|
||||
port:
|
||||
number: 8080
|
||||
- path: /api(/|$)(.*)
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: kueue-kueueviz-backend
|
||||
port:
|
||||
number: 8080
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: kueue-kueueviz-frontend
|
||||
port:
|
||||
number: 8080
|
||||
tls:
|
||||
- hosts:
|
||||
- kueue.dev.tos.obx
|
||||
secretName: kueueviz-tls
|
||||
{{- end}}
|
||||
---
|
||||
{{- if eq .Values.clusterConfig.cluster "rossby"}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: kueueviz-ingress
|
||||
namespace: kueue-system
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: ca-issuer
|
||||
nginx.ingress.kubernetes.io/websocket-services: kueue-kueueviz-backend
|
||||
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
|
||||
nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
|
||||
spec:
|
||||
rules:
|
||||
- host: kueue.dev.vtn.obx
|
||||
http:
|
||||
paths:
|
||||
- path: /ws
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: kueue-kueueviz-backend
|
||||
port:
|
||||
number: 8080
|
||||
- path: /api(/|$)(.*)
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: kueue-kueueviz-backend
|
||||
port:
|
||||
number: 8080
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: kueue-kueueviz-frontend
|
||||
port:
|
||||
number: 8080
|
||||
tls:
|
||||
- hosts:
|
||||
- kueue.dev.vtn.obx
|
||||
secretName: kueueviz-tls
|
||||
{{- end}}
|
||||
@@ -1,9 +0,0 @@
|
||||
kueueViz:
|
||||
backend:
|
||||
env:
|
||||
- name: KUEUEVIZ_ALLOWED_ORIGINS
|
||||
value: "https://kueue.dev.tos.obx"
|
||||
frontend:
|
||||
env:
|
||||
- name: REACT_APP_WEBSOCKET_URL
|
||||
value: "wss://kueue.dev.tos.obx"
|
||||
@@ -1,9 +0,0 @@
|
||||
kueueViz:
|
||||
backend:
|
||||
env:
|
||||
- name: KUEUEVIZ_ALLOWED_ORIGINS
|
||||
value: "https://kueue.dev.vtn.obx"
|
||||
frontend:
|
||||
env:
|
||||
- name: REACT_APP_WEBSOCKET_URL
|
||||
value: "wss://kueue.dev.vtn.obx"
|
||||
@@ -1,9 +1,9 @@
|
||||
controllerManager:
|
||||
featureGates:
|
||||
- name: TopologyAwareScheduling
|
||||
enabled: true
|
||||
- name: LocalQueueMetrics
|
||||
enabled: true
|
||||
# featureGates:
|
||||
# - name: TopologyAwareScheduling
|
||||
# enabled: true
|
||||
# - name: LocalQueueMetrics
|
||||
# enabled: true
|
||||
managerConfig:
|
||||
controllerManagerConfigYaml: |
|
||||
apiVersion: config.kueue.x-k8s.io/v1beta1
|
||||
@@ -14,8 +14,8 @@ controllerManager:
|
||||
- jobset.x-k8s.io/jobset
|
||||
internalCertManagement:
|
||||
enable: false
|
||||
enableCertManager: false
|
||||
enableCertManager: true
|
||||
enablePrometheus: true
|
||||
metrics:
|
||||
prometheusNamespace: prometheus
|
||||
enableKueueViz: true
|
||||
enableKueueViz: false
|
||||
|
||||
Reference in New Issue
Block a user