feat: Add Kueue and JobSet to ekman
This commit is contained in:
@@ -8,7 +8,7 @@ releases:
|
|||||||
- name: kueue
|
- name: kueue
|
||||||
namespace: kueue-system
|
namespace: kueue-system
|
||||||
chart: oci://registry.k8s.io/kueue/charts/kueue
|
chart: oci://registry.k8s.io/kueue/charts/kueue
|
||||||
version: 0.15.0
|
version: 0.16.2
|
||||||
condition: kueue.enabled
|
condition: kueue.enabled
|
||||||
values:
|
values:
|
||||||
- ../values/kueue/values/values.yaml
|
- ../values/kueue/values/values.yaml
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ spec:
|
|||||||
- group: ""
|
- group: ""
|
||||||
kind: Secret
|
kind: Secret
|
||||||
name: jobset-webhook-server-cert
|
name: jobset-webhook-server-cert
|
||||||
namespace: default
|
namespace: jobset-system
|
||||||
jsonPointers:
|
jsonPointers:
|
||||||
- /data
|
- /data
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|||||||
@@ -1,3 +0,0 @@
|
|||||||
kueue:
|
|
||||||
enabled: true
|
|
||||||
autosync: false
|
|
||||||
@@ -2,50 +2,55 @@ apiVersion: kueue.x-k8s.io/v1beta1
|
|||||||
kind: ResourceFlavor
|
kind: ResourceFlavor
|
||||||
metadata:
|
metadata:
|
||||||
name: compute # Just needs to exist, can be managed with tains/tolerations
|
name: compute # Just needs to exist, can be managed with tains/tolerations
|
||||||
|
spec:
|
||||||
|
nodeLabels:
|
||||||
|
node-role.kubernetes.io/compute: compute
|
||||||
|
topology.kubernetes.io/group: c1 # Only run on C1 for now
|
||||||
---
|
---
|
||||||
apiVersion: kueue.x-k8s.io/v1beta1
|
apiVersion: kueue.x-k8s.io/v1beta1
|
||||||
kind: ClusterQueue
|
kind: ClusterQueue
|
||||||
metadata:
|
metadata:
|
||||||
name: jobs
|
name: cluster-queue
|
||||||
spec:
|
spec:
|
||||||
cohort: general
|
cohort: general
|
||||||
namespaceSelector: {} # Accept workloads from any namespace
|
namespaceSelector: {} # Accept workloads from any namespace
|
||||||
preemption:
|
queueingStrategy: BestEffortFIFO
|
||||||
withinClusterQueue: "LowerPriority" # Allow higher priority to preempt lower
|
# preemption:
|
||||||
|
# withinClusterQueue: "LowerPriority" # Allow higher priority to preempt lower
|
||||||
resourceGroups:
|
resourceGroups:
|
||||||
- coveredResources: ["cpu", "memory"] # Cover both memory and cpu resources
|
- coveredResources: ["cpu", "memory"] # Cover both memory and cpu resources
|
||||||
flavors:
|
flavors:
|
||||||
- name: compute
|
- name: compute
|
||||||
resources:
|
resources:
|
||||||
- name: "cpu"
|
- name: "cpu"
|
||||||
nominalQuota: '4'
|
nominalQuota: '32'
|
||||||
- name: "memory"
|
- name: "memory"
|
||||||
nominalQuota: 8Gi
|
nominalQuota: 64Gi
|
||||||
---
|
# ---
|
||||||
apiVersion: kueue.x-k8s.io/v1beta1
|
# apiVersion: kueue.x-k8s.io/v1beta1
|
||||||
kind: LocalQueue
|
# kind: LocalQueue
|
||||||
metadata:
|
# metadata:
|
||||||
name: prod-queue
|
# name: prod-queue
|
||||||
namespace: prod-sorcerer
|
# namespace: prod-queue
|
||||||
spec:
|
# spec:
|
||||||
clusterQueue: jobs
|
# clusterQueue: cluster-queue
|
||||||
---
|
---
|
||||||
apiVersion: kueue.x-k8s.io/v1beta1
|
apiVersion: kueue.x-k8s.io/v1beta1
|
||||||
kind: LocalQueue
|
kind: LocalQueue
|
||||||
metadata:
|
metadata:
|
||||||
name: staging-queue
|
name: staging-queue
|
||||||
namespace: staging-sorcerer
|
namespace: dev-queue
|
||||||
spec:
|
spec:
|
||||||
clusterQueue: jobs
|
clusterQueue: cluster-queue
|
||||||
---
|
# ---
|
||||||
apiVersion: kueue.x-k8s.io/v1beta1
|
# apiVersion: kueue.x-k8s.io/v1beta1
|
||||||
kind: WorkloadPriorityClass
|
# kind: WorkloadPriorityClass
|
||||||
metadata:
|
# metadata:
|
||||||
name: "normal"
|
# name: "normal"
|
||||||
value: 100
|
# value: 100
|
||||||
---
|
# ---
|
||||||
apiVersion: kueue.x-k8s.io/v1beta1
|
# apiVersion: kueue.x-k8s.io/v1beta1
|
||||||
kind: WorkloadPriorityClass
|
# kind: WorkloadPriorityClass
|
||||||
metadata:
|
# metadata:
|
||||||
name: "high"
|
# name: "high"
|
||||||
value: 200 # Higher value = higher priority
|
# value: 200 # Higher value = higher priority
|
||||||
|
|||||||
@@ -1,89 +0,0 @@
|
|||||||
{{- if eq .Values.clusterConfig.cluster "ekman"}}
|
|
||||||
apiVersion: networking.k8s.io/v1
|
|
||||||
kind: Ingress
|
|
||||||
metadata:
|
|
||||||
name: kueueviz-ingress
|
|
||||||
namespace: kueue-system
|
|
||||||
annotations:
|
|
||||||
cert-manager.io/cluster-issuer: ca-issuer
|
|
||||||
nginx.ingress.kubernetes.io/backend-protocol: HTTP
|
|
||||||
nginx.ingress.kubernetes.io/proxy-buffer-size: 128k
|
|
||||||
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
|
|
||||||
nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
|
|
||||||
nginx.ingress.kubernetes.io/ssl-passthrough: "true"
|
|
||||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
|
||||||
nginx.ingress.kubernetes.io/websocket-services: kueue-kueueviz-backend
|
|
||||||
spec:
|
|
||||||
rules:
|
|
||||||
- host: kueue.dev.tos.obx
|
|
||||||
http:
|
|
||||||
paths:
|
|
||||||
- path: /ws
|
|
||||||
pathType: Prefix
|
|
||||||
backend:
|
|
||||||
service:
|
|
||||||
name: kueue-kueueviz-backend
|
|
||||||
port:
|
|
||||||
number: 8080
|
|
||||||
- path: /api(/|$)(.*)
|
|
||||||
pathType: Prefix
|
|
||||||
backend:
|
|
||||||
service:
|
|
||||||
name: kueue-kueueviz-backend
|
|
||||||
port:
|
|
||||||
number: 8080
|
|
||||||
- path: /
|
|
||||||
pathType: Prefix
|
|
||||||
backend:
|
|
||||||
service:
|
|
||||||
name: kueue-kueueviz-frontend
|
|
||||||
port:
|
|
||||||
number: 8080
|
|
||||||
tls:
|
|
||||||
- hosts:
|
|
||||||
- kueue.dev.tos.obx
|
|
||||||
secretName: kueueviz-tls
|
|
||||||
{{- end}}
|
|
||||||
---
|
|
||||||
{{- if eq .Values.clusterConfig.cluster "rossby"}}
|
|
||||||
apiVersion: networking.k8s.io/v1
|
|
||||||
kind: Ingress
|
|
||||||
metadata:
|
|
||||||
name: kueueviz-ingress
|
|
||||||
namespace: kueue-system
|
|
||||||
annotations:
|
|
||||||
cert-manager.io/cluster-issuer: ca-issuer
|
|
||||||
nginx.ingress.kubernetes.io/websocket-services: kueue-kueueviz-backend
|
|
||||||
nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
|
|
||||||
nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
|
|
||||||
spec:
|
|
||||||
rules:
|
|
||||||
- host: kueue.dev.vtn.obx
|
|
||||||
http:
|
|
||||||
paths:
|
|
||||||
- path: /ws
|
|
||||||
pathType: Prefix
|
|
||||||
backend:
|
|
||||||
service:
|
|
||||||
name: kueue-kueueviz-backend
|
|
||||||
port:
|
|
||||||
number: 8080
|
|
||||||
- path: /api(/|$)(.*)
|
|
||||||
pathType: Prefix
|
|
||||||
backend:
|
|
||||||
service:
|
|
||||||
name: kueue-kueueviz-backend
|
|
||||||
port:
|
|
||||||
number: 8080
|
|
||||||
- path: /
|
|
||||||
pathType: Prefix
|
|
||||||
backend:
|
|
||||||
service:
|
|
||||||
name: kueue-kueueviz-frontend
|
|
||||||
port:
|
|
||||||
number: 8080
|
|
||||||
tls:
|
|
||||||
- hosts:
|
|
||||||
- kueue.dev.vtn.obx
|
|
||||||
secretName: kueueviz-tls
|
|
||||||
{{- end}}
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
kueueViz:
|
|
||||||
backend:
|
|
||||||
env:
|
|
||||||
- name: KUEUEVIZ_ALLOWED_ORIGINS
|
|
||||||
value: "https://kueue.dev.tos.obx"
|
|
||||||
frontend:
|
|
||||||
env:
|
|
||||||
- name: REACT_APP_WEBSOCKET_URL
|
|
||||||
value: "wss://kueue.dev.tos.obx"
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
kueueViz:
|
|
||||||
backend:
|
|
||||||
env:
|
|
||||||
- name: KUEUEVIZ_ALLOWED_ORIGINS
|
|
||||||
value: "https://kueue.dev.vtn.obx"
|
|
||||||
frontend:
|
|
||||||
env:
|
|
||||||
- name: REACT_APP_WEBSOCKET_URL
|
|
||||||
value: "wss://kueue.dev.vtn.obx"
|
|
||||||
@@ -1,9 +1,9 @@
|
|||||||
controllerManager:
|
controllerManager:
|
||||||
featureGates:
|
# featureGates:
|
||||||
- name: TopologyAwareScheduling
|
# - name: TopologyAwareScheduling
|
||||||
enabled: true
|
# enabled: true
|
||||||
- name: LocalQueueMetrics
|
# - name: LocalQueueMetrics
|
||||||
enabled: true
|
# enabled: true
|
||||||
managerConfig:
|
managerConfig:
|
||||||
controllerManagerConfigYaml: |
|
controllerManagerConfigYaml: |
|
||||||
apiVersion: config.kueue.x-k8s.io/v1beta1
|
apiVersion: config.kueue.x-k8s.io/v1beta1
|
||||||
@@ -14,8 +14,8 @@ controllerManager:
|
|||||||
- jobset.x-k8s.io/jobset
|
- jobset.x-k8s.io/jobset
|
||||||
internalCertManagement:
|
internalCertManagement:
|
||||||
enable: false
|
enable: false
|
||||||
enableCertManager: false
|
enableCertManager: true
|
||||||
enablePrometheus: true
|
enablePrometheus: true
|
||||||
metrics:
|
metrics:
|
||||||
prometheusNamespace: prometheus
|
prometheusNamespace: prometheus
|
||||||
enableKueueViz: true
|
enableKueueViz: false
|
||||||
|
|||||||
Reference in New Issue
Block a user