feat: use central, off-site slurmdbd

This commit is contained in:
Jonas Juselius
2025-09-27 15:57:48 +02:00
parent 680330d569
commit 30d0180b59
6 changed files with 40 additions and 15 deletions

View File

@@ -39,7 +39,7 @@ let
address = host.address;
};
os.externalInterface = "eno33np0";
hpc.compute = true;
hpc.computeNode = true;
# k8s = { inherit etcdCluster; };
};

View File

@@ -100,6 +100,7 @@ let
};
networking = {
useDHCP = false;
domain = mkDefault "cluster.local";
defaultGateway = mkDefault "10.255.241.1";
nameservers = mkDefault [ "8.8.8.8" ];
@@ -144,12 +145,13 @@ let
features.hpc.slurm = {
enable = true;
client = true;
# clusterName = "ekman";
mungeKey = ./munge.key;
# jwtKey = ./jwt_hs256.key;
mungeUid = mkDefault 996; # hack
# pkey = "0x7666";
clusterName = "ekman";
controlMachine = "ekman-manage";
dbdHost = "10.255.241.15";
mungeKey = ./munge.key;
jwtKey = ./jwt_hs256.key;
slurmKey = ./slurm.key;
# pkey = "0x7666";
mailDomain = "oceanbox.io";
nodeName = [
"c0-[1-18] Sockets=2 CoresPerSocket=64 ThreadsPerCore=1 RealMemory=256000 TmpDisk=500000 State=UNKNOWN"

View File

@@ -69,7 +69,6 @@ in {
};
};
features.hpc.slurm.mungeUid = 991;
features.mft.enable = lib.mkForce true;
features = {

View File

@@ -46,8 +46,6 @@ in {
};
};
features.hpc.slurm.mungeUid = 994;
features = {
host = {
inherit address;

View File

@@ -51,8 +51,8 @@ in
hpc = {
slurm.server = false;
slurm.slurmrestd = false;
manage = false;
login = true;
manageNode = false;
loginNode = true;
knem = false;
};

View File

@@ -86,9 +86,9 @@ in {
hpc = {
slurm.server = true;
slurm.slurmrestd = false;
slurm.mungeUid = 996;
manage = true;
slurm.slurmrestd = true;
slurm.dbdServer = false;
manageNode = true;
};
k8s = {
@@ -283,6 +283,18 @@ in {
'';
};
services.dnsmasq.enable = true;
services.dnsmasq.settings = {
domain = [ "cluster.local" ];
server = [
"/obx.hs/100.100.100.100" # headscale dns
];
address = [
"/slurmctld.cluster.local/127.0.0.1"
];
srv-host = "_slurmctld._tcp.cluster.local,slurmctld.cluster.local,6817,0,5";
};
# ssh-rsa is deprecated, but putty/winscp users use it
services.openssh.extraConfig = ''
# pubkeyacceptedalgorithms ssh-rsa,ssh-ed25519-cert-v01@openssh.com,ecdsa-sha2-nistp256-cert-v01@openssh.com,ecdsa-sha2-nistp384-cert-v01@openssh.com,ecdsa-sha2-nistp521-cert-v01@openssh.com,sk-ssh-ed25519-cert-v01@openssh.com,sk-ecdsa-sha2-nistp256-cert-v01@openssh.com,rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-ed25519,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,ecdsa-sha2-nistp521,sk-ssh-ed25519@openssh.com,sk-ecdsa-sha2-nistp256@openssh.com,rsa-sha2-512,rsa-sha2-256
@@ -323,11 +335,25 @@ in {
useRoutingFeatures = "both"; # for exit-node usage
extraUpFlags = [
"--login-server=https://headscale.svc.oceanbox.io"
"--accept-dns=false"
"--accept-dns=true" # see dnsmasq
"--accept-routes=true"
"--advertise-exit-node"
"--advertise-routes=10.255.241.0/24"
];
};
services.networkd-dispatcher = {
enable = true;
rules = {
"tailscale-router" = {
onState = [ "routable" ];
script = ''
#!${pkgs.runtimeShell}
${pkgs.ethtool}/bin/ethtool -K enp65s0np0 rx-udp-gro-forwarding on rx-gro-list off
exit 0
'';
};
};
};
imports = [
./hardware-configuration.nix