{ pkgs, lib, config, ... }: with lib; let cfg = config.features.hpc.slurm; configuration = { services.munge.enable = true; environment.etc."munge/munge.key" = { source = cfg.mungeKey; mode = "0400"; uid = cfg.mungeUid; gid = 0; }; services.slurm = { controlMachine = cfg.controlMachine; nodeName = cfg.nodeName; partitionName = cfg.partitionName; extraConfig = '' # AccountingStorageType=accounting_storage/none AccountingStorageType=accounting_storage/slurmdbd JobAcctGatherType=jobacct_gather/linux MailDomain=${cfg.mailDomain} MailProg=/run/wrappers/bin/sendmail SelectType=select/cons_tres SelectTypeParameters=CR_Core # AuthAltTypes=auth/jwt # AuthAltParameters=jwt_key=/var/spool/slurm/statesave/jwt_hs256.key ''; }; networking.firewall.allowedTCPPorts = [ 6818 ]; nixpkgs.overlays = [ slurm-ucx ]; }; slurmServer = { services.mysql = { enable = true; package = pkgs.mariadb; ensureUsers = [ { name = "slurm"; ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; }; } ]; initialDatabases = [ { name = "slurm_acct_db"; } ]; }; services.slurm = { server.enable = true; # extraConfig = '' # MailDomain=itpartner.no # MailProg=${pkgs.ssmtp}/bin/ssmtp # ''; dbdserver = { enable = true; # dbdHost = cfg.controlMachine; # storagePass = cfg.storagePass; }; }; networking.firewall.allowedTCPPorts = [ 6817 ]; }; slurmClient = { services.slurm.client.enable = true; systemd.services.slurmd.serviceConfig = { Restart = "on-failure"; }; }; slurm-ucx = self: super: with super.pkgs; { slurm = super.slurm.overrideAttrs (attrs: { buildInputs = attrs.buildInputs ++ [ ucx http-parser pkg-config ]; nativeBuildInputs = attrs.nativeBuildInputs ++ [ makeWrapper ]; configureFlags = attrs.configureFlags ++ [ "--with-ucx=${ucx.dev}" "--with-http-parser=${http-parser}" "--enable-slurmrestd" ]; postFixup = '' wrapProgram $out/bin/slurmstepd --set LD_LIBRARY_PATH ${ucx}/lib wrapProgram $out/bin/srun --set SLURM_MPI_TYPE "pmix" ''; # --set PSM3_PKEY "${cfg.pkey}" \ # --set PMIX_MCA_gds "^ds12" \ }); }; hipster = { users.groups.hipster.gid = 2001; users.users.hipster = { description = "Job runner"; home = "/work/hipster"; group = "hipster"; extraGroups = [ "users" ]; uid = 2001; isNormalUser = true; createHome = false; useDefaultShell = true; }; }; slurmrestd = { systemd.tmpfiles.rules = [ "d /run/slurmrestd 0750 hipster hipster -" ]; systemd.services.slurmrestd = { description = "Slurm REST API service"; wantedBy = [ "multi-user.target" ]; after = [ "slurmd.service" ]; serviceConfig = { Type = "simple"; User = "hipster"; Group = "hipster"; }; environment = { # SLURM_JWT = "daemon"; }; script = '' rm -f /run/slurmrestd/hipster.socket /run/current-system/sw/bin/slurmrestd -v -a rest_auth/local unix:/run/slurmrestd/hipster.socket ''; serviceConfig = { RuntimeDirectory = "slurmrestd"; }; }; systemd.sockets.slurm-http-proxy = { enable = true; description = "Proxy slurmrestd unix socket to port 6822"; listenStreams = [ "0.0.0.0:6822" ]; wantedBy = [ "sockets.target" ]; # If 'true', allow multiple instances of corresponding service (fails) socketConfig.Accept = false; }; systemd.services."slurm-http-proxy@" = { enable = true; description = "Proxy slurmrestd unix socket to port 6822"; serviceConfig = { ExecStart = "-${pkgs.socat}/bin/socat STDIO UNIX-CONNECT:/run/slurmrestd/hipster.socket"; StandardInput="socket"; User = "hipster"; Group = "hipster"; }; }; }; in { options.features.hpc.slurm = { enable = mkEnableOption "Enable SLURM batch system"; mungeKey = mkOption { type = types.path; default = null; }; mungeUid = mkOption { type = types.int; default = 997; }; pkey = mkOption { type = types.str; default = "0x7fff"; }; controlMachine = mkOption { type = types.str; default = null; }; server = mkOption { type = types.bool; default = false; }; client = mkOption { type = types.bool; default = false; }; hipster = mkOption { type = types.bool; default = true; }; slurmrestd = mkOption { type = types.bool; default = false; }; nodeName = mkOption { type = types.listOf types.str; default = []; }; partitionName = mkOption { type = types.listOf types.str; default = []; }; storagePass = mkOption { type = types.str; default = null; }; mailDomain = mkOption { type = types.str; default = null; }; }; config = mkIf cfg.enable ( mkMerge [ configuration (mkIf cfg.server slurmServer) (mkIf cfg.client slurmClient) (mkIf (cfg.hipster) hipster) (mkIf (cfg.server && cfg.hipster) slurmrestd) (mkIf (cfg.slurmrestd && cfg.hipster) slurmrestd) ]); }