{ pkgs, lib, config, ... }: with lib; let cfg = config.features.hpc.slurm; configuration = { services.munge.enable = true; users = { groups = { slurm = { gid = lib.mkForce 401; }; munge = { gid = lib.mkForce 402; }; }; users.slurm = { group = "slurm"; uid = lib.mkForce 401; }; users.munge = { group = "munge"; uid = lib.mkForce 402; }; }; environment.etc."munge/munge.key" = { source = cfg.mungeKey; mode = "0400"; uid = 402; gid = 0; }; environment.etc."slurm/jwt_hs256.key" = { source = cfg.jwtKey; mode = "0400"; uid = 401; gid = 0; }; environment.etc."slurm/slurm.key" = { source = cfg.slurmKey; mode = "0400"; uid = 401; gid = 0; }; services.slurm = { clusterName=cfg.clusterName; controlMachine = cfg.controlMachine; nodeName = cfg.nodeName; partitionName = cfg.partitionName; extraConfig = '' # AccountingStorageType=accounting_storage/none AccountingStorageType=accounting_storage/slurmdbd AccountingStorageHost=${cfg.dbdHost} JobAcctGatherType=jobacct_gather/linux MailDomain=${cfg.mailDomain} MailProg=/run/wrappers/bin/sendmail SelectType=select/cons_tres SelectTypeParameters=CR_Core AuthAltTypes=auth/jwt AuthAltParameters=jwt_key=/etc/slurm/jwt_hs256.key ${slurmAuth} ''; extraConfigPaths = [ slurmKey ]; }; networking.firewall.allowedTCPPorts = [ 6818 ]; nixpkgs.overlays = [ slurm-ucx ]; }; slurmAuth = if cfg.slurmKey != null then "AuthType=auth/slurm" else ""; emptyKey = pkgs.writeTextDir "slurm-key" ""; slurmKey = if cfg.slurmKey == null then emptyKey else pkgs.stdenv.mkDerivation { name = "slurm-key"; buildCommand = '' mkdir -p $out ln -s /etc/slurm/slurm.key $out/slurm.key ''; }; slurmServer = { services.mysql = { enable = true; package = pkgs.mariadb; ensureUsers = [ { name = "slurm"; ensurePermissions = { "slurm_acct_db.*" = "ALL PRIVILEGES"; }; } ]; initialDatabases = [ { name = "slurm_acct_db"; } ]; }; services.slurm = { server.enable = true; # extraConfig = '' # MailDomain=itpartner.no # MailProg=${pkgs.ssmtp}/bin/ssmtp # ''; }; services.slurm.dbdserver = { enable = cfg.dbdServer; dbdHost = cfg.controlMachine; # storagePass = cfg.storagePass; }; networking.firewall.allowedTCPPorts = [ 6817 ]; }; slurmClient = { services.slurm.client.enable = true; systemd.services.slurmd.serviceConfig = { Restart = "on-failure"; }; }; slurm-ucx = self: super: with super.pkgs; { slurm = super.slurm.overrideAttrs (attrs: { buildInputs = attrs.buildInputs ++ [ ucx http-parser pkg-config ]; nativeBuildInputs = attrs.nativeBuildInputs ++ [ makeWrapper ]; configureFlags = attrs.configureFlags ++ [ "--with-ucx=${ucx.dev}" "--with-http-parser=${http-parser}" "--enable-slurmrestd" ]; postFixup = '' wrapProgram $out/bin/slurmstepd --set LD_LIBRARY_PATH ${ucx}/lib wrapProgram $out/bin/srun --set SLURM_MPI_TYPE "pmix" ''; # --set PSM3_PKEY "${cfg.pkey}" \ # --set PMIX_MCA_gds "^ds12" \ }); }; hipster = { users.groups.hipster.gid = 2001; users.users.hipster = { description = "Job runner"; home = "/work/hipster"; group = "hipster"; extraGroups = [ "users" ]; uid = 2001; isNormalUser = true; createHome = false; useDefaultShell = true; }; }; slurmrestd = { systemd.services.slurmrestd = { description = "Slurm REST API service"; wantedBy = [ "multi-user.target" ]; after = [ "slurmd.service" ]; serviceConfig = { Type = "simple"; User = "hipster"; Group = "hipster"; }; environment = { SLURM_JWT = "daemon"; }; script = '' /run/current-system/sw/bin/slurmrestd -v -a rest_auth/jwt :6822 ''; serviceConfig = { RuntimeDirectory = "slurmrestd"; }; }; }; in { options.features.hpc.slurm = { enable = mkEnableOption "Enable SLURM batch system"; jwtKey = mkOption { type = types.path; default = null; }; mungeKey = mkOption { type = types.path; default = null; }; clusterName = mkOption { type = types.str; default = null; }; pkey = mkOption { type = types.str; default = "0x7fff"; }; controlMachine = mkOption { type = types.str; default = null; }; dbdHost = mkOption { type = types.str; default = "localhost"; }; server = mkOption { type = types.bool; default = false; }; dbdServer = mkOption { type = types.bool; default = false; }; client = mkOption { type = types.bool; default = false; }; hipster = mkOption { type = types.bool; default = true; }; slurmrestd = mkOption { type = types.bool; default = false; }; nodeName = mkOption { type = types.listOf types.str; default = []; }; partitionName = mkOption { type = types.listOf types.str; default = []; }; storagePass = mkOption { type = types.str; default = null; }; mailDomain = mkOption { type = types.str; default = null; }; slurmKey = lib.mkOption { type = lib.types.nullOr lib.types.path; default = emptyKey; description = ''File containing the slurm.key to be used for auth/slurm.''; }; }; config = mkIf cfg.enable ( mkMerge [ configuration (mkIf cfg.server slurmServer) (mkIf cfg.client slurmClient) (mkIf (cfg.hipster) hipster) (mkIf (cfg.server && cfg.hipster) slurmrestd) (mkIf (cfg.slurmrestd && cfg.hipster) slurmrestd) ]); }