Files
platform/modules/hpc/slurm.nix

235 lines
5.1 KiB
Nix

{ pkgs, lib, config, ... }:
with lib;
let
cfg = config.features.hpc.slurm;
configuration = {
services.munge.enable = true;
environment.etc."munge/munge.key" = {
source = cfg.mungeKey;
mode = "0400";
uid = cfg.mungeUid;
gid = 0;
};
environment.etc."slurmctld/jwt_hs256.key" = {
source = cfg.jwtKey;
mode = "0400";
uid = cfg.slurmUid;
gid = 0;
};
services.slurm = {
controlMachine = cfg.controlMachine;
nodeName = cfg.nodeName;
partitionName = cfg.partitionName;
extraConfig = ''
# AccountingStorageType=accounting_storage/none
AccountingStorageType=accounting_storage/slurmdbd
AccountingStorageExternalHost=${cfg.dbdHosts}
JobAcctGatherType=jobacct_gather/linux
MailDomain=${cfg.mailDomain}
MailProg=/run/wrappers/bin/sendmail
SelectType=select/cons_tres
SelectTypeParameters=CR_Core
AuthAltTypes=auth/jwt
AuthAltParameters=jwt_key=/etc/slurmctld/jwt_hs256.key
'';
};
networking.firewall.allowedTCPPorts = [ 6818 ];
nixpkgs.overlays = [ slurm-ucx ];
};
slurmServer = {
services.mysql = {
enable = true;
package = pkgs.mariadb;
ensureUsers = [
{
name = "slurm";
ensurePermissions = {
"slurm_acct_db.*" = "ALL PRIVILEGES";
};
}
];
initialDatabases = [
{ name = "slurm_acct_db"; }
];
};
services.slurm = {
server.enable = true;
# extraConfig = ''
# MailDomain=itpartner.no
# MailProg=${pkgs.ssmtp}/bin/ssmtp
# '';
dbdserver = {
enable = true;
dbdHost = cfg.controlMachine;
# storagePass = cfg.storagePass;
};
};
networking.firewall.allowedTCPPorts = [ 6817 ];
};
slurmClient = {
services.slurm.client.enable = true;
systemd.services.slurmd.serviceConfig = {
Restart = "on-failure";
};
};
slurm-ucx = self: super: with super.pkgs; {
slurm = super.slurm.overrideAttrs (attrs: {
buildInputs = attrs.buildInputs ++ [ ucx http-parser pkg-config ];
nativeBuildInputs = attrs.nativeBuildInputs ++ [ makeWrapper ];
configureFlags =
attrs.configureFlags ++ [
"--with-ucx=${ucx.dev}"
"--with-http-parser=${http-parser}"
"--enable-slurmrestd"
];
postFixup = ''
wrapProgram $out/bin/slurmstepd --set LD_LIBRARY_PATH ${ucx}/lib
wrapProgram $out/bin/srun --set SLURM_MPI_TYPE "pmix"
'';
# --set PSM3_PKEY "${cfg.pkey}" \
# --set PMIX_MCA_gds "^ds12" \
});
};
hipster = {
users.groups.hipster.gid = 2001;
users.users.hipster = {
description = "Job runner";
home = "/work/hipster";
group = "hipster";
extraGroups = [
"users"
];
uid = 2001;
isNormalUser = true;
createHome = false;
useDefaultShell = true;
};
};
slurmrestd = {
systemd.services.slurmrestd = {
description = "Slurm REST API service";
wantedBy = [ "multi-user.target" ];
after = [ "slurmd.service" ];
serviceConfig = {
Type = "simple";
User = "hipster";
Group = "hipster";
};
environment = {
SLURM_JWT = "daemon";
};
script = ''
/run/current-system/sw/bin/slurmrestd -v -a rest_auth/jwt :6822
'';
serviceConfig = {
RuntimeDirectory = "slurmrestd";
};
};
};
in
{
options.features.hpc.slurm = {
enable = mkEnableOption "Enable SLURM batch system";
jwtKey = mkOption {
type = types.path;
default = null;
};
mungeKey = mkOption {
type = types.path;
default = null;
};
mungeUid = mkOption {
type = types.int;
default = 997;
};
slurmUid = mkOption {
type = types.int;
default = 307;
};
pkey = mkOption {
type = types.str;
default = "0x7fff";
};
controlMachine = mkOption {
type = types.str;
default = null;
};
dbdHosts = mkOption {
type = types.str;
default = null;
};
server = mkOption {
type = types.bool;
default = false;
};
client = mkOption {
type = types.bool;
default = false;
};
hipster = mkOption {
type = types.bool;
default = true;
};
slurmrestd = mkOption {
type = types.bool;
default = false;
};
nodeName = mkOption {
type = types.listOf types.str;
default = [];
};
partitionName = mkOption {
type = types.listOf types.str;
default = [];
};
storagePass = mkOption {
type = types.str;
default = null;
};
mailDomain = mkOption {
type = types.str;
default = null;
};
};
config = mkIf cfg.enable (
mkMerge [
configuration
(mkIf cfg.server slurmServer)
(mkIf cfg.client slurmClient)
(mkIf (cfg.hipster) hipster)
(mkIf (cfg.server && cfg.hipster) slurmrestd)
(mkIf (cfg.slurmrestd && cfg.hipster) slurmrestd)
]);
}