235 lines
5.6 KiB
Nix
235 lines
5.6 KiB
Nix
{ pkgs, lib, config, ... }:
|
|
with lib;
|
|
let
|
|
cfg = config.features.hpc.slurm;
|
|
|
|
configuration = {
|
|
services.munge.enable = true;
|
|
environment.etc."munge/munge.key" = {
|
|
source = cfg.mungeKey;
|
|
mode = "0400";
|
|
uid = cfg.mungeUid;
|
|
gid = 0;
|
|
};
|
|
|
|
services.slurm = {
|
|
controlMachine = cfg.controlMachine;
|
|
nodeName = cfg.nodeName;
|
|
partitionName = cfg.partitionName;
|
|
extraConfig = ''
|
|
# AccountingStorageType=accounting_storage/none
|
|
AccountingStorageType=accounting_storage/slurmdbd
|
|
JobAcctGatherType=jobacct_gather/linux
|
|
MailDomain=${cfg.mailDomain}
|
|
MailProg=/run/wrappers/bin/sendmail
|
|
SelectType=select/cons_tres
|
|
SelectTypeParameters=CR_Core
|
|
# AuthAltTypes=auth/jwt
|
|
# AuthAltParameters=jwt_key=/var/spool/slurm/statesave/jwt_hs256.key
|
|
'';
|
|
};
|
|
|
|
networking.firewall.allowedTCPPorts = [ 6818 ];
|
|
|
|
nixpkgs.overlays = [ slurm-ucx ];
|
|
};
|
|
|
|
|
|
slurmServer = {
|
|
services.mysql = {
|
|
enable = true;
|
|
package = pkgs.mariadb;
|
|
ensureUsers = [
|
|
{
|
|
name = "slurm";
|
|
ensurePermissions = {
|
|
"slurm_acct_db.*" = "ALL PRIVILEGES";
|
|
};
|
|
}
|
|
];
|
|
initialDatabases = [
|
|
{ name = "slurm_acct_db"; }
|
|
];
|
|
};
|
|
|
|
services.slurm = {
|
|
server.enable = true;
|
|
# extraConfig = ''
|
|
# MailDomain=itpartner.no
|
|
# MailProg=${pkgs.ssmtp}/bin/ssmtp
|
|
# '';
|
|
dbdserver = {
|
|
enable = true;
|
|
# dbdHost = cfg.controlMachine;
|
|
# storagePass = cfg.storagePass;
|
|
};
|
|
};
|
|
|
|
networking.firewall.allowedTCPPorts = [ 6817 ];
|
|
};
|
|
|
|
slurmClient = {
|
|
services.slurm.client.enable = true;
|
|
systemd.services.slurmd.serviceConfig = {
|
|
Restart = "on-failure";
|
|
};
|
|
};
|
|
|
|
slurm-ucx = self: super: with super.pkgs; {
|
|
slurm = super.slurm.overrideAttrs (attrs: {
|
|
buildInputs = attrs.buildInputs ++ [ ucx http-parser pkg-config ];
|
|
|
|
nativeBuildInputs = attrs.nativeBuildInputs ++ [ makeWrapper ];
|
|
|
|
configureFlags =
|
|
attrs.configureFlags ++ [
|
|
"--with-ucx=${ucx.dev}"
|
|
"--with-http-parser=${http-parser}"
|
|
"--enable-slurmrestd"
|
|
];
|
|
|
|
postFixup = ''
|
|
wrapProgram $out/bin/slurmstepd --set LD_LIBRARY_PATH ${ucx}/lib
|
|
wrapProgram $out/bin/srun --set SLURM_MPI_TYPE "pmix"
|
|
'';
|
|
# --set PSM3_PKEY "${cfg.pkey}" \
|
|
# --set PMIX_MCA_gds "^ds12" \
|
|
});
|
|
};
|
|
|
|
hipster = {
|
|
users.groups.hipster.gid = 2001;
|
|
users.users.hipster = {
|
|
description = "Job runner";
|
|
home = "/work/hipster";
|
|
group = "hipster";
|
|
extraGroups = [
|
|
"users"
|
|
];
|
|
uid = 2001;
|
|
isNormalUser = true;
|
|
createHome = false;
|
|
useDefaultShell = true;
|
|
};
|
|
};
|
|
|
|
slurmrestd = {
|
|
systemd.tmpfiles.rules = [ "d /run/slurmrestd 0750 hipster hipster -" ];
|
|
|
|
systemd.services.slurmrestd = {
|
|
description = "Slurm REST API service";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "slurmd.service" ];
|
|
serviceConfig = {
|
|
Type = "simple";
|
|
User = "hipster";
|
|
Group = "hipster";
|
|
};
|
|
environment = {
|
|
# SLURM_JWT = "daemon";
|
|
};
|
|
script = ''
|
|
rm -f /run/slurmrestd/hipster.socket
|
|
/run/current-system/sw/bin/slurmrestd -v -a rest_auth/local unix:/run/slurmrestd/hipster.socket
|
|
'';
|
|
serviceConfig = {
|
|
RuntimeDirectory = "slurmrestd";
|
|
};
|
|
};
|
|
|
|
systemd.sockets.slurm-http-proxy = {
|
|
enable = true;
|
|
description = "Proxy slurmrestd unix socket to port 6822";
|
|
listenStreams = [ "0.0.0.0:6822" ];
|
|
wantedBy = [ "sockets.target" ];
|
|
# Allow multiple instances of corresponding service.
|
|
socketConfig.Accept = true;
|
|
};
|
|
|
|
systemd.services."slurm-http-proxy@" = {
|
|
enable = true;
|
|
description = "Proxy slurmrestd unix socket to port 6822";
|
|
serviceConfig = {
|
|
ExecStart = "-${pkgs.socat}/bin/socat STDIO UNIX-CONNECT:/run/slurmrestd/hipster.socket";
|
|
StandardInput="socket";
|
|
User = "hipster";
|
|
Group = "hipster";
|
|
};
|
|
};
|
|
};
|
|
in
|
|
{
|
|
options.features.hpc.slurm = {
|
|
enable = mkEnableOption "Enable SLURM batch system";
|
|
|
|
mungeKey = mkOption {
|
|
type = types.path;
|
|
default = null;
|
|
};
|
|
|
|
mungeUid = mkOption {
|
|
type = types.int;
|
|
default = 997;
|
|
};
|
|
|
|
pkey = mkOption {
|
|
type = types.str;
|
|
default = "0x7fff";
|
|
};
|
|
|
|
controlMachine = mkOption {
|
|
type = types.str;
|
|
default = null;
|
|
};
|
|
server = mkOption {
|
|
type = types.bool;
|
|
default = false;
|
|
};
|
|
|
|
client = mkOption {
|
|
type = types.bool;
|
|
default = false;
|
|
};
|
|
|
|
hipster = mkOption {
|
|
type = types.bool;
|
|
default = true;
|
|
};
|
|
|
|
slurmrestd = mkOption {
|
|
type = types.bool;
|
|
default = false;
|
|
};
|
|
|
|
nodeName = mkOption {
|
|
type = types.listOf types.str;
|
|
default = [];
|
|
};
|
|
|
|
partitionName = mkOption {
|
|
type = types.listOf types.str;
|
|
default = [];
|
|
};
|
|
|
|
storagePass = mkOption {
|
|
type = types.str;
|
|
default = null;
|
|
};
|
|
|
|
mailDomain = mkOption {
|
|
type = types.str;
|
|
default = null;
|
|
};
|
|
};
|
|
|
|
config = mkIf cfg.enable (
|
|
mkMerge [
|
|
configuration
|
|
(mkIf cfg.server slurmServer)
|
|
(mkIf cfg.client slurmClient)
|
|
(mkIf (cfg.hipster) hipster)
|
|
(mkIf (cfg.server && cfg.hipster) slurmrestd)
|
|
(mkIf (cfg.slurmrestd && cfg.hipster) slurmrestd)
|
|
]);
|
|
}
|