fix: add k8s and hpc modules to main repo
This commit is contained in:
234
modules/hpc/slurm.nix
Normal file
234
modules/hpc/slurm.nix
Normal file
@@ -0,0 +1,234 @@
|
||||
{ pkgs, lib, config, ... }:
|
||||
with lib;
|
||||
let
|
||||
cfg = config.features.hpc.slurm;
|
||||
|
||||
configuration = {
|
||||
services.munge.enable = true;
|
||||
environment.etc."munge/munge.key" = {
|
||||
source = cfg.mungeKey;
|
||||
mode = "0400";
|
||||
uid = cfg.mungeUid;
|
||||
gid = 0;
|
||||
};
|
||||
|
||||
services.slurm = {
|
||||
controlMachine = cfg.controlMachine;
|
||||
nodeName = cfg.nodeName;
|
||||
partitionName = cfg.partitionName;
|
||||
extraConfig = ''
|
||||
# AccountingStorageType=accounting_storage/none
|
||||
AccountingStorageType=accounting_storage/slurmdbd
|
||||
JobAcctGatherType=jobacct_gather/linux
|
||||
MailDomain=${cfg.mailDomain}
|
||||
MailProg=/run/wrappers/bin/sendmail
|
||||
SelectType=select/cons_tres
|
||||
SelectTypeParameters=CR_Core
|
||||
# AuthAltTypes=auth/jwt
|
||||
# AuthAltParameters=jwt_key=/var/spool/slurm/statesave/jwt_hs256.key
|
||||
'';
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ 6818 ];
|
||||
|
||||
nixpkgs.overlays = [ slurm-ucx ];
|
||||
};
|
||||
|
||||
|
||||
slurmServer = {
|
||||
services.mysql = {
|
||||
enable = true;
|
||||
package = pkgs.mariadb;
|
||||
ensureUsers = [
|
||||
{
|
||||
name = "slurm";
|
||||
ensurePermissions = {
|
||||
"slurm_acct_db.*" = "ALL PRIVILEGES";
|
||||
};
|
||||
}
|
||||
];
|
||||
initialDatabases = [
|
||||
{ name = "slurm_acct_db"; }
|
||||
];
|
||||
};
|
||||
|
||||
services.slurm = {
|
||||
server.enable = true;
|
||||
# extraConfig = ''
|
||||
# MailDomain=itpartner.no
|
||||
# MailProg=${pkgs.ssmtp}/bin/ssmtp
|
||||
# '';
|
||||
dbdserver = {
|
||||
enable = true;
|
||||
# dbdHost = cfg.controlMachine;
|
||||
# storagePass = cfg.storagePass;
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ 6817 ];
|
||||
};
|
||||
|
||||
slurmClient = {
|
||||
services.slurm.client.enable = true;
|
||||
systemd.services.slurmd.serviceConfig = {
|
||||
Restart = "on-failure";
|
||||
};
|
||||
};
|
||||
|
||||
slurm-ucx = self: super: with super.pkgs; {
|
||||
slurm = super.slurm.overrideAttrs (attrs: {
|
||||
buildInputs = attrs.buildInputs ++ [ ucx http-parser pkg-config ];
|
||||
|
||||
nativeBuildInputs = attrs.nativeBuildInputs ++ [ makeWrapper ];
|
||||
|
||||
configureFlags =
|
||||
attrs.configureFlags ++ [
|
||||
"--with-ucx=${ucx.dev}"
|
||||
"--with-http-parser=${http-parser}"
|
||||
"--enable-slurmrestd"
|
||||
];
|
||||
|
||||
postFixup = ''
|
||||
wrapProgram $out/bin/slurmstepd --set LD_LIBRARY_PATH ${ucx}/lib
|
||||
wrapProgram $out/bin/srun --set SLURM_MPI_TYPE "pmix"
|
||||
'';
|
||||
# --set PSM3_PKEY "${cfg.pkey}" \
|
||||
# --set PMIX_MCA_gds "^ds12" \
|
||||
});
|
||||
};
|
||||
|
||||
hipster = {
|
||||
users.groups.hipster.gid = 2001;
|
||||
users.users.hipster = {
|
||||
description = "Job runner";
|
||||
home = "/work/hipster";
|
||||
group = "hipster";
|
||||
extraGroups = [
|
||||
"users"
|
||||
];
|
||||
uid = 2001;
|
||||
isNormalUser = true;
|
||||
createHome = false;
|
||||
useDefaultShell = true;
|
||||
};
|
||||
};
|
||||
|
||||
slurmrestd = {
|
||||
systemd.tmpfiles.rules = [ "d /run/slurmrestd 0750 hipster hipster -" ];
|
||||
|
||||
systemd.services.slurmrestd = {
|
||||
description = "Slurm REST API service";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "slurmd.service" ];
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
User = "hipster";
|
||||
Group = "hipster";
|
||||
};
|
||||
environment = {
|
||||
# SLURM_JWT = "daemon";
|
||||
};
|
||||
script = ''
|
||||
rm -f /run/slurmrestd/hipster.socket
|
||||
/run/current-system/sw/bin/slurmrestd -v -a rest_auth/local unix:/run/slurmrestd/hipster.socket
|
||||
'';
|
||||
serviceConfig = {
|
||||
RuntimeDirectory = "slurmrestd";
|
||||
};
|
||||
};
|
||||
|
||||
systemd.sockets.slurm-http-proxy = {
|
||||
enable = true;
|
||||
description = "Proxy slurmrestd unix socket to port 6822";
|
||||
listenStreams = [ "0.0.0.0:6822" ];
|
||||
wantedBy = [ "sockets.target" ];
|
||||
# Allow multiple instances of corresponding service.
|
||||
socketConfig.Accept = true;
|
||||
};
|
||||
|
||||
systemd.services."slurm-http-proxy@" = {
|
||||
enable = true;
|
||||
description = "Proxy slurmrestd unix socket to port 6822";
|
||||
serviceConfig = {
|
||||
ExecStart = "-${pkgs.socat}/bin/socat STDIO UNIX-CONNECT:/run/slurmrestd/hipster.socket";
|
||||
StandardInput="socket";
|
||||
User = "hipster";
|
||||
Group = "hipster";
|
||||
};
|
||||
};
|
||||
};
|
||||
in
|
||||
{
|
||||
options.features.hpc.slurm = {
|
||||
enable = mkEnableOption "Enable SLURM batch system";
|
||||
|
||||
mungeKey = mkOption {
|
||||
type = types.path;
|
||||
default = null;
|
||||
};
|
||||
|
||||
mungeUid = mkOption {
|
||||
type = types.int;
|
||||
default = 997;
|
||||
};
|
||||
|
||||
pkey = mkOption {
|
||||
type = types.str;
|
||||
default = "0x7fff";
|
||||
};
|
||||
|
||||
controlMachine = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
server = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
};
|
||||
|
||||
client = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
};
|
||||
|
||||
hipster = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
};
|
||||
|
||||
slurmrestd = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
};
|
||||
|
||||
nodeName = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
};
|
||||
|
||||
partitionName = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
};
|
||||
|
||||
storagePass = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
|
||||
mailDomain = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable (
|
||||
mkMerge [
|
||||
configuration
|
||||
(mkIf cfg.server slurmServer)
|
||||
(mkIf cfg.client slurmClient)
|
||||
(mkIf (cfg.hipster) hipster)
|
||||
(mkIf (cfg.server && cfg.hipster) slurmrestd)
|
||||
(mkIf (cfg.slurmrestd && cfg.hipster) slurmrestd)
|
||||
]);
|
||||
}
|
||||
Reference in New Issue
Block a user