unified cluster setup using colmena
This commit is contained in:
302
cluster/cluster.nix
Normal file
302
cluster/cluster.nix
Normal file
@@ -0,0 +1,302 @@
|
||||
{ pkgs, lib, config, ... }:
|
||||
with lib;
|
||||
let
|
||||
cfg = config.features.host;
|
||||
|
||||
computeNodes = import ./compute/nodes.nix;
|
||||
|
||||
mkSANs = host: [
|
||||
host.name
|
||||
host.address
|
||||
"127.0.0.1"
|
||||
];
|
||||
|
||||
configuration = {
|
||||
system.autoUpgrade.enable = lib.mkForce false;
|
||||
|
||||
nixpkgs.overlays = [
|
||||
(import ./overlays.nix)
|
||||
];
|
||||
|
||||
boot = {
|
||||
loader.systemd-boot.enable = true;
|
||||
loader.efi.canTouchEfiVariables = true;
|
||||
# kernelPackages = pkgs.linuxPackages_5_4;
|
||||
kernelModules = [ "ib_umad" "ib_ipoib" ];
|
||||
# kernelParams = [
|
||||
# "console=ttyS0,115200"
|
||||
# "console=tty0"
|
||||
# ];
|
||||
};
|
||||
|
||||
console = {
|
||||
font = "Lat2-Terminus16";
|
||||
keyMap = "us";
|
||||
};
|
||||
|
||||
i18n = {
|
||||
defaultLocale = "en_US.UTF-8";
|
||||
extraLocaleSettings = {
|
||||
LC_CTYPE="en_DK.UTF-8";
|
||||
LC_TIME="en_DK.UTF-8";
|
||||
LC_PAPER="en_DK.UTF-8";
|
||||
LC_NAME="en_DK.UTF-8";
|
||||
LC_ADDRESS="en_DK.UTF-8";
|
||||
LC_TELEPHONE="en_DK.UTF-8";
|
||||
LC_MEASUREMENT="en_DK.UTF-8";
|
||||
LC_IDENTIFICATION="en_DK.UTF-8";
|
||||
};
|
||||
};
|
||||
|
||||
time.timeZone = "Europe/Oslo";
|
||||
|
||||
programs.msmtp = {
|
||||
enable = true;
|
||||
accounts = {
|
||||
default = {
|
||||
auth = false;
|
||||
tls = false;
|
||||
tls_starttls = false;
|
||||
port = 24;
|
||||
from = "ekman@oceanbox.io";
|
||||
host = "smtpgw.itpartner.no";
|
||||
# user = "utvikling";
|
||||
# password = "S0m3rp0m@de#21!";
|
||||
};
|
||||
};
|
||||
defaults = {
|
||||
aliases = "/etc/aliases";
|
||||
};
|
||||
};
|
||||
|
||||
environment.etc = {
|
||||
"aliases" = {
|
||||
text = ''
|
||||
root: jonas.juselius@oceanbox.io
|
||||
'';
|
||||
mode = "0644";
|
||||
};
|
||||
};
|
||||
|
||||
features = {
|
||||
os = {
|
||||
# boot.uefi = true;
|
||||
adminAuthorizedKeys = [
|
||||
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKiAS30ZO+wgfAqDE9Y7VhRunn2QszPHA5voUwo+fGOf jonas"
|
||||
"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDULdlLC8ZLu9qBZUYsjhpr6kv5RH4yPkekXQdD7prkqapyoptUkO1nOTDwy7ZsKDxmp9Zc6OtdhgoJbowhGW3VIZPmooWO8twcaYDpkxEBLUehY/n8SlAwBtiHJ4mTLLcynJMVrjmTQLF3FeWVof0Aqy6UtZceFpLp1eNkiHTCM3anwtb9+gfr91dX1YsAOqxqv7ooRDu5rCRUvOi4OvRowepyuBcCjeWpTkJHkC9WGxuESvDV3CySWkGC2fF2LHkAu6SFsFE39UA5ZHo0b1TK+AFqRFiBAb7ULmtuno1yxhpBxbozf8+Yyc7yLfMNCyBpL1ci7WnjKkghQv7yM1xN2XMJLpF56v0slSKMoAs7ThoIlmkRm/6o3NCChgu0pkpNg/YP6A3HfYiEDgChvA6rAHX6+to50L9xF3ajqk4BUzWd/sCk7Q5Op2lzj31L53Ryg8vMP8hjDjYcgEcCCsGOcjUVgcsmfC9LupwRIEz3aF14AWg66+3zAxVho8ozjes= jonas.juselius@juselius.io"
|
||||
"ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCk5EKXxo/KLogjqSxSf/GkQdZ30UxB3wXc5k6Y6RRKQ/5iJ+XyYTbuqYOUp30p54apZzbayU2icahE/upr754lQicQwJtOXW/Iut57VRhSpq4P+mKCIdT58xCUkAZYr8Aja8UjHlYeJgFvp023K/fqmwbapu8R1gh4bzXm7uU1XeJoYfuOb+Cb8NGMn1ICrw2aztA0yVOXZ7tyJd2qyr1+6PuM/Ca2nKN4wLIX2vwyN3vZjR15nkIaHQGlTaJlNk2NEG1YTxsIQ9axDjNtyL80kjUr5M8zxW6s0h3451zr1b21EetP1i+1POIjS9uWXv5iabF+1Qb1GaS4FAYzzpqNY+moLzY7Zqfi05MPsMYkNoZ1Kg5aj0IuZb0OM9i6ZJrFs9nYAGG0uLSUTfrs957f9nokFyILGYg5xY46YN3uQrqfZifvcR0KaEdxEKvnfq0qrNG3uYLR/OYm2yblRcNbWgDoQ1hH7qa9uJM2JrPM07s4sJGkqfAib8Hwz9+l7jMrL6KIGUOA4aX0B1KZaIKKiZa42WlgdbeA17aW3laIqS5mZCkI3pLMYZAxe+A6rQi+V8ZAvDSyOL/Vws3lboXaN5QLu17R8uCY7MkIAvRBiZSpdWNeX3JO5m6zexkxkrFlxyEBf+ott4ATSw+eMYMs8i5xQRqPjgO1cABWkUdGpw== martin.moe.carstens@itpartner.no"
|
||||
];
|
||||
docker.enable = false;
|
||||
};
|
||||
cachix.enable = false;
|
||||
monitoring.nodeExporter.enable = false;
|
||||
};
|
||||
|
||||
networking = {
|
||||
domain = mkDefault "cluster.local";
|
||||
defaultGateway = mkDefault "10.255.241.1";
|
||||
nameservers = mkDefault [ "8.8.8.8" ];
|
||||
search = mkDefault [ "local" ];
|
||||
extraHosts = import ./hosts.nix;
|
||||
firewall.extraCommands = ''
|
||||
iptables -I INPUT -s 10.255.241.0/24 -j ACCEPT
|
||||
'';
|
||||
};
|
||||
|
||||
environment.variables = {};
|
||||
|
||||
# systemd.services."serial-getty@ttyS0".enable = true;
|
||||
|
||||
# environment.etc."beegfs/connauthfile" = {
|
||||
# source = ./connauthfile;
|
||||
# mode = "0400";
|
||||
# uid = 0;
|
||||
# gid = 0;
|
||||
# };
|
||||
|
||||
nix = {
|
||||
maxJobs = 32;
|
||||
trustedUsers = [ "@wheel" ];
|
||||
binaryCachePublicKeys = [
|
||||
"ekman.local:2NsTThGkZVJtOs3NVQYjEZ4NLscXlbjqA8Fi7HnAreA="
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
compute = {
|
||||
system.activationScripts = {
|
||||
work.text = ''
|
||||
mkdir -p /work
|
||||
'';
|
||||
};
|
||||
|
||||
features = {
|
||||
hpc = {
|
||||
enable = true;
|
||||
slurm = {
|
||||
client = true;
|
||||
mungeKey = ./compute/munge.key;
|
||||
mungeUid = 996; # hack
|
||||
pkey = "0x7666";
|
||||
controlMachine = "ekman";
|
||||
nodeName = [
|
||||
"c0-[1-16] Sockets=2 CoresPerSocket=64 ThreadsPerCore=1 RealMemory=256000 TmpDisk=500000 State=UNKNOWN"
|
||||
"ekman Sockets=2 CoresPerSocket=64 ThreadsPerCore=2 RealMemory=256000 TmpDisk=500000 State=UNKNOWN"
|
||||
];
|
||||
partitionName = [
|
||||
"batch Nodes=c0-[1-16] Default=YES MaxTime=INFINITE State=UP"
|
||||
"frontend Nodes=ekman MaxTime=1:00:00 State=UP"
|
||||
];
|
||||
};
|
||||
beegfs = {
|
||||
enable = false;
|
||||
beegfs = {
|
||||
work = {
|
||||
mgmtdHost = "ibbeegfs0";
|
||||
connAuthFile = "/etc/beegfs/connauthfile";
|
||||
client = {
|
||||
enable = false;
|
||||
mountPoint = "/work";
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
fileSystems = {
|
||||
"/opt" = {
|
||||
device = "10.255.243.80:/opt";
|
||||
fsType = "nfs";
|
||||
options = [ "soft" "rdma" "defaults" "vers=4.2" ];
|
||||
};
|
||||
"/data" = {
|
||||
device = "10.255.243.80:/data";
|
||||
fsType = "nfs";
|
||||
options = [ "soft" "rdma" "defaults" "vers=4.2" ];
|
||||
};
|
||||
"/work" = {
|
||||
device = "10.255.243.90:/work";
|
||||
fsType = "nfs";
|
||||
options = [ "soft" "rdma" "defaults" "vers=4.2" ];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
k8s-node = {
|
||||
features = {
|
||||
k8s = {
|
||||
enable = true;
|
||||
node.enable = true;
|
||||
clusterName = "ekman";
|
||||
initca = ./ca;
|
||||
cidr = "10.100.0.0/16";
|
||||
master = {
|
||||
name = "ekman";
|
||||
address = "10.255.241.100";
|
||||
extraSANs = [ "ekman.local" "ekman.oceanbox.io" ];
|
||||
};
|
||||
ingressNodes = [
|
||||
"ekman.oceanbox.io"
|
||||
];
|
||||
fileserver = "ibnfs0";
|
||||
charts = {
|
||||
acme_email = "acme@oceanbox.io";
|
||||
# grafana_smtp_user = "utvikling";
|
||||
# grafana_smtp_password = "S0m3rp0m@de#21!";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
services.kubernetes.kubelet.extraSANs = mkSANs {
|
||||
name = cfg.name;
|
||||
address = cfg.address;
|
||||
};
|
||||
};
|
||||
|
||||
shosts = {
|
||||
environment.etc."ssh/shosts.equiv" = {
|
||||
mode = "0644";
|
||||
uid = 0;
|
||||
gid = 0;
|
||||
text = ''
|
||||
10.255.241.80
|
||||
10.255.241.90
|
||||
'' + builtins.foldl' (a: x: a + "${x.address}\n") "" computeNodes;
|
||||
};
|
||||
|
||||
programs.ssh.knownHosts = {
|
||||
ekman = {
|
||||
hostNames = [
|
||||
"ekman" "ekman.cluster.local" "ekman.oceanbox.io" "10.255.241.100" "10.255.243.100"
|
||||
];
|
||||
publicKeyFile = ../ekman.pub;
|
||||
};
|
||||
nfs0 = {
|
||||
hostNames = [
|
||||
"nfs0" "nfs0.cluster.local" "10.255.241.80" "10.255.243.80"
|
||||
];
|
||||
publicKeyFile = ./nfs0/nfs0.pub;
|
||||
};
|
||||
nfs1 = {
|
||||
hostNames = [
|
||||
"nfs1" "nfs1.cluster.local" "10.255.241.90" "10.255.243.90"
|
||||
];
|
||||
publicKeyFile = ./nfs1/nfs1.pub;
|
||||
};
|
||||
} // builtins.foldl' (a: x:
|
||||
let
|
||||
n = toString x.idx;
|
||||
addr = toString (x.idx + 100);
|
||||
in a // {
|
||||
"c0-${n}" = {
|
||||
hostNames = [
|
||||
"c0-${n}"
|
||||
"c0-${n}.cluster.local"
|
||||
"10.255.241.${addr}"
|
||||
"10.255.243.${addr}"
|
||||
];
|
||||
publicKeyFile = ./compute/pubkeys/c0-${n}.pub;
|
||||
};
|
||||
}) {} computeNodes;
|
||||
|
||||
environment.systemPackages = [ openssh-shosts ];
|
||||
|
||||
security.wrappers = {
|
||||
ssh-keysign = {
|
||||
source = "${openssh-shosts}/libexec/ssh-keysign";
|
||||
owner = "root";
|
||||
group = "root";
|
||||
permissions = "u+rs,g+rx,o+rx";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
openssh-shosts = pkgs.openssh.overrideAttrs (attrs: {
|
||||
buildFlags = [ "SSH_KEYSIGN=/run/wrappers/bin/ssh-keysign" ];
|
||||
doCheck = false; # the tests take hours
|
||||
});
|
||||
|
||||
in {
|
||||
options.cluster = {
|
||||
compute = mkEnableOption "Enable compute node configs";
|
||||
};
|
||||
|
||||
options.cluster = {
|
||||
k8sNode = mkEnableOption "Enable k8s node";
|
||||
};
|
||||
|
||||
config = mkMerge [
|
||||
configuration
|
||||
shosts
|
||||
(mkIf config.cluster.compute compute)
|
||||
(mkIf config.cluster.k8sNode k8s-node)
|
||||
];
|
||||
|
||||
imports = [
|
||||
../modules
|
||||
../nixos
|
||||
./users.nix
|
||||
];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user