Files
platform/rossby/manage/default.nix
2025-09-25 12:28:59 +02:00

355 lines
9.7 KiB
Nix

{ pkgs, ...}:
let
computeNodes =
import ../c0/nodes.nix ++
[
rec {
idx = 222;
name = "rossby";
address = "172.16.239.${toString idx}";
ipoib = "10.1.6.${toString idx}";
pubkey = ../login/ssh_host_key.pub;
}
rec {
idx = 210;
name = "fs-work";
address = "172.16.239.${toString idx}";
ipoib = "10.1.6.${toString idx}";
pubkey = ../fs-work/ssh_host_key.pub;
}
];
etcdCluster = import ../etcdCluster.nix;
name = "rossby-manage";
address = "172.16.239.221";
ipoib = "10.1.6.221";
in {
systemd.targets = {
sleep.enable = false;
suspend.enable = false;
hibernate.enable = false;
hybrid-sleep.enable = false;
};
# services.udev.extraRules = ''
# KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
# '';
environment.systemPackages = with pkgs; [
rdma-core
hwloc
headscale
];
cluster = {
k8sNode = true;
compute = false;
slurm = true;
mounts = {
rdma.enable = false;
automount.enable = true;
users = false;
opt = true;
work = true;
data = true;
ceph = true;
};
};
features = {
desktop.enable = false;
cachix.enable = false;
host = {
inherit address;
inherit name;
};
myvnc.enable = false;
os = {
externalInterface = "enp65s0np0";
nfs.enable = false;
nfs.exports = ''
/exports 172.16.239.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
'';
};
hpc = {
slurm.server = true;
slurm.slurmrestd = true;
slurm.mungeUid = 993;
slurm.dbdHost = "ekman-manage.obx.hs";
manageNode = true;
};
k8s = {
master.enable = true;
node.enable = true;
nodes = computeNodes;
inherit etcdCluster;
};
monitoring = {
server = {
enable = false;
scrapeHosts = [
"rossby-login"
"rossby-manage"
"fs-work"
] ++ (builtins.map (x: x.name) computeNodes);
defaultAlertReceiver = {
email_configs = [
{ to = "jonas.juselius@oceanbox.io"; }
];
};
pageAlertReceiver = {
webhook_configs = [
{
url = "https://prometheus-msteams.k2.itpartner.no/ekman";
http_config = {
tls_config = { insecure_skip_verify = true; };
};
}
];
};
};
webUI.enable = false;
webUI.acmeEmail = "acme@oceanbox.io";
webUI.allow = [
"10.1.2.0/24"
"172.19.254.0/24"
"172.19.255.0/24"
];
infiniband-exporter = {
enable = true;
nameMap = ''
# 0xe8ebd3030024981e "c0-1"
'';
};
slurm-exporter = {
enable = true;
port = 6080;
};
};
};
programs.singularity.enable = true;
# services.udev.extraRules = ''
# KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
# '';
services.kubernetes.apiserver.extraOpts = ''--oidc-client-id=9b6daef0-02fa-4574-8949-f7c1b5fccd15 --oidc-groups-claim=roles --oidc-issuer-url=https://login.microsoftonline.com/3f737008-e9a0-4485-9d27-40329d288089/v2.0'';
services.flannel.iface = "enp65s0np0";
networking = {
useDHCP = false;
hostName = name;
interfaces.enp65s0np0 = {
useDHCP = false;
ipv4.addresses = [ {
inherit address;
prefixLength = 24;
} ];
};
# interfaces.eno2 = {
# useDHCP = false;
# ipv4.addresses = [
# {
# inherit address;
# prefixLength = 24;
# }
# ];
# };
# interfaces.ens2f1np1 = {
# useDHCP = false;
# ipv4.addresses = [
# {
# address = "172.16.239.99";
# prefixLength = 24;
# }
# ];
# };
# interfaces.ibs2f0 = {
# useDHCP = false;
# ipv4.addresses = [
# {
# address = ipoib;
# prefixLength = 24;
# }
# ];
# };
defaultGateway = "172.16.239.1";
firewall = {
allowedTCPPorts = [ 6443 4725 ];
extraCommands = ''
# needed for nodeport access on k1 and k2
# iptables -t nat -A POSTROUTING -s 172.16.239.0/24 ! -d 10.255.0.0/16 -j SNAT --to-source 10.255.242.3
iptables -t nat -A POSTROUTING -s 172.16.239.0/24 -j MASQUERADE
# iptables -t nat -A POSTROUTING -s 100.64.0.0/24 -j MASQUERADE
# iptables -t nat -A POSTROUTING -d 172.16.239.0/24 -j MASQUERADE
# iptables -t nat -A POSTROUTING -s 172.16.239.0/24 -d 10.255.241.0/16 -j SNAT --to-source 10.255.241.99
# iptables -t nat -A POSTROUTING -s 172.16.239.0/24 -j SNAT --to-source 10.255.242.3
'';
};
};
fileSystems = {
"/exports/public" = {
device = "/srv/public";
options = [ "bind" ];
};
};
nix.extraOptions = ''
# secret-key-files = /etc/nix/ekman.key
'';
programs.msmtp = {
enable = true;
accounts = {
default = {
auth = false;
tls = false;
tls_starttls = false;
port = 24;
from = "rossby@oceanbox.io";
host = "smtpgw.itpartner.no";
# user = "utvikling";
# password = "S0m3rp0m@de#21!";
};
};
defaults = {
aliases = "/etc/aliases";
};
};
services.prometheus.alertmanager.configuration.global = {
smtp_smarthost = "smtpgw.itpartner.no";
# smtp_auth_username = "utvikling";
# smtp_auth_password = "S0m3rp0m@de#21!";
smtp_hello = "rossby.oceanbox.io";
smtp_from = "noreply@rossby.oceanbox.io";
};
security.pam = {
services.sshd.googleAuthenticator.enable = true;
loginLimits = [
{
domain = "@users";
item = "rss";
type = "hard";
value = 16000000;
}
{
domain = "@users";
item = "cpu";
type = "hard";
value = 180;
}
];
};
system.activationScripts = {
home-permissions.text = ''
chmod 755 /home/olean
chmod 755 /home/frankgaa
chmod 755 /home/jonas
chmod 755 /home/stig
chmod 755 /home/bast
chmod 755 /home/mrtz
chmod 755 /home/avle
chmod 755 /home/simenlk
chmod 755 /home/ole
'';
};
services.dnsmasq.settings = {
domain = [ "hbx.hs" ];
server = [
"8.8.8.8"
"/obx.hs/100.100.100.100" # headscale dns
];
address = [ ];
};
# Use nvd to get package diff before apply
system.activationScripts.system-diff = {
supportsDryActivation = true; # safe: only outputs to stdout
text = ''
export PATH="${pkgs.lib.makeBinPath [ pkgs.nixVersions.latest ]}:$PATH"
if [ -e /run/current-system ]; then
${pkgs.lib.getExe pkgs.nvd} diff '/run/current-system' "$systemConfig" || true
fi
'';
};
# ssh-rsa is deprecated, but putty/winscp users use it
services.openssh.extraConfig = ''
# pubkeyacceptedalgorithms ssh-rsa,ssh-ed25519-cert-v01@openssh.com,ecdsa-sha2-nistp256-cert-v01@openssh.com,ecdsa-sha2-nistp384-cert-v01@openssh.com,ecdsa-sha2-nistp521-cert-v01@openssh.com,sk-ssh-ed25519-cert-v01@openssh.com,sk-ecdsa-sha2-nistp256-cert-v01@openssh.com,rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-ed25519,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,ecdsa-sha2-nistp521,sk-ssh-ed25519@openssh.com,sk-ecdsa-sha2-nistp256@openssh.com,rsa-sha2-512,rsa-sha2-256
PubkeyAuthOptions verify-required
'';
# boot.kernelPackages = pkgs.linuxKernel.packages.linux_6_1;
virtualisation.docker.enable = pkgs.lib.mkForce true;
# Configuration for the coordination server for a tailscale network run using headscale.
#
# We can set it up to provide several exit nodes through which traffic can be routed.
#
# Servers can join using this command:
# `tailscale up --login-server net.b0.itpartner.no --accept-dns=false --advertise-exit-node`
#
# with the following config:
#
# service.tailscale = {
# enable = true;
# useRoutingFeatures = "server"; # for exit-node usage
# };
#
# Clients can join using this command:
# `tailscale up --login-server net.b0.itpartner.no --accept-dns=false`
#
# services.headscale = {
# enable = true;
# address = "0.0.0.0";
# port = 4725; # hscl
# settings = import ./headscale/settings.nix;
# };
services.tailscale = {
enable = true;
authKeyFile = "/var/lib/secrets/tailscale.key";
useRoutingFeatures = "both"; # for exit-node usage
extraUpFlags = [
"--login-server=https://headscale.svc.oceanbox.io"
# "--accept-dns=true" # see dnsmasq
"--advertise-exit-node"
"--advertise-routes=172.16.239.0/24,172.16.238.0/24"
];
};
services.networkd-dispatcher = {
enable = true;
rules = {
"tailscale-router" = {
onState = [ "routable" ];
script = ''
#!${pkgs.runtimeShell}
${pkgs.ethtool}/bin/ethtool -K enp65s0np0 rx-udp-gro-forwarding on rx-gro-list off
exit 0
'';
};
};
};
imports = [
./hardware-configuration.nix
../default.nix
../mounts.nix
../myvnc.nix
];
}