major: initial rossy cluster and biggish refactor
This commit is contained in:
331
rossby/manage/default.nix
Normal file
331
rossby/manage/default.nix
Normal file
@@ -0,0 +1,331 @@
|
||||
{ pkgs, ...}:
|
||||
let
|
||||
computeNodes =
|
||||
import ../c0/nodes.nix ++
|
||||
[
|
||||
rec {
|
||||
idx = 222;
|
||||
name = "rossby-login";
|
||||
address = "172.16.239.${toString idx}";
|
||||
ipoib = "172.16.240.${toString idx}";
|
||||
pubkey = ../login/ssh_host_key.pub;
|
||||
}
|
||||
# rec {
|
||||
# idx = 210;
|
||||
# name = "fs-work";
|
||||
# address = "172.16.239.${toString idx}";
|
||||
# ipoib = "172.16.240.${toString idx}";
|
||||
# pubkey = ../fs-work/ssh_host_key.pub;
|
||||
# }
|
||||
];
|
||||
etcdCluster = import ../etcdCluster.nix;
|
||||
name = "rossby-manage";
|
||||
address = "172.16.239.221";
|
||||
ipoib = "172.16.240.221";
|
||||
in {
|
||||
systemd.targets = {
|
||||
sleep.enable = false;
|
||||
suspend.enable = false;
|
||||
hibernate.enable = false;
|
||||
hybrid-sleep.enable = false;
|
||||
};
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
rdma-core
|
||||
hwloc
|
||||
headscale
|
||||
];
|
||||
|
||||
cluster = {
|
||||
k8sNode = true;
|
||||
compute = false;
|
||||
slurm = true;
|
||||
mounts = {
|
||||
rdma.enable = false;
|
||||
automount.enable = true;
|
||||
users = false;
|
||||
opt = false;
|
||||
work = false;
|
||||
data = false;
|
||||
ceph = false;
|
||||
};
|
||||
};
|
||||
|
||||
features = {
|
||||
desktop.enable = false;
|
||||
cachix.enable = false;
|
||||
|
||||
host = {
|
||||
inherit address;
|
||||
inherit name;
|
||||
};
|
||||
|
||||
myvnc.enable = false;
|
||||
|
||||
os = {
|
||||
externalInterface = "enp129s0f0";
|
||||
nfs.enable = false;
|
||||
nfs.exports = ''
|
||||
/exports 172.16.239.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
'';
|
||||
};
|
||||
|
||||
hpc = {
|
||||
slurm.server = true;
|
||||
slurm.slurmrestd = true;
|
||||
slurm.mungeUid = 993;
|
||||
manager = true;
|
||||
};
|
||||
|
||||
k8s = {
|
||||
master.enable = true;
|
||||
node.enable = true;
|
||||
nodes = computeNodes;
|
||||
inherit etcdCluster;
|
||||
};
|
||||
|
||||
monitoring = {
|
||||
server = {
|
||||
enable = false;
|
||||
scrapeHosts = [
|
||||
"rossby-login"
|
||||
"rossby-manage"
|
||||
"fs-work"
|
||||
] ++ (builtins.map (x: x.name) computeNodes);
|
||||
defaultAlertReceiver = {
|
||||
email_configs = [
|
||||
{ to = "jonas.juselius@oceanbox.io"; }
|
||||
];
|
||||
};
|
||||
pageAlertReceiver = {
|
||||
webhook_configs = [
|
||||
{
|
||||
url = "https://prometheus-msteams.k2.itpartner.no/ekman";
|
||||
http_config = {
|
||||
tls_config = { insecure_skip_verify = true; };
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
webUI.enable = false;
|
||||
webUI.acmeEmail = "acme@oceanbox.io";
|
||||
webUI.allow = [
|
||||
"10.1.2.0/24"
|
||||
"172.19.254.0/24"
|
||||
"172.19.255.0/24"
|
||||
];
|
||||
infiniband-exporter = {
|
||||
enable = true;
|
||||
nameMap = ''
|
||||
# 0xe8ebd3030024981e "c0-1"
|
||||
'';
|
||||
};
|
||||
slurm-exporter = {
|
||||
enable = true;
|
||||
port = 6080;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
programs.singularity.enable = true;
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
services.kubernetes.apiserver.extraOpts = ''--oidc-client-id=9b6daef0-02fa-4574-8949-f7c1b5fccd15 --oidc-groups-claim=roles --oidc-issuer-url=https://login.microsoftonline.com/3f737008-e9a0-4485-9d27-40329d288089/v2.0'';
|
||||
|
||||
services.flannel.iface = "enp129s0f0";
|
||||
|
||||
networking = {
|
||||
useDHCP = false;
|
||||
hostName = name;
|
||||
interfaces.enp129s0f0 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
inherit address;
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
# interfaces.eno2 = {
|
||||
# useDHCP = false;
|
||||
# ipv4.addresses = [
|
||||
# {
|
||||
# inherit address;
|
||||
# prefixLength = 24;
|
||||
# }
|
||||
# ];
|
||||
# };
|
||||
# interfaces.ens2f1np1 = {
|
||||
# useDHCP = false;
|
||||
# ipv4.addresses = [
|
||||
# {
|
||||
# address = "172.16.239.99";
|
||||
# prefixLength = 24;
|
||||
# }
|
||||
# ];
|
||||
# };
|
||||
# interfaces.ibs2f0 = {
|
||||
# useDHCP = false;
|
||||
# ipv4.addresses = [
|
||||
# {
|
||||
# address = ipoib;
|
||||
# prefixLength = 24;
|
||||
# }
|
||||
# ];
|
||||
# };
|
||||
defaultGateway = "172.16.239.1";
|
||||
firewall = {
|
||||
allowedTCPPorts = [ 4443 4725 ];
|
||||
extraCommands = ''
|
||||
# needed for nodeport access on k1 and k2
|
||||
# iptables -t nat -A POSTROUTING -s 172.16.239.0/24 ! -d 10.255.0.0/16 -j SNAT --to-source 10.255.242.3
|
||||
iptables -t nat -A POSTROUTING -s 172.16.239.0/24 -j MASQUERADE
|
||||
# iptables -t nat -A POSTROUTING -s 100.64.0.0/24 -j MASQUERADE
|
||||
# iptables -t nat -A POSTROUTING -d 172.16.239.0/24 -j MASQUERADE
|
||||
# iptables -t nat -A POSTROUTING -s 172.16.239.0/24 -d 10.255.241.0/16 -j SNAT --to-source 10.255.241.99
|
||||
# iptables -t nat -A POSTROUTING -s 172.16.239.0/24 -j SNAT --to-source 10.255.242.3
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
fileSystems = {
|
||||
"/exports/public" = {
|
||||
device = "/srv/public";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
};
|
||||
|
||||
nix.extraOptions = ''
|
||||
# secret-key-files = /etc/nix/ekman.key
|
||||
'';
|
||||
|
||||
programs.msmtp = {
|
||||
enable = true;
|
||||
accounts = {
|
||||
default = {
|
||||
auth = false;
|
||||
tls = false;
|
||||
tls_starttls = false;
|
||||
port = 24;
|
||||
from = "rossby@oceanbox.io";
|
||||
host = "smtpgw.itpartner.no";
|
||||
# user = "utvikling";
|
||||
# password = "S0m3rp0m@de#21!";
|
||||
};
|
||||
};
|
||||
defaults = {
|
||||
aliases = "/etc/aliases";
|
||||
};
|
||||
};
|
||||
|
||||
services.prometheus.alertmanager.configuration.global = {
|
||||
smtp_smarthost = "smtpgw.itpartner.no";
|
||||
# smtp_auth_username = "utvikling";
|
||||
# smtp_auth_password = "S0m3rp0m@de#21!";
|
||||
smtp_hello = "rossby.oceanbox.io";
|
||||
smtp_from = "noreply@rossby.oceanbox.io";
|
||||
};
|
||||
|
||||
security.pam = {
|
||||
services.sshd.googleAuthenticator.enable = true;
|
||||
loginLimits = [
|
||||
{
|
||||
domain = "@users";
|
||||
item = "rss";
|
||||
type = "hard";
|
||||
value = 16000000;
|
||||
}
|
||||
{
|
||||
domain = "@users";
|
||||
item = "cpu";
|
||||
type = "hard";
|
||||
value = 180;
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
system.activationScripts = {
|
||||
home-permissions.text = ''
|
||||
chmod 755 /home/olean
|
||||
chmod 755 /home/frankgaa
|
||||
chmod 755 /home/jonas
|
||||
chmod 755 /home/stig
|
||||
chmod 755 /home/bast
|
||||
chmod 755 /home/mrtz
|
||||
chmod 755 /home/avle
|
||||
chmod 755 /home/simenlk
|
||||
chmod 755 /home/ole
|
||||
'';
|
||||
};
|
||||
|
||||
# Use nvd to get package diff before apply
|
||||
system.activationScripts.system-diff = {
|
||||
supportsDryActivation = true; # safe: only outputs to stdout
|
||||
text = ''
|
||||
export PATH="${pkgs.lib.makeBinPath [ pkgs.nixVersions.latest ]}:$PATH"
|
||||
if [ -e /run/current-system ]; then
|
||||
${pkgs.lib.getExe pkgs.nvd} diff '/run/current-system' "$systemConfig" || true
|
||||
fi
|
||||
'';
|
||||
};
|
||||
|
||||
# ssh-rsa is deprecated, but putty/winscp users use it
|
||||
services.openssh.extraConfig = ''
|
||||
# pubkeyacceptedalgorithms ssh-rsa,ssh-ed25519-cert-v01@openssh.com,ecdsa-sha2-nistp256-cert-v01@openssh.com,ecdsa-sha2-nistp384-cert-v01@openssh.com,ecdsa-sha2-nistp521-cert-v01@openssh.com,sk-ssh-ed25519-cert-v01@openssh.com,sk-ecdsa-sha2-nistp256-cert-v01@openssh.com,rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-ed25519,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,ecdsa-sha2-nistp521,sk-ssh-ed25519@openssh.com,sk-ecdsa-sha2-nistp256@openssh.com,rsa-sha2-512,rsa-sha2-256
|
||||
PubkeyAuthOptions verify-required
|
||||
'';
|
||||
|
||||
# boot.kernelPackages = pkgs.linuxKernel.packages.linux_6_1;
|
||||
|
||||
virtualisation.docker.enable = pkgs.lib.mkForce true;
|
||||
|
||||
# Configuration for the coordination server for a tailscale network run using headscale.
|
||||
#
|
||||
# We can set it up to provide several exit nodes through which traffic can be routed.
|
||||
#
|
||||
# Servers can join using this command:
|
||||
# `tailscale up --login-server net.b0.itpartner.no --accept-dns=false --advertise-exit-node`
|
||||
#
|
||||
# with the following config:
|
||||
#
|
||||
# service.tailscale = {
|
||||
# enable = true;
|
||||
# useRoutingFeatures = "server"; # for exit-node usage
|
||||
# };
|
||||
#
|
||||
# Clients can join using this command:
|
||||
# `tailscale up --login-server net.b0.itpartner.no --accept-dns=false`
|
||||
#
|
||||
# services.headscale = {
|
||||
# enable = true;
|
||||
# address = "0.0.0.0";
|
||||
# port = 4725; # hscl
|
||||
# settings = import ./headscale/settings.nix;
|
||||
# };
|
||||
|
||||
services.tailscale = {
|
||||
enable = true;
|
||||
authKeyFile = "/var/lib/secrets/tailscale.key";
|
||||
useRoutingFeatures = "both"; # for exit-node usage
|
||||
extraUpFlags = [
|
||||
"--login-server=https://headscale.svc.oceanbox.io"
|
||||
"--accept-dns=true"
|
||||
"--advertise-exit-node"
|
||||
"--advertise-routes=172.16.239.0/24,172.16.238.0/24"
|
||||
];
|
||||
};
|
||||
|
||||
imports = [
|
||||
./hardware-configuration.nix
|
||||
../default.nix
|
||||
../mounts.nix
|
||||
../myvnc.nix
|
||||
];
|
||||
}
|
||||
|
||||
44
rossby/manage/hardware-configuration.nix
Normal file
44
rossby/manage/hardware-configuration.nix
Normal file
@@ -0,0 +1,44 @@
|
||||
# Do not modify this file! It was generated by ‘nixos-generate-config’
|
||||
# and may be overwritten by future invocations. Please make changes
|
||||
# to /etc/nixos/configuration.nix instead.
|
||||
{ config, lib, pkgs, modulesPath, ... }:
|
||||
|
||||
{
|
||||
imports =
|
||||
[ (modulesPath + "/installer/scan/not-detected.nix")
|
||||
];
|
||||
|
||||
boot.initrd.availableKernelModules = [ "xhci_pci" "ahci" "usbhid" "sd_mod" ];
|
||||
boot.initrd.kernelModules = [ ];
|
||||
boot.kernelModules = [ "kvm-amd" ];
|
||||
boot.extraModulePackages = [ ];
|
||||
|
||||
fileSystems."/" =
|
||||
{ device = "/dev/disk/by-uuid/08147fd3-5fee-4650-bfbf-43f4e5c8659d";
|
||||
fsType = "ext4";
|
||||
};
|
||||
|
||||
fileSystems."/boot" =
|
||||
{ device = "/dev/disk/by-uuid/392A-D2D9";
|
||||
fsType = "vfat";
|
||||
options = [ "fmask=0077" "dmask=0077" ];
|
||||
};
|
||||
|
||||
swapDevices = [ ];
|
||||
|
||||
# Enables DHCP on each ethernet and wireless interface. In case of scripted networking
|
||||
# (the default) this is the recommended approach. When using systemd-networkd it's
|
||||
# still possible to use this option, but it's recommended to use it in conjunction
|
||||
# with explicit per-interface declarations with `networking.interfaces.<interface>.useDHCP`.
|
||||
networking.useDHCP = lib.mkDefault true;
|
||||
# networking.interfaces.enp129s0f0.useDHCP = lib.mkDefault true;
|
||||
# networking.interfaces.enp129s0f1.useDHCP = lib.mkDefault true;
|
||||
# networking.interfaces.enp129s0f2.useDHCP = lib.mkDefault true;
|
||||
# networking.interfaces.enp129s0f3.useDHCP = lib.mkDefault true;
|
||||
# networking.interfaces.enp1s0np0.useDHCP = lib.mkDefault true;
|
||||
# networking.interfaces.enp65s0np0.useDHCP = lib.mkDefault true;
|
||||
# networking.interfaces.tailscale0.useDHCP = lib.mkDefault true;
|
||||
|
||||
nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux";
|
||||
hardware.cpu.amd.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
|
||||
}
|
||||
1
rossby/manage/ssh_host_key.pub
Normal file
1
rossby/manage/ssh_host_key.pub
Normal file
@@ -0,0 +1 @@
|
||||
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCdTAf77QvX1WBN6LAIqPOnSSTMiubIHX+uC7usA6Bvgi9NwLk6aTdslxlA4pko8FOI8D0S06dkoUPqR8CZO+qhgyXi25WCaJ+yvVbC/ySdWsy+I8MA9+9G2FLCXXFQCJmmnDu4W6b91Hul/hv7xIxhXQA+aFpm2UoQ0bRZoMKgyTyue5HWTMsQ/mkug1PmferOWIzLb/N6gQienCeyoU2KGoHY8O30vYCS0M6jvLkq11X769bjYuLZrLS+dM+uGTRdO0nMITUJlT+Rbzb5k1bzuTPpcPM0V9LEPkBMUsgCnJZtvdOta5wf87ef3xrrV3JYmdRvoH6c52+1KoKu6JF29R/6MTJYN7UOS2R7Lksjst5SVnW3gL2AtNWvH0Y+IgaxaYut++Rm4tBCTGV14G0pw0B31Q3018Z5tPbt8dlUFi5D+c9mKx7x1W6GKmll8TVz3FZ6CHi1Dv0AJB+3wCUokwcnMvMymj9hCpLgat44qctGcRoVV7vOMxYFq8toGAmszqktNJR4IGUwUTrJahEHmXe5dU6TxkweF6Np8CJ9VD4f/cEFFaxFLWekfj1aNuq6vvtq0cFdhxkivse3WQAT1HzAS/iHczqkqUp+cseics+LsRPMzrmgjWa1067dzlX5XYPcPfXVxAq7O9JGD4W7BBHusXpju1Zotpjchl2U+Q== root@rossby-manage
|
||||
Reference in New Issue
Block a user