Merge branch 'main' of gitlab.com:oceanbox/clusterfck
This commit is contained in:
@@ -11,7 +11,6 @@ let
|
|||||||
|
|
||||||
compute = {
|
compute = {
|
||||||
deployment.tags = [ "compute" "c0" ];
|
deployment.tags = [ "compute" "c0" ];
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
mkCompute = host:
|
mkCompute = host:
|
||||||
@@ -39,8 +38,8 @@ let
|
|||||||
name = host.name;
|
name = host.name;
|
||||||
address = host.address;
|
address = host.address;
|
||||||
};
|
};
|
||||||
os.externalInterface = "enp33s0f0np0";
|
os.externalInterface = host.iface;
|
||||||
hpc.compute = true;
|
hpc.computeNode = true;
|
||||||
hpc.knem = true;
|
hpc.knem = true;
|
||||||
# k8s = { inherit etcdCluster; };
|
# k8s = { inherit etcdCluster; };
|
||||||
};
|
};
|
||||||
@@ -54,7 +53,7 @@ let
|
|||||||
networking = {
|
networking = {
|
||||||
hostName = host.name;
|
hostName = host.name;
|
||||||
useDHCP = false;
|
useDHCP = false;
|
||||||
interfaces.enp33s0f0np0 = {
|
interfaces."${host.iface}" = {
|
||||||
useDHCP = false;
|
useDHCP = false;
|
||||||
ipv4.addresses = [ {
|
ipv4.addresses = [ {
|
||||||
address = host.address;
|
address = host.address;
|
||||||
|
|||||||
@@ -1,9 +1,10 @@
|
|||||||
with builtins;
|
with builtins;
|
||||||
let
|
let
|
||||||
nodes = genList (n: n + 1) 16; in
|
nodes = genList (n: n + 1) 18; in
|
||||||
map (n: (
|
map (n: (
|
||||||
rec {
|
rec {
|
||||||
idx = 100 + n;
|
idx = 100 + n;
|
||||||
|
iface = if n > 16 then "enp33s0f3np3" else "enp33s0f0np0";
|
||||||
name = "c0-${toString n}";
|
name = "c0-${toString n}";
|
||||||
address = "10.255.241.${toString idx}";
|
address = "10.255.241.${toString idx}";
|
||||||
ipoib = "10.255.243.${toString idx}";
|
ipoib = "10.255.243.${toString idx}";
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
莵q丘
|
|
||||||
@@ -1,88 +0,0 @@
|
|||||||
{ pkgs ? import <nixpkgs> {} }:
|
|
||||||
let
|
|
||||||
# Pin the deployment package-set to a specific version of nixpkgs
|
|
||||||
# pkgs = import (builtins.fetchTarball {
|
|
||||||
# url = "https://github.com/NixOS/nixpkgs/archive/e6377ff35544226392b49fa2cf05590f9f0c4b43.tar.gz";
|
|
||||||
# sha256 = "1fra9wwy5gvj5ibayqkzqpwdf715bggc0qbmrfch4fghwvl5m70l";
|
|
||||||
# }) {};
|
|
||||||
# pkgs = import <nixpkgs> {};
|
|
||||||
|
|
||||||
nodes = import ./nodes.nix;
|
|
||||||
|
|
||||||
compute = {
|
|
||||||
deployment.tags = [ "compute" "c0" ];
|
|
||||||
};
|
|
||||||
|
|
||||||
mkCompute = host:
|
|
||||||
let
|
|
||||||
hw = ./hardware-configuration.d + "/${host.name}.nix";
|
|
||||||
in {
|
|
||||||
"${host.name}" = {
|
|
||||||
cluster = {
|
|
||||||
compute = true;
|
|
||||||
k8sNode = true;
|
|
||||||
mounts = {
|
|
||||||
rdma.enable = true;
|
|
||||||
automount.enable = true;
|
|
||||||
users = true;
|
|
||||||
opt = true;
|
|
||||||
work = true;
|
|
||||||
data = false;
|
|
||||||
backup = false;
|
|
||||||
ceph = false;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
features = {
|
|
||||||
host = {
|
|
||||||
name = host.name;
|
|
||||||
address = host.address;
|
|
||||||
};
|
|
||||||
os.externalInterface = "enp33s0f0np0";
|
|
||||||
hpc.compute = true;
|
|
||||||
hpc.knem = true;
|
|
||||||
# k8s = { inherit etcdCluster; };
|
|
||||||
};
|
|
||||||
|
|
||||||
deployment.targetHost = host.address;
|
|
||||||
|
|
||||||
# services.udev.extraRules = ''
|
|
||||||
# KERNEL=="ibp1s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
|
||||||
# '';
|
|
||||||
|
|
||||||
networking = {
|
|
||||||
hostName = host.name;
|
|
||||||
useDHCP = false;
|
|
||||||
interfaces.enp33s0f3np3 = {
|
|
||||||
useDHCP = false;
|
|
||||||
ipv4.addresses = [ {
|
|
||||||
address = host.address;
|
|
||||||
prefixLength = 24;
|
|
||||||
} ];
|
|
||||||
ipv4.routes = [ {
|
|
||||||
address = "10.255.242.0";
|
|
||||||
prefixLength = 24;
|
|
||||||
via = "10.255.241.100";
|
|
||||||
} ];
|
|
||||||
|
|
||||||
};
|
|
||||||
# interfaces."ibp1s0.7666" = {
|
|
||||||
interfaces."ibp1s0" = {
|
|
||||||
useDHCP = false;
|
|
||||||
ipv4.addresses = [ {
|
|
||||||
address = host.ipoib;
|
|
||||||
prefixLength = 24;
|
|
||||||
} ];
|
|
||||||
};
|
|
||||||
};
|
|
||||||
imports = [
|
|
||||||
hw
|
|
||||||
../default.nix
|
|
||||||
../mounts.nix
|
|
||||||
# ./kernel.nix
|
|
||||||
];
|
|
||||||
}
|
|
||||||
// compute;
|
|
||||||
};
|
|
||||||
in builtins.foldl' (a: n: a // mkCompute n) {} nodes
|
|
||||||
|
|
||||||
@@ -1,48 +0,0 @@
|
|||||||
{pkgs, lib, ...}:
|
|
||||||
let
|
|
||||||
kernel = pkgs.linuxPackages.kernel;
|
|
||||||
i40e =
|
|
||||||
pkgs.stdenv.mkDerivation rec {
|
|
||||||
name = "i40e-${version}-${kernel.version}";
|
|
||||||
version = "2.13.10";
|
|
||||||
|
|
||||||
src = pkgs.fetchFromGitHub {
|
|
||||||
owner = "dmarion";
|
|
||||||
repo = "i40e";
|
|
||||||
rev = "7228a7c3b362c3170baa2f9a9c6870a900e78dbd";
|
|
||||||
sha256 = "087kvq9wrc1iw6vig8cqcx7cb6346wx8qxzb85c3n8638vq1vrxr";
|
|
||||||
};
|
|
||||||
|
|
||||||
hardeningDisable = [ "pic" ];
|
|
||||||
|
|
||||||
configurePhase = ''
|
|
||||||
cd src
|
|
||||||
kernel_version=${kernel.modDirVersion}
|
|
||||||
sed -i -e 's|/lib/modules|${kernel.dev}/lib/modules|' Makefile
|
|
||||||
sed -i -e 's|/lib/modules|${kernel.dev}/lib/modules|' common.mk
|
|
||||||
export makeFlags="BUILD_KERNEL=$kernel_version"
|
|
||||||
'';
|
|
||||||
|
|
||||||
installPhase = ''
|
|
||||||
install -v -D -m 644 i40e.ko "$out/lib/modules/$kernel_version/kernel/drivers/net/i40e/i40e2.ko"
|
|
||||||
'';
|
|
||||||
|
|
||||||
dontStrip = true;
|
|
||||||
|
|
||||||
enableParallelBuilding = true;
|
|
||||||
|
|
||||||
meta = {
|
|
||||||
description = "Linux kernel drivers for Intel Ethernet adapters and LOMs (LAN On Motherboard)";
|
|
||||||
homepage = https://github.com/dmarion/i40e;
|
|
||||||
license = lib.licenses.gpl2;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
in
|
|
||||||
{
|
|
||||||
# i40e2 = i40e;
|
|
||||||
boot.kernelPackages = pkgs.linuxKernel.packages.linux_5_10;
|
|
||||||
# overlay = self: super: {
|
|
||||||
# linuxPackages_5_4 = super.linuxPackages_5_4 // { inherit i40e; };
|
|
||||||
# };
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
with builtins;
|
|
||||||
let
|
|
||||||
nodes = genList (n: n + 17) 2; in
|
|
||||||
map (n: (
|
|
||||||
rec {
|
|
||||||
idx = 100 + n;
|
|
||||||
name = "c0-${toString n}";
|
|
||||||
address = "10.255.241.${toString idx}";
|
|
||||||
ipoib = "10.255.243.${toString idx}";
|
|
||||||
pubkey = ./. + "/ssh_host_key.d/c0-${toString n}.pub";
|
|
||||||
})) nodes
|
|
||||||
@@ -39,7 +39,7 @@ let
|
|||||||
address = host.address;
|
address = host.address;
|
||||||
};
|
};
|
||||||
os.externalInterface = "eno33np0";
|
os.externalInterface = "eno33np0";
|
||||||
hpc.compute = true;
|
hpc.computeNode = true;
|
||||||
# k8s = { inherit etcdCluster; };
|
# k8s = { inherit etcdCluster; };
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -100,6 +100,7 @@ let
|
|||||||
};
|
};
|
||||||
|
|
||||||
networking = {
|
networking = {
|
||||||
|
useDHCP = false;
|
||||||
domain = mkDefault "cluster.local";
|
domain = mkDefault "cluster.local";
|
||||||
defaultGateway = mkDefault "10.255.241.1";
|
defaultGateway = mkDefault "10.255.241.1";
|
||||||
nameservers = mkDefault [ "8.8.8.8" ];
|
nameservers = mkDefault [ "8.8.8.8" ];
|
||||||
@@ -144,12 +145,13 @@ let
|
|||||||
features.hpc.slurm = {
|
features.hpc.slurm = {
|
||||||
enable = true;
|
enable = true;
|
||||||
client = true;
|
client = true;
|
||||||
# clusterName = "ekman";
|
clusterName = "ekman";
|
||||||
mungeKey = ./munge.key;
|
|
||||||
# jwtKey = ./jwt_hs256.key;
|
|
||||||
mungeUid = mkDefault 996; # hack
|
|
||||||
# pkey = "0x7666";
|
|
||||||
controlMachine = "ekman-manage";
|
controlMachine = "ekman-manage";
|
||||||
|
dbdHost = "10.255.241.15";
|
||||||
|
mungeKey = ./munge.key;
|
||||||
|
jwtKey = ./jwt_hs256.key;
|
||||||
|
slurmKey = ./slurm.key;
|
||||||
|
# pkey = "0x7666";
|
||||||
mailDomain = "oceanbox.io";
|
mailDomain = "oceanbox.io";
|
||||||
nodeName = [
|
nodeName = [
|
||||||
"c0-[1-18] Sockets=2 CoresPerSocket=64 ThreadsPerCore=1 RealMemory=256000 TmpDisk=500000 State=UNKNOWN"
|
"c0-[1-18] Sockets=2 CoresPerSocket=64 ThreadsPerCore=1 RealMemory=256000 TmpDisk=500000 State=UNKNOWN"
|
||||||
|
|||||||
@@ -69,7 +69,6 @@ in {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
features.hpc.slurm.mungeUid = 991;
|
|
||||||
features.mft.enable = lib.mkForce true;
|
features.mft.enable = lib.mkForce true;
|
||||||
|
|
||||||
features = {
|
features = {
|
||||||
|
|||||||
@@ -46,8 +46,6 @@ in {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
features.hpc.slurm.mungeUid = 994;
|
|
||||||
|
|
||||||
features = {
|
features = {
|
||||||
host = {
|
host = {
|
||||||
inherit address;
|
inherit address;
|
||||||
|
|||||||
@@ -17,9 +17,8 @@ let
|
|||||||
|
|
||||||
login = import ./login { inherit pkgs; };
|
login = import ./login { inherit pkgs; };
|
||||||
c0 = import ./c0 { inherit pkgs; };
|
c0 = import ./c0 { inherit pkgs; };
|
||||||
c0x = import ./c0x { inherit pkgs; };
|
|
||||||
c1 = import ./c1 { inherit pkgs; };
|
c1 = import ./c1 { inherit pkgs; };
|
||||||
fs-work = import ./fs-work { inherit pkgs; };
|
fs-work = import ./fs-work { inherit pkgs; };
|
||||||
fs-backup = import ./fs-backup { inherit pkgs; };
|
fs-backup = import ./fs-backup { inherit pkgs; };
|
||||||
in
|
in
|
||||||
{ inherit ekman-manage; frontend = ekman-manage; } // login // c0 // c0x // c1 // fs-work // fs-backup
|
{ inherit ekman-manage; frontend = ekman-manage; } // login // c0 // c1 // fs-work // fs-backup
|
||||||
|
|||||||
@@ -51,8 +51,8 @@ in
|
|||||||
hpc = {
|
hpc = {
|
||||||
slurm.server = false;
|
slurm.server = false;
|
||||||
slurm.slurmrestd = false;
|
slurm.slurmrestd = false;
|
||||||
manage = false;
|
manageNode = false;
|
||||||
login = true;
|
loginNode = true;
|
||||||
knem = false;
|
knem = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -86,9 +86,9 @@ in {
|
|||||||
|
|
||||||
hpc = {
|
hpc = {
|
||||||
slurm.server = true;
|
slurm.server = true;
|
||||||
slurm.slurmrestd = false;
|
slurm.slurmrestd = true;
|
||||||
slurm.mungeUid = 996;
|
slurm.dbdServer = false;
|
||||||
manage = true;
|
manageNode = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
k8s = {
|
k8s = {
|
||||||
@@ -283,6 +283,18 @@ in {
|
|||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
services.dnsmasq.enable = true;
|
||||||
|
services.dnsmasq.settings = {
|
||||||
|
domain = [ "cluster.local" ];
|
||||||
|
server = [
|
||||||
|
"/obx.hs/100.100.100.100" # headscale dns
|
||||||
|
];
|
||||||
|
address = [
|
||||||
|
"/slurmctld.cluster.local/127.0.0.1"
|
||||||
|
];
|
||||||
|
srv-host = "_slurmctld._tcp.cluster.local,slurmctld.cluster.local,6817,0,5";
|
||||||
|
};
|
||||||
|
|
||||||
# ssh-rsa is deprecated, but putty/winscp users use it
|
# ssh-rsa is deprecated, but putty/winscp users use it
|
||||||
services.openssh.extraConfig = ''
|
services.openssh.extraConfig = ''
|
||||||
# pubkeyacceptedalgorithms ssh-rsa,ssh-ed25519-cert-v01@openssh.com,ecdsa-sha2-nistp256-cert-v01@openssh.com,ecdsa-sha2-nistp384-cert-v01@openssh.com,ecdsa-sha2-nistp521-cert-v01@openssh.com,sk-ssh-ed25519-cert-v01@openssh.com,sk-ecdsa-sha2-nistp256-cert-v01@openssh.com,rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-ed25519,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,ecdsa-sha2-nistp521,sk-ssh-ed25519@openssh.com,sk-ecdsa-sha2-nistp256@openssh.com,rsa-sha2-512,rsa-sha2-256
|
# pubkeyacceptedalgorithms ssh-rsa,ssh-ed25519-cert-v01@openssh.com,ecdsa-sha2-nistp256-cert-v01@openssh.com,ecdsa-sha2-nistp384-cert-v01@openssh.com,ecdsa-sha2-nistp521-cert-v01@openssh.com,sk-ssh-ed25519-cert-v01@openssh.com,sk-ecdsa-sha2-nistp256-cert-v01@openssh.com,rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-ed25519,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,ecdsa-sha2-nistp521,sk-ssh-ed25519@openssh.com,sk-ecdsa-sha2-nistp256@openssh.com,rsa-sha2-512,rsa-sha2-256
|
||||||
@@ -323,11 +335,25 @@ in {
|
|||||||
useRoutingFeatures = "both"; # for exit-node usage
|
useRoutingFeatures = "both"; # for exit-node usage
|
||||||
extraUpFlags = [
|
extraUpFlags = [
|
||||||
"--login-server=https://headscale.svc.oceanbox.io"
|
"--login-server=https://headscale.svc.oceanbox.io"
|
||||||
"--accept-dns=false"
|
"--accept-dns=true" # see dnsmasq
|
||||||
|
"--accept-routes=true"
|
||||||
"--advertise-exit-node"
|
"--advertise-exit-node"
|
||||||
"--advertise-routes=10.255.241.0/24"
|
"--advertise-routes=10.255.241.0/24"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
services.networkd-dispatcher = {
|
||||||
|
enable = true;
|
||||||
|
rules = {
|
||||||
|
"tailscale-router" = {
|
||||||
|
onState = [ "routable" ];
|
||||||
|
script = ''
|
||||||
|
#!${pkgs.runtimeShell}
|
||||||
|
${pkgs.ethtool}/bin/ethtool -K enp65s0np0 rx-udp-gro-forwarding on rx-gro-list off
|
||||||
|
exit 0
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
imports = [
|
imports = [
|
||||||
./hardware-configuration.nix
|
./hardware-configuration.nix
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ let
|
|||||||
};
|
};
|
||||||
cachix.enable = false;
|
cachix.enable = false;
|
||||||
monitoring.nodeExporter.enable = false;
|
monitoring.nodeExporter.enable = false;
|
||||||
mft.enable = true; # Mellanox MFT
|
mft.enable = false; # Mellanox MFT
|
||||||
};
|
};
|
||||||
|
|
||||||
networking = {
|
networking = {
|
||||||
@@ -289,6 +289,18 @@ let
|
|||||||
permissions = "u+rs,g+rx,o+rx";
|
permissions = "u+rs,g+rx,o+rx";
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# Use nvd to get package diff before apply
|
||||||
|
system.activationScripts.system-diff = {
|
||||||
|
supportsDryActivation = true; # safe: only outputs to stdout
|
||||||
|
text = ''
|
||||||
|
export PATH="${pkgs.lib.makeBinPath [ pkgs.nixVersions.latest ]}:$PATH"
|
||||||
|
if [ -e /run/current-system ]; then
|
||||||
|
${pkgs.lib.getExe pkgs.nvd} diff '/run/current-system' "$systemConfig" || true
|
||||||
|
fi
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
openssh-shosts = pkgs.openssh.overrideAttrs (attrs: {
|
openssh-shosts = pkgs.openssh.overrideAttrs (attrs: {
|
||||||
|
|||||||
@@ -205,25 +205,6 @@ in {
|
|||||||
# secret-key-files = /etc/nix/ekman.key
|
# secret-key-files = /etc/nix/ekman.key
|
||||||
'';
|
'';
|
||||||
|
|
||||||
programs.msmtp = {
|
|
||||||
enable = true;
|
|
||||||
accounts = {
|
|
||||||
default = {
|
|
||||||
auth = false;
|
|
||||||
tls = false;
|
|
||||||
tls_starttls = false;
|
|
||||||
port = 24;
|
|
||||||
from = "rossby@oceanbox.io";
|
|
||||||
host = "smtpgw.itpartner.no";
|
|
||||||
# user = "utvikling";
|
|
||||||
# password = "S0m3rp0m@de#21!";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
defaults = {
|
|
||||||
aliases = "/etc/aliases";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
services.prometheus.alertmanager.configuration.global = {
|
services.prometheus.alertmanager.configuration.global = {
|
||||||
smtp_smarthost = "smtpgw.itpartner.no";
|
smtp_smarthost = "smtpgw.itpartner.no";
|
||||||
# smtp_auth_username = "utvikling";
|
# smtp_auth_username = "utvikling";
|
||||||
@@ -278,17 +259,6 @@ in {
|
|||||||
srv-host = "_slurmctld._tcp.cluster.local,slurmctld.cluster.local,6817,0,5";
|
srv-host = "_slurmctld._tcp.cluster.local,slurmctld.cluster.local,6817,0,5";
|
||||||
};
|
};
|
||||||
|
|
||||||
# Use nvd to get package diff before apply
|
|
||||||
system.activationScripts.system-diff = {
|
|
||||||
supportsDryActivation = true; # safe: only outputs to stdout
|
|
||||||
text = ''
|
|
||||||
export PATH="${pkgs.lib.makeBinPath [ pkgs.nixVersions.latest ]}:$PATH"
|
|
||||||
if [ -e /run/current-system ]; then
|
|
||||||
${pkgs.lib.getExe pkgs.nvd} diff '/run/current-system' "$systemConfig" || true
|
|
||||||
fi
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
# ssh-rsa is deprecated, but putty/winscp users use it
|
# ssh-rsa is deprecated, but putty/winscp users use it
|
||||||
services.openssh.extraConfig = ''
|
services.openssh.extraConfig = ''
|
||||||
# pubkeyacceptedalgorithms ssh-rsa,ssh-ed25519-cert-v01@openssh.com,ecdsa-sha2-nistp256-cert-v01@openssh.com,ecdsa-sha2-nistp384-cert-v01@openssh.com,ecdsa-sha2-nistp521-cert-v01@openssh.com,sk-ssh-ed25519-cert-v01@openssh.com,sk-ecdsa-sha2-nistp256-cert-v01@openssh.com,rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-ed25519,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,ecdsa-sha2-nistp521,sk-ssh-ed25519@openssh.com,sk-ecdsa-sha2-nistp256@openssh.com,rsa-sha2-512,rsa-sha2-256
|
# pubkeyacceptedalgorithms ssh-rsa,ssh-ed25519-cert-v01@openssh.com,ecdsa-sha2-nistp256-cert-v01@openssh.com,ecdsa-sha2-nistp384-cert-v01@openssh.com,ecdsa-sha2-nistp521-cert-v01@openssh.com,sk-ssh-ed25519-cert-v01@openssh.com,sk-ecdsa-sha2-nistp256-cert-v01@openssh.com,rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-ed25519,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,ecdsa-sha2-nistp521,sk-ssh-ed25519@openssh.com,sk-ecdsa-sha2-nistp256@openssh.com,rsa-sha2-512,rsa-sha2-256
|
||||||
|
|||||||
Reference in New Issue
Block a user