diff --git a/ekman/c0/default.nix b/ekman/c0/default.nix index 92deaab..f1a3b48 100644 --- a/ekman/c0/default.nix +++ b/ekman/c0/default.nix @@ -11,7 +11,6 @@ let compute = { deployment.tags = [ "compute" "c0" ]; - }; mkCompute = host: @@ -39,8 +38,8 @@ let name = host.name; address = host.address; }; - os.externalInterface = "enp33s0f0np0"; - hpc.compute = true; + os.externalInterface = host.iface; + hpc.computeNode = true; hpc.knem = true; # k8s = { inherit etcdCluster; }; }; @@ -54,7 +53,7 @@ let networking = { hostName = host.name; useDHCP = false; - interfaces.enp33s0f0np0 = { + interfaces."${host.iface}" = { useDHCP = false; ipv4.addresses = [ { address = host.address; diff --git a/ekman/c0x/hardware-configuration.d/c0-17.nix b/ekman/c0/hardware-configuration.d/c0-17.nix similarity index 100% rename from ekman/c0x/hardware-configuration.d/c0-17.nix rename to ekman/c0/hardware-configuration.d/c0-17.nix diff --git a/ekman/c0x/hardware-configuration.d/c0-18.nix b/ekman/c0/hardware-configuration.d/c0-18.nix similarity index 100% rename from ekman/c0x/hardware-configuration.d/c0-18.nix rename to ekman/c0/hardware-configuration.d/c0-18.nix diff --git a/ekman/c0/nodes.nix b/ekman/c0/nodes.nix index 44cc1bc..2d6817f 100644 --- a/ekman/c0/nodes.nix +++ b/ekman/c0/nodes.nix @@ -1,9 +1,10 @@ with builtins; let - nodes = genList (n: n + 1) 16; in + nodes = genList (n: n + 1) 18; in map (n: ( rec { idx = 100 + n; + iface = if n > 16 then "enp33s0f3np3" else "enp33s0f0np0"; name = "c0-${toString n}"; address = "10.255.241.${toString idx}"; ipoib = "10.255.243.${toString idx}"; diff --git a/ekman/c0x/ssh_host_key.d/c0-17.pub b/ekman/c0/ssh_host_key.d/c0-17.pub similarity index 100% rename from ekman/c0x/ssh_host_key.d/c0-17.pub rename to ekman/c0/ssh_host_key.d/c0-17.pub diff --git a/ekman/c0x/ssh_host_key.d/c0-18.pub b/ekman/c0/ssh_host_key.d/c0-18.pub similarity index 100% rename from ekman/c0x/ssh_host_key.d/c0-18.pub rename to ekman/c0/ssh_host_key.d/c0-18.pub diff --git a/ekman/c0x/connauthfile b/ekman/c0x/connauthfile deleted file mode 100644 index 37cd965..0000000 --- a/ekman/c0x/connauthfile +++ /dev/null @@ -1 +0,0 @@ -ä·q‹u \ No newline at end of file diff --git a/ekman/c0x/default.nix b/ekman/c0x/default.nix deleted file mode 100644 index fb345ac..0000000 --- a/ekman/c0x/default.nix +++ /dev/null @@ -1,88 +0,0 @@ -{ pkgs ? import {} }: -let - # Pin the deployment package-set to a specific version of nixpkgs - # pkgs = import (builtins.fetchTarball { - # url = "https://github.com/NixOS/nixpkgs/archive/e6377ff35544226392b49fa2cf05590f9f0c4b43.tar.gz"; - # sha256 = "1fra9wwy5gvj5ibayqkzqpwdf715bggc0qbmrfch4fghwvl5m70l"; - # }) {}; - # pkgs = import {}; - - nodes = import ./nodes.nix; - - compute = { - deployment.tags = [ "compute" "c0" ]; - }; - - mkCompute = host: - let - hw = ./hardware-configuration.d + "/${host.name}.nix"; - in { - "${host.name}" = { - cluster = { - compute = true; - k8sNode = true; - mounts = { - rdma.enable = true; - automount.enable = true; - users = true; - opt = true; - work = true; - data = false; - backup = false; - ceph = false; - }; - }; - - features = { - host = { - name = host.name; - address = host.address; - }; - os.externalInterface = "enp33s0f0np0"; - hpc.compute = true; - hpc.knem = true; - # k8s = { inherit etcdCluster; }; - }; - - deployment.targetHost = host.address; - - # services.udev.extraRules = '' - # KERNEL=="ibp1s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666" - # ''; - - networking = { - hostName = host.name; - useDHCP = false; - interfaces.enp33s0f3np3 = { - useDHCP = false; - ipv4.addresses = [ { - address = host.address; - prefixLength = 24; - } ]; - ipv4.routes = [ { - address = "10.255.242.0"; - prefixLength = 24; - via = "10.255.241.100"; - } ]; - - }; - # interfaces."ibp1s0.7666" = { - interfaces."ibp1s0" = { - useDHCP = false; - ipv4.addresses = [ { - address = host.ipoib; - prefixLength = 24; - } ]; - }; - }; - imports = [ - hw - ../default.nix - ../mounts.nix - # ./kernel.nix - ]; - } - // compute; -}; -in builtins.foldl' (a: n: a // mkCompute n) {} nodes - diff --git a/ekman/c0x/kernel.nix b/ekman/c0x/kernel.nix deleted file mode 100644 index 17b55e4..0000000 --- a/ekman/c0x/kernel.nix +++ /dev/null @@ -1,48 +0,0 @@ -{pkgs, lib, ...}: -let - kernel = pkgs.linuxPackages.kernel; - i40e = - pkgs.stdenv.mkDerivation rec { - name = "i40e-${version}-${kernel.version}"; - version = "2.13.10"; - - src = pkgs.fetchFromGitHub { - owner = "dmarion"; - repo = "i40e"; - rev = "7228a7c3b362c3170baa2f9a9c6870a900e78dbd"; - sha256 = "087kvq9wrc1iw6vig8cqcx7cb6346wx8qxzb85c3n8638vq1vrxr"; - }; - - hardeningDisable = [ "pic" ]; - - configurePhase = '' - cd src - kernel_version=${kernel.modDirVersion} - sed -i -e 's|/lib/modules|${kernel.dev}/lib/modules|' Makefile - sed -i -e 's|/lib/modules|${kernel.dev}/lib/modules|' common.mk - export makeFlags="BUILD_KERNEL=$kernel_version" - ''; - - installPhase = '' - install -v -D -m 644 i40e.ko "$out/lib/modules/$kernel_version/kernel/drivers/net/i40e/i40e2.ko" - ''; - - dontStrip = true; - - enableParallelBuilding = true; - - meta = { - description = "Linux kernel drivers for Intel Ethernet adapters and LOMs (LAN On Motherboard)"; - homepage = https://github.com/dmarion/i40e; - license = lib.licenses.gpl2; - }; - }; -in -{ - # i40e2 = i40e; - boot.kernelPackages = pkgs.linuxKernel.packages.linux_5_10; - # overlay = self: super: { - # linuxPackages_5_4 = super.linuxPackages_5_4 // { inherit i40e; }; - # }; -} - diff --git a/ekman/c0x/nodes.nix b/ekman/c0x/nodes.nix deleted file mode 100644 index fc6f251..0000000 --- a/ekman/c0x/nodes.nix +++ /dev/null @@ -1,11 +0,0 @@ -with builtins; -let - nodes = genList (n: n + 17) 2; in -map (n: ( - rec { - idx = 100 + n; - name = "c0-${toString n}"; - address = "10.255.241.${toString idx}"; - ipoib = "10.255.243.${toString idx}"; - pubkey = ./. + "/ssh_host_key.d/c0-${toString n}.pub"; - })) nodes diff --git a/ekman/c1/default.nix b/ekman/c1/default.nix index 051eaa2..e1a7239 100644 --- a/ekman/c1/default.nix +++ b/ekman/c1/default.nix @@ -39,7 +39,7 @@ let address = host.address; }; os.externalInterface = "eno33np0"; - hpc.compute = true; + hpc.computeNode = true; # k8s = { inherit etcdCluster; }; }; diff --git a/ekman/default.nix b/ekman/default.nix index 9666deb..4408a40 100644 --- a/ekman/default.nix +++ b/ekman/default.nix @@ -100,6 +100,7 @@ let }; networking = { + useDHCP = false; domain = mkDefault "cluster.local"; defaultGateway = mkDefault "10.255.241.1"; nameservers = mkDefault [ "8.8.8.8" ]; @@ -144,12 +145,13 @@ let features.hpc.slurm = { enable = true; client = true; - # clusterName = "ekman"; - mungeKey = ./munge.key; - # jwtKey = ./jwt_hs256.key; - mungeUid = mkDefault 996; # hack - # pkey = "0x7666"; + clusterName = "ekman"; controlMachine = "ekman-manage"; + dbdHost = "10.255.241.15"; + mungeKey = ./munge.key; + jwtKey = ./jwt_hs256.key; + slurmKey = ./slurm.key; + # pkey = "0x7666"; mailDomain = "oceanbox.io"; nodeName = [ "c0-[1-18] Sockets=2 CoresPerSocket=64 ThreadsPerCore=1 RealMemory=256000 TmpDisk=500000 State=UNKNOWN" diff --git a/ekman/fs-backup/default.nix b/ekman/fs-backup/default.nix index e5a2428..f4f9686 100644 --- a/ekman/fs-backup/default.nix +++ b/ekman/fs-backup/default.nix @@ -69,7 +69,6 @@ in { }; }; - features.hpc.slurm.mungeUid = 991; features.mft.enable = lib.mkForce true; features = { diff --git a/ekman/fs-work/default.nix b/ekman/fs-work/default.nix index 147b6ae..168ddbb 100644 --- a/ekman/fs-work/default.nix +++ b/ekman/fs-work/default.nix @@ -46,8 +46,6 @@ in { }; }; - features.hpc.slurm.mungeUid = 994; - features = { host = { inherit address; diff --git a/ekman/hive.nix b/ekman/hive.nix index d53db0c..0d6b225 100644 --- a/ekman/hive.nix +++ b/ekman/hive.nix @@ -17,9 +17,8 @@ let login = import ./login { inherit pkgs; }; c0 = import ./c0 { inherit pkgs; }; - c0x = import ./c0x { inherit pkgs; }; c1 = import ./c1 { inherit pkgs; }; fs-work = import ./fs-work { inherit pkgs; }; fs-backup = import ./fs-backup { inherit pkgs; }; in - { inherit ekman-manage; frontend = ekman-manage; } // login // c0 // c0x // c1 // fs-work // fs-backup + { inherit ekman-manage; frontend = ekman-manage; } // login // c0 // c1 // fs-work // fs-backup diff --git a/ekman/login/default.nix b/ekman/login/default.nix index 084e836..a157046 100644 --- a/ekman/login/default.nix +++ b/ekman/login/default.nix @@ -51,8 +51,8 @@ in hpc = { slurm.server = false; slurm.slurmrestd = false; - manage = false; - login = true; + manageNode = false; + loginNode = true; knem = false; }; diff --git a/ekman/manage/default.nix b/ekman/manage/default.nix index 886b1ce..cea20d4 100644 --- a/ekman/manage/default.nix +++ b/ekman/manage/default.nix @@ -86,9 +86,9 @@ in { hpc = { slurm.server = true; - slurm.slurmrestd = false; - slurm.mungeUid = 996; - manage = true; + slurm.slurmrestd = true; + slurm.dbdServer = false; + manageNode = true; }; k8s = { @@ -283,6 +283,18 @@ in { ''; }; + services.dnsmasq.enable = true; + services.dnsmasq.settings = { + domain = [ "cluster.local" ]; + server = [ + "/obx.hs/100.100.100.100" # headscale dns + ]; + address = [ + "/slurmctld.cluster.local/127.0.0.1" + ]; + srv-host = "_slurmctld._tcp.cluster.local,slurmctld.cluster.local,6817,0,5"; + }; + # ssh-rsa is deprecated, but putty/winscp users use it services.openssh.extraConfig = '' # pubkeyacceptedalgorithms ssh-rsa,ssh-ed25519-cert-v01@openssh.com,ecdsa-sha2-nistp256-cert-v01@openssh.com,ecdsa-sha2-nistp384-cert-v01@openssh.com,ecdsa-sha2-nistp521-cert-v01@openssh.com,sk-ssh-ed25519-cert-v01@openssh.com,sk-ecdsa-sha2-nistp256-cert-v01@openssh.com,rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-ed25519,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,ecdsa-sha2-nistp521,sk-ssh-ed25519@openssh.com,sk-ecdsa-sha2-nistp256@openssh.com,rsa-sha2-512,rsa-sha2-256 @@ -323,11 +335,25 @@ in { useRoutingFeatures = "both"; # for exit-node usage extraUpFlags = [ "--login-server=https://headscale.svc.oceanbox.io" - "--accept-dns=false" + "--accept-dns=true" # see dnsmasq + "--accept-routes=true" "--advertise-exit-node" "--advertise-routes=10.255.241.0/24" ]; }; + services.networkd-dispatcher = { + enable = true; + rules = { + "tailscale-router" = { + onState = [ "routable" ]; + script = '' + #!${pkgs.runtimeShell} + ${pkgs.ethtool}/bin/ethtool -K enp65s0np0 rx-udp-gro-forwarding on rx-gro-list off + exit 0 + ''; + }; + }; + }; imports = [ ./hardware-configuration.nix diff --git a/rossby/default.nix b/rossby/default.nix index 43083cb..c423359 100644 --- a/rossby/default.nix +++ b/rossby/default.nix @@ -94,7 +94,7 @@ let }; cachix.enable = false; monitoring.nodeExporter.enable = false; - mft.enable = true; # Mellanox MFT + mft.enable = false; # Mellanox MFT }; networking = { @@ -289,6 +289,18 @@ let permissions = "u+rs,g+rx,o+rx"; }; }; + + # Use nvd to get package diff before apply + system.activationScripts.system-diff = { + supportsDryActivation = true; # safe: only outputs to stdout + text = '' + export PATH="${pkgs.lib.makeBinPath [ pkgs.nixVersions.latest ]}:$PATH" + if [ -e /run/current-system ]; then + ${pkgs.lib.getExe pkgs.nvd} diff '/run/current-system' "$systemConfig" || true + fi + ''; + }; + }; openssh-shosts = pkgs.openssh.overrideAttrs (attrs: { diff --git a/rossby/manage/default.nix b/rossby/manage/default.nix index c515620..4409321 100644 --- a/rossby/manage/default.nix +++ b/rossby/manage/default.nix @@ -205,25 +205,6 @@ in { # secret-key-files = /etc/nix/ekman.key ''; - programs.msmtp = { - enable = true; - accounts = { - default = { - auth = false; - tls = false; - tls_starttls = false; - port = 24; - from = "rossby@oceanbox.io"; - host = "smtpgw.itpartner.no"; - # user = "utvikling"; - # password = "S0m3rp0m@de#21!"; - }; - }; - defaults = { - aliases = "/etc/aliases"; - }; - }; - services.prometheus.alertmanager.configuration.global = { smtp_smarthost = "smtpgw.itpartner.no"; # smtp_auth_username = "utvikling"; @@ -278,17 +259,6 @@ in { srv-host = "_slurmctld._tcp.cluster.local,slurmctld.cluster.local,6817,0,5"; }; - # Use nvd to get package diff before apply - system.activationScripts.system-diff = { - supportsDryActivation = true; # safe: only outputs to stdout - text = '' - export PATH="${pkgs.lib.makeBinPath [ pkgs.nixVersions.latest ]}:$PATH" - if [ -e /run/current-system ]; then - ${pkgs.lib.getExe pkgs.nvd} diff '/run/current-system' "$systemConfig" || true - fi - ''; - }; - # ssh-rsa is deprecated, but putty/winscp users use it services.openssh.extraConfig = '' # pubkeyacceptedalgorithms ssh-rsa,ssh-ed25519-cert-v01@openssh.com,ecdsa-sha2-nistp256-cert-v01@openssh.com,ecdsa-sha2-nistp384-cert-v01@openssh.com,ecdsa-sha2-nistp521-cert-v01@openssh.com,sk-ssh-ed25519-cert-v01@openssh.com,sk-ecdsa-sha2-nistp256-cert-v01@openssh.com,rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-ed25519,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,ecdsa-sha2-nistp521,sk-ssh-ed25519@openssh.com,sk-ecdsa-sha2-nistp256@openssh.com,rsa-sha2-512,rsa-sha2-256