Compare commits
227 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
320c15488a
|
|||
|
6e57520557
|
|||
|
f19d7c2881
|
|||
|
|
454fe6e713 | ||
|
|
410fabe78b | ||
|
|
a4ec5acb75 | ||
|
|
53cf811713 | ||
|
|
591bfbfe15 | ||
| 7b23c53032 | |||
| bd8ab1b6b8 | |||
| ec1c516d1b | |||
| dfe73d6d71 | |||
|
|
2866de1ce9 | ||
|
|
2cc5b08a51 | ||
|
|
84d677d264 | ||
|
|
695f4407cb | ||
|
|
027c20d9a6 | ||
|
042cace4f0
|
|||
|
24b586a4a0
|
|||
| e9c0ce52b2 | |||
| fdff8f3e48 | |||
| f8a0269913 | |||
| ecf934e979 | |||
| d068384a44 | |||
| cd9d02a9d5 | |||
| 2216d589f0 | |||
| baf0547d7f | |||
| b4f6cd9b53 | |||
| 50dbe5183f | |||
| 05767f1976 | |||
| b515338a54 | |||
| c7410e3978 | |||
| a90bf694ad | |||
| cf9ca8be08 | |||
| 5c0e4e0388 | |||
| cb41d50f12 | |||
| e21ae0780c | |||
| 351810aeaa | |||
| 82a9b86531 | |||
| a6617c9cce | |||
|
|
710153d859 | ||
|
|
dddd0fa88c | ||
|
|
422ac77c87 | ||
|
|
cebc4e773d | ||
|
|
b7c66e99fa | ||
|
|
6c05a71290 | ||
|
|
0c25e3c660 | ||
|
|
84e730b580 | ||
|
|
644b9acf24 | ||
|
|
65c5ce955f | ||
|
|
665707969b | ||
|
|
da994766e1 | ||
|
|
131ffda9e8 | ||
|
|
2634f392ca | ||
|
|
26f2b16bf9 | ||
|
|
a71473a548 | ||
|
|
6c405e5b91 | ||
|
|
7c4d108026 | ||
| ce121e7d2b | |||
| cafb434781 | |||
| 8405ce2d87 | |||
| 81103a09c4 | |||
| 1a4318c075 | |||
|
|
d36366fdb1 | ||
|
|
78fe8616e6 | ||
|
|
7c3bc4ba47 | ||
|
|
059fa13f3b | ||
|
|
4a33496824 | ||
|
|
2264ec5108 | ||
|
|
0a3180fd2c | ||
|
|
0f7f991cad | ||
|
|
92826cd6d2 | ||
| e1869e5c89 | |||
| ac652d366a | |||
| 60b6b0e0cd | |||
| b38c934312 | |||
|
addee4268f
|
|||
| 3aede769fd | |||
| 2b95b82a6f | |||
|
d8dbf1122b
|
|||
| 02c407a579 | |||
| da3fa53fe9 | |||
| 78d3ce294d | |||
|
|
0c7f01a650 | ||
| 8a91ef2209 | |||
| c3920d8015 | |||
|
|
6af717c807 | ||
| e64ecd2993 | |||
| 8d2b840bdf | |||
| 4b988488fb | |||
| 9af8c6e137 | |||
|
|
c6b720d1ac | ||
|
3b1f689273
|
|||
|
a845b237f9
|
|||
|
63e04f57db
|
|||
|
5e69d7adaf
|
|||
|
c9aa712715
|
|||
|
|
85a315b1ec | ||
|
|
73907361b6 | ||
| 5328e5b645 | |||
| d94d4ab94c | |||
| d7d45b0911 | |||
| 1671ea3ca1 | |||
| aba775befe | |||
|
|
752fe275f0 | ||
|
|
efb43acad1 | ||
|
|
245435a508 | ||
|
|
31cea944f3 | ||
|
|
50880b458b | ||
| cc4d50b87f | |||
| 537e4ba371 | |||
|
|
34307093bb | ||
| 241454a2a1 | |||
| 530ff0aaf9 | |||
| 3ea15031a7 | |||
| 65ccb89fb9 | |||
|
|
8b42df3e73 | ||
|
|
a59452ff7a | ||
|
|
cd5e4e5c25 | ||
| d307989e83 | |||
|
|
5ff6f6d89b | ||
|
|
8e3272a4c1 | ||
|
|
657e289c78 | ||
|
|
7d5a65e344 | ||
|
|
4b80478ec8 | ||
| ec3950de70 | |||
| 2b3dc17bf8 | |||
| 21c6ca2fea | |||
|
|
2ecac6cf3c | ||
| 384d58ff84 | |||
| fc5a0923e1 | |||
| ee80f1ff9a | |||
|
|
4b9a0f69c6 | ||
|
|
79bc969cfa | ||
| cb25301636 | |||
| e985a8221b | |||
| 7d82e3c44a | |||
|
|
35c09d6c94 | ||
|
|
e5bcac154b | ||
|
|
4dd0b495ab | ||
| 05e1d24048 | |||
|
|
389ad47745 | ||
| e101653b7f | |||
|
|
29e65c1598 | ||
|
|
6611267c3b | ||
|
|
75848674f2 | ||
|
|
543eb35351 | ||
| 8d1d892f25 | |||
| bb56b8f3a6 | |||
| b398364cc7 | |||
| c314409d3b | |||
| 86f1e0d974 | |||
| 1402192618 | |||
| 1cfd24de44 | |||
| 7cc6cd0eba | |||
| 670c75953d | |||
| 88a72088f1 | |||
| bb959dae3a | |||
|
|
455ad2076d | ||
|
|
5c0e8cbb4c | ||
|
|
3b78b4038e | ||
|
|
63c852ef37 | ||
| b76f5e309a | |||
| f51f30004e | |||
|
|
26cfbd7130 | ||
|
|
4fa73f5a36 | ||
|
|
b1e713f113 | ||
|
5271a33b7c
|
|||
|
|
1ab635279a | ||
|
|
9e1deae147 | ||
| e1b15035cc | |||
| 981eda082c | |||
| 2bf922fb6d | |||
|
|
f6db232ca7 | ||
|
|
8c5ca68530 | ||
|
|
d481f6789c | ||
| 43547f45de | |||
| 7be10b4457 | |||
|
|
be13a10c8f | ||
|
|
799cb6cae1 | ||
| eabb600641 | |||
| bc1ce00610 | |||
|
|
6d3d18bbe0 | ||
|
|
30d0180b59 | ||
|
|
680330d569 | ||
| e6cf1f6232 | |||
| caab89f642 | |||
| a981f5e7ba | |||
| 34c28e18bf | |||
|
|
5dfc0743eb | ||
| 398af17797 | |||
| 998d551943 | |||
|
|
b2bf32dc73 | ||
|
|
312b3906ab | ||
| c9624213ed | |||
| bcff2e6c2f | |||
| 3c0a7f91f5 | |||
| 46cf9da93f | |||
| 2e919182d4 | |||
| ff3f897859 | |||
| 9b798444d1 | |||
| d2e27a7e87 | |||
|
|
d5cfcd2bf9 | ||
|
|
cf4ae97e1c | ||
|
|
96f8215c52 | ||
|
|
46473c88dd | ||
| fac7bdd62e | |||
|
|
e38b0a2317 | ||
| 82a5328d7f | |||
| 8894339216 | |||
| f5679d39f9 | |||
| 59db74b265 | |||
| 65aba0f69d | |||
| db794e6eea | |||
| 4057a00143 | |||
| 14b5f07cc6 | |||
| 33a14d1509 | |||
| 3af5ba3fbd | |||
| 6767eb21e6 | |||
| eb7b1f8130 | |||
| fcd136ed4e | |||
|
|
c8814ec8d9 | ||
| f7f6eabb0f | |||
|
|
69e47e60d0 | ||
|
|
5c72112457 | ||
|
|
ba5f1b8add | ||
| e0846164a7 |
12
.envrc
Normal file
12
.envrc
Normal file
@@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env bash
|
||||
# the shebang is ignored, but nice for editors
|
||||
watch_file nix/sources.json
|
||||
|
||||
# Load .env file if it exists
|
||||
dotenv_if_exists
|
||||
|
||||
# Set npins dir
|
||||
export NPINS_DIRECTORY="nix"
|
||||
|
||||
# Activate development shell
|
||||
use nix
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,6 +1,10 @@
|
||||
*.pem
|
||||
*.csr
|
||||
*.key
|
||||
result
|
||||
result-*
|
||||
gcroots/
|
||||
ca
|
||||
configuration.nix
|
||||
system
|
||||
.direnv
|
||||
|
||||
28
bin/reuid-slurm.sh
Executable file
28
bin/reuid-slurm.sh
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/bin/sh
|
||||
|
||||
pwunconv
|
||||
sed -i 's/slurm:!:[0-9]\+:[0-9]\+:/slurm:!:401:401:/;
|
||||
s/munge:!:[0-9]\+:[0-9]\+:/munge:!:402:402:/' /etc/passwd
|
||||
pwconv
|
||||
|
||||
grpunconv
|
||||
sed -i 's/slurm:x:[0-9]\+:/slurm:x:401:/;
|
||||
s/munge:x:[0-9]\+:/munge:x:402:/' /etc/group
|
||||
grpconv
|
||||
|
||||
systemctl stop munged
|
||||
rm -rf /run/munge
|
||||
chown -R munge:munge /etc/munge /var/lib/munge
|
||||
systemctl start munged
|
||||
|
||||
systemctl stop slurmd
|
||||
rm -rf /run/slurm
|
||||
chown -R slurm:slurm /etc/slurm
|
||||
systemctl start slurmd
|
||||
|
||||
if [ -d /var/spool/slurmctld ]; then
|
||||
systemctl stop slurmctld
|
||||
rm -rf /run/slurm /run/slurmctld
|
||||
systemctl start slurmctld
|
||||
fi
|
||||
|
||||
3
bin/slurm-gen-jwt_hs256.sh
Executable file
3
bin/slurm-gen-jwt_hs256.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
dd if=/dev/random of=jwt_hs256.key bs=32 count=1
|
||||
chmod 400 jwt_hs256.key
|
||||
2
bin/slurm-gen-mungekey.sh
Executable file
2
bin/slurm-gen-mungekey.sh
Executable file
@@ -0,0 +1,2 @@
|
||||
#!/bin/sh
|
||||
mungekey -c -b 2048 -k munge.key
|
||||
@@ -1,108 +0,0 @@
|
||||
{ pkgs ? import <nixpkgs> {} }:
|
||||
let
|
||||
# Pin the deployment package-set to a specific version of nixpkgs
|
||||
# pkgs = import (builtins.fetchTarball {
|
||||
# url = "https://github.com/NixOS/nixpkgs/archive/e6377ff35544226392b49fa2cf05590f9f0c4b43.tar.gz";
|
||||
# sha256 = "1fra9wwy5gvj5ibayqkzqpwdf715bggc0qbmrfch4fghwvl5m70l";
|
||||
# }) {};
|
||||
# pkgs = import <nixpkgs> {};
|
||||
|
||||
nodes = import ./nodes.nix;
|
||||
|
||||
compute = {
|
||||
deployment.tags = [ "compute" "c0" ];
|
||||
|
||||
fileSystems = {
|
||||
"/frontend" = {
|
||||
device = "10.255.241.100:/home";
|
||||
fsType = "nfs4";
|
||||
options = [
|
||||
"soft"
|
||||
"defaults"
|
||||
"noauto"
|
||||
"x-systemd.automount"
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
systemd.automounts = [
|
||||
{
|
||||
where = "/frontend";
|
||||
wantedBy = [ "default.target" ];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
mkCompute = host:
|
||||
let
|
||||
hw = ./hw + "/${host.name}.nix";
|
||||
in {
|
||||
"${host.name}" = {
|
||||
cluster = {
|
||||
compute = true;
|
||||
k8sNode = true;
|
||||
mounts = {
|
||||
rdma.enable = true;
|
||||
automount.enable = true;
|
||||
home = false;
|
||||
opt = true;
|
||||
work = true;
|
||||
data = false;
|
||||
backup = false;
|
||||
ceph = false;
|
||||
};
|
||||
};
|
||||
|
||||
features = {
|
||||
host = {
|
||||
name = host.name;
|
||||
address = host.address;
|
||||
};
|
||||
os.externalInterface = "enp33s0f0np0";
|
||||
hpc.compute = true;
|
||||
hpc.knem = true;
|
||||
# k8s = { inherit etcdCluster; };
|
||||
};
|
||||
|
||||
deployment.targetHost = host.address;
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp1s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
networking = {
|
||||
hostName = host.name;
|
||||
useDHCP = false;
|
||||
interfaces.enp33s0f0np0 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = host.address;
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
ipv4.routes = [ {
|
||||
address = "10.255.242.0";
|
||||
prefixLength = 24;
|
||||
via = "10.255.241.100";
|
||||
} ];
|
||||
|
||||
};
|
||||
# interfaces."ibp1s0.7666" = {
|
||||
interfaces."ibp1s0" = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = host.ipoib;
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
};
|
||||
imports = [
|
||||
hw
|
||||
../cluster.nix
|
||||
../mounts.nix
|
||||
#./kernel.nix
|
||||
];
|
||||
}
|
||||
// compute;
|
||||
};
|
||||
in builtins.foldl' (a: n: a // mkCompute n) {} nodes
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
with builtins;
|
||||
let
|
||||
nodes = genList (n: n + 1) 16; in
|
||||
map (n: (
|
||||
rec {
|
||||
idx = 100 + n;
|
||||
name = "c0-${toString n}";
|
||||
address = "10.255.241.${toString idx}";
|
||||
ipoib = "10.255.243.${toString idx}";
|
||||
pubkey = ./. + "/pubkeys/c0-${toString n}.pub";
|
||||
})) nodes
|
||||
@@ -1,108 +0,0 @@
|
||||
{ pkgs ? import <nixpkgs> {} }:
|
||||
let
|
||||
# Pin the deployment package-set to a specific version of nixpkgs
|
||||
# pkgs = import (builtins.fetchTarball {
|
||||
# url = "https://github.com/NixOS/nixpkgs/archive/e6377ff35544226392b49fa2cf05590f9f0c4b43.tar.gz";
|
||||
# sha256 = "1fra9wwy5gvj5ibayqkzqpwdf715bggc0qbmrfch4fghwvl5m70l";
|
||||
# }) {};
|
||||
# pkgs = import <nixpkgs> {};
|
||||
|
||||
nodes = import ./nodes.nix;
|
||||
|
||||
compute = {
|
||||
deployment.tags = [ "compute" "c0" ];
|
||||
|
||||
fileSystems = {
|
||||
"/frontend" = {
|
||||
device = "10.255.241.100:/home";
|
||||
fsType = "nfs4";
|
||||
options = [
|
||||
"soft"
|
||||
"defaults"
|
||||
"noauto"
|
||||
"x-systemd.automount"
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
systemd.automounts = [
|
||||
{
|
||||
where = "/frontend";
|
||||
wantedBy = [ "default.target" ];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
mkCompute = host:
|
||||
let
|
||||
hw = ./hw + "/${host.name}.nix";
|
||||
in {
|
||||
"${host.name}" = {
|
||||
cluster = {
|
||||
compute = true;
|
||||
k8sNode = true;
|
||||
mounts = {
|
||||
rdma.enable = true;
|
||||
automount.enable = true;
|
||||
home = false;
|
||||
opt = true;
|
||||
work = true;
|
||||
data = true;
|
||||
backup = true;
|
||||
ceph = false;
|
||||
};
|
||||
};
|
||||
|
||||
features = {
|
||||
host = {
|
||||
name = host.name;
|
||||
address = host.address;
|
||||
};
|
||||
os.externalInterface = "enp33s0f0np0";
|
||||
hpc.compute = true;
|
||||
hpc.knem = true;
|
||||
# k8s = { inherit etcdCluster; };
|
||||
};
|
||||
|
||||
deployment.targetHost = host.address;
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp1s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
networking = {
|
||||
hostName = host.name;
|
||||
useDHCP = false;
|
||||
interfaces.enp33s0f3np3 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = host.address;
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
ipv4.routes = [ {
|
||||
address = "10.255.242.0";
|
||||
prefixLength = 24;
|
||||
via = "10.255.241.100";
|
||||
} ];
|
||||
|
||||
};
|
||||
# interfaces."ibp1s0.7666" = {
|
||||
interfaces."ibp1s0" = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = host.ipoib;
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
};
|
||||
imports = [
|
||||
hw
|
||||
../cluster.nix
|
||||
../mounts.nix
|
||||
# ./kernel.nix
|
||||
];
|
||||
}
|
||||
// compute;
|
||||
};
|
||||
in builtins.foldl' (a: n: a // mkCompute n) {} nodes
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
{pkgs, lib, ...}:
|
||||
let
|
||||
kernel = pkgs.linuxPackages.kernel;
|
||||
i40e =
|
||||
pkgs.stdenv.mkDerivation rec {
|
||||
name = "i40e-${version}-${kernel.version}";
|
||||
version = "2.13.10";
|
||||
|
||||
src = pkgs.fetchFromGitHub {
|
||||
owner = "dmarion";
|
||||
repo = "i40e";
|
||||
rev = "7228a7c3b362c3170baa2f9a9c6870a900e78dbd";
|
||||
sha256 = "087kvq9wrc1iw6vig8cqcx7cb6346wx8qxzb85c3n8638vq1vrxr";
|
||||
};
|
||||
|
||||
hardeningDisable = [ "pic" ];
|
||||
|
||||
configurePhase = ''
|
||||
cd src
|
||||
kernel_version=${kernel.modDirVersion}
|
||||
sed -i -e 's|/lib/modules|${kernel.dev}/lib/modules|' Makefile
|
||||
sed -i -e 's|/lib/modules|${kernel.dev}/lib/modules|' common.mk
|
||||
export makeFlags="BUILD_KERNEL=$kernel_version"
|
||||
'';
|
||||
|
||||
installPhase = ''
|
||||
install -v -D -m 644 i40e.ko "$out/lib/modules/$kernel_version/kernel/drivers/net/i40e/i40e2.ko"
|
||||
'';
|
||||
|
||||
dontStrip = true;
|
||||
|
||||
enableParallelBuilding = true;
|
||||
|
||||
meta = {
|
||||
description = "Linux kernel drivers for Intel Ethernet adapters and LOMs (LAN On Motherboard)";
|
||||
homepage = https://github.com/dmarion/i40e;
|
||||
license = lib.licenses.gpl2;
|
||||
};
|
||||
};
|
||||
in
|
||||
{
|
||||
# i40e2 = i40e;
|
||||
boot.kernelPackages = pkgs.linuxKernel.packages.linux_5_10;
|
||||
# overlay = self: super: {
|
||||
# linuxPackages_5_4 = super.linuxPackages_5_4 // { inherit i40e; };
|
||||
# };
|
||||
}
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
with builtins;
|
||||
let
|
||||
nodes = genList (n: n + 17) 2; in
|
||||
map (n: (
|
||||
rec {
|
||||
idx = 100 + n;
|
||||
name = "c0-${toString n}";
|
||||
address = "10.255.241.${toString idx}";
|
||||
ipoib = "10.255.243.${toString idx}";
|
||||
pubkey = ./. + "/pubkeys/c0-${toString n}.pub";
|
||||
})) nodes
|
||||
@@ -1,119 +0,0 @@
|
||||
{ pkgs ? import <nixpkgs> {} }:
|
||||
let
|
||||
# Pin the deployment package-set to a specific version of nixpkgs
|
||||
# pkgs = import (builtins.fetchTarball {
|
||||
# url = "https://github.com/NixOS/nixpkgs/archive/e6377ff35544226392b49fa2cf05590f9f0c4b43.tar.gz";
|
||||
# sha256 = "1fra9wwy5gvj5ibayqkzqpwdf715bggc0qbmrfch4fghwvl5m70l";
|
||||
# }) {};
|
||||
# pkgs = import <nixpkgs> {};
|
||||
|
||||
nodes = import ./nodes.nix;
|
||||
compute = {
|
||||
deployment.tags = [ "compute" "c1" ];
|
||||
|
||||
systemd.automounts = [
|
||||
{
|
||||
where = "/frontend";
|
||||
wantedBy = [ "default.target" ];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
mkCompute = host:
|
||||
let
|
||||
hw = ./hw + "/${host.name}.nix";
|
||||
in {
|
||||
"${host.name}" = {
|
||||
cluster = {
|
||||
compute = true;
|
||||
k8sNode = true;
|
||||
mounts = {
|
||||
rdma.enable = false;
|
||||
gbe100.enable = true;
|
||||
automount.enable = true;
|
||||
home = true;
|
||||
opt = true;
|
||||
work = true;
|
||||
data = false;
|
||||
backup = false;
|
||||
ceph = true;
|
||||
};
|
||||
};
|
||||
|
||||
features = {
|
||||
host = {
|
||||
name = host.name;
|
||||
address = host.address;
|
||||
};
|
||||
os.externalInterface = "eno33np0";
|
||||
hpc.compute = true;
|
||||
# k8s = { inherit etcdCluster; };
|
||||
};
|
||||
|
||||
deployment.targetHost = host.target;
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp1s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
# boot.kernel.sysctl = {
|
||||
# "net.ipv4.tcp_timestamps" = 0;
|
||||
# "net.ipv4.tcp_sack" = 1;
|
||||
# "net.core.netdev_max_backlog" = 250000;
|
||||
# "net.core.rmem_max" = 4194304;
|
||||
# "net.core.wmem_max" = 4194304;
|
||||
# "net.core.rmem_default" = 4194304;
|
||||
# "net.core.wmem_default" = 4194304;
|
||||
# "net.core.optmem_max" = 4194304;
|
||||
# "net.ipv4.tcp_rmem" = "4096 87380 4194304";
|
||||
# "net.ipv4.tcp_wmem" = "4096 65536 4194304";
|
||||
# "net.ipv4.tcp_low_latency" = 1;
|
||||
# "net.ipv4.tcp_adv_win_scale" = 1;
|
||||
# };
|
||||
|
||||
networking = {
|
||||
hostName = host.name;
|
||||
useDHCP = false;
|
||||
interfaces.eno33np0 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = host.address;
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
ipv4.routes = [ {
|
||||
address = "10.255.242.0";
|
||||
prefixLength = 24;
|
||||
via = "10.255.241.100";
|
||||
} ];
|
||||
|
||||
};
|
||||
# interfaces.ibp65s0 = {
|
||||
# useDHCP = false;
|
||||
# ipv4.addresses = [ {
|
||||
# address = host.ipoib;
|
||||
# prefixLength = 24;
|
||||
# } ];
|
||||
# };
|
||||
interfaces.enp65s0np0 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = host.gbe100;
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
# firewall.extraCommands =
|
||||
# if host.name == "c1-1" then ''
|
||||
# iptables -t nat -A POSTROUTING -d 10.255.244.0/24 -j MASQUERADE
|
||||
# ''
|
||||
# else "";
|
||||
};
|
||||
imports = [
|
||||
hw
|
||||
../cluster.nix
|
||||
../mounts.nix
|
||||
];
|
||||
}
|
||||
// compute;
|
||||
};
|
||||
in builtins.foldl' (a: n: a // mkCompute n) {} nodes
|
||||
|
||||
@@ -1,358 +0,0 @@
|
||||
{ pkgs ? import <nixpkgs> {} }:
|
||||
let
|
||||
name = "ekman";
|
||||
address = "10.255.241.100";
|
||||
in
|
||||
{
|
||||
ekman = { config, pkgs, ... }: with pkgs; {
|
||||
deployment.tags = [ "ekman" "login" ];
|
||||
deployment.targetHost = address;
|
||||
system.autoUpgrade.enable = lib.mkForce false;
|
||||
|
||||
systemd.targets = {
|
||||
sleep.enable = false;
|
||||
suspend.enable = false;
|
||||
hibernate.enable = false;
|
||||
hybrid-sleep.enable = false;
|
||||
};
|
||||
|
||||
cluster = {
|
||||
compute = true;
|
||||
k8sNode = true;
|
||||
mounts = {
|
||||
rdma.enable = true;
|
||||
automount.enable = true;
|
||||
home = false;
|
||||
opt = false;
|
||||
work = true;
|
||||
data = true;
|
||||
backup = true;
|
||||
ceph = true;
|
||||
};
|
||||
};
|
||||
|
||||
features = {
|
||||
host = {
|
||||
name = "ekman";
|
||||
address = "10.255.241.100";
|
||||
};
|
||||
|
||||
myvnc.enable = false;
|
||||
|
||||
os = {
|
||||
externalInterface = "enp33s0f0np0";
|
||||
nfs.enable = true;
|
||||
nfs.exports = ''
|
||||
/exports 10.255.241.0/24(insecure,rw,sync,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
/exports 10.255.243.0/24(insecure,rw,sync,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
'';
|
||||
};
|
||||
|
||||
hpc = {
|
||||
slurm.server = false;
|
||||
slurm.slurmrestd = false;
|
||||
frontend = false;
|
||||
login = true;
|
||||
knem = false;
|
||||
};
|
||||
|
||||
k8s = {
|
||||
master.enable = false;
|
||||
node.enable = true;
|
||||
};
|
||||
|
||||
desktop.enable = false;
|
||||
# server.enable = true;
|
||||
monitoring = {
|
||||
# server = {
|
||||
# enable = false;
|
||||
# scrapeHosts = [ "frontend" "nfs0" "nfs1" ] ++ (builtins.map (x: x.name) computeNodes);
|
||||
# defaultAlertReceiver = {
|
||||
# email_configs = [
|
||||
# { to = "jonas.juselius@oceanbox.io"; }
|
||||
# ];
|
||||
# };
|
||||
# pageAlertReceiver = {
|
||||
# webhook_configs = [
|
||||
# {
|
||||
# url = "https://prometheus-msteams.k2.itpartner.no/ekman";
|
||||
# http_config = {
|
||||
# tls_config = { insecure_skip_verify = true; };
|
||||
# };
|
||||
# }
|
||||
# ];
|
||||
# };
|
||||
# };
|
||||
# webUI.enable = false;
|
||||
# webUI.acmeEmail = "innovasjon@itpartner.no";
|
||||
# webUI.allow = [
|
||||
# "10.1.2.0/24"
|
||||
# "172.19.254.0/24"
|
||||
# "172.19.255.0/24"
|
||||
# ];
|
||||
infiniband-exporter = {
|
||||
enable = true;
|
||||
nameMap = ''
|
||||
0x248a07030029d2fc "frontend"
|
||||
0x248a07030029d104 "ekman"
|
||||
0x5aa2e1fffe1edfca "fs-work"
|
||||
0x1c34da0300787798 "fs-backup"
|
||||
0xe8ebd3030024981e "c0-1"
|
||||
0xe8ebd3030024a21a "c0-2"
|
||||
0xe8ebd30300249a3a "c0-3"
|
||||
0xe8ebd30300248b9e "c0-4"
|
||||
0xe8ebd30300248b86 "c0-5"
|
||||
0xe8ebd3030024998a "c0-6"
|
||||
0xe8ebd30300248b8e "c0-7"
|
||||
0xe8ebd3030024999e "c0-8"
|
||||
0xe8ebd30300248fca "c0-9"
|
||||
0xe8ebd3030024a216 "c0-10"
|
||||
0xe8ebd30300248b96 "c0-11"
|
||||
0xe8ebd30300248b9a "c0-12"
|
||||
0xe8ebd303002495d2 "c0-13"
|
||||
0xe8ebd303002495e2 "c0-14"
|
||||
0xe8ebd30300248f42 "c0-15"
|
||||
0xe8ebd303002495e6 "c0-16"
|
||||
0xe8ebd3030024a2a2 "c0-17"
|
||||
0xe8ebd3030024a2ae "c0-18"
|
||||
'';
|
||||
};
|
||||
slurm-exporter = {
|
||||
enable = true;
|
||||
port = 6080;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
# boot.kernelPackages = pkgs.linuxKernel.packages.linux_6_6;
|
||||
services.flannel.iface = "enp33s0f3np3";
|
||||
|
||||
networking = {
|
||||
useDHCP = false;
|
||||
hostName = "ekman";
|
||||
interfaces.enp33s0f3np3 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = "10.255.241.100";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
# ipv4.routes = [
|
||||
# {
|
||||
# address = "10.255.244.0";
|
||||
# prefixLength = 24;
|
||||
# via = "10.255.241.99";
|
||||
# }
|
||||
# ];
|
||||
};
|
||||
interfaces."ibp65s0f0" = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = "10.255.243.100";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
interfaces."enp65s0f1np1" = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = "10.255.244.100";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
interfaces.enp33s0f0np0 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = "10.255.242.2";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
ipv4.routes = [
|
||||
# {
|
||||
# address = "10.1.8.0";
|
||||
# prefixLength = 24;
|
||||
# via = "10.255.242.1";
|
||||
# }
|
||||
# {
|
||||
# address = "10.1.30.0";
|
||||
# prefixLength = 24;
|
||||
# via = "10.255.242.1";
|
||||
# }
|
||||
];
|
||||
};
|
||||
defaultGateway = "10.255.242.1";
|
||||
firewall = {
|
||||
allowedTCPPorts = [ 4443 ];
|
||||
extraCommands = ''
|
||||
# needed for nodeport access on k1 and k2
|
||||
# iptables -t nat -A POSTROUTING -s 10.255.241.0/24 ! -d 10.255.0.0/16 -j SNAT --to-source 10.255.242.2
|
||||
iptables -t nat -A POSTROUTING -s 10.255.243.0/24 -j MASQUERADE
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
fileSystems = {
|
||||
"/exports/home" = {
|
||||
device = "/home";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/exports/opt/bin" = {
|
||||
device = "/opt/bin";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/exports/opt/sif" = {
|
||||
device = "/opt/sif";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/exports/opt/singularity" = {
|
||||
device = "/opt/singularity";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/exports/nfs-provisioner" = {
|
||||
device = "/vol/nfs-provisioner";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/frontend" = {
|
||||
device = "/home";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/vol/local-storage/vol1" = {
|
||||
device = "/vol/vol1";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/vol/local-storage/vol2" = {
|
||||
device = "/vol/vol2";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
};
|
||||
|
||||
nix.extraOptions = ''
|
||||
secret-key-files = /etc/nix/ekman.key
|
||||
'';
|
||||
|
||||
# services.xserver = {
|
||||
# enable = false;
|
||||
# enableCtrlAltBackspace = true;
|
||||
# layout = "us";
|
||||
# xkbVariant = "altgr-intl";
|
||||
# xkbOptions = "eurosign:e";
|
||||
# displayManager = {
|
||||
# gdm.enable = false;
|
||||
# job.logToFile = true;
|
||||
# };
|
||||
# # desktopManager.xfce.enable = true;
|
||||
# };
|
||||
|
||||
services.prometheus.alertmanager.configuration.global = {
|
||||
smtp_smarthost = "smtpgw.itpartner.no";
|
||||
# smtp_auth_username = "utvikling";
|
||||
# smtp_auth_password = "S0m3rp0m@de#21!";
|
||||
smtp_hello = "ekman.oceanbox.io";
|
||||
smtp_from = "noreply@ekman.oceanbox.io";
|
||||
};
|
||||
|
||||
# services.nginx = {
|
||||
# virtualHosts = {
|
||||
# "ds.matnoc.regnekraft.io" = {
|
||||
# forceSSL = true;
|
||||
# enableACME = true;
|
||||
# serverAliases = [];
|
||||
# locations."/" = {
|
||||
# proxyPass = "http://localhost:9088";
|
||||
# proxyWebsockets = false;
|
||||
# extraConfig = ''
|
||||
# allow 10.1.2.0/24;
|
||||
# allow 172.19.254.0/24;
|
||||
# allow 172.19.255.0/24;
|
||||
# deny all;
|
||||
# '';
|
||||
# };
|
||||
# };
|
||||
# };
|
||||
# };
|
||||
|
||||
# services.gitlab-runner = {
|
||||
# enable = true;
|
||||
# extraPackages = with pkgs; [
|
||||
# singularity
|
||||
# ];
|
||||
# concurrent = 4;
|
||||
# services = {
|
||||
# sif = {
|
||||
# registrationConfigFile = "/var/lib/secrets/gitlab-runner-registration";
|
||||
# executor = "shell";
|
||||
# tagList = [ "ekman" "sif" ];
|
||||
# };
|
||||
# };
|
||||
# };
|
||||
|
||||
# security.sudo.extraConfig = ''
|
||||
# gitlab-runner ALL=(ALL) NOPASSWD: /run/current-system/sw/bin/singularity
|
||||
# '';
|
||||
|
||||
security.pam = {
|
||||
services.sshd.googleAuthenticator.enable = true;
|
||||
loginLimits = [
|
||||
{
|
||||
domain = "@users";
|
||||
item = "rss";
|
||||
type = "hard";
|
||||
value = 16000000;
|
||||
}
|
||||
{
|
||||
domain = "@users";
|
||||
item = "cpu";
|
||||
type = "hard";
|
||||
value = 180;
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
system.activationScripts = {
|
||||
home-permissions.text = ''
|
||||
chmod 755 /home/olean
|
||||
chmod 755 /home/frankgaa
|
||||
chmod 755 /home/jonas
|
||||
chmod 755 /home/mrtz
|
||||
chmod 755 /home/avle
|
||||
chmod 755 /home/stig
|
||||
chmod 755 /home/bast
|
||||
chmod 755 /home/simenlk
|
||||
chmod 755 /work/kraken
|
||||
'';
|
||||
};
|
||||
|
||||
|
||||
# ssh-rsa is deprecated, but putty/winscp users use it
|
||||
services.openssh.extraConfig = ''
|
||||
# pubkeyacceptedalgorithms ssh-rsa,ssh-ed25519-cert-v01@openssh.com,ecdsa-sha2-nistp256-cert-v01@openssh.com,ecdsa-sha2-nistp384-cert-v01@openssh.com,ecdsa-sha2-nistp521-cert-v01@openssh.com,sk-ssh-ed25519-cert-v01@openssh.com,sk-ecdsa-sha2-nistp256-cert-v01@openssh.com,rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-ed25519,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,ecdsa-sha2-nistp521,sk-ssh-ed25519@openssh.com,sk-ecdsa-sha2-nistp256@openssh.com,rsa-sha2-512,rsa-sha2-256
|
||||
PubkeyAuthOptions verify-required
|
||||
'';
|
||||
|
||||
environment.systemPackages = [];
|
||||
|
||||
virtualisation.docker.enable = pkgs.lib.mkForce true;
|
||||
|
||||
services.tailscale = {
|
||||
enable = true;
|
||||
authKeyFile = "/var/lib/secrets/tailscale.key";
|
||||
useRoutingFeatures = "server"; # for exit-node usage
|
||||
extraUpFlags = [
|
||||
"--login-server=https://headscale.svc.oceanbox.io"
|
||||
"--accept-dns"
|
||||
"--advertise-exit-node"
|
||||
"--advertise-routes=10.255.241.241.0/24"
|
||||
"--advertise-tags=tag:ekman"
|
||||
];
|
||||
};
|
||||
|
||||
imports = [
|
||||
./hardware-configuration.nix
|
||||
../cluster.nix
|
||||
../mounts.nix
|
||||
../myvnc.nix
|
||||
];
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,180 +0,0 @@
|
||||
{ pkgs ? import <nixpkgs> {} }:
|
||||
let
|
||||
name = "fs-backup";
|
||||
address = "10.255.241.80";
|
||||
etcdCluster = import ../etcdCluster.nix;
|
||||
in {
|
||||
fs-backup = { config, pkgs, ... }: with pkgs; {
|
||||
deployment.tags = [ "fs" "fs-backup" ];
|
||||
deployment.targetHost = address;
|
||||
system.autoUpgrade.enable = lib.mkForce false;
|
||||
|
||||
systemd.targets = {
|
||||
sleep.enable = false;
|
||||
suspend.enable = false;
|
||||
hibernate.enable = false;
|
||||
hybrid-sleep.enable = false;
|
||||
};
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
rdma-core
|
||||
hwloc
|
||||
xfsprogs
|
||||
];
|
||||
|
||||
boot.swraid = {
|
||||
enable = true;
|
||||
mdadmConf = ''
|
||||
DEVICE partitions
|
||||
ARRAY /dev/md/0 metadata=1.2 UUID=b743fdd4:5b339cc7:7c43f50f:3b81243e name=fs2:0
|
||||
'';
|
||||
};
|
||||
|
||||
systemd.services.restart-md0 = {
|
||||
description = "restart /dev/md0";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "sys-devices-virtual-block-md0.device" "-.mount" ];
|
||||
before = [ "backup.mount" ];
|
||||
path = [ "/run/current-system/sw/" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
};
|
||||
script = ''
|
||||
restart=0
|
||||
${util-linux}/bin/lsblk -o MAJ:MIN -n /dev/md0 | grep -q "254:" || restart=1
|
||||
if [ $restart = 1 ]; then
|
||||
${mdadm}/bin/mdadm --stop /dev/md0
|
||||
${mdadm}/bin/mdadm --assemble /dev/md0
|
||||
sleep 1
|
||||
fi
|
||||
'';
|
||||
};
|
||||
|
||||
cluster = {
|
||||
k8sNode = true;
|
||||
slurm = false;
|
||||
mounts = {
|
||||
rdma.enable = false;
|
||||
automount.enable = true;
|
||||
home = false;
|
||||
opt = false;
|
||||
work = false;
|
||||
data = false;
|
||||
backup = false;
|
||||
ceph = false;
|
||||
};
|
||||
};
|
||||
|
||||
features.hpc.slurm.mungeUid = 996;
|
||||
|
||||
features = {
|
||||
host = {
|
||||
inherit address;
|
||||
inherit name;
|
||||
};
|
||||
|
||||
os = {
|
||||
externalInterface = "eno1";
|
||||
nfs.enable = true;
|
||||
nfs.exports = ''
|
||||
/exports 10.255.241.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
/exports 10.255.243.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
/exports 10.255.244.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
'';
|
||||
};
|
||||
|
||||
k8s = {
|
||||
enable = true;
|
||||
node.enable = true;
|
||||
master.enable = false;
|
||||
inherit etcdCluster;
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.rc-local = {
|
||||
description = "rc.local script";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
path = [ "/run/current-system/sw/" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
};
|
||||
script = ''
|
||||
# if [ -e /sys/block/md126 ]; then
|
||||
# echo "deadline" > /sys/block/md126/queue/scheduler
|
||||
# # echo "4096" > /sys/block/md126/queue/nr_requests
|
||||
# echo "4096" > /sys/block/md126/queue/read_ahead_kb
|
||||
# echo "always" > /sys/kernel/mm/transparent_hugepage/enabled
|
||||
# echo "always" > /sys/kernel/mm/transparent_hugepage/defrag
|
||||
# fi
|
||||
grep -q rdma /proc/fs/nfsd/portlist || echo "rdma 20049" > /proc/fs/nfsd/portlist
|
||||
grep -q tcp /proc/fs/nfsd/portlist || echo "tcp 2049" > /proc/fs/nfsd/portlist
|
||||
'';
|
||||
};
|
||||
|
||||
boot.kernel.sysctl = {
|
||||
"vm.dirty_background_ratio" = 5;
|
||||
"vm.dirty_ratio" = 10;
|
||||
"vm.vfs_cache_pressure" = 50;
|
||||
"vm.min_free_kbytes" = 262144;
|
||||
};
|
||||
|
||||
networking = {
|
||||
hostName = name;
|
||||
interfaces.eno1 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = address;
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
ipv4.routes = [
|
||||
{
|
||||
address = "10.255.244.0";
|
||||
prefixLength = 24;
|
||||
via = "10.255.241.99";
|
||||
}
|
||||
];
|
||||
};
|
||||
interfaces.ibp59s0 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = "10.255.243.80";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
firewall = {
|
||||
allowedTCPPorts = [];
|
||||
allowedUDPPorts = [];
|
||||
extraCommands = ''
|
||||
iptables -t nat -A POSTROUTING -s 10.255.243.0/24 -j MASQUERADE
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
services.rpcbind.enable = true;
|
||||
|
||||
fileSystems = {
|
||||
"/exports/backup" = {
|
||||
device = "/backup";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/exports/ekman" = {
|
||||
device = "/backup/ekman-nfs";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
};
|
||||
|
||||
programs.singularity.enable = true;
|
||||
|
||||
imports = [
|
||||
./hardware-configuration.nix
|
||||
../cluster.nix
|
||||
../mounts.nix
|
||||
];
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,172 +0,0 @@
|
||||
{ pkgs ? import <nixpkgs> {} }:
|
||||
let
|
||||
# Pin the deployment package-set to a specific version of nixpkgs
|
||||
# pkgs = import (builtins.fetchTarball {
|
||||
# url = "https://github.com/NixOS/nixpkgs/archive/e9148dc1c30e02aae80cc52f68ceb37b772066f3.tar.gz";
|
||||
# sha256 = "1ckzhh24mgz6jd1xhfgx0i9mijk6xjqxwsshnvq789xsavrmsc36";
|
||||
# }) {};
|
||||
# pkgs = import <nixpkgs> {};
|
||||
etcdCluster = import ../etcdCluster.nix;
|
||||
name = "fs-work";
|
||||
address = "10.255.241.90";
|
||||
in {
|
||||
fs-work = { config, pkgs, ... }: with pkgs; {
|
||||
deployment.tags = [ "fs" "fs-work" ];
|
||||
deployment.targetHost = address;
|
||||
system.autoUpgrade.enable = lib.mkForce false;
|
||||
|
||||
systemd.targets = {
|
||||
sleep.enable = false;
|
||||
suspend.enable = false;
|
||||
hibernate.enable = false;
|
||||
hybrid-sleep.enable = false;
|
||||
};
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
rdma-core
|
||||
hwloc
|
||||
];
|
||||
|
||||
cluster = {
|
||||
k8sNode = true;
|
||||
slurm = false;
|
||||
mounts = {
|
||||
rdma.enable = true;
|
||||
automount.enable = true;
|
||||
home = true;
|
||||
opt = false;
|
||||
work = false;
|
||||
data = false;
|
||||
backup = false;
|
||||
ceph = false;
|
||||
};
|
||||
};
|
||||
|
||||
features.hpc.slurm.mungeUid = 994;
|
||||
|
||||
features = {
|
||||
host = {
|
||||
inherit address;
|
||||
inherit name;
|
||||
};
|
||||
|
||||
os = {
|
||||
externalInterface = "enp33s0f3np3";
|
||||
nfs.enable = true;
|
||||
nfs.exports = ''
|
||||
/exports 10.255.241.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
/exports 10.255.243.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
/exports 10.255.244.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
'';
|
||||
};
|
||||
|
||||
k8s = {
|
||||
enable = true;
|
||||
node.enable = true;
|
||||
master.enable = false;
|
||||
inherit etcdCluster;
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.rc-local = {
|
||||
description = "rc.local script";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
path = [ "/run/current-system/sw/" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
};
|
||||
script = ''
|
||||
# if [ -e /sys/block/md126 ]; then
|
||||
# echo "deadline" > /sys/block/md126/queue/scheduler
|
||||
# # echo "4096" > /sys/block/md126/queue/nr_requests
|
||||
# echo "4096" > /sys/block/md126/queue/read_ahead_kb
|
||||
# echo "always" > /sys/kernel/mm/transparent_hugepage/enabled
|
||||
# echo "always" > /sys/kernel/mm/transparent_hugepage/defrag
|
||||
# fi
|
||||
grep -q rdma /proc/fs/nfsd/portlist || echo "rdma 20049" > /proc/fs/nfsd/portlist
|
||||
grep -q tcp /proc/fs/nfsd/portlist || echo "tcp 2049" > /proc/fs/nfsd/portlist
|
||||
'';
|
||||
};
|
||||
|
||||
boot.kernel.sysctl = {
|
||||
"vm.dirty_background_ratio" = 5;
|
||||
"vm.dirty_ratio" = 10;
|
||||
"vm.vfs_cache_pressure" = 50;
|
||||
"vm.min_free_kbytes" = 262144;
|
||||
};
|
||||
|
||||
networking = {
|
||||
hostName = name;
|
||||
interfaces.enp65s0f0np0 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [
|
||||
{
|
||||
address = address;
|
||||
prefixLength = 24;
|
||||
}
|
||||
];
|
||||
ipv4.routes = [
|
||||
{
|
||||
address = "10.255.242.0";
|
||||
prefixLength = 24;
|
||||
via = "10.255.241.100";
|
||||
}
|
||||
];
|
||||
};
|
||||
interfaces.enp1s0f1np1 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [
|
||||
{
|
||||
address = "10.255.244.90";
|
||||
prefixLength = 24;
|
||||
}
|
||||
];
|
||||
};
|
||||
# interfaces."ibp65s0.7666" = {
|
||||
# useDHCP = false;
|
||||
# };
|
||||
interfaces.ibp1s0f0 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [
|
||||
{
|
||||
address = "10.255.243.90";
|
||||
prefixLength = 24;
|
||||
}
|
||||
];
|
||||
};
|
||||
firewall = {
|
||||
allowedTCPPorts = [];
|
||||
allowedUDPPorts = [];
|
||||
extraCommands = ''
|
||||
# iptables -t nat -A POSTROUTING -s 10.255.243.0/24 -j MASQUERADE
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
services.rpcbind.enable = true;
|
||||
|
||||
fileSystems = {
|
||||
"/exports/work" = {
|
||||
device = "/work";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/exports/opt" = {
|
||||
device = "/opt";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
};
|
||||
|
||||
programs.singularity.enable = true;
|
||||
|
||||
imports = [
|
||||
./hardware-configuration.nix
|
||||
../cluster.nix
|
||||
../mounts.nix
|
||||
];
|
||||
};
|
||||
}
|
||||
@@ -1,83 +0,0 @@
|
||||
''
|
||||
10.255.242.2 ekman-gw ekman-gw.compute.local ekman-gw.cluster.local
|
||||
10.255.242.3 front-gw front-gw.compute.local front-gw.cluster.local
|
||||
|
||||
10.255.241.90 fs-work fs-work.compute.local
|
||||
10.255.241.90 nfs1 nfs1.compute.local
|
||||
10.255.241.90 fs1 fs1.compute.local
|
||||
10.255.241.80 fs-backup fs-backup.compute.local
|
||||
10.255.241.80 fs2 fs2.compute.local
|
||||
10.255.241.100 ekman ekman.compute.local ekman.cluster.local
|
||||
10.255.241.100 etcd0 etcd0.compute.local
|
||||
10.255.241.80 etcd1 etcd1.compute.local
|
||||
10.255.241.90 etcd2 etcd2.compute.local
|
||||
10.255.241.99 frontend frontend.compute.local frontend.cluster.local
|
||||
10.255.243.99 ibfrontend ibfrontend.compute.local ibfrontend.cluster.local
|
||||
|
||||
10.255.241.101 c0-1 c0-1.compute.local
|
||||
10.255.241.102 c0-2 c0-2.compute.local
|
||||
10.255.241.103 c0-3 c0-3.compute.local
|
||||
10.255.241.104 c0-4 c0-4.compute.local
|
||||
10.255.241.105 c0-5 c0-5.compute.local
|
||||
10.255.241.106 c0-6 c0-6.compute.local
|
||||
10.255.241.107 c0-7 c0-7.compute.local
|
||||
10.255.241.108 c0-8 c0-8.compute.local
|
||||
10.255.241.109 c0-9 c0-9.compute.local
|
||||
10.255.241.110 c0-10 c0-10.compute.local
|
||||
10.255.241.111 c0-11 c0-11.compute.local
|
||||
10.255.241.112 c0-12 c0-12.compute.local
|
||||
10.255.241.113 c0-13 c0-13.compute.local
|
||||
10.255.241.114 c0-14 c0-14.compute.local
|
||||
10.255.241.115 c0-15 c0-15.compute.local
|
||||
10.255.241.116 c0-16 c0-16.compute.local
|
||||
10.255.241.117 c0-17 c0-17.compute.local
|
||||
10.255.241.118 c0-18 c0-18.compute.local
|
||||
|
||||
10.255.241.121 c1-1 c1-1.compute.local
|
||||
10.255.241.122 c1-2 c1-2.compute.local
|
||||
10.255.241.123 c1-3 c1-3.compute.local
|
||||
10.255.241.124 c1-4 c1-4.compute.local
|
||||
10.255.241.125 c1-5 c1-5.compute.local
|
||||
10.255.241.126 c1-6 c1-6.compute.local
|
||||
10.255.241.127 c1-7 c1-7.compute.local
|
||||
10.255.241.128 c1-8 c1-8.compute.local
|
||||
|
||||
10.255.243.90 ibfs-work ibfs-work.compute.local
|
||||
10.255.243.90 ibnfs1 ibnfs1.compute.local
|
||||
10.255.243.90 ibfs1 ibfs1.compute.local
|
||||
10.255.243.80 ibfs-backup ibfs-backup.compute.local
|
||||
10.255.243.80 ibfs2 ibfs2.compute.local
|
||||
10.255.243.100 ibekman ibekman.compute.local
|
||||
10.255.243.100 ibetcd0 ibetcd0.compute.local
|
||||
10.255.243.80 ibetcd1 ibetcd1.compute.local
|
||||
10.255.243.90 ibetcd2 ibetcd2.compute.local
|
||||
|
||||
10.255.243.101 ib0-1 ib0-1.compute.local
|
||||
10.255.243.102 ib0-2 ib0-2.compute.local
|
||||
10.255.243.103 ib0-3 ib0-3.compute.local
|
||||
10.255.243.104 ib0-4 ib0-4.compute.local
|
||||
10.255.243.105 ib0-5 ib0-5.compute.local
|
||||
10.255.243.106 ib0-6 ib0-6.compute.local
|
||||
10.255.243.107 ib0-7 ib0-7.compute.local
|
||||
10.255.243.108 ib0-8 ib0-8.compute.local
|
||||
10.255.243.109 ib0-9 ib0-9.compute.local
|
||||
10.255.243.110 ib0-10 ib0-10.compute.local
|
||||
10.255.243.111 ib0-11 ib0-1.compute.local
|
||||
10.255.243.112 ib0-12 ib0-12.compute.local
|
||||
10.255.243.113 ib0-13 ib0-13.compute.local
|
||||
10.255.243.114 ib0-14 ib0-14.compute.local
|
||||
10.255.243.115 ib0-15 ib0-15.compute.local
|
||||
10.255.243.116 ib0-16 ib0-16.compute.local
|
||||
10.255.243.117 ib0-17 ib0-17.compute.local
|
||||
10.255.243.118 ib0-18 ib0-18.compute.local
|
||||
10.255.243.118 ib0-18 ib0-19.compute.local
|
||||
|
||||
10.255.243.121 ib1-1 ib1-1.compute.local
|
||||
10.255.243.122 ib1-2 ib1-2.compute.local
|
||||
10.255.243.123 ib1-3 ib1-3.compute.local
|
||||
10.255.243.124 ib1-4 ib1-4.compute.local
|
||||
10.255.243.125 ib1-5 ib1-5.compute.local
|
||||
10.255.243.126 ib1-6 ib1-6.compute.local
|
||||
10.255.243.127 ib1-7 ib1-7.compute.local
|
||||
10.255.243.128 ib1-8 ib1-8.compute.local
|
||||
''
|
||||
@@ -1,2 +0,0 @@
|
||||
ç£/ik±/¨÷|ñR¯E¥R®$ÃQfj5·<35>rd<0E>С¶7“{¢–99âTÂîÛ›Ãi‹ÄŒ‰–,ÐŒÍhçïÙ8töv:%‘T”
|
||||
|ÈÚÈ´þΕ§VŒ00w<30>|ŸÏ®÷íà|È_ŸY{3L_!F1TdÔ&F7õ™B°R
|
||||
@@ -1,349 +0,0 @@
|
||||
{ pkgs, ...}:
|
||||
let
|
||||
computeNodes =
|
||||
import ./cluster/c0/nodes.nix ++
|
||||
import ./cluster/c1/nodes.nix ++
|
||||
[
|
||||
rec {
|
||||
idx = 100;
|
||||
name = "ekman";
|
||||
address = "10.255.241.${toString idx}";
|
||||
ipoib = "10.255.243.${toString idx}";
|
||||
pubkey = ./cluster/ekman/ekman.pub;
|
||||
}
|
||||
rec {
|
||||
idx = 90;
|
||||
name = "fs-work";
|
||||
address = "10.255.241.${toString idx}";
|
||||
ipoib = "10.255.243.${toString idx}";
|
||||
pubkey = ./cluster/fs-work/fs-work.pub;
|
||||
}
|
||||
rec {
|
||||
idx = 81;
|
||||
name = "fs-backup";
|
||||
address = "10.255.241.${toString idx}";
|
||||
ipoib = "10.255.243.${toString idx}";
|
||||
pubkey = ./cluster/fs-backup/fs-backup.pub;
|
||||
}
|
||||
];
|
||||
etcdCluster = import ./cluster/etcdCluster.nix;
|
||||
name = "frontend";
|
||||
address = "10.255.241.99";
|
||||
ipoib = "10.255.243.99";
|
||||
in {
|
||||
systemd.targets = {
|
||||
sleep.enable = false;
|
||||
suspend.enable = false;
|
||||
hibernate.enable = false;
|
||||
hybrid-sleep.enable = false;
|
||||
};
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
rdma-core
|
||||
hwloc
|
||||
headscale
|
||||
];
|
||||
|
||||
cluster = {
|
||||
k8sNode = true;
|
||||
compute = false;
|
||||
slurm = true;
|
||||
mounts = {
|
||||
rdma.enable = true;
|
||||
automount.enable = true;
|
||||
home = true;
|
||||
opt = true;
|
||||
work = true;
|
||||
data = true;
|
||||
backup = true;
|
||||
ceph = true;
|
||||
};
|
||||
};
|
||||
|
||||
features = {
|
||||
desktop.enable = false;
|
||||
cachix.enable = false;
|
||||
|
||||
host = {
|
||||
inherit address;
|
||||
inherit name;
|
||||
};
|
||||
|
||||
myvnc.enable = false;
|
||||
|
||||
os = {
|
||||
externalInterface = "eno1";
|
||||
nfs.enable = false;
|
||||
nfs.exports = ''
|
||||
/exports 10.255.241.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
/exports 10.255.243.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
'';
|
||||
};
|
||||
|
||||
hpc = {
|
||||
slurm.server = true;
|
||||
slurm.slurmrestd = false;
|
||||
slurm.mungeUid = 996;
|
||||
frontend = true;
|
||||
};
|
||||
|
||||
k8s = {
|
||||
master.enable = true;
|
||||
node.enable = true;
|
||||
nodes = computeNodes;
|
||||
inherit etcdCluster;
|
||||
};
|
||||
|
||||
monitoring = {
|
||||
server = {
|
||||
enable = false;
|
||||
scrapeHosts = [
|
||||
"frontend"
|
||||
"ekman"
|
||||
"fs-work"
|
||||
"fs-backup"
|
||||
] ++ (builtins.map (x: x.name) computeNodes);
|
||||
defaultAlertReceiver = {
|
||||
email_configs = [
|
||||
{ to = "jonas.juselius@oceanbox.io"; }
|
||||
];
|
||||
};
|
||||
pageAlertReceiver = {
|
||||
webhook_configs = [
|
||||
{
|
||||
url = "https://prometheus-msteams.k2.itpartner.no/ekman";
|
||||
http_config = {
|
||||
tls_config = { insecure_skip_verify = true; };
|
||||
};
|
||||
}
|
||||
];
|
||||
};
|
||||
};
|
||||
webUI.enable = false;
|
||||
webUI.acmeEmail = "innovasjon@itpartner.no";
|
||||
webUI.allow = [
|
||||
"10.1.2.0/24"
|
||||
"172.19.254.0/24"
|
||||
"172.19.255.0/24"
|
||||
];
|
||||
infiniband-exporter = {
|
||||
enable = true;
|
||||
nameMap = ''
|
||||
0xe8ebd3030024a2c6 "ekman"
|
||||
0x0c42a10300ddc4bc "frontend"
|
||||
0xe8ebd3030024a2ae "fs-work"
|
||||
0x1c34da0300787798 "fs-backup"
|
||||
0xe8ebd3030024981e "c0-1"
|
||||
0xe8ebd3030024a21a "c0-2"
|
||||
0xe8ebd30300249a3a "c0-3"
|
||||
0xe8ebd30300248b9e "c0-4"
|
||||
0xe8ebd30300248b86 "c0-5"
|
||||
0xe8ebd3030024998a "c0-6"
|
||||
0xe8ebd30300248b8e "c0-7"
|
||||
0xe8ebd3030024999e "c0-8"
|
||||
0xe8ebd30300248fca "c0-9"
|
||||
0xe8ebd3030024a216 "c0-10"
|
||||
0xe8ebd30300248b96 "c0-11"
|
||||
0xe8ebd30300248b9a "c0-12"
|
||||
0xe8ebd303002495d2 "c0-13"
|
||||
0xe8ebd303002495e2 "c0-14"
|
||||
0xe8ebd30300248f42 "c0-15"
|
||||
0xe8ebd303002495e6 "c0-16"
|
||||
0x0c42a10300dbe7f4 "c1-1"
|
||||
0x0c42a10300dbe7d8 "c1-2"
|
||||
0x0c42a10300dbe800 "c1-3"
|
||||
0x0c42a10300dbec80 "c1-4"
|
||||
0x0c42a10300dbea50 "c1-5"
|
||||
0x0c42a10300dbeb2c "c1-6"
|
||||
0x0c42a10300dbe7fc "c1-7"
|
||||
0x0c42a10300dbe5a0 "c1-8"
|
||||
'';
|
||||
};
|
||||
slurm-exporter = {
|
||||
enable = true;
|
||||
port = 6080;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
programs.singularity.enable = true;
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
services.kubernetes.apiserver.extraOpts = ''--oidc-client-id=9b6daef0-02fa-4574-8949-f7c1b5fccd15 --oidc-groups-claim=roles --oidc-issuer-url=https://login.microsoftonline.com/3f737008-e9a0-4485-9d27-40329d288089/v2.0'';
|
||||
|
||||
services.flannel.iface = "eno2";
|
||||
|
||||
networking = {
|
||||
useDHCP = false;
|
||||
hostName = name;
|
||||
interfaces.eno1 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [ {
|
||||
address = "10.255.242.3";
|
||||
prefixLength = 24;
|
||||
} ];
|
||||
};
|
||||
interfaces.eno2 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [
|
||||
{
|
||||
inherit address;
|
||||
prefixLength = 24;
|
||||
}
|
||||
];
|
||||
};
|
||||
interfaces.ens2f1np1 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [
|
||||
{
|
||||
address = "10.255.244.99";
|
||||
prefixLength = 24;
|
||||
}
|
||||
];
|
||||
};
|
||||
interfaces.ibs2f0 = {
|
||||
useDHCP = false;
|
||||
ipv4.addresses = [
|
||||
{
|
||||
address = ipoib;
|
||||
prefixLength = 24;
|
||||
}
|
||||
];
|
||||
};
|
||||
defaultGateway = "10.255.242.1";
|
||||
firewall = {
|
||||
allowedTCPPorts = [ 4443 4725 ];
|
||||
extraCommands = ''
|
||||
# needed for nodeport access on k1 and k2
|
||||
# iptables -t nat -A POSTROUTING -s 10.255.241.0/24 ! -d 10.255.0.0/16 -j SNAT --to-source 10.255.242.3
|
||||
iptables -t nat -A POSTROUTING -s 10.255.243.0/24 -j MASQUERADE
|
||||
# iptables -t nat -A POSTROUTING -s 100.64.0.0/24 -j MASQUERADE
|
||||
# iptables -t nat -A POSTROUTING -d 10.255.244.0/24 -j MASQUERADE
|
||||
# iptables -t nat -A POSTROUTING -s 10.255.244.0/24 -d 10.255.241.0/16 -j SNAT --to-source 10.255.241.99
|
||||
# iptables -t nat -A POSTROUTING -s 10.255.244.0/24 -j SNAT --to-source 10.255.242.3
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
fileSystems = {
|
||||
"/exports/public" = {
|
||||
device = "/srv/public";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
};
|
||||
|
||||
nix.extraOptions = ''
|
||||
# secret-key-files = /etc/nix/ekman.key
|
||||
'';
|
||||
|
||||
services.prometheus.alertmanager.configuration.global = {
|
||||
smtp_smarthost = "smtpgw.itpartner.no";
|
||||
# smtp_auth_username = "utvikling";
|
||||
# smtp_auth_password = "S0m3rp0m@de#21!";
|
||||
smtp_hello = "ekman.oceanbox.io";
|
||||
smtp_from = "noreply@ekman.oceanbox.io";
|
||||
};
|
||||
|
||||
security.pam = {
|
||||
services.sshd.googleAuthenticator.enable = true;
|
||||
loginLimits = [
|
||||
{
|
||||
domain = "@users";
|
||||
item = "rss";
|
||||
type = "hard";
|
||||
value = 16000000;
|
||||
}
|
||||
{
|
||||
domain = "@users";
|
||||
item = "cpu";
|
||||
type = "hard";
|
||||
value = 180;
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
system.activationScripts = {
|
||||
home-permissions.text = ''
|
||||
chmod 755 /home/olean
|
||||
chmod 755 /home/frankgaa
|
||||
chmod 755 /home/jonas
|
||||
chmod 755 /home/stig
|
||||
chmod 755 /home/bast
|
||||
chmod 755 /home/mrtz
|
||||
chmod 755 /home/avle
|
||||
chmod 755 /home/simenlk
|
||||
chmod 755 /home/ole
|
||||
'';
|
||||
};
|
||||
|
||||
# Use nvd to get package diff before apply
|
||||
system.activationScripts.system-diff = {
|
||||
supportsDryActivation = true; # safe: only outputs to stdout
|
||||
text = ''
|
||||
export PATH="${pkgs.lib.makeBinPath [ pkgs.nixVersions.latest ]}:$PATH"
|
||||
if [ -e /run/current-system ]; then
|
||||
${pkgs.lib.getExe pkgs.nvd} diff '/run/current-system' "$systemConfig" || true
|
||||
fi
|
||||
'';
|
||||
};
|
||||
|
||||
# ssh-rsa is deprecated, but putty/winscp users use it
|
||||
services.openssh.extraConfig = ''
|
||||
# pubkeyacceptedalgorithms ssh-rsa,ssh-ed25519-cert-v01@openssh.com,ecdsa-sha2-nistp256-cert-v01@openssh.com,ecdsa-sha2-nistp384-cert-v01@openssh.com,ecdsa-sha2-nistp521-cert-v01@openssh.com,sk-ssh-ed25519-cert-v01@openssh.com,sk-ecdsa-sha2-nistp256-cert-v01@openssh.com,rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-ed25519,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,ecdsa-sha2-nistp521,sk-ssh-ed25519@openssh.com,sk-ecdsa-sha2-nistp256@openssh.com,rsa-sha2-512,rsa-sha2-256
|
||||
PubkeyAuthOptions verify-required
|
||||
'';
|
||||
|
||||
# boot.kernelPackages = pkgs.linuxKernel.packages.linux_6_1;
|
||||
|
||||
virtualisation.docker.enable = pkgs.lib.mkForce true;
|
||||
|
||||
# Configuration for the coordination server for a tailscale network run using headscale.
|
||||
#
|
||||
# We can set it up to provide several exit nodes through which traffic can be routed.
|
||||
#
|
||||
# Servers can join using this command:
|
||||
# `tailscale up --login-server net.b0.itpartner.no --accept-dns=false --advertise-exit-node`
|
||||
#
|
||||
# with the following config:
|
||||
#
|
||||
# service.tailscale = {
|
||||
# enable = true;
|
||||
# useRoutingFeatures = "server"; # for exit-node usage
|
||||
# };
|
||||
#
|
||||
# Clients can join using this command:
|
||||
# `tailscale up --login-server net.b0.itpartner.no --accept-dns=false`
|
||||
#
|
||||
# services.headscale = {
|
||||
# enable = true;
|
||||
# address = "0.0.0.0";
|
||||
# port = 4725; # hscl
|
||||
# settings = import ./headscale/settings.nix;
|
||||
# };
|
||||
|
||||
services.tailscale = {
|
||||
enable = true;
|
||||
authKeyFile = "/var/lib/secrets/tailscale.key";
|
||||
useRoutingFeatures = "both"; # for exit-node usage
|
||||
extraUpFlags = [
|
||||
"--login-server=https://headscale.svc.oceanbox.io"
|
||||
"--accept-dns=false"
|
||||
"--advertise-exit-node"
|
||||
];
|
||||
};
|
||||
|
||||
imports = [
|
||||
./hardware-configuration.nix
|
||||
./cluster/cluster.nix
|
||||
./cluster/mounts.nix
|
||||
./cluster/myvnc.nix
|
||||
];
|
||||
}
|
||||
|
||||
27
dns.nix
Normal file
27
dns.nix
Normal file
@@ -0,0 +1,27 @@
|
||||
{ pkgs, ... }:
|
||||
let
|
||||
zonefile = builtins.toFile "obx.zone" (builtins.readFile ./obx.zone);
|
||||
in {
|
||||
services.coredns = {
|
||||
enable = true;
|
||||
config = ''
|
||||
. {
|
||||
errors
|
||||
log . {
|
||||
class error
|
||||
}
|
||||
file ${zonefile} obx.
|
||||
cache 30 {
|
||||
disable success cluster.local
|
||||
disable denial cluster.local
|
||||
}
|
||||
header {
|
||||
response set ra
|
||||
}
|
||||
forward . 8.8.8.8 8.8.4.4
|
||||
loop
|
||||
reload
|
||||
}
|
||||
'';
|
||||
};
|
||||
}
|
||||
29
ekman/bin/adduser.sh
Executable file
29
ekman/bin/adduser.sh
Executable file
@@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
id=$1
|
||||
user=$2
|
||||
name="$3"
|
||||
|
||||
grp="\ $user = { gid = "$id"; };"
|
||||
|
||||
read -d '' usr << EOF
|
||||
\\\ $user = {\\\n\
|
||||
description = "$name";\\\n\
|
||||
home = "/home/$user";\\\n\
|
||||
group = "$user";\\\n\
|
||||
extraGroups = [\\\n\
|
||||
"users"\\\n\
|
||||
"docker"\\\n\
|
||||
];\\\n\
|
||||
uid = $id;\\\n\
|
||||
isNormalUser = true;\\\n\
|
||||
createHome = true;\\\n\
|
||||
openssh.authorizedKeys.keys = [];\\\n\
|
||||
};\\\n\
|
||||
|
||||
EOF
|
||||
|
||||
sed -i "
|
||||
/# @grp@/i $grp
|
||||
/# @usr@/i $usr
|
||||
" stokes/users.nix
|
||||
18
ekman/bin/initca.sh
Executable file
18
ekman/bin/initca.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
TOP="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)/.."
|
||||
|
||||
if [ "x$1" = "x" ]; then
|
||||
echo "usage: initca.sh {cluster}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ca=$TOP/modules/initca.nix
|
||||
|
||||
cd $TOP/$1
|
||||
|
||||
echo "--- Preparing CA certificate"
|
||||
nix-build -o ca $ca
|
||||
|
||||
echo "--- Safeguarding CA certificate"
|
||||
nix-store --add-root $(pwd)/ca --indirect -r $(nix-instantiate --add-root $ca)
|
||||
136
ekman/c0/default.nix
Normal file
136
ekman/c0/default.nix
Normal file
@@ -0,0 +1,136 @@
|
||||
{
|
||||
pkgs ? import <nixpkgs> { },
|
||||
}:
|
||||
let
|
||||
# Pin the deployment package-set to a specific version of nixpkgs
|
||||
# pkgs = import (builtins.fetchTarball {
|
||||
# url = "https://github.com/NixOS/nixpkgs/archive/e6377ff35544226392b49fa2cf05590f9f0c4b43.tar.gz";
|
||||
# sha256 = "1fra9wwy5gvj5ibayqkzqpwdf715bggc0qbmrfch4fghwvl5m70l";
|
||||
# }) {};
|
||||
# pkgs = import <nixpkgs> {};
|
||||
|
||||
nodes = import ./nodes.nix;
|
||||
has100GbE = [ "c0-18" ];
|
||||
|
||||
mkCompute =
|
||||
host:
|
||||
let
|
||||
hw = ./hardware-configuration.d + "/${host.name}.nix";
|
||||
in
|
||||
{
|
||||
"${host.name}" = {
|
||||
deployment.tags = [
|
||||
"compute"
|
||||
"c0"
|
||||
"cluster"
|
||||
];
|
||||
deployment.targetHost = host.address;
|
||||
|
||||
cluster = {
|
||||
compute = true;
|
||||
k8sNode = true;
|
||||
mounts = {
|
||||
rdma.enable = true;
|
||||
automount.enable = true;
|
||||
users = true;
|
||||
opt = true;
|
||||
work = true;
|
||||
data = false;
|
||||
ceph = false;
|
||||
backup = false;
|
||||
}
|
||||
// (
|
||||
if (builtins.elem host.name has100GbE) then
|
||||
{
|
||||
data = true;
|
||||
ceph = true;
|
||||
}
|
||||
else
|
||||
{ }
|
||||
);
|
||||
};
|
||||
|
||||
features = {
|
||||
host = {
|
||||
name = host.name;
|
||||
address = host.address;
|
||||
};
|
||||
os.networkmanager.enable = false;
|
||||
os.externalInterface = host.iface;
|
||||
hpc.computeNode = true;
|
||||
hpc.knem = true;
|
||||
};
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp1s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
networking = {
|
||||
useNetworkd = true;
|
||||
hostName = host.name;
|
||||
useDHCP = false;
|
||||
};
|
||||
|
||||
# systemd.services.systemd-networkd-wait-online.enable = false;
|
||||
|
||||
systemd.network = {
|
||||
wait-online.ignoredInterfaces = [ "ibp1s0" ];
|
||||
# wait-online.enable = false;
|
||||
networks = {
|
||||
"40-${host.iface}" = {
|
||||
matchConfig.Name = host.iface;
|
||||
address = [ "${host.address}/24" ];
|
||||
# networkConfig = {
|
||||
# DNSDefaultRoute = true;
|
||||
# };
|
||||
routes = [
|
||||
{ Gateway = "10.255.241.1"; }
|
||||
{
|
||||
Destination = "172.16.239.0/24";
|
||||
Gateway = "10.255.241.210";
|
||||
}
|
||||
{
|
||||
Destination = "10.255.242.0/24";
|
||||
Gateway = "10.255.241.100";
|
||||
}
|
||||
];
|
||||
};
|
||||
"45-ibp1s0" = {
|
||||
matchConfig.Name = "ibp1s0";
|
||||
address = [ "${host.ipoib}/24" ];
|
||||
};
|
||||
}
|
||||
// (
|
||||
if (builtins.elem host.name has100GbE) then
|
||||
{
|
||||
"42-enp65s0f1np1" = {
|
||||
DHCP = "no";
|
||||
matchConfig.Name = "enp65s0f1np1 ";
|
||||
address = [ "${host.gbe100}/24" ];
|
||||
};
|
||||
}
|
||||
else
|
||||
{ }
|
||||
);
|
||||
};
|
||||
|
||||
boot.kernelParams = [
|
||||
"console=tty0"
|
||||
"console=ttyS0,115200"
|
||||
];
|
||||
systemd.services."serial-getty@ttyS0" = {
|
||||
enable = true;
|
||||
wantedBy = [ "getty.target" ];
|
||||
serviceConfig.Restart = "always";
|
||||
};
|
||||
|
||||
imports = [
|
||||
hw
|
||||
../default.nix
|
||||
../mounts.nix
|
||||
./kernel.nix
|
||||
];
|
||||
};
|
||||
};
|
||||
in
|
||||
builtins.foldl' (a: n: a // mkCompute n) { } nodes
|
||||
12
ekman/c0/hardware-configuration.d/fix.sh
Normal file
12
ekman/c0/hardware-configuration.d/fix.sh
Normal file
@@ -0,0 +1,12 @@
|
||||
mv c0-8.nix c0-6.nix.tmp
|
||||
mv c0-6.nix c0-7.nix.tmp
|
||||
mv c0-7.nix c0-8.nix.tmp
|
||||
mv c0-15.nix c0-10.nix.tmp
|
||||
mv c0-14.nix c0-12.nix.tmp
|
||||
mv c0-12.nix c0-14.nix.tmp
|
||||
mv c0-10.nix c0-15.nix.tmp
|
||||
|
||||
for i in *.tmp; do
|
||||
mv $i $(basename $i .tmp)
|
||||
done
|
||||
|
||||
58
ekman/c0/kernel.nix
Normal file
58
ekman/c0/kernel.nix
Normal file
@@ -0,0 +1,58 @@
|
||||
{ pkgs, lib, ... }:
|
||||
let
|
||||
kernel = pkgs.linuxPackages.kernel;
|
||||
i40e = pkgs.stdenv.mkDerivation rec {
|
||||
name = "i40e-${version}-${kernel.version}";
|
||||
version = "2.13.10";
|
||||
|
||||
src = pkgs.fetchFromGitHub {
|
||||
owner = "dmarion";
|
||||
repo = "i40e";
|
||||
rev = "7228a7c3b362c3170baa2f9a9c6870a900e78dbd";
|
||||
sha256 = "087kvq9wrc1iw6vig8cqcx7cb6346wx8qxzb85c3n8638vq1vrxr";
|
||||
};
|
||||
|
||||
hardeningDisable = [ "pic" ];
|
||||
|
||||
configurePhase = ''
|
||||
cd src
|
||||
kernel_version=${kernel.modDirVersion}
|
||||
sed -i -e 's|/lib/modules|${kernel.dev}/lib/modules|' Makefile
|
||||
sed -i -e 's|/lib/modules|${kernel.dev}/lib/modules|' common.mk
|
||||
export makeFlags="BUILD_KERNEL=$kernel_version"
|
||||
'';
|
||||
|
||||
installPhase = ''
|
||||
install -v -D -m 644 i40e.ko "$out/lib/modules/$kernel_version/kernel/drivers/net/i40e/i40e2.ko"
|
||||
'';
|
||||
|
||||
dontStrip = true;
|
||||
|
||||
enableParallelBuilding = true;
|
||||
|
||||
meta = {
|
||||
description = "Linux kernel drivers for Intel Ethernet adapters and LOMs (LAN On Motherboard)";
|
||||
homepage = "https://github.com/dmarion/i40e";
|
||||
license = lib.licenses.gpl2;
|
||||
};
|
||||
};
|
||||
in
|
||||
{
|
||||
# i40e2 = i40e;
|
||||
boot.kernelPackages = pkgs.linuxPackagesFor (
|
||||
pkgs.linux_5_10.override {
|
||||
argsOverride = rec {
|
||||
src = pkgs.fetchurl {
|
||||
url = "mirror://kernel/linux/kernel/v5.x/linux-${version}.tar.xz";
|
||||
sha256 = "1nzhl1y6avfl77fyqwjwy3qc6679gp92k0d3aarscrdydcml5yid";
|
||||
};
|
||||
version = "5.10.239";
|
||||
modDirVersion = "5.10.239";
|
||||
};
|
||||
}
|
||||
);
|
||||
# boot.kernelPackages = pkgs.linuxKernel.packages.linux_5_10;
|
||||
# overlay = self: super: {
|
||||
# linuxPackages_5_4 = super.linuxPackages_5_4 // { inherit i40e; };
|
||||
# };
|
||||
}
|
||||
13
ekman/c0/nodes.nix
Normal file
13
ekman/c0/nodes.nix
Normal file
@@ -0,0 +1,13 @@
|
||||
with builtins;
|
||||
let
|
||||
nodes = genList (n: n + 1) 18; in
|
||||
map (n: (
|
||||
rec {
|
||||
idx = 100 + n;
|
||||
iface = if n > 16 then "enp33s0f3np3" else "enp33s0f0np0";
|
||||
name = "c0-${toString n}";
|
||||
address = "10.255.241.${toString idx}";
|
||||
ipoib = "10.255.243.${toString idx}";
|
||||
gbe100 = "10.255.244.${toString idx}";
|
||||
pubkey = ./. + "/ssh_host_key.d/c0-${toString n}.pub";
|
||||
})) nodes
|
||||
12
ekman/c0/ssh_host_key.d/fix.sh
Normal file
12
ekman/c0/ssh_host_key.d/fix.sh
Normal file
@@ -0,0 +1,12 @@
|
||||
mv c0-8.pub c0-6.pub.tmp
|
||||
mv c0-6.pub c0-7.pub.tmp
|
||||
mv c0-7.pub c0-8.pub.tmp
|
||||
mv c0-15.pub c0-10.pub.tmp
|
||||
mv c0-14.pub c0-12.pub.tmp
|
||||
mv c0-12.pub c0-14.pub.tmp
|
||||
mv c0-10.pub c0-15.pub.tmp
|
||||
|
||||
for i in *.tmp; do
|
||||
mv $i $(basename $i .tmp)
|
||||
done
|
||||
|
||||
129
ekman/c1/default.nix
Normal file
129
ekman/c1/default.nix
Normal file
@@ -0,0 +1,129 @@
|
||||
{
|
||||
pkgs ? import <nixpkgs> { },
|
||||
}:
|
||||
let
|
||||
# Pin the deployment package-set to a specific version of nixpkgs
|
||||
# pkgs = import (builtins.fetchTarball {
|
||||
# url = "https://github.com/NixOS/nixpkgs/archive/e6377ff35544226392b49fa2cf05590f9f0c4b43.tar.gz";
|
||||
# sha256 = "1fra9wwy5gvj5ibayqkzqpwdf715bggc0qbmrfch4fghwvl5m70l";
|
||||
# }) {};
|
||||
# pkgs = import <nixpkgs> {};
|
||||
|
||||
nodes = import ./nodes.nix;
|
||||
|
||||
mkCompute =
|
||||
host:
|
||||
let
|
||||
hw = ./hardware-configuration.d + "/${host.name}.nix";
|
||||
in
|
||||
{
|
||||
"${host.name}" = {
|
||||
deployment.tags = [
|
||||
"compute"
|
||||
"c1"
|
||||
"cluster"
|
||||
];
|
||||
deployment.targetHost = host.address;
|
||||
|
||||
cluster = {
|
||||
compute = true;
|
||||
k8sNode = true;
|
||||
mounts = {
|
||||
rdma.enable = false;
|
||||
gbe100.enable = true;
|
||||
automount.enable = true;
|
||||
users = true;
|
||||
opt = true;
|
||||
work = true;
|
||||
data = true;
|
||||
ceph = true;
|
||||
backup = false;
|
||||
};
|
||||
};
|
||||
|
||||
features = {
|
||||
host = {
|
||||
name = host.name;
|
||||
address = host.address;
|
||||
};
|
||||
os.networkmanager.enable = false;
|
||||
os.externalInterface = "eno33np0";
|
||||
hpc.computeNode = true;
|
||||
};
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp1s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
networking = {
|
||||
useNetworkd = true;
|
||||
hostName = host.name;
|
||||
useDHCP = false;
|
||||
};
|
||||
|
||||
# systemd.services.systemd-networkd-wait-online.enable = false;
|
||||
# systemd.network.wait-online.ignoredInterfaces = [ "ibp1s0" ];
|
||||
systemd.network = {
|
||||
# wait-online.enable = false;
|
||||
networks = {
|
||||
"40-${host.iface}" = {
|
||||
DHCP = "no";
|
||||
matchConfig.Name = host.iface;
|
||||
address = [ "${host.address}/24" ];
|
||||
networkConfig = {
|
||||
DNSDefaultRoute = true;
|
||||
};
|
||||
routes = [
|
||||
{ Gateway = "10.255.241.1"; }
|
||||
{
|
||||
Destination = "172.16.239.0/24";
|
||||
Gateway = "10.255.241.210";
|
||||
}
|
||||
{
|
||||
Destination = "10.255.242.0/24";
|
||||
Gateway = "10.255.241.100";
|
||||
}
|
||||
];
|
||||
};
|
||||
"42-enp65s0np0" = {
|
||||
DHCP = "no";
|
||||
matchConfig.Name = "enp65s0np0 ";
|
||||
address = [ "${host.gbe100}/24" ];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# boot.kernel.sysctl = {
|
||||
# "net.ipv4.tcp_timestamps" = 0;
|
||||
# "net.ipv4.tcp_sack" = 1;
|
||||
# "net.core.netdev_max_backlog" = 250000;
|
||||
# "net.core.rmem_max" = 4194304;
|
||||
# "net.core.wmem_max" = 4194304;
|
||||
# "net.core.rmem_default" = 4194304;
|
||||
# "net.core.wmem_default" = 4194304;
|
||||
# "net.core.optmem_max" = 4194304;
|
||||
# "net.ipv4.tcp_rmem" = "4096 87380 4194304";
|
||||
# "net.ipv4.tcp_wmem" = "4096 65536 4194304";
|
||||
# "net.ipv4.tcp_low_latency" = 1;
|
||||
# "net.ipv4.tcp_adv_win_scale" = 1;
|
||||
# };
|
||||
|
||||
boot.kernelParams = [
|
||||
"console=tty0"
|
||||
"console=ttyS0,115200"
|
||||
];
|
||||
systemd.services."serial-getty@ttyS0" = {
|
||||
enable = true;
|
||||
wantedBy = [ "getty.target" ];
|
||||
serviceConfig.Restart = "always";
|
||||
};
|
||||
|
||||
imports = [
|
||||
hw
|
||||
../default.nix
|
||||
../mounts.nix
|
||||
];
|
||||
};
|
||||
};
|
||||
in
|
||||
builtins.foldl' (a: n: a // mkCompute n) { } nodes
|
||||
@@ -3,10 +3,11 @@ let nodes = genList (n: n + 1) 8; in
|
||||
map (n: (
|
||||
rec {
|
||||
idx = 120 + n;
|
||||
iface = "eno33np0";
|
||||
name = "c1-${toString n}";
|
||||
target = "10.255.241.${toString (idx + 100)}";
|
||||
# target = "10.255.241.${toString (idx + 100)}";
|
||||
address = "10.255.241.${toString idx}";
|
||||
ipoib = "10.255.243.${toString idx}";
|
||||
gbe100 = "10.255.244.${toString idx}";
|
||||
pubkey = ./. + "/pubkeys/c1-${toString n}.pub";
|
||||
pubkey = ./. + "/ssh_host_key.d/c1-${toString n}.pub";
|
||||
})) nodes
|
||||
@@ -1,11 +1,14 @@
|
||||
{ pkgs, lib, config, ... }:
|
||||
{
|
||||
pkgs,
|
||||
lib,
|
||||
config,
|
||||
...
|
||||
}:
|
||||
with lib;
|
||||
let
|
||||
cfg = config.features.host;
|
||||
|
||||
computeNodes =
|
||||
import ./c0/nodes.nix ++
|
||||
import ./c1/nodes.nix;
|
||||
computeNodes = import ./c0/nodes.nix ++ import ./c1/nodes.nix;
|
||||
|
||||
mkSANs = host: [
|
||||
host.name
|
||||
@@ -25,13 +28,32 @@ let
|
||||
loader.systemd-boot.enable = true;
|
||||
loader.efi.canTouchEfiVariables = true;
|
||||
# kernelPackages = pkgs.linuxKernel.packages.linux_6_9;
|
||||
kernelModules = [ "ib_umad" "ib_ipoib" "ceph" ];
|
||||
kernelModules = [
|
||||
"ib_umad"
|
||||
"ib_ipoib"
|
||||
"ceph"
|
||||
];
|
||||
# kernelParams = [
|
||||
# "console=ttyS0,115200"
|
||||
# "console=tty0"
|
||||
# ];
|
||||
};
|
||||
|
||||
services.resolved = {
|
||||
enable = true;
|
||||
dnssec = "false";
|
||||
fallbackDns = [
|
||||
"1.1.1.1"
|
||||
"1.0.0.1"
|
||||
];
|
||||
# domains = [ "ekman.tos.obx" "~." ];
|
||||
extraConfig = ''
|
||||
DNSStubListener=no # conflicts with dnsmasq and kubernetes dns
|
||||
MulticastDNS=no
|
||||
LLMNR=no
|
||||
'';
|
||||
};
|
||||
|
||||
console = {
|
||||
font = "Lat2-Terminus16";
|
||||
keyMap = "us";
|
||||
@@ -40,38 +62,19 @@ let
|
||||
i18n = {
|
||||
defaultLocale = "en_US.UTF-8";
|
||||
extraLocaleSettings = {
|
||||
LC_CTYPE="en_DK.UTF-8";
|
||||
LC_TIME="en_DK.UTF-8";
|
||||
LC_PAPER="en_DK.UTF-8";
|
||||
LC_NAME="en_DK.UTF-8";
|
||||
LC_ADDRESS="en_DK.UTF-8";
|
||||
LC_TELEPHONE="en_DK.UTF-8";
|
||||
LC_MEASUREMENT="en_DK.UTF-8";
|
||||
LC_IDENTIFICATION="en_DK.UTF-8";
|
||||
LC_CTYPE = "en_DK.UTF-8";
|
||||
LC_TIME = "en_DK.UTF-8";
|
||||
LC_PAPER = "en_DK.UTF-8";
|
||||
LC_NAME = "en_DK.UTF-8";
|
||||
LC_ADDRESS = "en_DK.UTF-8";
|
||||
LC_TELEPHONE = "en_DK.UTF-8";
|
||||
LC_MEASUREMENT = "en_DK.UTF-8";
|
||||
LC_IDENTIFICATION = "en_DK.UTF-8";
|
||||
};
|
||||
};
|
||||
|
||||
time.timeZone = "Europe/Oslo";
|
||||
|
||||
programs.msmtp = {
|
||||
enable = true;
|
||||
accounts = {
|
||||
default = {
|
||||
auth = false;
|
||||
tls = false;
|
||||
tls_starttls = false;
|
||||
port = 24;
|
||||
from = "ekman@oceanbox.io";
|
||||
host = "smtpgw.itpartner.no";
|
||||
# user = "utvikling";
|
||||
# password = "S0m3rp0m@de#21!";
|
||||
};
|
||||
};
|
||||
defaults = {
|
||||
aliases = "/etc/aliases";
|
||||
};
|
||||
};
|
||||
|
||||
environment.etc = {
|
||||
"aliases" = {
|
||||
text = ''
|
||||
@@ -96,25 +99,28 @@ let
|
||||
};
|
||||
cachix.enable = false;
|
||||
monitoring.nodeExporter.enable = false;
|
||||
hpc.mft.enable = false; # Mellanox MFT
|
||||
mft.enable = true; # Mellanox MFT
|
||||
};
|
||||
|
||||
networking = {
|
||||
domain = mkDefault "compute.local";
|
||||
defaultGateway = mkDefault "10.255.241.1";
|
||||
nameservers = mkDefault [ "8.8.8.8" ];
|
||||
search = mkDefault [];
|
||||
extraHosts = import ./hosts.nix;
|
||||
useDHCP = false;
|
||||
domain = "ekman.tos.obx";
|
||||
nameservers = [
|
||||
"10.255.241.210"
|
||||
"10.255.241.99"
|
||||
];
|
||||
search = [ "ekman.tos.obx" ];
|
||||
extraHosts = import ../hosts.nix + import ./hosts.nix;
|
||||
firewall.extraCommands = ''
|
||||
iptables -I INPUT -s 10.255.241.0/24 -j ACCEPT
|
||||
iptables -I INPUT -s 10.255.243.0/24 -j ACCEPT
|
||||
iptables -I INPUT -s 100.64.0.0/24 -j ACCEPT
|
||||
'';
|
||||
};
|
||||
|
||||
environment.variables = {};
|
||||
environment.variables = { };
|
||||
|
||||
# systemd.services."serial-getty@ttyS0".enable = true;
|
||||
|
||||
# environment.etc."beegfs/connauthfile" = {
|
||||
# source = ./connauthfile;
|
||||
# mode = "0400";
|
||||
@@ -133,60 +139,52 @@ let
|
||||
};
|
||||
|
||||
system.activationScripts = {
|
||||
kraken-permissions.text = ''
|
||||
chmod 755 /work/kraken
|
||||
'';
|
||||
kraken-permissions.text = ''
|
||||
chmod 755 /work/kraken
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
slurm = {
|
||||
features.hpc.slurm = {
|
||||
enable = true;
|
||||
client = true;
|
||||
mungeKey = ./munge.key;
|
||||
mungeUid = mkDefault 996; # hack
|
||||
# pkey = "0x7666";
|
||||
controlMachine = "frontend";
|
||||
mailDomain = "oceanbox.io";
|
||||
nodeName = [
|
||||
"c0-[1-18] Sockets=2 CoresPerSocket=64 ThreadsPerCore=1 RealMemory=256000 TmpDisk=500000 State=UNKNOWN"
|
||||
"c1-[1-8] Sockets=1 CoresPerSocket=64 ThreadsPerCore=1 RealMemory=256000 TmpDisk=100000 State=UNKNOWN"
|
||||
"ekman Sockets=2 CoresPerSocket=64 ThreadsPerCore=2 RealMemory=256000 TmpDisk=500000 State=UNKNOWN"
|
||||
"frontend Sockets=2 CoresPerSocket=16 ThreadsPerCore=2 RealMemory=92000 TmpDisk=200000 State=UNKNOWN"
|
||||
];
|
||||
partitionName = [
|
||||
"batch Nodes=c0-[1-18] Default=YES MaxTime=INFINITE State=UP"
|
||||
"ekman Nodes=ekman MaxTime=1:00:00 State=UP"
|
||||
"short Nodes=c1-[1-8] MaxTime=INFINITE State=UP"
|
||||
"long Nodes=c1-[3-8] MaxTime=INFINITE State=UP"
|
||||
"stats Nodes=c1-[7-8] MaxTime=INFINITE State=UP"
|
||||
];
|
||||
enable = true;
|
||||
client = true;
|
||||
clusterName = "ekman";
|
||||
slurmctldHosts = [
|
||||
"ekman-manage(10.255.241.99)"
|
||||
];
|
||||
dbdHost = "slurm-accounting";
|
||||
mungeKey = ../munge.key;
|
||||
jwtKey = ../jwt_hs256.key;
|
||||
# slurmKey = ../slurm.key;
|
||||
# pkey = "0x7666";
|
||||
mailDomain = "oceanbox.io";
|
||||
nodeName = [
|
||||
"c0-[1-18] Sockets=2 CoresPerSocket=64 ThreadsPerCore=1 RealMemory=256000 TmpDisk=500000 State=UNKNOWN"
|
||||
"c1-[1-8] Sockets=1 CoresPerSocket=64 ThreadsPerCore=1 RealMemory=256000 TmpDisk=100000 State=UNKNOWN"
|
||||
"ekman Sockets=2 CoresPerSocket=64 ThreadsPerCore=1 RealMemory=256000 TmpDisk=500000 State=UNKNOWN"
|
||||
"ekman-manage Sockets=2 CoresPerSocket=16 ThreadsPerCore=2 RealMemory=92000 TmpDisk=200000 State=UNKNOWN"
|
||||
"fs-backup Sockets=2 CoresPerSocket=20 ThreadsPerCore=1 RealMemory=92000 TmpDisk=300000 State=UNKNOWN"
|
||||
];
|
||||
partitionName = [
|
||||
"batch Nodes=c0-[1-17] Default=YES MaxTime=INFINITE State=UP"
|
||||
"ekman Nodes=ekman MaxTime=1:00:00 State=UP"
|
||||
"short Nodes=c1-[1-8],c0-18 MaxTime=INFINITE State=UP"
|
||||
"long Nodes=c1-[2-8],c0-18 MaxTime=INFINITE State=UP"
|
||||
"stats Nodes=c1-[7-8] MaxTime=INFINITE State=UP"
|
||||
"test Nodes=fs-backup MaxTime=INFINITE State=UP"
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
compute = {
|
||||
system.activationScripts = {
|
||||
mkWorkDir.text = "mkdir -p /work";
|
||||
mkWorkDir.text = "mkdir -p /work";
|
||||
};
|
||||
|
||||
cluster.slurm = true;
|
||||
|
||||
features = {
|
||||
hpc = {
|
||||
enable = true;
|
||||
beegfs = {
|
||||
enable = false;
|
||||
# beegfs = {
|
||||
# work = {
|
||||
# mgmtdHost = "ibbeegfs0";
|
||||
# connAuthFile = "/etc/beegfs/connauthfile";
|
||||
# client = {
|
||||
# enable = false;
|
||||
# mountPoint = "/work";
|
||||
# };
|
||||
# };
|
||||
# };
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
@@ -200,32 +198,31 @@ let
|
||||
initca = ./ca;
|
||||
cidr = "10.100.0.0/16";
|
||||
master = {
|
||||
name = "frontend";
|
||||
name = "ekman-manage";
|
||||
address = "10.255.241.99";
|
||||
# extraSANs = [
|
||||
# "frontend.oceanbox.io"
|
||||
# ];
|
||||
};
|
||||
ingressNodes = [
|
||||
"ekman-manage.oceanbox.io"
|
||||
"ekman.oceanbox.io"
|
||||
];
|
||||
fileserver = "fs-work";
|
||||
charts = {
|
||||
acme_email = "acme@oceanbox.io";
|
||||
# grafana_smtp_user = "utvikling";
|
||||
# grafana_smtp_password = "S0m3rp0m@de#21!";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
system.activationScripts = {
|
||||
copyCaKey.text = "cp ${./ca}/ca-key.pem /var/lib/kubernetes/secrets";
|
||||
copyCaKey.text = "cp ${./ca}/ca-key.pem /var/lib/kubernetes/secrets";
|
||||
};
|
||||
|
||||
# services.kubernetes.kubelet.extraSANs = mkSANs {
|
||||
# name = cfg.name;
|
||||
# address = cfg.address;
|
||||
# };
|
||||
services.kubernetes.kubelet.extraSANs = mkSANs {
|
||||
name = cfg.name;
|
||||
address = cfg.address;
|
||||
};
|
||||
};
|
||||
|
||||
shosts = {
|
||||
@@ -236,63 +233,96 @@ let
|
||||
text = ''
|
||||
10.255.241.80
|
||||
10.255.241.90
|
||||
'' + builtins.foldl' (a: x: a + "${x.address}\n") "" computeNodes;
|
||||
''
|
||||
+ builtins.foldl' (a: x: a + "${x.address}\n") "" computeNodes;
|
||||
};
|
||||
|
||||
programs.ssh.knownHosts = {
|
||||
frontend = {
|
||||
ekman-manage = {
|
||||
hostNames = [
|
||||
"frontend" "frontend.compute.local" "frontend.oceanbox.io" "10.255.241.99" "10.255.243.99"
|
||||
"ekman-manage"
|
||||
"ekman-manage.ekman.tos.obx"
|
||||
"frontend.oceanbox.io"
|
||||
"10.255.241.99"
|
||||
"10.255.243.99"
|
||||
];
|
||||
publicKeyFile = ../frontend.pub;
|
||||
publicKeyFile = ./manage/ssh_host_key.pub;
|
||||
};
|
||||
ekman = {
|
||||
hostNames = [
|
||||
"ekman" "ekman.compute.local" "ekman.oceanbox.io" "10.255.241.100" "10.255.243.100"
|
||||
"ekman"
|
||||
"ekman.ekman.tos.obx"
|
||||
"ekman.oceanbox.io"
|
||||
"10.255.241.100"
|
||||
"10.255.243.100"
|
||||
];
|
||||
publicKeyFile = ./ekman/ekman.pub;
|
||||
publicKeyFile = ./login/ssh_host_key.pub;
|
||||
};
|
||||
fs-work = {
|
||||
hostNames = [
|
||||
"fs-work" "fs-work.compute.local" "10.255.241.90" "10.255.243.90"
|
||||
"fs-work"
|
||||
"fs-work.ekman.tos.obx"
|
||||
"10.255.241.90"
|
||||
"10.255.243.90"
|
||||
];
|
||||
publicKeyFile = ./fs-work/fs-work.pub;
|
||||
publicKeyFile = ./fs-work/ssh_host_key.pub;
|
||||
};
|
||||
fs-backup = {
|
||||
hostNames = [
|
||||
"fs-backup" "fs-backup.compute.local" "10.255.241.80" "10.255.243.80"
|
||||
"fs-backup"
|
||||
"fs-backup.ekman.tos.obx"
|
||||
"10.255.241.80"
|
||||
"10.255.243.80"
|
||||
];
|
||||
publicKeyFile = ./fs-backup/fs-backup.pub;
|
||||
publicKeyFile = ./fs-backup/ssh_host_key.pub;
|
||||
};
|
||||
} // builtins.foldl' (a: x:
|
||||
let n = toString x.idx;
|
||||
in a // {
|
||||
"${x.name}" = {
|
||||
hostNames = [
|
||||
"${x.name}"
|
||||
"${x.name}.compute.local"
|
||||
"10.255.241.${n}"
|
||||
"10.255.243.${n}"
|
||||
];
|
||||
publicKeyFile = x.pubkey;
|
||||
};
|
||||
}) {} computeNodes;
|
||||
}
|
||||
// builtins.foldl' (
|
||||
a: x:
|
||||
let
|
||||
n = toString x.idx;
|
||||
in
|
||||
a
|
||||
// {
|
||||
"${x.name}" = {
|
||||
hostNames = [
|
||||
"${x.name}"
|
||||
"${x.name}.ekman.tos.obx"
|
||||
"10.255.241.${n}"
|
||||
"10.255.243.${n}"
|
||||
];
|
||||
publicKeyFile = x.pubkey;
|
||||
};
|
||||
}
|
||||
) { } computeNodes;
|
||||
|
||||
environment.systemPackages = [
|
||||
openssh-shosts
|
||||
pkgs.inotify-tools
|
||||
pkgs.ceph
|
||||
pkgs.ceph-client
|
||||
openssh-shosts
|
||||
pkgs.inotify-tools
|
||||
pkgs.ceph
|
||||
pkgs.ceph-client
|
||||
];
|
||||
|
||||
security.wrappers = {
|
||||
ssh-keysign = {
|
||||
source = "${openssh-shosts}/libexec/ssh-keysign";
|
||||
owner = "root";
|
||||
group = "root";
|
||||
permissions = "u+rs,g+rx,o+rx";
|
||||
};
|
||||
ssh-keysign = {
|
||||
source = "${openssh-shosts}/libexec/ssh-keysign";
|
||||
owner = "root";
|
||||
group = "root";
|
||||
permissions = "u+rs,g+rx,o+rx";
|
||||
};
|
||||
};
|
||||
|
||||
# Use nvd to get package diff before apply
|
||||
system.activationScripts.system-diff = {
|
||||
supportsDryActivation = true; # safe: only outputs to stdout
|
||||
text = ''
|
||||
export PATH="${pkgs.lib.makeBinPath [ pkgs.nixVersions.latest ]}:$PATH"
|
||||
if [ -e /run/current-system ]; then
|
||||
${pkgs.lib.getExe pkgs.nvd} diff '/run/current-system' "$systemConfig" || true
|
||||
fi
|
||||
'';
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
openssh-shosts = pkgs.openssh.overrideAttrs (attrs: {
|
||||
@@ -300,7 +330,8 @@ let
|
||||
doCheck = false; # the tests take hours
|
||||
});
|
||||
|
||||
in {
|
||||
in
|
||||
{
|
||||
options.cluster = {
|
||||
compute = mkEnableOption "Enable compute node configs";
|
||||
};
|
||||
@@ -324,7 +355,6 @@ in {
|
||||
imports = [
|
||||
../modules
|
||||
../nixos
|
||||
./users.nix
|
||||
../users.nix
|
||||
];
|
||||
}
|
||||
|
||||
235
ekman/fs-backup/default.nix
Normal file
235
ekman/fs-backup/default.nix
Normal file
@@ -0,0 +1,235 @@
|
||||
{
|
||||
pkgs ? import <nixpkgs> { },
|
||||
}:
|
||||
let
|
||||
name = "fs-backup";
|
||||
address = "10.255.241.80";
|
||||
etcdCluster = import ../etcdCluster.nix;
|
||||
in
|
||||
{
|
||||
fs-backup =
|
||||
{ config, pkgs, ... }:
|
||||
with pkgs;
|
||||
{
|
||||
deployment.tags = [
|
||||
"fs"
|
||||
"fs-backup"
|
||||
];
|
||||
deployment.targetHost = address;
|
||||
system.autoUpgrade.enable = lib.mkForce false;
|
||||
|
||||
systemd.targets = {
|
||||
sleep.enable = false;
|
||||
suspend.enable = false;
|
||||
hibernate.enable = false;
|
||||
hybrid-sleep.enable = false;
|
||||
};
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
environment.systemPackages = with pkgs; [
|
||||
rdma-core
|
||||
hwloc
|
||||
xfsprogs
|
||||
];
|
||||
|
||||
cluster = {
|
||||
k8sNode = true;
|
||||
slurm = true;
|
||||
mounts = {
|
||||
rdma.enable = false;
|
||||
automount.enable = true;
|
||||
users = true;
|
||||
opt = true;
|
||||
work = true;
|
||||
data = true;
|
||||
ceph = true;
|
||||
backup = false;
|
||||
};
|
||||
};
|
||||
|
||||
features = {
|
||||
host = {
|
||||
inherit address;
|
||||
inherit name;
|
||||
};
|
||||
|
||||
os = {
|
||||
networkmanager.enable = false;
|
||||
externalInterface = "eno1";
|
||||
nfs.enable = true;
|
||||
nfs.exports = ''
|
||||
/exports 10.255.241.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
/exports 10.255.244.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
'';
|
||||
};
|
||||
|
||||
k8s = {
|
||||
enable = true;
|
||||
node.enable = true;
|
||||
master.enable = false;
|
||||
inherit etcdCluster;
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.rc-local = {
|
||||
description = "rc.local script";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
path = [ "/run/current-system/sw/" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
};
|
||||
script = ''
|
||||
# if [ -e /sys/block/md126 ]; then
|
||||
# echo "deadline" > /sys/block/md126/queue/scheduler
|
||||
# # echo "4096" > /sys/block/md126/queue/nr_requests
|
||||
# echo "4096" > /sys/block/md126/queue/read_ahead_kb
|
||||
# echo "always" > /sys/kernel/mm/transparent_hugepage/enabled
|
||||
# echo "always" > /sys/kernel/mm/transparent_hugepage/defrag
|
||||
# fi
|
||||
grep -q rdma /proc/fs/nfsd/portlist || echo "rdma 20049" > /proc/fs/nfsd/portlist
|
||||
grep -q tcp /proc/fs/nfsd/portlist || echo "tcp 2049" > /proc/fs/nfsd/portlist
|
||||
'';
|
||||
};
|
||||
|
||||
boot.kernel.sysctl = {
|
||||
"vm.dirty_background_ratio" = 5;
|
||||
"vm.dirty_ratio" = 10;
|
||||
"vm.vfs_cache_pressure" = 50;
|
||||
"vm.min_free_kbytes" = 262144;
|
||||
};
|
||||
|
||||
networking = {
|
||||
useNetworkd = true;
|
||||
useDHCP = false;
|
||||
hostName = name;
|
||||
firewall = {
|
||||
allowedTCPPorts = [ ];
|
||||
allowedUDPPorts = [ ];
|
||||
extraCommands = ''
|
||||
# iptables -t nat -A POSTROUTING -s 10.255.243.0/24 -j MASQUERADE
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
systemd.network = {
|
||||
networks."40-eno1" = {
|
||||
DHCP = "no";
|
||||
matchConfig.Name = "eno1";
|
||||
address = [ "${address}/24" ];
|
||||
networkConfig = {
|
||||
DNSDefaultRoute = true;
|
||||
};
|
||||
routes = [
|
||||
{ Gateway = "10.255.241.1"; }
|
||||
{
|
||||
Destination = "10.255.242.0/24";
|
||||
Gateway = "10.255.241.100";
|
||||
}
|
||||
{
|
||||
Destination = "172.16.239.0/24";
|
||||
Gateway = "10.255.241.210";
|
||||
}
|
||||
];
|
||||
};
|
||||
networks."40-enp59s0np0" = {
|
||||
DHCP = "no";
|
||||
matchConfig.Name = "enp59s0np0";
|
||||
address = [ "10.255.244.80/24" ];
|
||||
};
|
||||
};
|
||||
|
||||
services.rpcbind.enable = true;
|
||||
|
||||
fileSystems = {
|
||||
"/exports/backup" = {
|
||||
device = "/backup";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/exports/ekman" = {
|
||||
device = "/backup/ekman-nfs";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
};
|
||||
|
||||
programs.singularity.enable = true;
|
||||
|
||||
boot.swraid = {
|
||||
enable = true;
|
||||
mdadmConf = ''
|
||||
DEVICE partitions
|
||||
ARRAY /dev/md/0 metadata=1.2 UUID=b743fdd4:5b339cc7:7c43f50f:3b81243e name=fs2:0
|
||||
'';
|
||||
};
|
||||
|
||||
systemd.services.restart-md0 = {
|
||||
description = "restart /dev/md0";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [
|
||||
"sys-devices-virtual-block-md0.device"
|
||||
"-.mount"
|
||||
];
|
||||
before = [ "backup.mount" ];
|
||||
path = [ "/run/current-system/sw/" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
};
|
||||
script = ''
|
||||
restart=0
|
||||
${util-linux}/bin/lsblk -o MAJ:MIN -n /dev/md0 | grep -q "254:" || restart=1
|
||||
if [ $restart = 1 ]; then
|
||||
${mdadm}/bin/mdadm --stop /dev/md0
|
||||
${mdadm}/bin/mdadm --assemble /dev/md0
|
||||
sleep 1
|
||||
fi
|
||||
'';
|
||||
};
|
||||
|
||||
#services.tailscale = {
|
||||
# enable = true;
|
||||
# authKeyFile = "/var/lib/secrets/tailscale.key";
|
||||
# useRoutingFeatures = "both";
|
||||
# extraUpFlags = [
|
||||
# "--login-server=https://headscale.svc.oceanbox.io"
|
||||
# "--accept-dns=true"
|
||||
# "--accept-routes=true"
|
||||
# "--snat-subnet-routes=true"
|
||||
# "--advertise-routes=10.255.241.0/24"
|
||||
# ];
|
||||
#};
|
||||
|
||||
#services.networkd-dispatcher = {
|
||||
# enable = true;
|
||||
# rules = {
|
||||
# "tailscale-router" = {
|
||||
# onState = [ "routable" ];
|
||||
# script = ''
|
||||
# #!${pkgs.runtimeShell}
|
||||
# ${pkgs.ethtool}/bin/ethtool -K eno1 rx-udp-gro-forwarding on
|
||||
# ${pkgs.ethtool}/bin/ethtool -K eno1 rx-gro-list off
|
||||
# ${pkgs.ethtool}/bin/ethtool -K eno1 tx-udp-segmentation on
|
||||
# exit 0
|
||||
# '';
|
||||
# };
|
||||
# };
|
||||
#};
|
||||
|
||||
boot.kernelParams = [
|
||||
"console=tty0"
|
||||
"console=ttyS0,115200"
|
||||
];
|
||||
systemd.services."serial-getty@ttyS0" = {
|
||||
enable = true;
|
||||
wantedBy = [ "getty.target" ];
|
||||
serviceConfig.Restart = "always";
|
||||
};
|
||||
|
||||
imports = [
|
||||
./hardware-configuration.nix
|
||||
../default.nix
|
||||
../mounts.nix
|
||||
];
|
||||
};
|
||||
}
|
||||
@@ -4,25 +4,32 @@
|
||||
{ config, lib, pkgs, modulesPath, ... }:
|
||||
|
||||
{
|
||||
imports =
|
||||
[ (modulesPath + "/installer/scan/not-detected.nix")
|
||||
];
|
||||
imports = [ (modulesPath + "/installer/scan/not-detected.nix") ];
|
||||
|
||||
boot.initrd.availableKernelModules = [ "ahci" "xhci_pci" "megaraid_sas" "mpt3sas" "usbhid" "usb_storage" "sd_mod" "sr_mod" ];
|
||||
boot.initrd.availableKernelModules = [
|
||||
"ahci"
|
||||
"xhci_pci"
|
||||
"megaraid_sas"
|
||||
"mpt3sas"
|
||||
"usbhid"
|
||||
"usb_storage"
|
||||
"sd_mod"
|
||||
"sr_mod"
|
||||
];
|
||||
boot.initrd.kernelModules = [ "dm-snapshot" ];
|
||||
boot.kernelModules = [ ];
|
||||
boot.extraModulePackages = [ ];
|
||||
|
||||
fileSystems."/" =
|
||||
{ device = "/dev/disk/by-uuid/19b7e607-b138-442a-9026-3ae1092046c9";
|
||||
fsType = "ext4";
|
||||
};
|
||||
fileSystems."/" = {
|
||||
device = "/dev/disk/by-uuid/19b7e607-b138-442a-9026-3ae1092046c9";
|
||||
fsType = "ext4";
|
||||
};
|
||||
|
||||
fileSystems."/backup" =
|
||||
{ device = "/dev/vg1/data";
|
||||
fsType = "xfs";
|
||||
options = [ "ro" "noauto" ];
|
||||
};
|
||||
# fileSystems."/backup" =
|
||||
# { device = "/dev/vg1/data";
|
||||
# fsType = "xfs";
|
||||
# options = [ "ro" "noauto" ];
|
||||
# };
|
||||
|
||||
swapDevices = [ ];
|
||||
|
||||
@@ -37,5 +44,6 @@
|
||||
# networking.interfaces.eno4.useDHCP = lib.mkDefault true;
|
||||
|
||||
nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux";
|
||||
hardware.cpu.intel.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware;
|
||||
hardware.cpu.intel.updateMicrocode =
|
||||
lib.mkDefault config.hardware.enableRedistributableFirmware;
|
||||
}
|
||||
186
ekman/fs-work/default.nix
Normal file
186
ekman/fs-work/default.nix
Normal file
@@ -0,0 +1,186 @@
|
||||
{
|
||||
pkgs ? import <nixpkgs> { },
|
||||
}:
|
||||
let
|
||||
name = "fs-work";
|
||||
address = "10.255.241.90";
|
||||
etcdCluster = import ../etcdCluster.nix;
|
||||
in
|
||||
{
|
||||
fs-work =
|
||||
{ config, pkgs, ... }:
|
||||
with pkgs;
|
||||
{
|
||||
deployment.tags = [
|
||||
"fs"
|
||||
"fs-backup"
|
||||
];
|
||||
deployment.targetHost = address;
|
||||
system.autoUpgrade.enable = lib.mkForce false;
|
||||
|
||||
systemd.targets = {
|
||||
sleep.enable = false;
|
||||
suspend.enable = false;
|
||||
hibernate.enable = false;
|
||||
hybrid-sleep.enable = false;
|
||||
};
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
environment.systemPackages = with pkgs; [
|
||||
rdma-core
|
||||
hwloc
|
||||
xfsprogs
|
||||
];
|
||||
|
||||
cluster = {
|
||||
k8sNode = true;
|
||||
slurm = false;
|
||||
mounts = {
|
||||
rdma.enable = true;
|
||||
automount.enable = true;
|
||||
users = true;
|
||||
opt = true;
|
||||
work = false;
|
||||
data = true;
|
||||
ceph = true;
|
||||
backup = false;
|
||||
};
|
||||
};
|
||||
|
||||
features = {
|
||||
host = {
|
||||
inherit address;
|
||||
inherit name;
|
||||
};
|
||||
|
||||
os = {
|
||||
networkmanager.enable = false;
|
||||
externalInterface = "enp33s0f3np3";
|
||||
nfs.enable = true;
|
||||
nfs.exports = ''
|
||||
/exports 10.255.241.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
/exports 10.255.243.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
/exports 10.255.244.0/24(insecure,rw,async,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
'';
|
||||
};
|
||||
|
||||
k8s = {
|
||||
enable = true;
|
||||
node.enable = true;
|
||||
master.enable = false;
|
||||
inherit etcdCluster;
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.rc-local = {
|
||||
description = "rc.local script";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
path = [ "/run/current-system/sw/" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
};
|
||||
script = ''
|
||||
# if [ -e /sys/block/md126 ]; then
|
||||
# echo "deadline" > /sys/block/md126/queue/scheduler
|
||||
# # echo "4096" > /sys/block/md126/queue/nr_requests
|
||||
# echo "4096" > /sys/block/md126/queue/read_ahead_kb
|
||||
# echo "always" > /sys/kernel/mm/transparent_hugepage/enabled
|
||||
# echo "always" > /sys/kernel/mm/transparent_hugepage/defrag
|
||||
# fi
|
||||
grep -q rdma /proc/fs/nfsd/portlist || echo "rdma 20049" > /proc/fs/nfsd/portlist
|
||||
grep -q tcp /proc/fs/nfsd/portlist || echo "tcp 2049" > /proc/fs/nfsd/portlist
|
||||
'';
|
||||
};
|
||||
|
||||
boot.kernel.sysctl = {
|
||||
"vm.dirty_background_ratio" = 5;
|
||||
"vm.dirty_ratio" = 10;
|
||||
"vm.vfs_cache_pressure" = 50;
|
||||
"vm.min_free_kbytes" = 262144;
|
||||
};
|
||||
|
||||
networking = {
|
||||
useNetworkd = true;
|
||||
useDHCP = false;
|
||||
hostName = name;
|
||||
firewall = {
|
||||
allowedTCPPorts = [ ];
|
||||
allowedUDPPorts = [ ];
|
||||
extraCommands = ''
|
||||
# iptables -t nat -A POSTROUTING -s 10.255.243.0/24 -j MASQUERADE
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
systemd.network = {
|
||||
networks."40-enp65s0f0np0" = {
|
||||
DHCP = "no";
|
||||
matchConfig.Name = "enp65s0f0np0";
|
||||
address = [ "${address}/24" ];
|
||||
networkConfig = {
|
||||
DNSDefaultRoute = true;
|
||||
};
|
||||
routes = [
|
||||
{ Gateway = "10.255.241.1"; }
|
||||
{
|
||||
Destination = "10.255.242.0/24";
|
||||
Gateway = "10.255.241.100";
|
||||
}
|
||||
{
|
||||
Destination = "172.16.239.0/24";
|
||||
Gateway = "10.255.241.210";
|
||||
}
|
||||
];
|
||||
};
|
||||
networks."40-enp1s0f1np1" = {
|
||||
DHCP = "no";
|
||||
matchConfig.Name = "enp1s0f1np1";
|
||||
address = [ "10.255.244.90/24" ];
|
||||
};
|
||||
networks."42-ibp1s0f0" = {
|
||||
DHCP = "no";
|
||||
matchConfig.Name = "ibp1s0f0 ";
|
||||
address = [ "10.255.243.90/24" ];
|
||||
};
|
||||
};
|
||||
|
||||
services.rpcbind.enable = true;
|
||||
|
||||
fileSystems = {
|
||||
"/exports/work" = {
|
||||
device = "/work";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/exports/opt" = {
|
||||
device = "/opt";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
};
|
||||
|
||||
programs.singularity.enable = true;
|
||||
|
||||
security.sudo.extraConfig = ''
|
||||
%sif ALL=(ALL) NOPASSWD: /run/current-system/sw/bin/singularity
|
||||
%admin ALL=(admin) NOPASSWD: ALL
|
||||
'';
|
||||
|
||||
boot.kernelParams = [
|
||||
"console=tty0"
|
||||
"console=ttyS0,115200"
|
||||
];
|
||||
systemd.services."serial-getty@ttyS0" = {
|
||||
enable = true;
|
||||
wantedBy = [ "getty.target" ];
|
||||
serviceConfig.Restart = "always";
|
||||
};
|
||||
|
||||
imports = [
|
||||
./hardware-configuration.nix
|
||||
../default.nix
|
||||
../mounts.nix
|
||||
];
|
||||
};
|
||||
}
|
||||
24
ekman/hive.nix
Normal file
24
ekman/hive.nix
Normal file
@@ -0,0 +1,24 @@
|
||||
let
|
||||
# Pin the deployment package-set to a specific version of nixpkgs
|
||||
# pkgs = import (builtins.fetchTarball {
|
||||
# url = "https://github.com/NixOS/nixpkgs/archive/e6377ff35544226392b49fa2cf05590f9f0c4b43.tar.gz";
|
||||
# sha256 = "1fra9wwy5gvj5ibayqkzqpwdf715bggc0qbmrfch4fghwvl5m70l";
|
||||
# }) {};
|
||||
pkgs = import <nixpkgs> {};
|
||||
|
||||
ekman-manage = {
|
||||
deployment = {
|
||||
tags = [ "manage" "ekman" ];
|
||||
allowLocalDeployment = true;
|
||||
targetHost = null;
|
||||
};
|
||||
imports = [ ./manage ];
|
||||
};
|
||||
|
||||
login = import ./login { inherit pkgs; };
|
||||
c0 = import ./c0 { inherit pkgs; };
|
||||
c1 = import ./c1 { inherit pkgs; };
|
||||
fs-work = import ./fs-work { inherit pkgs; };
|
||||
fs-backup = import ./fs-backup { inherit pkgs; };
|
||||
in
|
||||
{ inherit ekman-manage; frontend = ekman-manage; } // login // c0 // c1 // fs-work // fs-backup
|
||||
32
ekman/hosts.nix
Normal file
32
ekman/hosts.nix
Normal file
@@ -0,0 +1,32 @@
|
||||
''
|
||||
10.255.241.99 ekman-manage
|
||||
10.255.241.100 ekman-login
|
||||
10.255.241.100 ekman
|
||||
10.255.241.101 c0-1
|
||||
10.255.241.102 c0-2
|
||||
10.255.241.103 c0-3
|
||||
10.255.241.104 c0-4
|
||||
10.255.241.105 c0-5
|
||||
10.255.241.106 c0-6
|
||||
10.255.241.107 c0-7
|
||||
10.255.241.108 c0-8
|
||||
10.255.241.109 c0-9
|
||||
10.255.241.110 c0-10
|
||||
10.255.241.111 c0-11
|
||||
10.255.241.112 c0-12
|
||||
10.255.241.113 c0-13
|
||||
10.255.241.114 c0-14
|
||||
10.255.241.115 c0-15
|
||||
10.255.241.116 c0-16
|
||||
10.255.241.117 c0-17
|
||||
10.255.241.118 c0-18
|
||||
|
||||
10.255.241.121 c1-1
|
||||
10.255.241.122 c1-2
|
||||
10.255.241.123 c1-3
|
||||
10.255.241.124 c1-4
|
||||
10.255.241.125 c1-5
|
||||
10.255.241.126 c1-6
|
||||
10.255.241.127 c1-7
|
||||
10.255.241.128 c1-8
|
||||
''
|
||||
337
ekman/login/default.nix
Normal file
337
ekman/login/default.nix
Normal file
@@ -0,0 +1,337 @@
|
||||
{
|
||||
pkgs ? import <nixpkgs> { },
|
||||
}:
|
||||
let
|
||||
name = "ekman";
|
||||
address = "10.255.241.100";
|
||||
in
|
||||
{
|
||||
ekman-login =
|
||||
{ config, pkgs, ... }:
|
||||
with pkgs;
|
||||
{
|
||||
deployment.tags = [
|
||||
"login"
|
||||
"cluster"
|
||||
];
|
||||
deployment.targetHost = address;
|
||||
system.autoUpgrade.enable = lib.mkForce false;
|
||||
|
||||
systemd.targets = {
|
||||
sleep.enable = false;
|
||||
suspend.enable = false;
|
||||
hibernate.enable = false;
|
||||
hybrid-sleep.enable = false;
|
||||
};
|
||||
|
||||
cluster = {
|
||||
compute = true;
|
||||
k8sNode = true;
|
||||
mounts = {
|
||||
rdma.enable = true;
|
||||
automount.enable = true;
|
||||
users = false;
|
||||
opt = false;
|
||||
work = true;
|
||||
data = true;
|
||||
ceph = true;
|
||||
backup = false;
|
||||
};
|
||||
};
|
||||
|
||||
features = {
|
||||
host = {
|
||||
inherit name;
|
||||
inherit address;
|
||||
};
|
||||
|
||||
myvnc.enable = false;
|
||||
|
||||
os = {
|
||||
networkmanager.enable = false;
|
||||
externalInterface = "enp33s0f0np0";
|
||||
nfs.enable = true;
|
||||
nfs.exports = ''
|
||||
/exports 10.255.241.0/24(insecure,rw,sync,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
/exports 10.255.243.0/24(insecure,rw,sync,no_subtree_check,crossmnt,fsid=0,no_root_squash)
|
||||
'';
|
||||
};
|
||||
|
||||
hpc = {
|
||||
slurm.server = false;
|
||||
slurm.slurmrestd = false;
|
||||
manageNode = false;
|
||||
loginNode = true;
|
||||
knem = false;
|
||||
};
|
||||
|
||||
k8s = {
|
||||
master.enable = false;
|
||||
node.enable = true;
|
||||
};
|
||||
|
||||
desktop.enable = false;
|
||||
# server.enable = true;
|
||||
monitoring = {
|
||||
# server = {
|
||||
# enable = false;
|
||||
# scrapeHosts = [ "frontend" "nfs0" "nfs1" ] ++ (builtins.map (x: x.name) computeNodes);
|
||||
# defaultAlertReceiver = {
|
||||
# email_configs = [
|
||||
# { to = "jonas.juselius@oceanbox.io"; }
|
||||
# ];
|
||||
# };
|
||||
# pageAlertReceiver = {
|
||||
# webhook_configs = [
|
||||
# {
|
||||
# url = "https://prometheus-msteams.k2.itpartner.no/ekman";
|
||||
# http_config = {
|
||||
# tls_config = { insecure_skip_verify = true; };
|
||||
# };
|
||||
# }
|
||||
# ];
|
||||
# };
|
||||
# };
|
||||
# webUI.enable = false;
|
||||
# webUI.acmeEmail = "innovasjon@itpartner.no";
|
||||
# webUI.allow = [
|
||||
# "10.1.2.0/24"
|
||||
# "172.19.254.0/24"
|
||||
# "172.19.255.0/24"
|
||||
# ];
|
||||
};
|
||||
};
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x7666"
|
||||
# '';
|
||||
|
||||
# boot.kernelPackages = pkgs.linuxKernel.packages.linux_6_6;
|
||||
services.flannel.iface = "enp33s0f3np3";
|
||||
|
||||
networking = {
|
||||
useNetworkd = true;
|
||||
useDHCP = false;
|
||||
hostName = name;
|
||||
firewall = {
|
||||
allowedTCPPorts = [ 6443 ];
|
||||
extraCommands = ''
|
||||
# needed for nodeport access on k1 and k2
|
||||
# iptables -t nat -A POSTROUTING -s 10.255.241.0/24 ! -d 10.255.0.0/16 -j SNAT --to-source 10.255.242.2
|
||||
iptables -t nat -A POSTROUTING -s 10.255.243.0/24 -j MASQUERADE
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
systemd.network = {
|
||||
networks = {
|
||||
"40-enp33s0f0np0" = {
|
||||
DHCP = "no";
|
||||
matchConfig.Name = "enp33s0f0np0";
|
||||
address = [ "10.255.242.2/24" ];
|
||||
routes = [
|
||||
{ Gateway = "10.255.242.1"; }
|
||||
];
|
||||
};
|
||||
"40-enp33s0f3np3" = {
|
||||
DHCP = "no";
|
||||
matchConfig.Name = "enp33s0f3np3";
|
||||
address = [ "${address}/24" ];
|
||||
networkConfig = {
|
||||
DNSDefaultRoute = true;
|
||||
};
|
||||
routes = [
|
||||
{
|
||||
Destination = "172.16.239.0/24";
|
||||
Gateway = "10.255.241.210";
|
||||
}
|
||||
];
|
||||
};
|
||||
"41-enp65s0f1np1" = {
|
||||
DHCP = "no";
|
||||
matchConfig.Name = "enp65s0f1np1";
|
||||
address = [ "10.255.244.100/24" ];
|
||||
};
|
||||
"45-ibp65s0f0" = {
|
||||
DHCP = "no";
|
||||
matchConfig.Name = "ibp65s0f0";
|
||||
address = [ "10.255.243.100/24" ];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
services.resolved = {
|
||||
# DNS=[::1]:53
|
||||
extraConfig = ''
|
||||
DNSStubListener=no
|
||||
'';
|
||||
};
|
||||
|
||||
fileSystems = {
|
||||
"/exports/users" = {
|
||||
device = "/home";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/exports/opt/bin" = {
|
||||
device = "/opt/bin";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/exports/opt/sif" = {
|
||||
device = "/opt/sif";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/exports/nfs-provisioner" = {
|
||||
device = "/vol/nfs-provisioner";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/users" = {
|
||||
device = "/home";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/vol/local-storage/vol1" = {
|
||||
device = "/vol/vol1";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
"/vol/local-storage/vol2" = {
|
||||
device = "/vol/vol2";
|
||||
options = [ "bind" ];
|
||||
};
|
||||
};
|
||||
|
||||
nix.extraOptions = ''
|
||||
secret-key-files = /etc/nix/ekman.key
|
||||
'';
|
||||
|
||||
# services.xserver = {
|
||||
# enable = false;
|
||||
# enableCtrlAltBackspace = true;
|
||||
# layout = "us";
|
||||
# xkbVariant = "altgr-intl";
|
||||
# xkbOptions = "eurosign:e";
|
||||
# displayManager = {
|
||||
# gdm.enable = false;
|
||||
# job.logToFile = true;
|
||||
# };
|
||||
# # desktopManager.xfce.enable = true;
|
||||
# };
|
||||
|
||||
services.prometheus.alertmanager.configuration.global = {
|
||||
smtp_smarthost = "smtpgw.itpartner.no";
|
||||
# smtp_auth_username = "utvikling";
|
||||
# smtp_auth_password = "S0m3rp0m@de#21!";
|
||||
smtp_hello = "ekman.oceanbox.io";
|
||||
smtp_from = "noreply@ekman.oceanbox.io";
|
||||
};
|
||||
|
||||
# services.nginx = {
|
||||
# virtualHosts = {
|
||||
# "ds.matnoc.regnekraft.io" = {
|
||||
# forceSSL = true;
|
||||
# enableACME = true;
|
||||
# serverAliases = [];
|
||||
# locations."/" = {
|
||||
# proxyPass = "http://localhost:9088";
|
||||
# proxyWebsockets = false;
|
||||
# extraConfig = ''
|
||||
# allow 10.1.2.0/24;
|
||||
# allow 172.19.254.0/24;
|
||||
# allow 172.19.255.0/24;
|
||||
# deny all;
|
||||
# '';
|
||||
# };
|
||||
# };
|
||||
# };
|
||||
# };
|
||||
|
||||
# services.gitlab-runner = {
|
||||
# enable = true;
|
||||
# extraPackages = with pkgs; [
|
||||
# singularity
|
||||
# ];
|
||||
# concurrent = 4;
|
||||
# services = {
|
||||
# sif = {
|
||||
# registrationConfigFile = "/var/lib/secrets/gitlab-runner-registration";
|
||||
# executor = "shell";
|
||||
# tagList = [ "ekman" "sif" ];
|
||||
# };
|
||||
# };
|
||||
# };
|
||||
|
||||
security.sudo.extraConfig = ''
|
||||
%sif ALL=(ALL) NOPASSWD: /run/current-system/sw/bin/singularity
|
||||
%admin ALL=(admin) NOPASSWD: ALL
|
||||
# gitlab-runner ALL=(ALL) NOPASSWD: /run/current-system/sw/bin/singularity
|
||||
'';
|
||||
|
||||
security.pam = {
|
||||
services.sshd.googleAuthenticator.enable = true;
|
||||
loginLimits = [
|
||||
{
|
||||
domain = "@users";
|
||||
item = "rss";
|
||||
type = "hard";
|
||||
value = 16000000;
|
||||
}
|
||||
{
|
||||
domain = "@users";
|
||||
item = "cpu";
|
||||
type = "hard";
|
||||
value = 180;
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
system.activationScripts = {
|
||||
home-permissions.text = ''
|
||||
chmod 755 /home/olean
|
||||
chmod 755 /home/frankgaa
|
||||
chmod 755 /home/jonas
|
||||
chmod 755 /home/mrtz
|
||||
chmod 755 /home/avle
|
||||
chmod 755 /home/stig
|
||||
chmod 755 /home/bast
|
||||
chmod 755 /home/simenlk
|
||||
chmod 755 /work/kraken
|
||||
'';
|
||||
};
|
||||
|
||||
# ssh-rsa is deprecated, but putty/winscp users use it
|
||||
services.openssh.extraConfig = ''
|
||||
# pubkeyacceptedalgorithms ssh-rsa,ssh-ed25519-cert-v01@openssh.com,ecdsa-sha2-nistp256-cert-v01@openssh.com,ecdsa-sha2-nistp384-cert-v01@openssh.com,ecdsa-sha2-nistp521-cert-v01@openssh.com,sk-ssh-ed25519-cert-v01@openssh.com,sk-ecdsa-sha2-nistp256-cert-v01@openssh.com,rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-ed25519,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,ecdsa-sha2-nistp521,sk-ssh-ed25519@openssh.com,sk-ecdsa-sha2-nistp256@openssh.com,rsa-sha2-512,rsa-sha2-256
|
||||
PubkeyAuthOptions verify-required
|
||||
'';
|
||||
|
||||
environment.systemPackages = [ ];
|
||||
|
||||
virtualisation.docker.enable = pkgs.lib.mkForce true;
|
||||
|
||||
services.tailscale = {
|
||||
enable = true;
|
||||
authKeyFile = "/var/lib/secrets/tailscale.key";
|
||||
useRoutingFeatures = "client";
|
||||
extraUpFlags = [
|
||||
"--login-server=https://headscale.svc.oceanbox.io"
|
||||
"--accept-dns=true"
|
||||
"--advertise-tags=tag:hpc"
|
||||
];
|
||||
};
|
||||
|
||||
boot.kernelParams = [
|
||||
"console=tty0"
|
||||
"console=ttyS0,115200"
|
||||
];
|
||||
systemd.services."serial-getty@ttyS0" = {
|
||||
enable = true;
|
||||
wantedBy = [ "getty.target" ];
|
||||
serviceConfig.Restart = "always";
|
||||
};
|
||||
|
||||
imports = [
|
||||
./hardware-configuration.nix
|
||||
../default.nix
|
||||
../mounts.nix
|
||||
../myvnc.nix
|
||||
];
|
||||
};
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user