fix: add k8s and hpc modules to main repo
This commit is contained in:
24
modules/default.nix
Normal file
24
modules/default.nix
Normal file
@@ -0,0 +1,24 @@
|
||||
{ lib, ...}:
|
||||
with lib;
|
||||
{
|
||||
options.features.host = {
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
|
||||
address = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
imports = [
|
||||
./k8s
|
||||
./hpc
|
||||
./fs
|
||||
./pki/certs.nix
|
||||
../nixos
|
||||
];
|
||||
}
|
||||
84
modules/fs/default.nix
Normal file
84
modules/fs/default.nix
Normal file
@@ -0,0 +1,84 @@
|
||||
{ pkgs, lib, config, ... }:
|
||||
with lib;
|
||||
let
|
||||
cfg = config.features.fs;
|
||||
|
||||
cert = cfg.cert;
|
||||
|
||||
pki = import ./pki.nix { inherit pkgs; ca = cfg.initca; };
|
||||
|
||||
common = {
|
||||
boot.kernelModules = [
|
||||
"dm_snapshot"
|
||||
"dm_mirror"
|
||||
"dm_thin_pool"
|
||||
];
|
||||
|
||||
boot.kernel.sysctl = {
|
||||
"kernel.mm.transparent_hugepage.enabled" = "never";
|
||||
"net.core.somaxconn" = "512";
|
||||
};
|
||||
|
||||
networking = {
|
||||
firewall.allowedTCPPortRanges = [ { from = 5000; to = 50000; } ];
|
||||
firewall.allowedTCPPorts = [ 111 2049 ];
|
||||
firewall.allowedUDPPorts = [ 111 2049 24007 24008 ];
|
||||
};
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
nfs-utils
|
||||
lvm2
|
||||
];
|
||||
};
|
||||
|
||||
glusterfs = {
|
||||
services.glusterfs = {
|
||||
enable = true;
|
||||
tlsSettings = {
|
||||
caCert = pki.ca.cert;
|
||||
tlsKeyPath = cert.key;
|
||||
tlsPem = cert.cert;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
nfs = {
|
||||
services.nfs.server = {
|
||||
enable = true;
|
||||
exports = cfg.nfs.exports;
|
||||
};
|
||||
};
|
||||
in {
|
||||
options.features.fs = {
|
||||
enable = mkEnableOption "Enable nfs fileserver";
|
||||
|
||||
nfs = {
|
||||
enable = mkEnableOption "Enable nfs fileserver";
|
||||
exports = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
};
|
||||
};
|
||||
|
||||
initca = mkOption {
|
||||
type = types.path;
|
||||
default = null;
|
||||
};
|
||||
|
||||
glusterfs = {
|
||||
enable = mkEnableOption "Enable glusterfs fileserver";
|
||||
cert = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable (
|
||||
mkMerge [
|
||||
common
|
||||
(mkIf cfg.nfs.enable nfs)
|
||||
(mkIf cfg.glusterfs.enable glusterfs)
|
||||
]
|
||||
);
|
||||
}
|
||||
96
modules/hpc/alert-rules.nix
Normal file
96
modules/hpc/alert-rules.nix
Normal file
@@ -0,0 +1,96 @@
|
||||
{ lib }:
|
||||
with lib;
|
||||
|
||||
let
|
||||
deviceFilter = ''fstype!="ramfs",device!="rpc_pipefs",device!="lxcfs",device!="nsfs",device!="borgfs"'';
|
||||
in mapAttrsToList (name: opts: {
|
||||
alert = name;
|
||||
expr = opts.condition;
|
||||
for = opts.time or "2m";
|
||||
labels = if (opts.page or true) then { severity = "page"; } else {};
|
||||
annotations = {
|
||||
summary = opts.summary;
|
||||
description = opts.description;
|
||||
};
|
||||
}) {
|
||||
watchdog = {
|
||||
condition = "vector(1)";
|
||||
summary = "An alert that should always be firing to certify that Alertmanager is working properly.";
|
||||
description = ''
|
||||
This is an alert meant to ensure that the entire alerting pipeline is functional.
|
||||
This alert is always firing, therefore it should always be firing in Alertmanager
|
||||
and always fire against a receiver. There are integrations with various notification
|
||||
mechanisms that send a notification when this alert is not firing. For example the
|
||||
"DeadMansSnitch" integration in PagerDuty.
|
||||
'';
|
||||
time = "12h";
|
||||
labels = { severity = "none"; };
|
||||
};
|
||||
node_down = {
|
||||
condition = ''up{job="node"} == 0'';
|
||||
summary = "{{$labels.alias}}: Node is down.";
|
||||
time = "10m";
|
||||
description = "{{$labels.alias}} has been down for more than 10 minutes.";
|
||||
};
|
||||
node_collector_failed = {
|
||||
condition = ''node_scrape_collector_success{job="node"} == 0'';
|
||||
summary = "{{$labels.alias}}: Node collector {{$labels.collector}} failed.";
|
||||
description = "{{$labels.alias}}: The collector {{$labels.collector}} of node exporter instance {{$labels.instance}} failed.";
|
||||
};
|
||||
node_systemd_service_failed = {
|
||||
condition = ''node_systemd_unit_state{state="failed"} == 1'';
|
||||
summary = "{{$labels.alias}}: Service {{$labels.name}} failed to start.";
|
||||
description = "{{$labels.alias}} failed to (re)start service {{$labels.name}}.";
|
||||
};
|
||||
node_filesystem_full_90percent = {
|
||||
condition = ''sort(node_filesystem_free_bytes{${deviceFilter}} < node_filesystem_size_bytes{${deviceFilter}} * 0.1) / 1024^3'';
|
||||
time = "10m";
|
||||
page = false;
|
||||
summary = "{{$labels.alias}}: Filesystem is running out of space soon.";
|
||||
description = "{{$labels.alias}} device {{$labels.device}} on {{$labels.mountpoint}} got less than 10% space left on its filesystem.";
|
||||
};
|
||||
node_load15 = {
|
||||
condition = ''node_load15 / on(alias) count(node_cpu_seconds_total{mode="system"}) by (alias) >= 1.0 AND node_load15{alias !~ "c[0-9]-[0-9]"}'';
|
||||
time = "10m";
|
||||
page = false;
|
||||
summary = "{{$labels.alias}}: Running on high load: {{$value}}";
|
||||
description = "{{$labels.alias}} is running with load15 > 1 for at least 10 minutes: {{$value}}";
|
||||
};
|
||||
node_ram_using_90percent = {
|
||||
condition = ''node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes < node_memory_MemTotal_bytes * 0.1 AND node_memory_MemFree_bytes{alias !~ "c[0-9]-[0-9]"}'';
|
||||
time = "1h";
|
||||
page = false;
|
||||
summary = "{{$labels.alias}}: Using lots of RAM.";
|
||||
description = "{{$labels.alias}} is using at least 90% of its RAM for at least 1 hour.";
|
||||
};
|
||||
node_hwmon_temp = {
|
||||
condition = "node_hwmon_temp_crit_alarm_celsius == 1";
|
||||
summary = "{{$labels.alias}}: Sensor {{$labels.sensor}}/{{$labels.chip}} temp is high: {{$value}} ";
|
||||
description = "{{$labels.alias}} reports hwmon sensor {{$labels.sensor}}/{{$labels.chip}} temperature value is nearly critical: {{$value}}";
|
||||
};
|
||||
node_reboot = {
|
||||
condition = "time() - node_boot_time_seconds < 300";
|
||||
summary = "{{$labels.alias}}: Reboot";
|
||||
description = "{{$labels.alias}} just rebooted.";
|
||||
};
|
||||
node_uptime = {
|
||||
condition = "time() - node_boot_time_seconds > 2592000";
|
||||
page = false;
|
||||
summary = "{{$labels.alias}}: Uptime monster";
|
||||
description = "{{$labels.alias}} has been up for more than 30 days.";
|
||||
};
|
||||
slurm_nodes_offline = {
|
||||
condition = "slurm_node_down > 0 OR slurm_node_drain > 0 OR slurm_node_err > 0 OR slurm_node_fail > 0";
|
||||
summary = "Slurm nodes offline: {{$value}}";
|
||||
description = "Slurm node(s) have been offline for more than 5m.";
|
||||
};
|
||||
node_filesystem_full_in_7d = {
|
||||
condition = ''node_filesystem_free_bytes{${deviceFilter}} ''
|
||||
+ ''and predict_linear(node_filesystem_free_bytes{${deviceFilter}}[2d], 7*24*3600) <= 0'';
|
||||
page = false;
|
||||
time = "1h";
|
||||
summary = "{{$labels.alias}}: Filesystem is running out of space in 7 days.";
|
||||
description = "{{$labels.alias}} device {{$labels.device}} on {{$labels.mountpoint}} is running out of space in approx. 7 days";
|
||||
};
|
||||
}
|
||||
|
||||
107
modules/hpc/beegfs/beegfs.nix
Normal file
107
modules/hpc/beegfs/beegfs.nix
Normal file
@@ -0,0 +1,107 @@
|
||||
{ pkgs, kernel ? pkgs.linux, ... } :
|
||||
with pkgs;
|
||||
let
|
||||
version = "7.4.0";
|
||||
in stdenvNoCC.mkDerivation {
|
||||
pname = "beegfs";
|
||||
inherit version;
|
||||
|
||||
src = fetchurl {
|
||||
name = "beegfs-archive-${version}.tar.bz2";
|
||||
# url = "https://git.beegfs.com/pub/v7/repository/archive.tar.bz2?ref=${version}";
|
||||
url = "https://git.beegfs.io/pub/v7/-/archive/${version}/v7-${version}.tar.bz2";
|
||||
sha256 = "sha256-VwD3z3lZIs5aOIBbwUvEkOxkFggTCv8OWuJMCga2ooo=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [ which unzip pkg-config cppunit perl makeWrapper ];
|
||||
|
||||
buildInputs = [
|
||||
gcc12
|
||||
libuuid
|
||||
attr
|
||||
xfsprogs
|
||||
zlib
|
||||
openssl
|
||||
sqlite
|
||||
rdma-core
|
||||
openssh
|
||||
gfortran
|
||||
influxdb
|
||||
curl
|
||||
rdma-core
|
||||
pahole
|
||||
];
|
||||
|
||||
hardeningDisable = [ "format" ]; # required for building beeond
|
||||
|
||||
postPatch = ''
|
||||
patchShebangs ./
|
||||
find -type f -name Makefile -exec sed -i "s:/bin/bash:${stdenv.shell}:" \{} \;
|
||||
find -type f -name Makefile -exec sed -i "s:/bin/true:true:" \{} \;
|
||||
find -type f -name "*.mk" -exec sed -i "s:/bin/true:true:" \{} \;
|
||||
'';
|
||||
|
||||
buildPhase = ''
|
||||
make BEEGFS_OPENTK_IBVERBS=1 \
|
||||
KDIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build \
|
||||
''${enableParallelBuilding:+-j''${NIX_BUILD_CORES} \
|
||||
-l''${NIX_BUILD_CORES}}
|
||||
'';
|
||||
|
||||
enableParallelBuilding = true;
|
||||
|
||||
installPhase = ''
|
||||
binDir=$out/bin
|
||||
docDir=$out/share/doc/beegfs
|
||||
includeDir=$out/include/beegfs
|
||||
libDir=$out/lib
|
||||
libDirPkg=$out/lib/beegfs
|
||||
mkdir -p $binDir $libDir $libDirPkg $docDir $includeDir
|
||||
cp common/build/libbeegfs_ib.so $libDir
|
||||
cp ctl/build/beegfs-ctl $binDir
|
||||
cp fsck/build/beegfs-fsck $binDir
|
||||
cp utils/scripts/beegfs-check-servers $binDir
|
||||
cp utils/scripts/beegfs-df $binDir
|
||||
cp utils/scripts/beegfs-net $binDir
|
||||
cp helperd/build/beegfs-helperd $binDir
|
||||
cp helperd/build/dist/etc/beegfs-helperd.conf $docDir
|
||||
cp client_module/build/dist/sbin/beegfs-setup-client $binDir
|
||||
cp client_module/build/dist/etc/beegfs-client.conf $docDir
|
||||
cp meta/build/beegfs-meta $binDir
|
||||
cp meta/build/dist/sbin/beegfs-setup-meta $binDir
|
||||
cp meta/build/dist/etc/beegfs-meta.conf $docDir
|
||||
cp mgmtd/build/beegfs-mgmtd $binDir
|
||||
cp mgmtd/build/dist/sbin/beegfs-setup-mgmtd $binDir
|
||||
cp mgmtd/build/dist/etc/beegfs-mgmtd.conf $docDir
|
||||
cp storage/build/beegfs-storage $binDir
|
||||
cp storage/build/dist/sbin/beegfs-setup-storage $binDir
|
||||
cp storage/build/dist/etc/beegfs-storage.conf $docDir
|
||||
cp client_devel/build/dist/usr/share/doc/beegfs-client-devel/examples/* $docDir
|
||||
cp -r client_devel/include/* $includeDir
|
||||
'';
|
||||
|
||||
# postFixup = ''
|
||||
# for i in $(find $out/bin -type f -executable); do
|
||||
# wrapProgram "$i" --prefix LD_LIBRARY_PATH : $out/lib
|
||||
# done
|
||||
# '';
|
||||
|
||||
doCheck = true;
|
||||
|
||||
# checkPhase = ''
|
||||
# LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/opentk_lib/build/ \
|
||||
# common/build/test-runner --text
|
||||
# '';
|
||||
|
||||
meta = with lib; {
|
||||
description = "High performance distributed filesystem with RDMA support";
|
||||
homepage = "https://www.beegfs.io";
|
||||
platforms = [ "i686-linux" "x86_64-linux" ];
|
||||
license = {
|
||||
fullName = "BeeGFS_EULA";
|
||||
url = "https://www.beegfs.io/docs/BeeGFS_EULA.txt";
|
||||
free = true;
|
||||
};
|
||||
maintainers = with maintainers; [ "juselius" ];
|
||||
};
|
||||
}
|
||||
340
modules/hpc/beegfs/default.nix
Normal file
340
modules/hpc/beegfs/default.nix
Normal file
@@ -0,0 +1,340 @@
|
||||
{ config, lib, pkgs, ...} :
|
||||
|
||||
with lib;
|
||||
|
||||
let
|
||||
cfg = config.features.hpc.beegfs.beegfs;
|
||||
|
||||
# kernel = pkgs.linuxPackages_5_4.kernel;
|
||||
kernel = config.boot.kernelPackages.kernel;
|
||||
|
||||
beegfs = pkgs.callPackage ./beegfs.nix {
|
||||
inherit kernel;
|
||||
};
|
||||
beegfs-module = pkgs.callPackage ./kernel-module.nix {
|
||||
inherit kernel;
|
||||
};
|
||||
|
||||
# functions for the generations of config files
|
||||
|
||||
configMgmtd = name: cfg: pkgs.writeText "mgmt-${name}.conf" ''
|
||||
storeMgmtdDirectory = ${cfg.mgmtd.storeDir}
|
||||
storeAllowFirstRunInit = false
|
||||
connAuthFile = ${cfg.connAuthFile}
|
||||
connPortShift = ${toString cfg.connPortShift}
|
||||
${cfg.mgmtd.extraConfig}
|
||||
'';
|
||||
|
||||
configMeta = name: cfg: pkgs.writeText "meta-${name}.conf" ''
|
||||
storeMetaDirectory = ${cfg.meta.storeDir}
|
||||
sysMgmtdHost = ${cfg.mgmtdHost}
|
||||
connAuthFile = ${cfg.connAuthFile}
|
||||
connPortShift = ${toString cfg.connPortShift}
|
||||
storeAllowFirstRunInit = false
|
||||
${cfg.meta.extraConfig}
|
||||
'';
|
||||
|
||||
configStorage = name: cfg: pkgs.writeText "storage-${name}.conf" ''
|
||||
storeStorageDirectory = ${cfg.storage.storeDir}
|
||||
sysMgmtdHost = ${cfg.mgmtdHost}
|
||||
connAuthFile = ${cfg.connAuthFile}
|
||||
connPortShift = ${toString cfg.connPortShift}
|
||||
storeAllowFirstRunInit = false
|
||||
${cfg.storage.extraConfig}
|
||||
'';
|
||||
|
||||
configHelperd = name: cfg: pkgs.writeText "helperd-${name}.conf" ''
|
||||
connAuthFile = ${cfg.connAuthFile}
|
||||
${cfg.helperd.extraConfig}
|
||||
'';
|
||||
|
||||
configClientFilename = name : "/etc/beegfs/client-${name}.conf";
|
||||
|
||||
configClient = name: cfg: ''
|
||||
sysMgmtdHost = ${cfg.mgmtdHost}
|
||||
connAuthFile = ${cfg.connAuthFile}
|
||||
connPortShift = ${toString cfg.connPortShift}
|
||||
${cfg.client.extraConfig}
|
||||
'';
|
||||
|
||||
serviceList = [
|
||||
{ service = "meta"; cfgFile = configMeta; }
|
||||
{ service = "mgmtd"; cfgFile = configMgmtd; }
|
||||
{ service = "storage"; cfgFile = configStorage; }
|
||||
];
|
||||
|
||||
# functions to generate systemd.service entries
|
||||
|
||||
systemdEntry = service: cfgFile: (mapAttrs' ( name: cfg:
|
||||
(nameValuePair "beegfs-${service}-${name}" (mkIf cfg.${service}.enable {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
requires = [ "network-online.target" ];
|
||||
after = [ "network-online.target" ];
|
||||
serviceConfig = rec {
|
||||
ExecStart = ''
|
||||
${beegfs}/bin/beegfs-${service} \
|
||||
cfgFile=${cfgFile name cfg} \
|
||||
pidFile=${PIDFile}
|
||||
'';
|
||||
PIDFile = "/run/beegfs-${service}-${name}.pid";
|
||||
TimeoutStopSec = "300";
|
||||
};
|
||||
}))) cfg);
|
||||
|
||||
systemdHelperd = mapAttrs' ( name: cfg:
|
||||
(nameValuePair "beegfs-helperd-${name}" (mkIf cfg.client.enable {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
requires = [ "network-online.target" ];
|
||||
after = [ "network-online.target" ];
|
||||
environment = {
|
||||
LD_LIBRARY_PATH = "${beegfs}/lib";
|
||||
};
|
||||
serviceConfig = rec {
|
||||
ExecStart = ''
|
||||
${beegfs}/bin/beegfs-helperd \
|
||||
cfgFile=${configHelperd name cfg} \
|
||||
pidFile=${PIDFile}
|
||||
'';
|
||||
PIDFile = "/run/beegfs-helperd-${name}.pid";
|
||||
TimeoutStopSec = "300";
|
||||
};
|
||||
}))) cfg;
|
||||
|
||||
# wrappers to beegfs tools. Avoid typing path of config files
|
||||
utilWrappers = mapAttrsToList ( name: cfg:
|
||||
( pkgs.runCommand "beegfs-utils-${name}" {
|
||||
nativeBuildInputs = [ pkgs.makeWrapper ];
|
||||
preferLocalBuild = true;
|
||||
} ''
|
||||
mkdir -p $out/bin
|
||||
makeWrapper ${beegfs}/bin/beegfs-check-servers \
|
||||
$out/bin/beegfs-check-servers-${name} \
|
||||
--add-flags "-c ${configClientFilename name}" \
|
||||
--prefix PATH : ${lib.makeBinPath [ beegfs ]}
|
||||
makeWrapper ${beegfs}/bin/beegfs-ctl \
|
||||
$out/bin/beegfs-ctl-${name} \
|
||||
--add-flags "--cfgFile=${configClientFilename name}"
|
||||
makeWrapper ${beegfs}/bin/beegfs-ctl \
|
||||
$out/bin/beegfs-df-${name} \
|
||||
--add-flags "--cfgFile=${configClientFilename name}" \
|
||||
--add-flags --listtargets \
|
||||
--add-flags --hidenodeid \
|
||||
--add-flags --pools \
|
||||
--add-flags --spaceinfo
|
||||
makeWrapper ${beegfs}/bin/beegfs-fsck \
|
||||
$out/bin/beegfs-fsck-${name} \
|
||||
--add-flags "--cfgFile=${configClientFilename name}"
|
||||
''
|
||||
)) cfg;
|
||||
beegfsOptions = {
|
||||
options = {
|
||||
mgmtdHost = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
example = "master";
|
||||
description = ''Hostname of managament host.'';
|
||||
};
|
||||
|
||||
connAuthFile = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
example = "/etc/my.key";
|
||||
description = "File containing shared secret authentication.";
|
||||
};
|
||||
|
||||
connPortShift = mkOption {
|
||||
type = types.int;
|
||||
default = 0;
|
||||
example = 5;
|
||||
description = ''
|
||||
For each additional beegfs configuration shift all
|
||||
service TCP/UDP ports by at least 5.
|
||||
'';
|
||||
};
|
||||
|
||||
client = {
|
||||
enable = mkEnableOption "BeeGFS client";
|
||||
|
||||
mount = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = "Create fstab entry automatically";
|
||||
};
|
||||
|
||||
mountPoint = mkOption {
|
||||
type = types.str;
|
||||
default = "/run/beegfs";
|
||||
description = ''
|
||||
Mount point under which the beegfs filesytem should be mounted.
|
||||
If mounted manually the mount option specifing the config file is needed:
|
||||
cfgFile=/etc/beegfs/beegfs-client-<name>.conf
|
||||
'';
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
type = types.lines;
|
||||
default = "";
|
||||
description = ''
|
||||
Additional lines for beegfs-client.conf.
|
||||
See documentation for further details.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
helperd = {
|
||||
enable = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
description = ''
|
||||
Enable the BeeGFS helperd.
|
||||
The helpered is need for logging purposes on the client.
|
||||
Disabling <literal>helperd</literal> allows for runing the client
|
||||
with <literal>allowUnfree = false</literal>.
|
||||
'';
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
type = types.lines;
|
||||
default = "";
|
||||
description = ''
|
||||
Additional lines for beegfs-helperd.conf. See documentation
|
||||
for further details.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
mgmtd = {
|
||||
enable = mkEnableOption "BeeGFS mgmtd daemon";
|
||||
|
||||
storeDir = mkOption {
|
||||
type = types.path;
|
||||
default = null;
|
||||
example = "/data/beegfs-mgmtd";
|
||||
description = ''
|
||||
Data directory for mgmtd.
|
||||
Must not be shared with other beegfs daemons.
|
||||
This directory must exist and it must be initialized
|
||||
with beegfs-setup-mgmtd, e.g. "beegfs-setup-mgmtd -C -p <storeDir>"
|
||||
'';
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
type = types.lines;
|
||||
default = "";
|
||||
description = ''
|
||||
Additional lines for beegfs-mgmtd.conf. See documentation
|
||||
for further details.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
meta = {
|
||||
enable = mkEnableOption "BeeGFS meta data daemon";
|
||||
|
||||
storeDir = mkOption {
|
||||
type = types.path;
|
||||
default = null;
|
||||
example = "/data/beegfs-meta";
|
||||
description = ''
|
||||
Data directory for meta data service.
|
||||
Must not be shared with other beegfs daemons.
|
||||
The underlying filesystem must be mounted with xattr turned on.
|
||||
This directory must exist and it must be initialized
|
||||
with beegfs-setup-meta, e.g.
|
||||
"beegfs-setup-meta -C -s <serviceID> -p <storeDir>"
|
||||
'';
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = ''
|
||||
Additional lines for beegfs-meta.conf. See documentation
|
||||
for further details.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
storage = {
|
||||
enable = mkEnableOption "BeeGFS storage daemon";
|
||||
|
||||
storeDir = mkOption {
|
||||
type = types.path;
|
||||
default = null;
|
||||
example = "/data/beegfs-storage";
|
||||
description = ''
|
||||
Data directories for storage service.
|
||||
Must not be shared with other beegfs daemons.
|
||||
The underlying filesystem must be mounted with xattr turned on.
|
||||
This directory must exist and it must be initialized
|
||||
with beegfs-setup-storage, e.g.
|
||||
"beegfs-setup-storage -C -s <serviceID> -i <storageTargetID> -p <storeDir>"
|
||||
'';
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
description = ''
|
||||
Addional lines for beegfs-storage.conf. See documentation
|
||||
for further details.
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
in
|
||||
{
|
||||
###### interface
|
||||
|
||||
options.features.hpc.beegfs = {
|
||||
enable = mkEnableOption "BeeGFS";
|
||||
|
||||
beegfs = mkOption {
|
||||
type = with types; attrsOf (submodule ({ ... } : beegfsOptions ));
|
||||
default = {};
|
||||
description = ''
|
||||
BeeGFS configurations. Every mount point requires a separate configuration.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
###### implementation
|
||||
|
||||
config = mkIf config.features.hpc.beegfs.enable {
|
||||
|
||||
environment.systemPackages = utilWrappers;
|
||||
|
||||
# Put the client.conf files in /etc since they are needed
|
||||
# by the commandline tools
|
||||
environment.etc = mapAttrs' ( name: cfg:
|
||||
(nameValuePair "beegfs/client-${name}.conf" (mkIf (cfg.client.enable)
|
||||
{
|
||||
enable = true;
|
||||
text = configClient name cfg;
|
||||
}))) cfg;
|
||||
|
||||
# Kernel module, we need it only once per host.
|
||||
boot = mkIf (
|
||||
foldr (a: b: a || b) false
|
||||
(map (x: x.client.enable) (collect (x: x ? client) cfg)))
|
||||
{
|
||||
kernelModules = [ "beegfs" ];
|
||||
extraModulePackages = [ beegfs-module ];
|
||||
};
|
||||
|
||||
# generate fstab entries
|
||||
fileSystems = mapAttrs' (name: cfg:
|
||||
(nameValuePair cfg.client.mountPoint (optionalAttrs cfg.client.mount (mkIf cfg.client.enable {
|
||||
device = "beegfs_nodev";
|
||||
fsType = "beegfs";
|
||||
mountPoint = cfg.client.mountPoint;
|
||||
options = [ "cfgFile=${configClientFilename name}" "_netdev" ];
|
||||
})))) cfg;
|
||||
|
||||
# generate systemd services
|
||||
systemd.services = systemdHelperd //
|
||||
foldr (a: b: a // b) {}
|
||||
(map (x: systemdEntry x.service x.cfgFile) serviceList);
|
||||
};
|
||||
}
|
||||
50
modules/hpc/beegfs/kernel-module.nix
Normal file
50
modules/hpc/beegfs/kernel-module.nix
Normal file
@@ -0,0 +1,50 @@
|
||||
{ pkgs, kernel ? pkgs.linux, ... } :
|
||||
with pkgs;
|
||||
let
|
||||
version = "7.4.0";
|
||||
beegfs = pkgs.callPackage ./beegfs.nix { inherit kernel; };
|
||||
in stdenvNoCC.mkDerivation {
|
||||
name = "beegfs-module-${version}-${kernel.version}";
|
||||
|
||||
src = fetchurl {
|
||||
name = "beegfs-archive-${version}.tar.bz2";
|
||||
# url = "https://git.beegfs.com/pub/v7/repository/archive.tar.bz2?ref=${version}";
|
||||
url = "https://git.beegfs.io/pub/v7/-/archive/${version}/v7-${version}.tar.bz2";
|
||||
sha256 = "sha256-VwD3z3lZIs5aOIBbwUvEkOxkFggTCv8OWuJMCga2ooo=";
|
||||
};
|
||||
|
||||
hardeningDisable = [ "fortify" "pic" "stackprotector" ];
|
||||
|
||||
nativeBuildInputs = [ gcc12 which kmod pahole ];
|
||||
|
||||
buildInputs = kernel.moduleBuildDependencies;
|
||||
|
||||
makeFlags = [ "KDIR=${kernel.dev}/lib/modules/${kernel.modDirVersion}/build/" ];
|
||||
|
||||
postPatch = ''
|
||||
patchShebangs ./
|
||||
find -type f -name Makefile -exec sed -i "s:/bin/bash:${stdenv.shell}:" \{} \;
|
||||
find -type f -name Makefile -exec sed -i "s:/bin/true:true:" \{} \;
|
||||
find -type f -name "*.mk" -exec sed -i "s:/bin/true:true:" \{} \;
|
||||
find -type f -name "configure" -exec sed -i "s:/bin/:/usr/bin/env :" \{} \;
|
||||
find -type f -name "configure" -exec sed -i "s:/usr/bin/:/usr/bin/env :" \{} \;
|
||||
sed -i 's,libbeegfs_ib.so,${beegfs}/lib/&,' common/source/common/net/sock/RDMASocket.cpp
|
||||
'';
|
||||
|
||||
preBuild = "cd client_module/build";
|
||||
|
||||
installPhase = ''
|
||||
instdir=$out/lib/modules/${kernel.modDirVersion}/extras/fs/beegfs
|
||||
mkdir -p $instdir
|
||||
cp beegfs.ko $instdir
|
||||
'';
|
||||
|
||||
meta = with lib; {
|
||||
description = "High performance distributed filesystem with RDMA support";
|
||||
homepage = "https://www.beegfs.io";
|
||||
platforms = [ "i686-linux" "x86_64-linux" ];
|
||||
license = licenses.gpl2;
|
||||
maintainers = with maintainers; [ "juselius" ];
|
||||
# broken = stdenv.lib.versionAtLeast kernel.version "4.18";
|
||||
};
|
||||
}
|
||||
9
modules/hpc/default.nix
Normal file
9
modules/hpc/default.nix
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
imports = [
|
||||
./beegfs
|
||||
./hpc.nix
|
||||
./slurm.nix
|
||||
./monitoring.nix
|
||||
./mft
|
||||
];
|
||||
}
|
||||
154
modules/hpc/hpc.nix
Normal file
154
modules/hpc/hpc.nix
Normal file
@@ -0,0 +1,154 @@
|
||||
{ pkgs, lib, config, ... } :
|
||||
with lib;
|
||||
let
|
||||
cfg = config.features.hpc;
|
||||
|
||||
configuration = {
|
||||
programs.singularity.enable = true;
|
||||
|
||||
features.hpc.slurm.enable = mkDefault true;
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
git
|
||||
cmakeCurses
|
||||
nco
|
||||
neovim
|
||||
python3
|
||||
gfortran
|
||||
# intel-mpi
|
||||
# openmpi
|
||||
rdma-core
|
||||
mstflint
|
||||
squashfsTools
|
||||
linuxPackages.cpupower
|
||||
linuxPackages.turbostat
|
||||
hwloc
|
||||
];
|
||||
|
||||
services.openssh.extraConfig = ''
|
||||
HostbasedAuthentication yes
|
||||
'';
|
||||
|
||||
programs.ssh.extraConfig = ''
|
||||
HostbasedAuthentication yes
|
||||
EnableSSHKeysign yes
|
||||
'';
|
||||
|
||||
powerManagement ={
|
||||
enable = true;
|
||||
cpuFreqGovernor = "performance";
|
||||
powerUpCommands = ''
|
||||
${pkgs.linuxPackages.cpupower}/bin/cpupower -c 0-63 idle-set -d 2
|
||||
'';
|
||||
};
|
||||
|
||||
boot = {
|
||||
# extraModulePackages = [ knem ];
|
||||
# kernelModules = [ "knem" ];
|
||||
kernel.sysctl = {
|
||||
"kernel.randomize_va_space" = 0;
|
||||
};
|
||||
};
|
||||
|
||||
# services.udev.extraRules = ''
|
||||
# KERNEL=="knem", NAME="knem", GROUP="users", MODE="0660"
|
||||
# '';
|
||||
|
||||
security.pam.services = {
|
||||
sshd.limits = [ stackLimit memlockLimit ];
|
||||
sudo.limits = [ stackLimit memlockLimit ];
|
||||
};
|
||||
|
||||
programs.bash.shellInit = ''
|
||||
ulimit -l unlimited
|
||||
ulimit -s unlimited
|
||||
'';
|
||||
|
||||
programs.fish.shellInit = ''
|
||||
ulimit -l unlimited
|
||||
ulimit -s unlimited
|
||||
'';
|
||||
};
|
||||
|
||||
frontend = {
|
||||
services.influxdb.enable = true;
|
||||
features.monitoring.nodeExporter.extraCollectors = [ "nfsd" ];
|
||||
};
|
||||
|
||||
login = {
|
||||
environment.systemPackages = with pkgs; [
|
||||
# tigervnc
|
||||
# tightvnc
|
||||
turbovnc
|
||||
emacs
|
||||
];
|
||||
security.sudo.extraConfig = ''
|
||||
%sif ALL=(ALL) NOPASSWD: /run/current-system/sw/bin/singularity
|
||||
'';
|
||||
};
|
||||
|
||||
compute = {
|
||||
boot.kernelParams = [ "mitigations=off" ]; # spectre/meltdown
|
||||
features.monitoring.nodeExporter.extraCollectors = [ "nfs" ];
|
||||
};
|
||||
|
||||
stackLimit = {
|
||||
domain = "@users";
|
||||
type = "hard";
|
||||
item = "stack";
|
||||
value = "unlimited";
|
||||
};
|
||||
|
||||
memlockLimit = {
|
||||
domain = "@users";
|
||||
type = "hard";
|
||||
item = "memlock";
|
||||
value = "unlimited";
|
||||
};
|
||||
|
||||
# intel-mpi = pkgs.callPackage ./intel-mpi.nix {};
|
||||
|
||||
knem =
|
||||
let
|
||||
kernel = config.boot.kernelPackages.kernel;
|
||||
knem = pkgs.callPackage ./knem.nix { inherit kernel; };
|
||||
# xpmem = pkgs.callPackage ./xpmem.nix { inherit kernel; };
|
||||
in {
|
||||
boot = {
|
||||
kernelPackages = pkgs.linuxKernel.packages.linux_5_10;
|
||||
extraModulePackages = [ knem ];
|
||||
kernelModules = [ "knem" ];
|
||||
};
|
||||
|
||||
services.udev.extraRules = ''
|
||||
KERNEL=="knem", NAME="knem", GROUP="users", MODE="0660"
|
||||
'';
|
||||
} ;
|
||||
|
||||
in
|
||||
{
|
||||
options.features.hpc = {
|
||||
enable = mkEnableOption "Enable HPC features";
|
||||
|
||||
frontend = mkEnableOption "Enable frontend features";
|
||||
|
||||
login = mkEnableOption "Enable login node features";
|
||||
|
||||
compute = mkEnableOption "Enable compute features";
|
||||
|
||||
knem = mkEnableOption "Enable knem for openmpi";
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable (mkMerge [
|
||||
configuration
|
||||
|
||||
(mkIf cfg.frontend frontend)
|
||||
|
||||
(mkIf cfg.login login)
|
||||
|
||||
(mkIf cfg.compute compute)
|
||||
|
||||
(mkIf cfg.knem knem)
|
||||
]);
|
||||
}
|
||||
|
||||
58
modules/hpc/infiniband-exporter.nix
Normal file
58
modules/hpc/infiniband-exporter.nix
Normal file
@@ -0,0 +1,58 @@
|
||||
{pkgs, config, lib, ...}:
|
||||
with lib;
|
||||
let
|
||||
cfg = config.features.monitoring.infiniband-exporter;
|
||||
|
||||
python-env = pkgs.python3.withPackages (ps: with ps; [
|
||||
prometheus_client
|
||||
]
|
||||
);
|
||||
|
||||
exporter = pkgs.fetchFromGitHub {
|
||||
owner = "guilbaults";
|
||||
repo = "infiniband-exporter";
|
||||
rev = "12e7b2de049fc3c33c44e164f426dd723c8479c0";
|
||||
hash = "sha256-+n09beiJEgOgX+3po7fjiwZrziug+5N4JHi7ivTYa9U=";
|
||||
};
|
||||
|
||||
nameMap = pkgs.writeTextFile {
|
||||
name = "infiniband-node-name-map.txt";
|
||||
text = cfg.nameMap;
|
||||
};
|
||||
|
||||
infiniband-exporter-service = {
|
||||
systemd.services.infiniband-exporter = {
|
||||
enable = true;
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
description = "Prometheus InfiniBand exporter";
|
||||
path = [ pkgs.rdma-core ];
|
||||
script = "${python-env}/bin/python3 ${exporter}/infiniband-exporter.py"
|
||||
+ " --port ${builtins.toString cfg.port} --can-reset-counter"
|
||||
+ (if cfg.nameMap == null then "" else " --node-name-map=${nameMap}");
|
||||
serviceConfig = {
|
||||
RestartSec = "15s";
|
||||
Restart = "on-failure";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
in {
|
||||
options.features.monitoring.infiniband-exporter = {
|
||||
enable = mkEnableOption "Enable InfiniBand prometheus exporter";
|
||||
|
||||
port = mkOption {
|
||||
type = types.ints.unsigned;
|
||||
default = 9683;
|
||||
description = "Collector http port";
|
||||
};
|
||||
|
||||
nameMap = mkOption {
|
||||
type = types.nullOr types.str;
|
||||
default = null;
|
||||
description = "Node name map";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable infiniband-exporter-service;
|
||||
}
|
||||
69
modules/hpc/intel-mpi.nix
Normal file
69
modules/hpc/intel-mpi.nix
Normal file
@@ -0,0 +1,69 @@
|
||||
{ stdenv, lib, glibc, gcc, file , patchelf , makeWrapper }:
|
||||
|
||||
let
|
||||
|
||||
preinstDir = "opt/intel/oneapi/mpi/${version}";
|
||||
version = "2021.1.1";
|
||||
|
||||
self = stdenv.mkDerivation rec {
|
||||
inherit version;
|
||||
name = "intelmpi-${version}";
|
||||
src = ./intel-mpi.tgz;
|
||||
|
||||
nativeBuildInputs= [ file patchelf makeWrapper ];
|
||||
|
||||
dontPatchELF = true;
|
||||
dontStrip = true;
|
||||
|
||||
installPhase = ''
|
||||
mpi=$out/opt/intel/oneapi/mpi
|
||||
mkdir -p $mpi
|
||||
cp -r * $mpi
|
||||
cp -rs $mpi/${version}/bin $out
|
||||
'';
|
||||
|
||||
postFixup = ''
|
||||
echo "Patching rpath and interpreter..."
|
||||
for f in $(find $out -type f -executable); do
|
||||
type="$(file -b --mime-type $f)"
|
||||
case "$type" in
|
||||
"application/executable"|"application/x-executable")
|
||||
echo "Patching executable: $f"
|
||||
patchelf --set-interpreter $(echo ${glibc}/lib/ld-linux*.so.2) --set-rpath ${glibc}/lib:\$ORIGIN:\$ORIGIN/../lib $f || true
|
||||
;;
|
||||
"application/x-sharedlib"|"application/x-pie-executable")
|
||||
echo "Patching library: $f"
|
||||
patchelf --set-rpath ${glibc}/lib:\$ORIGIN:\$ORIGIN/../lib:\$ORIGIN/../../libfabric/lib $f || true
|
||||
;;
|
||||
*)
|
||||
echo "$f ($type) not patched"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
echo "Fixing path into scripts..."
|
||||
for file in `grep -l -r "/${preinstDir}" $out`; do
|
||||
sed -e "s,/${preinstDir},$out,g" -i $file
|
||||
done
|
||||
for file in `grep -l -r "I_MPI_SUBSTITUTE_INSTALLDIR" $out`; do
|
||||
sed -e "s,I_MPI_SUBSTITUTE_INSTALLDIR,$out,g" -i $file
|
||||
done
|
||||
|
||||
wrapProgram $out/${preinstDir}/bin/mpiexec.hydra \
|
||||
--set UCX_TLS ud,sm,self \
|
||||
--set I_MPI_FABRICS shm:ofi \
|
||||
--set FI_PROVIDER_PATH $out/${preinstDir}/libfabric/lib/prov \
|
||||
--set FI_PROVIDER mlx
|
||||
'';
|
||||
|
||||
passthru = {
|
||||
isIntel = true;
|
||||
};
|
||||
|
||||
meta = {
|
||||
description = "Intel MPI ${version} library";
|
||||
maintainers = [ lib.maintainers.dguibert ];
|
||||
platforms = lib.platforms.linux;
|
||||
};
|
||||
};
|
||||
in self
|
||||
|
||||
46
modules/hpc/knem.nix
Normal file
46
modules/hpc/knem.nix
Normal file
@@ -0,0 +1,46 @@
|
||||
{ pkgs, kernel ? pkgs.linux, ... } :
|
||||
with pkgs;
|
||||
let
|
||||
version = "master";
|
||||
kdir="${kernel.dev}/lib/modules/${kernel.modDirVersion}";
|
||||
in stdenv.mkDerivation {
|
||||
inherit version;
|
||||
name = "knem-${version}-${kernel.version}";
|
||||
|
||||
# src = fetchurl {
|
||||
# name = "knem-${version}.tar.bz2";
|
||||
# url = "https://gitlab.inria.fr/knem/knem/uploads/4a43e3eb860cda2bbd5bf5c7c04a24b6/knem-1.1.4.tar.gz";
|
||||
# sha256 = "0dq9a41s08alrgggabmlyagmwl95sczmhi36gph5axmfg42kc3lz";
|
||||
# };
|
||||
|
||||
src = fetchgit {
|
||||
name = "knem-${version}";
|
||||
url = "https://gitlab.inria.fr/knem/knem.git";
|
||||
sha256 = "sha256-ptjALI2q2AF0tvdxXm4xH+8rXO8qnRwPfWMPITjrKVI=";
|
||||
};
|
||||
|
||||
hardeningDisable = [ "fortify" "pic" "stackprotector" ];
|
||||
|
||||
nativeBuildInputs = [ which kmod pkgconf ];
|
||||
|
||||
buildInputs = kernel.moduleBuildDependencies ++ [
|
||||
libtool autoconf pkgconf automake hwloc
|
||||
] ;
|
||||
|
||||
preConfigurePhases = "preConfigure";
|
||||
preConfigure = ''
|
||||
autoupdate
|
||||
./autogen.sh
|
||||
'';
|
||||
|
||||
configureFlags = [
|
||||
"--with-linux-release=${kernel.modDirVersion}"
|
||||
"--with-linux=${kdir}/source"
|
||||
"--with-linux-build=${kdir}/build"
|
||||
];
|
||||
|
||||
installPhase = ''
|
||||
make install
|
||||
rm -rf $out/etc $out/sbin
|
||||
'';
|
||||
}
|
||||
29
modules/hpc/mft/default.nix
Normal file
29
modules/hpc/mft/default.nix
Normal file
@@ -0,0 +1,29 @@
|
||||
{config, lib, pkgs,...}:
|
||||
with lib;
|
||||
let
|
||||
kernel = config.boot.kernelPackages.kernel;
|
||||
|
||||
mft = pkgs.callPackage ./mft.nix { inherit kernel; };
|
||||
in
|
||||
{
|
||||
###### interface
|
||||
|
||||
options.features.hpc.mft = {
|
||||
enable = mkEnableOption "Mellanox MFT";
|
||||
};
|
||||
|
||||
###### implementation
|
||||
|
||||
config = mkIf config.features.hpc.mft.enable {
|
||||
environment.etc."mft/mft.conf".source = "${mft.mft}/etc/mft/mft.conf";
|
||||
environment.etc."mft/mst.conf".source = "${mft.mft}/etc/mft/mst.conf";
|
||||
environment.etc."mft/ca-bundle.crt".source = "${mft.mft}/etc/mft/ca-bundle.crt";
|
||||
|
||||
environment.systemPackages = [ pkgs.pciutils mft.mft ];
|
||||
|
||||
# boot = {
|
||||
# kernelModules = [ "mst_pci" "mst_pciconf" ];
|
||||
# extraModulePackages = [ mft.mft-kernel-module ];
|
||||
# };
|
||||
};
|
||||
}
|
||||
123
modules/hpc/mft/mft.nix
Normal file
123
modules/hpc/mft/mft.nix
Normal file
@@ -0,0 +1,123 @@
|
||||
{pkgs, lib, stdenv, kernel ? pkgs.linux, ...}:
|
||||
let
|
||||
version = "4.27.0";
|
||||
ver = "${version}-83";
|
||||
arch = "amd64";
|
||||
|
||||
rpath = lib.strings.concatStringsSep ":" [
|
||||
"${pkgs.libxcrypt}/lib"
|
||||
"${pkgs.glibc}/lib"
|
||||
"${stdenv.cc.cc.lib.outPath}/lib"
|
||||
];
|
||||
|
||||
src = pkgs.fetchurl {
|
||||
url = "https://www.mellanox.com/downloads/MFT/mft-${ver}-x86_64-deb.tgz";
|
||||
hash = "sha256-Mx2dyHSFkZ+vsorAd7yVe2vU8nhksoGieE+LPcA5fZA=";
|
||||
};
|
||||
|
||||
unpackPhase = ''
|
||||
PATH=${pkgs.dpkg}/bin:$PATH
|
||||
tar vfxz $src
|
||||
mv mft-${ver}-x86_64-deb deb
|
||||
'';
|
||||
|
||||
preFixup = ''
|
||||
for i in $out/usr/bin/*; do
|
||||
if $(file $i | grep -q 'ELF.*dynamic'); then
|
||||
patchelf \
|
||||
--set-interpreter "$(cat $NIX_CC/nix-support/dynamic-linker)" \
|
||||
--set-rpath "${rpath}" $i
|
||||
elif $(file $i | grep -q shell); then
|
||||
patchShebangs --build $i
|
||||
fi
|
||||
done
|
||||
'';
|
||||
|
||||
in
|
||||
rec {
|
||||
mft = stdenv.mkDerivation {
|
||||
name = "mft-${ver}";
|
||||
inherit src unpackPhase preFixup;
|
||||
|
||||
installPhase = ''
|
||||
PATH=/bin:$PATH
|
||||
dpkg -x deb/DEBS/mft_${ver}_${arch}.deb $out
|
||||
rm $out/usr/bin/mst
|
||||
mv $out/etc/init.d/mst $out/usr/bin/mst
|
||||
rmdir $out/etc/init.d
|
||||
sed -i "15i export PATH=/run/current-system/sw/bin:${pkgs.kmod}/bin
|
||||
s,/usr/mst,$out&,;
|
||||
s,/sbin/modprobe,modprobe,;
|
||||
s,/sbin/lsmod,lsmod,;
|
||||
s,lsmod,${pkgs.kmod}/bin/lsmod,;
|
||||
s,modprobe \+-r,${pkgs.kmod}/bin/rmmod,;
|
||||
s,=lspci,=${pkgs.pciutils}/bin/lspci,;
|
||||
s,mbindir=,&$out,;
|
||||
s,mlibdir=,&$out,;
|
||||
s,MST_PCI_MOD=.*,MST_PCI_MOD="${mft-kernel-module}/lib/modules/${kernel.version}/extras/mft/mst_pci.ko,";
|
||||
s,MST_PCICONF_MOD=.*,MST_PCICONF_MOD="${mft-kernel-module}/lib/modules/${kernel.version}/extras/mft/mst_pciconf.ko,";
|
||||
s,^PATH=.*,PATH=\$\{PATH\}:\$\{mbindir\},;" $out/usr/bin/mst
|
||||
sed -i "s,mft_prefix_location=.*,mft_prefix_location=$out/usr," $out/etc/mft/mft.conf
|
||||
mkdir $out/bin
|
||||
cp -s $out/usr/bin/* $out/bin
|
||||
'';
|
||||
};
|
||||
|
||||
oem = stdenv.mkDerivation {
|
||||
name = "mft-oem-${ver}";
|
||||
inherit src unpackPhase preFixup;
|
||||
|
||||
installPhase = ''
|
||||
PATH=/bin:$PATH
|
||||
dpkg -x deb/DEBS/mft-oem_${ver}_${arch}.deb $out
|
||||
'';
|
||||
};
|
||||
|
||||
pcap = stdenv.mkDerivation {
|
||||
name = "mft-pcap${ver}";
|
||||
inherit src unpackPhase preFixup;
|
||||
|
||||
installPhase = ''
|
||||
PATH=/bin:$PATH
|
||||
dpkg -x deb/DEBS/mft-pcap_${ver}_${arch}.deb $out
|
||||
'';
|
||||
};
|
||||
|
||||
mft-kernel-module = stdenv.mkDerivation {
|
||||
name = "mft-kernel-module";
|
||||
pname = "mft-kernel-module";
|
||||
inherit src;
|
||||
inherit unpackPhase;
|
||||
|
||||
prePatch = ''
|
||||
PATH=/bin:$PATH
|
||||
dpkg -x deb/SDEBS/kernel-mft-dkms_${ver}_all.deb source
|
||||
'';
|
||||
|
||||
preConfigure = ''
|
||||
export KSRC="${kernel.dev}/lib/modules/${kernel.modDirVersion}/build"
|
||||
export sourceRoot="/build/source/usr/src/kernel-mft-dkms-${version}"
|
||||
buildRoot () { echo $KSRC; }
|
||||
'';
|
||||
|
||||
nativeBuildInputs = kernel.moduleBuildDependencies;
|
||||
|
||||
buildPhase = ''
|
||||
cd $sourceRoot/mst_backward_compatibility/mst_pci
|
||||
make ${lib.strings.concatStringsSep " " kernel.makeFlags} -C "${kernel.dev}/lib/modules/${kernel.modDirVersion}/build" M=$(pwd) modules
|
||||
cd $sourceRoot/mst_backward_compatibility/mst_pciconf
|
||||
make ${lib.strings.concatStringsSep " " kernel.makeFlags} -C "${kernel.dev}/lib/modules/${kernel.modDirVersion}/build" M=$(pwd) modules
|
||||
'';
|
||||
|
||||
installPhase = ''
|
||||
instdir=$out/lib/modules/${kernel.modDirVersion}/extras/mft
|
||||
mkdir -p $instdir
|
||||
cp $sourceRoot/mst_backward_compatibility/mst_pci/mst_pci.ko $instdir
|
||||
cp $sourceRoot/mst_backward_compatibility/mst_pciconf/mst_pciconf.ko $instdir
|
||||
'';
|
||||
|
||||
meta = {
|
||||
description = "Mellanox MFT kernel module";
|
||||
};
|
||||
};
|
||||
}
|
||||
277
modules/hpc/monitoring.nix
Normal file
277
modules/hpc/monitoring.nix
Normal file
@@ -0,0 +1,277 @@
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
let
|
||||
cfg = config.features.monitoring;
|
||||
|
||||
mkScrapeConfigs = configs: flip mapAttrsToList configs (k: v:
|
||||
let
|
||||
static_configs = flip map v.hostNames (name: {
|
||||
targets = [ "${name}:${toString v.port}" ];
|
||||
labels.alias = name;
|
||||
});
|
||||
in
|
||||
(mkIf (static_configs != []) ({
|
||||
inherit static_configs;
|
||||
job_name = k;
|
||||
scrape_interval = "15s";
|
||||
} // (removeAttrs v [ "hostNames" "port" ]))));
|
||||
|
||||
prometheus = {
|
||||
systemd.services.prometheus.serviceConfig.LimitNOFILE = 1024000;
|
||||
|
||||
services.prometheus = {
|
||||
enable = true;
|
||||
|
||||
ruleFiles = singleton (pkgs.writeText "prometheus-rules.yml" (builtins.toJSON {
|
||||
groups = singleton {
|
||||
name = "alerting-rules";
|
||||
rules = import ./alert-rules.nix { inherit lib; };
|
||||
};
|
||||
}));
|
||||
|
||||
scrapeConfigs = (mkScrapeConfigs ({
|
||||
node = {
|
||||
hostNames = cfg.server.scrapeHosts;
|
||||
port = 9100;
|
||||
};
|
||||
infiniband = {
|
||||
hostNames = [ "stokes" ];
|
||||
port = 9683;
|
||||
};
|
||||
slurm = {
|
||||
hostNames = [ "stokes" ];
|
||||
port = 6080;
|
||||
};
|
||||
}));
|
||||
};
|
||||
};
|
||||
|
||||
nodeExporter = {
|
||||
services.prometheus.exporters = {
|
||||
node = {
|
||||
enable = true;
|
||||
openFirewall = true;
|
||||
extraFlags = [ "--collector.disable-defaults" ];
|
||||
enabledCollectors = [
|
||||
"netstat"
|
||||
"stat"
|
||||
"systemd"
|
||||
"textfile"
|
||||
"textfile.directory /run/prometheus-node-exporter"
|
||||
"thermal_zone"
|
||||
"time"
|
||||
"udp_queues"
|
||||
"uname"
|
||||
"vmstat"
|
||||
"cpu"
|
||||
"cpufreq"
|
||||
"diskstats"
|
||||
"edac"
|
||||
"filesystem"
|
||||
"hwmon"
|
||||
"interrupts"
|
||||
"ksmd"
|
||||
"loadavg"
|
||||
"meminfo"
|
||||
"pressure"
|
||||
"timex"
|
||||
# "nfsd"
|
||||
# "nfs"
|
||||
# "rapl"
|
||||
] ++ cfg.nodeExporter.extraCollectors;
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ 9093 9100 ];
|
||||
};
|
||||
|
||||
webUI = let net = config.networking; in {
|
||||
services.grafana = {
|
||||
enable = true;
|
||||
domain = "grafana.${net.domain}";
|
||||
port = 2342;
|
||||
addr = "127.0.0.1";
|
||||
};
|
||||
|
||||
security.acme = {
|
||||
acceptTerms = true;
|
||||
email = cfg.webUI.acmeEmail;
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ 80 443 ];
|
||||
|
||||
services.nginx = {
|
||||
enable = true;
|
||||
|
||||
statusPage = true;
|
||||
|
||||
virtualHosts = {
|
||||
# "acme.${net.domain}" = {
|
||||
# serverAliases = [ "*.svc.${net.domain}" ];
|
||||
# # /var/lib/acme/.challenges must be writable by the ACME user
|
||||
# # and readable by the Nginx user.
|
||||
# locations."/.well-known/acme-challenge" = {
|
||||
# root = "/var/lib/acme/acme-challenge";
|
||||
# };
|
||||
# locations."/" = {
|
||||
# return = "301 https://$host$request_uri";
|
||||
# };
|
||||
# };
|
||||
|
||||
${config.services.grafana.domain} = {
|
||||
forceSSL = true;
|
||||
enableACME = true;
|
||||
serverAliases = [];
|
||||
locations."/" = {
|
||||
proxyPass = "http://127.0.0.1:${toString config.services.grafana.port}";
|
||||
proxyWebsockets = true;
|
||||
extraConfig = webUIExtraConfig;
|
||||
};
|
||||
};
|
||||
|
||||
"prometheus.${net.domain}" = {
|
||||
forceSSL = true;
|
||||
enableACME = true;
|
||||
serverAliases = [];
|
||||
locations."/" = {
|
||||
proxyPass = "http://127.0.0.1:${toString config.services.prometheus.port}";
|
||||
proxyWebsockets = true;
|
||||
extraConfig = webUIExtraConfig;
|
||||
};
|
||||
};
|
||||
|
||||
"alertmanager.${net.domain}" = {
|
||||
forceSSL = true;
|
||||
enableACME = true;
|
||||
serverAliases = [];
|
||||
locations."/" = {
|
||||
proxyPass = "http://127.0.0.1:${toString config.services.prometheus.alertmanager.port}";
|
||||
proxyWebsockets = true;
|
||||
extraConfig = webUIExtraConfig;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
webUIExtraConfig =
|
||||
(builtins.foldl' (a: x:
|
||||
a + "\nallow ${x};" ) cfg.webUI.extraConfig cfg.webUI.allow)
|
||||
+ "\ndeny all;";
|
||||
|
||||
alertmanager = {
|
||||
systemd.services.alertmanager.serviceConfig.LimitNOFILE = 1024000;
|
||||
|
||||
services.prometheus.alertmanager = {
|
||||
enable = true;
|
||||
configuration = {
|
||||
route = {
|
||||
receiver = "default";
|
||||
routes = [
|
||||
{
|
||||
group_by = [ "alertname" "alias" ];
|
||||
group_wait = "5s";
|
||||
group_interval = "3m";
|
||||
repeat_interval = "3h";
|
||||
match = { severity = "page"; };
|
||||
receiver = "page";
|
||||
}
|
||||
{
|
||||
group_by = [ "alertname" "alias" ];
|
||||
group_wait = "30s";
|
||||
group_interval = "5m";
|
||||
repeat_interval = "6h";
|
||||
receiver = "default";
|
||||
}
|
||||
];
|
||||
};
|
||||
receivers = [
|
||||
({ name = "default"; } // cfg.server.defaultAlertReceiver)
|
||||
({ name = "page"; } // cfg.server.pageAlertReceiver)
|
||||
];
|
||||
inhibit_rules = [
|
||||
# {
|
||||
# target_match = {
|
||||
# alertname = "node_collector_failed";
|
||||
# };
|
||||
# target_match_re = {
|
||||
# alias = "c[0-9]-[0-9]";
|
||||
# collector = "nfsd";
|
||||
# };
|
||||
# }
|
||||
];
|
||||
};
|
||||
};
|
||||
|
||||
services.prometheus = {
|
||||
alertmanagers = singleton {
|
||||
static_configs = singleton {
|
||||
targets = [ "localhost:9093" ];
|
||||
# targets = flip map cfg.server.scrapeHosts (n: "${n}:9093");
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
in {
|
||||
options.features.monitoring = {
|
||||
server = {
|
||||
enable = mkEnableOption "HPC cluster monitoring server with prometheus";
|
||||
|
||||
scrapeHosts = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
};
|
||||
|
||||
defaultAlertReceiver = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
};
|
||||
|
||||
pageAlertReceiver = mkOption {
|
||||
type = types.attrs;
|
||||
default = {};
|
||||
};
|
||||
};
|
||||
|
||||
nodeExporter.enable = mkEnableOption "Enable node exporter";
|
||||
|
||||
nodeExporter.extraCollectors = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
};
|
||||
|
||||
webUI = {
|
||||
enable = mkEnableOption "Enable web UI for monitoring";
|
||||
|
||||
acmeEmail = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
|
||||
allow = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkMerge [
|
||||
(mkIf cfg.server.enable (mkMerge [
|
||||
prometheus
|
||||
alertmanager
|
||||
]))
|
||||
|
||||
(mkIf cfg.nodeExporter.enable nodeExporter)
|
||||
|
||||
(mkIf cfg.webUI.enable webUI)
|
||||
];
|
||||
|
||||
imports = [ ./infiniband-exporter.nix ./slurm-exporter.nix ];
|
||||
}
|
||||
67
modules/hpc/slurm-exporter.nix
Normal file
67
modules/hpc/slurm-exporter.nix
Normal file
@@ -0,0 +1,67 @@
|
||||
{pkgs, config, lib, ...}:
|
||||
with lib;
|
||||
let
|
||||
cfg = config.features.monitoring.slurm-exporter;
|
||||
|
||||
slurm-exporter-service = {
|
||||
systemd.services.slurm-exporter = {
|
||||
enable = true;
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network.target" ];
|
||||
description = "Prometheus SLURM exporter";
|
||||
script = ''
|
||||
PATH=$(${pkgs.nix}/bin/nix-store -q --referrers ${pkgs.slurm} | ${pkgs.gnugrep}/bin/grep wrappedSlurm)/bin
|
||||
${slurm-exporter}/bin/prometheus-slurm-exporter -listen-address :${builtins.toString cfg.port}
|
||||
'';
|
||||
serviceConfig = {
|
||||
RestartSec = "15s";
|
||||
Restart = "on-failure";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
slurm-exporter = pkgs.buildGoModule rec {
|
||||
pname = "prometheus-slurm-exporter";
|
||||
version = "master";
|
||||
|
||||
src = pkgs.fetchFromGitHub {
|
||||
owner = "vpenso";
|
||||
repo = "prometheus-slurm-exporter";
|
||||
rev = "master";
|
||||
sha256 = "sha256-KS9LoDuLQFq3KoKpHd8vg1jw20YCNRJNJrnBnu5vxvs=";
|
||||
};
|
||||
|
||||
vendorHash = "sha256-A1dd9T9SIEHDCiVT2UwV6T02BSLh9ej6LC/2l54hgwI=";
|
||||
|
||||
nativeBuildInputs = [ pkgs.slurm ];
|
||||
|
||||
# subPackages = [ "." ];
|
||||
|
||||
# deleteVendor = true;
|
||||
|
||||
# runVend = true;
|
||||
|
||||
doCheck = false;
|
||||
|
||||
meta = with lib; {
|
||||
description = "Simple command-line snippet manager, written in Go";
|
||||
homepage = "https://github.com/knqyf263/pet";
|
||||
license = licenses.mit;
|
||||
maintainers = with maintainers; [ kalbasit ];
|
||||
platforms = platforms.linux ++ platforms.darwin;
|
||||
};
|
||||
};
|
||||
|
||||
in {
|
||||
options.features.monitoring.slurm-exporter = {
|
||||
enable = mkEnableOption "Enable SLURM prometheus exporter";
|
||||
|
||||
port = mkOption {
|
||||
type = types.ints.unsigned;
|
||||
default = 8080;
|
||||
description = "Collector http port";
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable slurm-exporter-service;
|
||||
}
|
||||
234
modules/hpc/slurm.nix
Normal file
234
modules/hpc/slurm.nix
Normal file
@@ -0,0 +1,234 @@
|
||||
{ pkgs, lib, config, ... }:
|
||||
with lib;
|
||||
let
|
||||
cfg = config.features.hpc.slurm;
|
||||
|
||||
configuration = {
|
||||
services.munge.enable = true;
|
||||
environment.etc."munge/munge.key" = {
|
||||
source = cfg.mungeKey;
|
||||
mode = "0400";
|
||||
uid = cfg.mungeUid;
|
||||
gid = 0;
|
||||
};
|
||||
|
||||
services.slurm = {
|
||||
controlMachine = cfg.controlMachine;
|
||||
nodeName = cfg.nodeName;
|
||||
partitionName = cfg.partitionName;
|
||||
extraConfig = ''
|
||||
# AccountingStorageType=accounting_storage/none
|
||||
AccountingStorageType=accounting_storage/slurmdbd
|
||||
JobAcctGatherType=jobacct_gather/linux
|
||||
MailDomain=${cfg.mailDomain}
|
||||
MailProg=/run/wrappers/bin/sendmail
|
||||
SelectType=select/cons_tres
|
||||
SelectTypeParameters=CR_Core
|
||||
# AuthAltTypes=auth/jwt
|
||||
# AuthAltParameters=jwt_key=/var/spool/slurm/statesave/jwt_hs256.key
|
||||
'';
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ 6818 ];
|
||||
|
||||
nixpkgs.overlays = [ slurm-ucx ];
|
||||
};
|
||||
|
||||
|
||||
slurmServer = {
|
||||
services.mysql = {
|
||||
enable = true;
|
||||
package = pkgs.mariadb;
|
||||
ensureUsers = [
|
||||
{
|
||||
name = "slurm";
|
||||
ensurePermissions = {
|
||||
"slurm_acct_db.*" = "ALL PRIVILEGES";
|
||||
};
|
||||
}
|
||||
];
|
||||
initialDatabases = [
|
||||
{ name = "slurm_acct_db"; }
|
||||
];
|
||||
};
|
||||
|
||||
services.slurm = {
|
||||
server.enable = true;
|
||||
# extraConfig = ''
|
||||
# MailDomain=itpartner.no
|
||||
# MailProg=${pkgs.ssmtp}/bin/ssmtp
|
||||
# '';
|
||||
dbdserver = {
|
||||
enable = true;
|
||||
# dbdHost = cfg.controlMachine;
|
||||
# storagePass = cfg.storagePass;
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall.allowedTCPPorts = [ 6817 ];
|
||||
};
|
||||
|
||||
slurmClient = {
|
||||
services.slurm.client.enable = true;
|
||||
systemd.services.slurmd.serviceConfig = {
|
||||
Restart = "on-failure";
|
||||
};
|
||||
};
|
||||
|
||||
slurm-ucx = self: super: with super.pkgs; {
|
||||
slurm = super.slurm.overrideAttrs (attrs: {
|
||||
buildInputs = attrs.buildInputs ++ [ ucx http-parser pkg-config ];
|
||||
|
||||
nativeBuildInputs = attrs.nativeBuildInputs ++ [ makeWrapper ];
|
||||
|
||||
configureFlags =
|
||||
attrs.configureFlags ++ [
|
||||
"--with-ucx=${ucx.dev}"
|
||||
"--with-http-parser=${http-parser}"
|
||||
"--enable-slurmrestd"
|
||||
];
|
||||
|
||||
postFixup = ''
|
||||
wrapProgram $out/bin/slurmstepd --set LD_LIBRARY_PATH ${ucx}/lib
|
||||
wrapProgram $out/bin/srun --set SLURM_MPI_TYPE "pmix"
|
||||
'';
|
||||
# --set PSM3_PKEY "${cfg.pkey}" \
|
||||
# --set PMIX_MCA_gds "^ds12" \
|
||||
});
|
||||
};
|
||||
|
||||
hipster = {
|
||||
users.groups.hipster.gid = 2001;
|
||||
users.users.hipster = {
|
||||
description = "Job runner";
|
||||
home = "/work/hipster";
|
||||
group = "hipster";
|
||||
extraGroups = [
|
||||
"users"
|
||||
];
|
||||
uid = 2001;
|
||||
isNormalUser = true;
|
||||
createHome = false;
|
||||
useDefaultShell = true;
|
||||
};
|
||||
};
|
||||
|
||||
slurmrestd = {
|
||||
systemd.tmpfiles.rules = [ "d /run/slurmrestd 0750 hipster hipster -" ];
|
||||
|
||||
systemd.services.slurmrestd = {
|
||||
description = "Slurm REST API service";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "slurmd.service" ];
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
User = "hipster";
|
||||
Group = "hipster";
|
||||
};
|
||||
environment = {
|
||||
# SLURM_JWT = "daemon";
|
||||
};
|
||||
script = ''
|
||||
rm -f /run/slurmrestd/hipster.socket
|
||||
/run/current-system/sw/bin/slurmrestd -v -a rest_auth/local unix:/run/slurmrestd/hipster.socket
|
||||
'';
|
||||
serviceConfig = {
|
||||
RuntimeDirectory = "slurmrestd";
|
||||
};
|
||||
};
|
||||
|
||||
systemd.sockets.slurm-http-proxy = {
|
||||
enable = true;
|
||||
description = "Proxy slurmrestd unix socket to port 6822";
|
||||
listenStreams = [ "0.0.0.0:6822" ];
|
||||
wantedBy = [ "sockets.target" ];
|
||||
# Allow multiple instances of corresponding service.
|
||||
socketConfig.Accept = true;
|
||||
};
|
||||
|
||||
systemd.services."slurm-http-proxy@" = {
|
||||
enable = true;
|
||||
description = "Proxy slurmrestd unix socket to port 6822";
|
||||
serviceConfig = {
|
||||
ExecStart = "-${pkgs.socat}/bin/socat STDIO UNIX-CONNECT:/run/slurmrestd/hipster.socket";
|
||||
StandardInput="socket";
|
||||
User = "hipster";
|
||||
Group = "hipster";
|
||||
};
|
||||
};
|
||||
};
|
||||
in
|
||||
{
|
||||
options.features.hpc.slurm = {
|
||||
enable = mkEnableOption "Enable SLURM batch system";
|
||||
|
||||
mungeKey = mkOption {
|
||||
type = types.path;
|
||||
default = null;
|
||||
};
|
||||
|
||||
mungeUid = mkOption {
|
||||
type = types.int;
|
||||
default = 997;
|
||||
};
|
||||
|
||||
pkey = mkOption {
|
||||
type = types.str;
|
||||
default = "0x7fff";
|
||||
};
|
||||
|
||||
controlMachine = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
server = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
};
|
||||
|
||||
client = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
};
|
||||
|
||||
hipster = mkOption {
|
||||
type = types.bool;
|
||||
default = true;
|
||||
};
|
||||
|
||||
slurmrestd = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
};
|
||||
|
||||
nodeName = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
};
|
||||
|
||||
partitionName = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
};
|
||||
|
||||
storagePass = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
|
||||
mailDomain = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable (
|
||||
mkMerge [
|
||||
configuration
|
||||
(mkIf cfg.server slurmServer)
|
||||
(mkIf cfg.client slurmClient)
|
||||
(mkIf (cfg.hipster) hipster)
|
||||
(mkIf (cfg.server && cfg.hipster) slurmrestd)
|
||||
(mkIf (cfg.slurmrestd && cfg.hipster) slurmrestd)
|
||||
]);
|
||||
}
|
||||
38
modules/hpc/xpmem.nix
Normal file
38
modules/hpc/xpmem.nix
Normal file
@@ -0,0 +1,38 @@
|
||||
{ pkgs, kernel ? pkgs.linux, ... } :
|
||||
with pkgs;
|
||||
let
|
||||
version = "master";
|
||||
kdir="${kernel.dev}/lib/modules/${kernel.modDirVersion}";
|
||||
in stdenv.mkDerivation {
|
||||
inherit version;
|
||||
name = "xpmem-${version}-${kernel.version}";
|
||||
|
||||
src = fetchgit {
|
||||
name = "xpmem-${version}";
|
||||
url = "https://github.com/hpc/xpmem.git";
|
||||
sha256 = "sha256-lB4adWBLhaj32Ll11jWPuCJSuKF8EVBG9L1Pk8HIbzY=";
|
||||
};
|
||||
|
||||
hardeningDisable = [ "fortify" "pic" "stackprotector" ];
|
||||
|
||||
nativeBuildInputs = [ which kmod ];
|
||||
|
||||
buildInputs = kernel.moduleBuildDependencies ++ [
|
||||
libtool autoconf pkgconf automake hwloc
|
||||
] ;
|
||||
|
||||
preConfigurePhases = "preConfigure";
|
||||
preConfigure = ''
|
||||
autoupdate
|
||||
./autogen.sh
|
||||
'';
|
||||
|
||||
configureFlags = [
|
||||
"--with-kerneldir=${kdir}/source"
|
||||
];
|
||||
|
||||
installPhase = ''
|
||||
make install
|
||||
rm -rf $out/etc $out/sbin
|
||||
'';
|
||||
}
|
||||
476
modules/k8s/default.nix
Normal file
476
modules/k8s/default.nix
Normal file
@@ -0,0 +1,476 @@
|
||||
{ pkgs, lib, config, ...}:
|
||||
with lib;
|
||||
let
|
||||
cfg = config.features.k8s;
|
||||
host = config.features.host;
|
||||
|
||||
pki = import ./pki.nix { inherit pkgs; ca = cfg.initca; };
|
||||
|
||||
secret = name: "${config.services.kubernetes.secretsPath}/${name}.pem";
|
||||
|
||||
mkEasyCert = { name, CN, hosts ? [], fields ? {}, action ? "",
|
||||
privateKeyOwner ? "kubernetes" }: rec {
|
||||
inherit name CN hosts fields action;
|
||||
caCert = secret "ca";
|
||||
cert = secret name;
|
||||
key = secret "${name}-key";
|
||||
privateKeyOptions = {
|
||||
owner = privateKeyOwner;
|
||||
group = "nogroup";
|
||||
mode = "0600";
|
||||
path = key;
|
||||
};
|
||||
};
|
||||
|
||||
hostName = config.networking.hostName;
|
||||
domain = config.networking.domain;
|
||||
|
||||
apiserverAddress = "https://${cfg.master.address}:4443";
|
||||
|
||||
cfssl-apitoken =
|
||||
let
|
||||
version = "1.0";
|
||||
apitoken = pkgs.stdenv.mkDerivation {
|
||||
name = "apitoken-${version}";
|
||||
inherit version;
|
||||
buildCommand = ''
|
||||
head -c ${toString (32 / 2)} /dev/urandom | \
|
||||
od -An -t x | tr -d ' ' > $out
|
||||
chmod 400 $out
|
||||
'';
|
||||
};
|
||||
in
|
||||
# make ca derivation sha depend on initca cfssl output
|
||||
pkgs.stdenv.mkDerivation {
|
||||
name = "cfssl-apitoken";
|
||||
src = apitoken;
|
||||
buildCommand = ''
|
||||
cp $src $out
|
||||
'';
|
||||
};
|
||||
|
||||
cluster-scripts =
|
||||
with builtins;
|
||||
let
|
||||
first = head cfg.ingressNodes;
|
||||
rest = tail cfg.ingressNodes;
|
||||
ingressNodes = foldl' (a: x: a + ",${x}") first rest;
|
||||
nodeNames = foldl' (a: x: a + " " + x.name) cfg.master.name cfg.nodes;
|
||||
ingressReplicaCount =
|
||||
toString (length cfg.ingressNodes);
|
||||
etcdNodes =
|
||||
let
|
||||
etcdaddrs = attrValues cfg.etcdCluster.nodes;
|
||||
first = head etcdaddrs;
|
||||
rest = tail etcdaddrs;
|
||||
in
|
||||
if cfg.etcdCluster.enable && length etcdaddrs > 0
|
||||
then foldl' (x: a: a + ",${x}") first rest
|
||||
else "${cfg.master.address}";
|
||||
show-kubernetes-charts-config = ''
|
||||
#!/usr/bin/env bash
|
||||
cat << EOF
|
||||
# Generated by show-kubernetes-charts-config
|
||||
# $(date)
|
||||
# Charts in git@gitlab.com:serit/k8s/k8s-charts
|
||||
top="\$(cd "\$(dirname "\$BASH_SOURCE[0]")" >/dev/null 2>&1 && pwd)"
|
||||
|
||||
vars=(
|
||||
initca="${pki.initca}"
|
||||
apiserver="${cfg.master.name}"
|
||||
apiserverip="${cfg.master.address}"
|
||||
etcd_nodes="${etcdNodes}"
|
||||
cluster="${cfg.clusterName}"
|
||||
ingress_nodes="${ingressNodes}"
|
||||
ingress_replica_count="${ingressReplicaCount}"
|
||||
fileserver="${cfg.fileserver}"
|
||||
acme_email="${cfg.charts.acme_email}"
|
||||
grafana_smtp_user="$(echo -n ${cfg.charts.grafana_smtp_user} | base64 -w0)"
|
||||
grafana_smtp_password="$(echo -n ${cfg.charts.grafana_smtp_password} | base64 -w0)"
|
||||
)
|
||||
|
||||
nodenames=( ${nodeNames} )
|
||||
nodes=(${builtins.foldl' (a: x: a + " " + x.address) cfg.master.address cfg.nodes})
|
||||
|
||||
. \$top/functions.sh
|
||||
EOF
|
||||
'';
|
||||
in
|
||||
pkgs.stdenv.mkDerivation {
|
||||
name = "cluster-scripts";
|
||||
src = ./scripts;
|
||||
buildCommand = ''
|
||||
mkdir -p $out/bin
|
||||
cp $src/* $out/bin
|
||||
echo '${show-kubernetes-charts-config}' > $out/bin/show-kubernetes-charts-config
|
||||
chmod a+x $out/bin/show-kubernetes-charts-config
|
||||
|
||||
rm $out/bin/restart-flannel.sh $out/bin/restart-kubernetes.sh
|
||||
sed 's/@master@/${cfg.master.name}/; s/@nodes@/${nodeNames}/' \
|
||||
$src/restart-flannel.sh > $out/bin/restart-flannel.sh
|
||||
chmod a+x $out/bin/restart-flannel.sh
|
||||
|
||||
sed 's/@master@/${cfg.master.name}/; s/@nodes@/${nodeNames}/' \
|
||||
$src/restart-kubernetes.sh > $out/bin/restart-kubernetes.sh
|
||||
chmod a+x $out/bin/restart-kubernetes.sh
|
||||
'';
|
||||
};
|
||||
|
||||
etcd-cluster-scripts =
|
||||
let
|
||||
etcd-join-cluster = ''
|
||||
#!/usr/bin/env bash
|
||||
export ETCD_ADVERTISE_CLIENT_URLS=https://${host.address}:2379
|
||||
export ETCD_CERT_FILE=/var/lib/kubernetes/secrets/etcd.pem
|
||||
export ETCD_CLIENT_CERT_AUTH=1
|
||||
export ETCD_DATA_DIR=/var/lib/etcd
|
||||
export ETCD_INITIAL_ADVERTISE_PEER_URLS=https://${host.address}:2380
|
||||
export ETCD_INITIAL_CLUSTER=${host.name}=https://${host.address}:2380
|
||||
export ETCD_INITIAL_CLUSTER_STATE=existing
|
||||
export ETCD_INITIAL_CLUSTER_TOKEN=etcd-cluster
|
||||
export ETCD_KEY_FILE=/var/lib/kubernetes/secrets/etcd-key.pem
|
||||
export ETCD_LISTEN_CLIENT_URLS=https://${host.address}:2379
|
||||
export ETCD_LISTEN_PEER_URLS=https://${host.address}:2380
|
||||
export ETCD_NAME=${host.name}
|
||||
export ETCD_PEER_CERT_FILE=/var/lib/kubernetes/secrets/etcd.pem
|
||||
export ETCD_PEER_KEY_FILE=/var/lib/kubernetes/secrets/etcd-key.pem
|
||||
export ETCD_PEER_TRUSTED_CA_FILE=/var/lib/kubernetes/secrets/ca.pem
|
||||
export ETCD_TRUSTED_CA_FILE=/var/lib/kubernetes/secrets/ca.pem
|
||||
|
||||
for i in $*; do
|
||||
ETCD_INITIAL_CLUSTER=$ETCD_INITIAL_CLUSTER,$i
|
||||
done
|
||||
|
||||
if [ "x${builtins.toString cfg.master.enable}" = x1 ]; then
|
||||
echo "Refusing to run on master node! Exiting."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "WARNING! WARNING! WARNING!"
|
||||
echo "This script destroys the existing etcd database on the current host!"
|
||||
echo "Sleeping 10 seconds before proceeding... ctrl-c is your friend"
|
||||
sleep 11 # give one second extra just in case
|
||||
|
||||
systemctl stop etcd.service
|
||||
rm -rf /var/lib/etcd/*
|
||||
sudo -E -u etcd etcd
|
||||
'';
|
||||
in
|
||||
pkgs.stdenv.mkDerivation {
|
||||
name = "etcd-cluster-scripts";
|
||||
buildCommand = ''
|
||||
mkdir -p $out/bin
|
||||
echo '${etcd-join-cluster}' > $out/bin/etcd-join-cluster
|
||||
chmod a+x $out/bin/etcd-join-cluster
|
||||
'';
|
||||
};
|
||||
|
||||
install-apitoken = ''
|
||||
#!${pkgs.bash}/bin/bash
|
||||
set -e
|
||||
if [ "x${builtins.toString cfg.master.enable}" = x1 -a -d /var/lib/cfssl ]; then
|
||||
cp ${cfssl-apitoken} /var/lib/cfssl/apitoken.secret
|
||||
chown cfssl /var/lib/cfssl/apitoken.secret
|
||||
chmod 640 /var/lib/cfssl/apitoken.secret
|
||||
else
|
||||
mkdir -p /var/lib/kubernetes/secrets
|
||||
cp ${cfssl-apitoken} /var/lib/kubernetes/secrets/apitoken.secret
|
||||
chown root /var/lib/kubernetes/secrets/apitoken.secret
|
||||
chmod 600 /var/lib/kubernetes/secrets/apitoken.secret
|
||||
fi
|
||||
'';
|
||||
|
||||
common = {
|
||||
security.pki.certificateFiles = [ "${pki.initca}/ca.pem" ];
|
||||
environment.systemPackages = [
|
||||
pkgs.nfs-utils
|
||||
etcd-cluster-scripts
|
||||
];
|
||||
environment.variables = {
|
||||
ETCDCTL_API = "3";
|
||||
};
|
||||
networking = {
|
||||
firewall.allowedTCPPortRanges = [ { from = 5000; to = 50000; } ];
|
||||
firewall.allowedTCPPorts = [ 80 443 111 ];
|
||||
firewall.allowedUDPPorts = [ 111 24007 24008 ];
|
||||
};
|
||||
boot.kernel.sysctl = {
|
||||
"fs.inotify.max_user_instances" = 1024;
|
||||
"fs.inotify.max_user_watches" = 65536;
|
||||
};
|
||||
};
|
||||
|
||||
kubeMaster = {
|
||||
services.cfssl.ca = pki.ca.cert;
|
||||
services.cfssl.caKey = pki.ca.key;
|
||||
services.kubernetes = {
|
||||
roles = [ "master" ];
|
||||
inherit apiserverAddress;
|
||||
masterAddress = "${cfg.master.name}.${domain}";
|
||||
clusterCidr = cfg.cidr;
|
||||
pki.genCfsslCACert = false;
|
||||
pki.genCfsslAPIToken = false;
|
||||
pki.caCertPathPrefix = "${pki.initca}/ca";
|
||||
|
||||
kubelet = {
|
||||
# clusterDomain = "${cfg.clusterName}.local";
|
||||
};
|
||||
|
||||
apiserver = {
|
||||
advertiseAddress = cfg.master.address;
|
||||
authorizationMode = [ "Node" "RBAC" ];
|
||||
allowPrivileged = true;
|
||||
securePort = 4443;
|
||||
serviceClusterIpRange = "10.0.0.0/22";
|
||||
extraOpts = "--requestheader-client-ca-file ${pki.ca.cert}";
|
||||
extraSANs = cfg.master.extraSANs;
|
||||
verbosity = 2;
|
||||
etcd.servers =
|
||||
with builtins;
|
||||
let clusterNodes = attrValues cfg.etcdCluster.nodes; in
|
||||
if cfg.etcdCluster.enable && length clusterNodes > 0 then
|
||||
mkForce (map (x: "https://${x}:2379") clusterNodes)
|
||||
else [];
|
||||
};
|
||||
|
||||
controllerManager = {
|
||||
bindAddress = cfg.master.address;
|
||||
extraOpts = "--authorization-always-allow-paths=/healthz,/metrics";
|
||||
};
|
||||
|
||||
scheduler.address = cfg.master.address;
|
||||
scheduler.extraOpts = "--authorization-always-allow-paths=/healthz,/metrics";
|
||||
|
||||
addonManager.enable = true;
|
||||
addons = {
|
||||
dns = {
|
||||
enable = true;
|
||||
# clusterDomain = "${cfg.clusterName}.local";
|
||||
reconcileMode = "EnsureExists";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall = {
|
||||
allowedTCPPorts = [ 53 5000 8080 4443 4001 2379 2380 10250 10251 10252 ];
|
||||
allowedUDPPorts = [ 53 4053 ];
|
||||
};
|
||||
|
||||
environment.systemPackages = [
|
||||
pkgs.kubernetes-helm
|
||||
pkgs.kubectl
|
||||
cluster-scripts
|
||||
];
|
||||
|
||||
systemd.services.kube-certmgr-apitoken-bootstrap = {
|
||||
description = "Kubernetes certmgr bootstrapper";
|
||||
wantedBy = [ "cfssl.service" ];
|
||||
before = [ "cfssl.target" ];
|
||||
script = install-apitoken;
|
||||
serviceConfig = {
|
||||
RestartSec = "10s";
|
||||
Restart = "on-failure";
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.cfssl-restart = {
|
||||
enable = true;
|
||||
startAt = "00/4:00";
|
||||
description = "Restrart cfssl which regularly locks up";
|
||||
script = "systemctl restart cfssl.service";
|
||||
};
|
||||
|
||||
systemd.services.kube-socat-https-proxy = {
|
||||
enable = cfg.master.socat443;
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
after = [ "kubelet.target" ];
|
||||
description = "Proxy TCP port 443 to ingress NodePort at 30443";
|
||||
script = "${pkgs.socat}/bin/socat TCP-LISTEN:443,fork,reuseaddr TCP:127.0.0.1:30443";
|
||||
serviceConfig = {
|
||||
RestartSec = "10s";
|
||||
Restart = "on-failure";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
etcdClusterNode = {
|
||||
services.etcd = {
|
||||
enable = true;
|
||||
clientCertAuth = true;
|
||||
peerClientCertAuth = true;
|
||||
listenClientUrls = mkForce ["https://${host.address}:2379"];
|
||||
listenPeerUrls = mkForce ["https://${host.address}:2380"];
|
||||
advertiseClientUrls = mkForce ["https://${host.address}:2379"];
|
||||
initialAdvertisePeerUrls = mkForce ["https://${host.address}:2380"];
|
||||
name = "${host.name}";
|
||||
certFile = secret "etcd";
|
||||
keyFile = secret "etcd-key";
|
||||
trustedCaFile = secret "ca";
|
||||
extraConf =
|
||||
if cfg.etcdCluster.existing
|
||||
then { "INITIAL_CLUSTER_STATE" = "existing"; }
|
||||
else {};
|
||||
initialCluster = with builtins;
|
||||
if length (attrValues cfg.etcdCluster.nodes) == 0
|
||||
then mkForce [ "${host.name}=https://${host.address}:2380" ]
|
||||
else mkForce (attrValues
|
||||
(mapAttrs (k: v: "${k}=https://${v}:2380") cfg.etcdCluster.nodes));
|
||||
};
|
||||
|
||||
services.kubernetes.pki.certs =
|
||||
{
|
||||
etcd = mkEasyCert {
|
||||
name = "etcd";
|
||||
CN = host.name;
|
||||
hosts = [
|
||||
"etcd.local"
|
||||
"etcd.cluster.local"
|
||||
"etcd.${domain}"
|
||||
host.name
|
||||
host.address
|
||||
];
|
||||
privateKeyOwner = "etcd";
|
||||
action = "systemctl restart etcd.service";
|
||||
};
|
||||
};
|
||||
|
||||
networking.firewall = {
|
||||
allowedTCPPorts = [ 2379 2380 ];
|
||||
};
|
||||
};
|
||||
|
||||
kubeNode = {
|
||||
services.kubernetes = rec {
|
||||
roles = [ "node" ];
|
||||
inherit apiserverAddress;
|
||||
# masterAddress = cfg.master.name;
|
||||
masterAddress = "${cfg.master.name}.${domain}";
|
||||
clusterCidr = cfg.cidr;
|
||||
# kubelet.clusterDomain = "${cfg.clusterName}.local";
|
||||
kubelet.hostname = "${hostName}";
|
||||
proxy.hostname = "${hostName}";
|
||||
proxy.extraOpts = "--metrics-bind-address 0.0.0.0:10249";
|
||||
};
|
||||
|
||||
networking = {
|
||||
firewall = {
|
||||
enable = true;
|
||||
allowedTCPPorts = [ 4194 10250 ];
|
||||
allowedUDPPorts = [ 53 ];
|
||||
};
|
||||
};
|
||||
virtualisation.docker.enable = false; # conflicts with containerd!
|
||||
virtualisation.docker.autoPrune.enable = pkgs.lib.mkForce false; # conflicts with linkerd2
|
||||
systemd.services.kube-certmgr-apitoken-bootstrap = {
|
||||
description = "Kubernetes certmgr bootstrapper";
|
||||
wantedBy = [ "certmgr.service" ];
|
||||
before = [ "certmgr.service" ];
|
||||
script = install-apitoken;
|
||||
serviceConfig = {
|
||||
RestartSec = "10s";
|
||||
Restart = "on-failure";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
in {
|
||||
options.features.k8s = {
|
||||
enable = mkEnableOption "Enable kubernetes";
|
||||
|
||||
clusterName = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
|
||||
nodes = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
};
|
||||
|
||||
fileserver = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
|
||||
cidr = mkOption {
|
||||
type = types.str;
|
||||
default = "10.0.0.0/16";
|
||||
};
|
||||
|
||||
ingressNodes = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = null;
|
||||
};
|
||||
|
||||
initca = mkOption {
|
||||
type = types.path;
|
||||
default = null;
|
||||
};
|
||||
|
||||
master = {
|
||||
enable = mkEnableOption "Enable kubernetes master node";
|
||||
|
||||
address = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
|
||||
name = mkOption {
|
||||
type = types.str;
|
||||
default = null;
|
||||
};
|
||||
|
||||
extraSANs = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [];
|
||||
};
|
||||
|
||||
socat443 = mkEnableOption "Enable socat on port 443 -> 30443";
|
||||
};
|
||||
|
||||
node = {
|
||||
enable = mkEnableOption "Enable kubernetes";
|
||||
};
|
||||
|
||||
etcdCluster = {
|
||||
enable = mkEnableOption "Enable kubernetes";
|
||||
existing = mkEnableOption "Existing cluster";
|
||||
nodes = mkOption {
|
||||
type = types.attrs;
|
||||
default = { "${host.name}" = "${host.address}"; };
|
||||
};
|
||||
};
|
||||
|
||||
charts = {
|
||||
acme_email = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
};
|
||||
|
||||
grafana_smtp_user = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
};
|
||||
|
||||
grafana_smtp_password = mkOption {
|
||||
type = types.str;
|
||||
default = "";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable (
|
||||
mkMerge [
|
||||
common
|
||||
(mkIf cfg.master.enable kubeMaster)
|
||||
(mkIf cfg.node.enable kubeNode)
|
||||
(mkIf cfg.etcdCluster.enable etcdClusterNode)
|
||||
]
|
||||
);
|
||||
|
||||
imports = [
|
||||
../overrides/kubernetes_default.nix
|
||||
../overrides/kubelet.nix
|
||||
];
|
||||
}
|
||||
|
||||
42
modules/k8s/initca.nix
Normal file
42
modules/k8s/initca.nix
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
pkgs ? import <nixpkgs> {},
|
||||
ca ? null,
|
||||
name ? "ca",
|
||||
algo ? "rsa",
|
||||
hosts ? [],
|
||||
...}:
|
||||
with pkgs;
|
||||
let
|
||||
ca_csr = pkgs.writeText "${name}-csr.json" (builtins.toJSON {
|
||||
inherit hosts;
|
||||
CN = "${name}";
|
||||
key = {
|
||||
inherit algo;
|
||||
size = if algo == "ecdsa" then 256 else 2048;
|
||||
};
|
||||
names = [
|
||||
{
|
||||
CN = "${name}";
|
||||
O = "NixOS";
|
||||
OU = "${name}.pki.caSpec";
|
||||
L = "generated";
|
||||
}
|
||||
];
|
||||
}
|
||||
);
|
||||
ca' =
|
||||
pkgs.runCommand "initca" {
|
||||
buildInputs = [ pkgs.cfssl ];
|
||||
} '' cfssl genkey -initca ${ca_csr} | cfssljson -bare ca;
|
||||
mkdir -p $out; cp *.pem $out '';
|
||||
initca = if ca != null then ca else ca';
|
||||
in
|
||||
# make ca derivation sha depend on initca cfssl output
|
||||
pkgs.stdenv.mkDerivation {
|
||||
inherit name;
|
||||
src = initca;
|
||||
buildCommand = ''
|
||||
mkdir -p $out;
|
||||
cp -r $src/* $out
|
||||
'';
|
||||
}
|
||||
82
modules/k8s/pki.nix
Normal file
82
modules/k8s/pki.nix
Normal file
@@ -0,0 +1,82 @@
|
||||
{ pkgs, ca ? "", algo ? "rsa" }:
|
||||
let
|
||||
initca = import ./initca.nix { inherit pkgs ca; };
|
||||
|
||||
ca' = {
|
||||
key = "${initca}/ca-key.pem";
|
||||
cert = "${initca}/ca.pem";
|
||||
};
|
||||
|
||||
ca-config = pkgs.writeText "ca-config.json" ''
|
||||
{
|
||||
"signing": {
|
||||
"default": {
|
||||
"expiry": "8760h"
|
||||
},
|
||||
"profiles": {
|
||||
"default": {
|
||||
"usages": [
|
||||
"signing",
|
||||
"key encipherment",
|
||||
"server auth",
|
||||
"client auth"
|
||||
],
|
||||
"expiry": "8760h"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'';
|
||||
|
||||
gencsr = args:
|
||||
let
|
||||
csr = {
|
||||
CN = "${args.cn}";
|
||||
key = {
|
||||
inherit algo;
|
||||
size = if algo == "ecdsa" then 256 else 2048;
|
||||
};
|
||||
names = [
|
||||
{
|
||||
CN = "${args.cn}";
|
||||
O = "${args.o}";
|
||||
OU = "${args.cn}.${args.o}.pki.caSpec";
|
||||
L = "generated";
|
||||
}
|
||||
];
|
||||
hosts = args.hosts;
|
||||
};
|
||||
in
|
||||
pkgs.writeText "${args.cn}-csr.json" (builtins.toJSON csr);
|
||||
in
|
||||
# Example usage:
|
||||
#
|
||||
# gencert { cn = "test"; ca = ca; o = "test; };
|
||||
#
|
||||
rec {
|
||||
inherit initca;
|
||||
ca = ca';
|
||||
gencert = attrs:
|
||||
let
|
||||
conf = {
|
||||
cn = attrs.cn;
|
||||
ca = attrs.ca;
|
||||
csr = gencsr { cn = attrs.cn; o = attrs.o; hosts = attrs.hosts; };
|
||||
};
|
||||
cfssl = conf:
|
||||
''
|
||||
cfssl gencert -ca ${ca.cert} -ca-key ${ca.key} \
|
||||
-config=${ca-config} -profile=default ${conf.csr} | \
|
||||
cfssljson -bare cert; \
|
||||
mkdir -p $out; cp *.pem $out
|
||||
'';
|
||||
crt =
|
||||
pkgs.runCommand "${attrs.cn}" {
|
||||
buildInputs = [ pkgs.cfssl ];
|
||||
} (cfssl conf);
|
||||
in
|
||||
{
|
||||
key = "${crt}/cert-key.pem";
|
||||
cert = "${crt}/cert.pem";
|
||||
};
|
||||
}
|
||||
1
modules/k8s/scripts/docker-prune-stopped.fish
Executable file
1
modules/k8s/scripts/docker-prune-stopped.fish
Executable file
@@ -0,0 +1 @@
|
||||
for i in (seq 2 5); ssh k0- docker system prune -a;end
|
||||
7
modules/k8s/scripts/etcd-snapshot.sh
Normal file
7
modules/k8s/scripts/etcd-snapshot.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
ETCDCTL_API=3 etcdctl --endpoints https://etcd.local:2379 \
|
||||
--cacert=/var/lib/kubernetes/secrets/ca.pem \
|
||||
--cert=/var/lib/kubernetes/secrets/kube-apiserver-etcd-client.pem \
|
||||
--key=/var/lib/kubernetes/secrets/kube-apiserver-etcd-client-key.pem \
|
||||
snapshot save snapshot.db
|
||||
5
modules/k8s/scripts/get-admin-token.sh
Executable file
5
modules/k8s/scripts/get-admin-token.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
token=$(kubectl get secret -n kube-system | grep cluster-admin-token | cut -d' ' -f1)
|
||||
kubectl get secret -n kube-system $token -o yaml | \
|
||||
grep ' token:' | cut -d' ' -f4 | base64 -d
|
||||
49
modules/k8s/scripts/gitlab-prune-registry.sh
Executable file
49
modules/k8s/scripts/gitlab-prune-registry.sh
Executable file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
token=UTjgSspYQcX-BVUd1UsC
|
||||
api=https://gitlab.com/api/v4
|
||||
|
||||
prune () {
|
||||
id=$1
|
||||
reg=$(curl -s --header "PRIVATE-TOKEN: $token" \
|
||||
"$api/projects/$id/registry/repositories" \
|
||||
| json_pp | sed -n 's/^ *"id" *: *\([0-9]\+\).*/\1/p')
|
||||
for i in $reg; do
|
||||
curl -s --request DELETE --data 'keep_n=10' \
|
||||
--data 'name_regex=.*[0-9].*' \
|
||||
--header "PRIVATE-TOKEN: $token" \
|
||||
"$api/projects/$id/registry/repositories/$i/tags"
|
||||
done
|
||||
}
|
||||
|
||||
gc () {
|
||||
pod=$(kubectl get pod -n gitlab -lapp=registry | tail -1 | cut -d' ' -f1)
|
||||
kubectl exec -n gitlab $pod -- \
|
||||
registry garbage-collect /etc/docker/registry/config.yml -m
|
||||
}
|
||||
|
||||
all () {
|
||||
groups=$(curl -s --header "PRIVATE-TOKEN: $token" "$api/groups" \
|
||||
| json_pp | sed -n 's/^ *"id" *: *\([0-9]\+\).*/\1/p')
|
||||
for g in $groups; do
|
||||
proj=$(curl -s --header "PRIVATE-TOKEN: $token" \
|
||||
"$api/groups/$g/projects?simple=true&include_subgroups=true" \
|
||||
| json_pp | sed -n 's/^ \{6\}"id" *: *\([0-9]\+\).*/\1/p')
|
||||
for p in $proj; do
|
||||
prune $p
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
projects () {
|
||||
for i in $@; do
|
||||
prune $(echo $i | sed 's,/,%2F,g')
|
||||
done
|
||||
}
|
||||
|
||||
case $1 in
|
||||
--all) all ;;
|
||||
*) projects $@
|
||||
esac
|
||||
|
||||
gc
|
||||
24
modules/k8s/scripts/inject-linkerd.sh
Executable file
24
modules/k8s/scripts/inject-linkerd.sh
Executable file
@@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env bash
|
||||
TOP="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
|
||||
|
||||
linkerd=$(which kubectl 2> /dev/null)
|
||||
|
||||
if [ -z "$linkerd" ]; then
|
||||
echo "linkerd cli is not available"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
inject () {
|
||||
for i in $@; do
|
||||
kubectl get ns $i -o yaml | linkerd inject - | kubectl apply -f-
|
||||
kubectl rollout restart daemonsets -n $i
|
||||
kubectl rollout restart statefulsets -n $i
|
||||
kubectl rollout restart deployments -n $i
|
||||
done
|
||||
}
|
||||
|
||||
if [ $# > 0 ]; then
|
||||
inject $@
|
||||
else
|
||||
inject $(kubectl get ns | sed "1d; /kube-system/d; s/ .*//")
|
||||
fi
|
||||
29
modules/k8s/scripts/inject-sa-pull-secrets.sh
Executable file
29
modules/k8s/scripts/inject-sa-pull-secrets.sh
Executable file
@@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
TOP="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
|
||||
|
||||
if [ $# != 2 ]; then
|
||||
echo "usage: inject-sa-pull-secrets.sh {namespace} {all|serviceaccount}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
namespace=$1
|
||||
sa=$2
|
||||
|
||||
inject () {
|
||||
kubectl patch serviceaccount $1 \
|
||||
-n $namespace \
|
||||
-p "{\"imagePullSecrets\": [ \
|
||||
{\"name\": \"docker-pull-secret\"}, \
|
||||
{\"name\": \"gitlab-pull-secret\"} \
|
||||
]}"
|
||||
}
|
||||
|
||||
if [ $sa = all ]; then
|
||||
for i in $(kubectl get sa -n $namespace | sed '1d;s/\([^ ]\+\).*/\1/'); do
|
||||
inject $i
|
||||
done
|
||||
else
|
||||
inject $sa
|
||||
fi
|
||||
|
||||
76
modules/k8s/scripts/install-namespace.sh
Executable file
76
modules/k8s/scripts/install-namespace.sh
Executable file
@@ -0,0 +1,76 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set +e
|
||||
|
||||
TOP="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
|
||||
|
||||
if [ x$1 = x ]; then
|
||||
ehco "usage: install-namespace.sh {namespace|all}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
namespace=$1
|
||||
|
||||
setup_namespace () {
|
||||
local namespace
|
||||
namespace=$1
|
||||
cat << EOF | kubectl apply -f -
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
annotations:
|
||||
linkerd.io/inject: enabled
|
||||
labels:
|
||||
name: $namespace
|
||||
name: $namespace
|
||||
EOF
|
||||
}
|
||||
|
||||
create_docker_secret () {
|
||||
local namespace
|
||||
namespace=$1
|
||||
kubectl get secret docker-pull-secret -n $namespace >/dev/null 2>&1
|
||||
[ $? = 0 ] && kubectl delete secret docker-pull-secret -n $namespace
|
||||
|
||||
kubectl create secret docker-registry docker-pull-secret \
|
||||
-n $namespace \
|
||||
--docker-username=juselius \
|
||||
--docker-password=ed584a31-c7ff-47ba-8469-3f0f4db6402c \
|
||||
--docker-email=jonas.juselius@gmail.com
|
||||
}
|
||||
|
||||
create_gitlab_secret () {
|
||||
local namespace
|
||||
namespace=$1
|
||||
cat << EOF | kubectl apply -f -
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: gitlab-pull-secret
|
||||
namespace: $namespace
|
||||
kind: Secret
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
.dockerconfigjson: ewoJImF1dGhzIjogewoJCSJyZWdpc3RyeS5naXRsYWIuY29tIjogewoJCQkiYXV0aCI6ICJaMmwwYkdGaUsyUmxjR3h2ZVMxMGIydGxiaTB4T1Rnd01qQTZPRmxqU0VoMFZIaENSVUZUTFZKUWRsSnJXbGM9IgoJCX0KCX0sCgkiSHR0cEhlYWRlcnMiOiB7CgkJIlVzZXItQWdlbnQiOiAiRG9ja2VyLUNsaWVudC8xOS4wMy4xMiAobGludXgpIgoJfQp9Cg==
|
||||
EOF
|
||||
}
|
||||
|
||||
inject_pull_secrets () {
|
||||
local namespace
|
||||
namespace=$1
|
||||
$TOP/inject-sa-pull-secrets.sh $namespace all
|
||||
}
|
||||
|
||||
configure_namespace () {
|
||||
setup_namespace $1
|
||||
create_docker_secret $1
|
||||
create_gitlab_secret $1
|
||||
inject_pull_secrets $1
|
||||
}
|
||||
|
||||
if [ "x$namespace" = "xall" ]; then
|
||||
for i in $(kubectl get ns | sed '1d;/^kube-system/d;s/\([^ ]\+\).*/\1/'); do
|
||||
configure_namespace $i
|
||||
done
|
||||
else
|
||||
configure_namespace $namespace
|
||||
fi
|
||||
13
modules/k8s/scripts/k8s-all
Executable file
13
modules/k8s/scripts/k8s-all
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Simple script for fetching all resources from a namespace, might include some
|
||||
# clutter
|
||||
|
||||
[ $# -ne 1 ] && echo "Usage: k8s-all [namespace]" && exit 1
|
||||
|
||||
for r in $(kubectl api-resources --verbs=list --namespaced -o name)
|
||||
do
|
||||
echo "=== Resource: $r ==="; echo \
|
||||
&& kubectl get $r -n $1 --ignore-not-found \
|
||||
&& echo
|
||||
done
|
||||
16
modules/k8s/scripts/lost-sock.sh
Executable file
16
modules/k8s/scripts/lost-sock.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
pods=$(kubectl get po -A -l linkerd.io/control-plane-ns -ojsonpath="{range .items[*]}{.metadata.name} {.metadata.namespace}{'\n'}{end}")
|
||||
|
||||
IFS=" "
|
||||
|
||||
while read name namespace; do
|
||||
tcp=$(kubectl exec -n $namespace $name -c linkerd-proxy -- cat /proc/net/tcp)
|
||||
close_wait=$(echo $tcp | awk 'BEGIN {cnt=0} $4==08 {cnt++} END {print cnt}')
|
||||
fin_wait_2=$(echo $tcp | awk 'BEGIN {cnt=0} $4==05 {cnt++} END {print cnt}')
|
||||
if [ "$close_wait" -gt "0" -o "$fin_wait_2" -gt "0" ]; then
|
||||
echo "$name.$namespace has $close_wait sockets in CLOSE_WAIT and $fin_wait_2 sockets in FIN_WAIT_2"
|
||||
else
|
||||
echo "$name.$namespace is okay"
|
||||
fi
|
||||
done <<< "$pods"
|
||||
3
modules/k8s/scripts/reset-sa-tokens.sh
Executable file
3
modules/k8s/scripts/reset-sa-tokens.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
kubectl delete secrets --all-namespaces --field-selector='type=kubernetes.io/service-account-token'
|
||||
21
modules/k8s/scripts/restart-flannel.sh
Executable file
21
modules/k8s/scripts/restart-flannel.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# master="etcd.service"
|
||||
master=""
|
||||
node="flannel.service"
|
||||
|
||||
nodes="@nodes@"
|
||||
master_node="@master@"
|
||||
# nodes=$(kubectl get nodes --no-headers | cut -d' ' -f1)
|
||||
# master_node=$(echo $nodes | cut -d' ' -f1)
|
||||
|
||||
echo "$master_node: systemctl restart $master"
|
||||
sudo systemctl restart $master
|
||||
|
||||
for n in $nodes; do
|
||||
echo "$n: systemctl restart $node"
|
||||
ssh root@$n systemctl restart $node &
|
||||
done
|
||||
|
||||
echo "Waiting..."
|
||||
wait
|
||||
20
modules/k8s/scripts/restart-kubernetes.sh
Executable file
20
modules/k8s/scripts/restart-kubernetes.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
master="kube-apiserver kube-scheduler kube-controller-manager"
|
||||
node="kube-proxy kubelet kube-certmgr-apitoken-bootstrap"
|
||||
|
||||
nodes="@nodes@"
|
||||
master_node="@master@"
|
||||
# nodes=$(kubectl get nodes --no-headers | cut -d' ' -f1)
|
||||
# master_node=$(echo $nodes | cut -d' ' -f1)
|
||||
|
||||
echo "$master_node: systemctl restart $master"
|
||||
sudo systemctl restart $master
|
||||
|
||||
for n in $nodes; do
|
||||
echo "$n: systemctl restart $node"
|
||||
ssh root@$n systemctl restart $node &
|
||||
done
|
||||
|
||||
echo "Waiting..."
|
||||
wait
|
||||
3
modules/k8s/scripts/taint-node-no-schedule.sh
Executable file
3
modules/k8s/scripts/taint-node-no-schedule.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
kubectl taint node $1 ClusterService="true":NoSchedule
|
||||
20
modules/k8s/scripts/update-helm-repos.sh
Executable file
20
modules/k8s/scripts/update-helm-repos.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
repos=(
|
||||
"stable=https://charts.helm.sh/stable"
|
||||
"ingress-nginx=https://kubernetes.github.io/ingress-nginx"
|
||||
"prometheus-community=https://prometheus-community.github.io/helm-charts"
|
||||
"hashicorp=https://helm.releases.hashicorp.com"
|
||||
"bitnami=https://charts.bitnami.com/bitnami"
|
||||
"minio=https://helm.min.io/"
|
||||
"anchore=https://charts.anchore.io"
|
||||
"linkerd=https://helm.linkerd.io/stable"
|
||||
)
|
||||
|
||||
for i in ${repos[@]}; do
|
||||
IFS="="
|
||||
set $i
|
||||
helm repo add $1 $2
|
||||
done
|
||||
|
||||
helm repo update
|
||||
5
modules/k8s/scripts/verify-valid-certificates.sh
Executable file
5
modules/k8s/scripts/verify-valid-certificates.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
for pem in /var/lib/cfssl/*.pem /var/lib/kubernetes/secrets/*.pem; do
|
||||
printf 'exp: %s: %s\n' \
|
||||
"$(date --date="$(openssl x509 -enddate -noout -in "$pem"|cut -d= -f 2)" --iso-8601)" \
|
||||
"$pem"
|
||||
done | sort
|
||||
12
modules/k8s/scripts/ws-curl.sh
Executable file
12
modules/k8s/scripts/ws-curl.sh
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/bin/sh
|
||||
|
||||
host=$1; shift
|
||||
|
||||
curl -i -N \
|
||||
-H "Connection: upgrade"\
|
||||
-H "Upgrade: websocket"\
|
||||
-H "Sec-WebSocket-Key: SGVsbG8sIHdvcmxkIQ=="\
|
||||
-H "Sec-WebSocket-Version: 13"\
|
||||
-H "Origin: http://foo.com/"\
|
||||
-H "Host: $host" $@
|
||||
|
||||
3
modules/k8s/scripts/zap-crashing-pods.sh
Executable file
3
modules/k8s/scripts/zap-crashing-pods.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
for i in (kubectl get pods -A |grep CrashLoop | sed 's/^\([^ ]\+\) \+\([^ ]\+\) .*/kubectl delete pod -n \1 \2 --force=true/'); eval $i; end
|
||||
3
modules/k8s/scripts/zap-evicted-pods.sh
Executable file
3
modules/k8s/scripts/zap-evicted-pods.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
kubectl delete pods --field-selector 'status.phase==Failed' -A
|
||||
5
modules/k8s/scripts/zap-node-exporters.sh
Executable file
5
modules/k8s/scripts/zap-node-exporters.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
for i in $(kubectl get nodes | sed -nr 's/^(k[^ ]+) .*/\1/p'); do
|
||||
ssh root@$i pkill node_exporter
|
||||
done
|
||||
33
modules/overrides/certmgr.nix
Normal file
33
modules/overrides/certmgr.nix
Normal file
@@ -0,0 +1,33 @@
|
||||
self: super:
|
||||
let
|
||||
lib = super.lib;
|
||||
buildGoModule = super.buildGoModule;
|
||||
fetchFromGitHub = super.fetchFromGitHub;
|
||||
in {
|
||||
certmgr =buildGoModule rec {
|
||||
pname = "certmgr";
|
||||
version = "3.0.3";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "cloudflare";
|
||||
repo = "certmgr";
|
||||
rev = "v${version}";
|
||||
hash = "sha256-MgNPU06bv31tdfUnigcmct8UTVztNLXcmTg3H/J7mic=";
|
||||
};
|
||||
|
||||
patches = [ ./certmgr.patch ];
|
||||
|
||||
vendorHash = null;
|
||||
|
||||
ldflags = [ "-s" "-w" ];
|
||||
|
||||
meta = with lib; {
|
||||
homepage = "https://cfssl.org/";
|
||||
description = "Cloudflare's automated certificate management using a CFSSL CA";
|
||||
mainProgram = "certmgr";
|
||||
platforms = platforms.linux;
|
||||
license = licenses.bsd2;
|
||||
maintainers = with maintainers; [ johanot srhb ];
|
||||
};
|
||||
};
|
||||
}
|
||||
14
modules/overrides/certmgr.patch
Normal file
14
modules/overrides/certmgr.patch
Normal file
@@ -0,0 +1,14 @@
|
||||
diff --git a/cert/verification.go b/cert/verification.go
|
||||
index 39f255c..97fa613 100644
|
||||
--- a/cert/verification.go
|
||||
+++ b/cert/verification.go
|
||||
@@ -10,6 +10,9 @@ import (
|
||||
|
||||
// CertificateMatchesHostname checks if the Certificates hosts are the same as the given hosts
|
||||
func CertificateMatchesHostname(hosts []string, cert *x509.Certificate) bool {
|
||||
+ // kubernetes system certs (i.e. CN=system:kube-proxy)
|
||||
+ if len(hosts) == 1 && len(cert.DNSNames)+len(cert.IPAddresses) == 0 { return true }
|
||||
+
|
||||
a := make([]string, len(hosts))
|
||||
for idx := range hosts {
|
||||
// normalize the IPs.
|
||||
456
modules/overrides/kubelet.nix
Normal file
456
modules/overrides/kubelet.nix
Normal file
@@ -0,0 +1,456 @@
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
options,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
|
||||
with lib;
|
||||
|
||||
let
|
||||
top = config.services.kubernetes;
|
||||
otop = options.services.kubernetes;
|
||||
cfg = top.kubelet;
|
||||
|
||||
cniConfig =
|
||||
if cfg.cni.config != [ ] && cfg.cni.configDir != null then
|
||||
throw "Verbatim CNI-config and CNI configDir cannot both be set."
|
||||
else if cfg.cni.configDir != null then
|
||||
cfg.cni.configDir
|
||||
else
|
||||
(pkgs.buildEnv {
|
||||
name = "kubernetes-cni-config";
|
||||
paths = imap (
|
||||
i: entry: pkgs.writeTextDir "${toString (10 + i)}-${entry.type}.conf" (builtins.toJSON entry)
|
||||
) cfg.cni.config;
|
||||
});
|
||||
|
||||
infraContainer = pkgs.dockerTools.buildImage {
|
||||
name = "pause";
|
||||
tag = "latest";
|
||||
copyToRoot = pkgs.buildEnv {
|
||||
name = "image-root";
|
||||
pathsToLink = [ "/bin" ];
|
||||
paths = [ top.package.pause ];
|
||||
};
|
||||
config.Cmd = [ "/bin/pause" ];
|
||||
};
|
||||
|
||||
kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig;
|
||||
|
||||
# Flag based settings are deprecated, use the `--config` flag with a
|
||||
# `KubeletConfiguration` struct.
|
||||
# https://kubernetes.io/docs/tasks/administer-cluster/kubelet-config-file/
|
||||
#
|
||||
# NOTE: registerWithTaints requires a []core/v1.Taint, therefore requires
|
||||
# additional work to be put in config format.
|
||||
#
|
||||
kubeletConfig = pkgs.writeText "kubelet-config" (
|
||||
builtins.toJSON (
|
||||
{
|
||||
apiVersion = "kubelet.config.k8s.io/v1beta1";
|
||||
kind = "KubeletConfiguration";
|
||||
address = cfg.address;
|
||||
port = cfg.port;
|
||||
authentication = {
|
||||
x509 = lib.optionalAttrs (cfg.clientCaFile != null) { clientCAFile = cfg.clientCaFile; };
|
||||
webhook = {
|
||||
enabled = true;
|
||||
cacheTTL = "10s";
|
||||
};
|
||||
};
|
||||
authorization = {
|
||||
mode = "Webhook";
|
||||
};
|
||||
cgroupDriver = "systemd";
|
||||
hairpinMode = "hairpin-veth";
|
||||
registerNode = cfg.registerNode;
|
||||
containerRuntimeEndpoint = cfg.containerRuntimeEndpoint;
|
||||
healthzPort = cfg.healthz.port;
|
||||
healthzBindAddress = cfg.healthz.bind;
|
||||
}
|
||||
// lib.optionalAttrs (cfg.tlsCertFile != null) { tlsCertFile = cfg.tlsCertFile; }
|
||||
// lib.optionalAttrs (cfg.tlsKeyFile != null) { tlsPrivateKeyFile = cfg.tlsKeyFile; }
|
||||
// lib.optionalAttrs (cfg.clusterDomain != "") { clusterDomain = cfg.clusterDomain; }
|
||||
// lib.optionalAttrs (cfg.clusterDns != [ ]) { clusterDNS = cfg.clusterDns; }
|
||||
// lib.optionalAttrs (cfg.featureGates != { }) { featureGates = cfg.featureGates; }
|
||||
// lib.optionalAttrs (cfg.extraConfig != { }) cfg.extraConfig
|
||||
)
|
||||
);
|
||||
|
||||
manifestPath = "kubernetes/manifests";
|
||||
|
||||
taintOptions =
|
||||
with lib.types;
|
||||
{ name, ... }:
|
||||
{
|
||||
options = {
|
||||
key = mkOption {
|
||||
description = "Key of taint.";
|
||||
default = name;
|
||||
defaultText = literalMD "Name of this submodule.";
|
||||
type = str;
|
||||
};
|
||||
value = mkOption {
|
||||
description = "Value of taint.";
|
||||
type = str;
|
||||
};
|
||||
effect = mkOption {
|
||||
description = "Effect of taint.";
|
||||
example = "NoSchedule";
|
||||
type = enum [
|
||||
"NoSchedule"
|
||||
"PreferNoSchedule"
|
||||
"NoExecute"
|
||||
];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
taints = concatMapStringsSep "," (v: "${v.key}=${v.value}:${v.effect}") (
|
||||
mapAttrsToList (n: v: v) cfg.taints
|
||||
);
|
||||
in
|
||||
{
|
||||
disabledModules = [ "services/cluster/kubernetes/kubelet.nix" ];
|
||||
|
||||
imports = [
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "applyManifests" ] "")
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "cadvisorPort" ] "")
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "allowPrivileged" ] "")
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "networkPlugin" ] "")
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "containerRuntime" ] "")
|
||||
];
|
||||
|
||||
###### interface
|
||||
options.services.kubernetes.kubelet = with lib.types; {
|
||||
|
||||
address = mkOption {
|
||||
description = "Kubernetes kubelet info server listening address.";
|
||||
default = "0.0.0.0";
|
||||
type = str;
|
||||
};
|
||||
|
||||
clusterDns = mkOption {
|
||||
description = "Use alternative DNS.";
|
||||
default = [ "10.1.0.1" ];
|
||||
type = listOf str;
|
||||
};
|
||||
|
||||
clusterDomain = mkOption {
|
||||
description = "Use alternative domain.";
|
||||
default = config.services.kubernetes.addons.dns.clusterDomain;
|
||||
defaultText = literalExpression "config.${options.services.kubernetes.addons.dns.clusterDomain}";
|
||||
type = str;
|
||||
};
|
||||
|
||||
extraSANs = mkOption {
|
||||
description = "Extra x509 Subject Alternative Names to be added to the kubelet tls cert.";
|
||||
default = [];
|
||||
type = listOf str;
|
||||
};
|
||||
|
||||
clientCaFile = mkOption {
|
||||
description = "Kubernetes apiserver CA file for client authentication.";
|
||||
default = top.caFile;
|
||||
defaultText = literalExpression "config.${otop.caFile}";
|
||||
type = nullOr path;
|
||||
};
|
||||
|
||||
cni = {
|
||||
packages = mkOption {
|
||||
description = "List of network plugin packages to install.";
|
||||
type = listOf package;
|
||||
default = [ ];
|
||||
};
|
||||
|
||||
config = mkOption {
|
||||
description = "Kubernetes CNI configuration.";
|
||||
type = listOf attrs;
|
||||
default = [ ];
|
||||
example = literalExpression ''
|
||||
[{
|
||||
"cniVersion": "0.3.1",
|
||||
"name": "mynet",
|
||||
"type": "bridge",
|
||||
"bridge": "cni0",
|
||||
"isGateway": true,
|
||||
"ipMasq": true,
|
||||
"ipam": {
|
||||
"type": "host-local",
|
||||
"subnet": "10.22.0.0/16",
|
||||
"routes": [
|
||||
{ "dst": "0.0.0.0/0" }
|
||||
]
|
||||
}
|
||||
} {
|
||||
"cniVersion": "0.3.1",
|
||||
"type": "loopback"
|
||||
}]
|
||||
'';
|
||||
};
|
||||
|
||||
configDir = mkOption {
|
||||
description = "Path to Kubernetes CNI configuration directory.";
|
||||
type = nullOr path;
|
||||
default = null;
|
||||
};
|
||||
};
|
||||
|
||||
containerRuntimeEndpoint = mkOption {
|
||||
description = "Endpoint at which to find the container runtime api interface/socket";
|
||||
type = str;
|
||||
default = "unix:///run/containerd/containerd.sock";
|
||||
};
|
||||
|
||||
enable = mkEnableOption "Kubernetes kubelet";
|
||||
|
||||
extraOpts = mkOption {
|
||||
description = "Kubernetes kubelet extra command line options.";
|
||||
default = "";
|
||||
type = separatedString " ";
|
||||
};
|
||||
|
||||
extraConfig = mkOption {
|
||||
description = ''
|
||||
Kubernetes kubelet extra configuration file entries.
|
||||
|
||||
See also [Set Kubelet Parameters Via A Configuration File](https://kubernetes.io/docs/tasks/administer-cluster/kubelet-config-file/)
|
||||
and [Kubelet Configuration](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/).
|
||||
'';
|
||||
default = { };
|
||||
type = attrsOf ((pkgs.formats.json { }).type);
|
||||
};
|
||||
|
||||
featureGates = mkOption {
|
||||
description = "Attribute set of feature gate";
|
||||
default = top.featureGates;
|
||||
defaultText = literalExpression "config.${otop.featureGates}";
|
||||
type = attrsOf bool;
|
||||
};
|
||||
|
||||
healthz = {
|
||||
bind = mkOption {
|
||||
description = "Kubernetes kubelet healthz listening address.";
|
||||
default = "127.0.0.1";
|
||||
type = str;
|
||||
};
|
||||
|
||||
port = mkOption {
|
||||
description = "Kubernetes kubelet healthz port.";
|
||||
default = 10248;
|
||||
type = port;
|
||||
};
|
||||
};
|
||||
|
||||
hostname = mkOption {
|
||||
description = "Kubernetes kubelet hostname override.";
|
||||
defaultText = literalExpression "config.networking.fqdnOrHostName";
|
||||
type = str;
|
||||
};
|
||||
|
||||
kubeconfig = top.lib.mkKubeConfigOptions "Kubelet";
|
||||
|
||||
manifests = mkOption {
|
||||
description = "List of manifests to bootstrap with kubelet (only pods can be created as manifest entry)";
|
||||
type = attrsOf attrs;
|
||||
default = { };
|
||||
};
|
||||
|
||||
nodeIp = mkOption {
|
||||
description = "IP address of the node. If set, kubelet will use this IP address for the node.";
|
||||
default = null;
|
||||
type = nullOr str;
|
||||
};
|
||||
|
||||
registerNode = mkOption {
|
||||
description = "Whether to auto register kubelet with API server.";
|
||||
default = true;
|
||||
type = bool;
|
||||
};
|
||||
|
||||
port = mkOption {
|
||||
description = "Kubernetes kubelet info server listening port.";
|
||||
default = 10250;
|
||||
type = port;
|
||||
};
|
||||
|
||||
seedDockerImages = mkOption {
|
||||
description = "List of docker images to preload on system";
|
||||
default = [ ];
|
||||
type = listOf package;
|
||||
};
|
||||
|
||||
taints = mkOption {
|
||||
description = "Node taints (https://kubernetes.io/docs/concepts/configuration/assign-pod-node/).";
|
||||
default = { };
|
||||
type = attrsOf (submodule [ taintOptions ]);
|
||||
};
|
||||
|
||||
tlsCertFile = mkOption {
|
||||
description = "File containing x509 Certificate for HTTPS.";
|
||||
default = null;
|
||||
type = nullOr path;
|
||||
};
|
||||
|
||||
tlsKeyFile = mkOption {
|
||||
description = "File containing x509 private key matching tlsCertFile.";
|
||||
default = null;
|
||||
type = nullOr path;
|
||||
};
|
||||
|
||||
unschedulable = mkOption {
|
||||
description = "Whether to set node taint to unschedulable=true as it is the case of node that has only master role.";
|
||||
default = false;
|
||||
type = bool;
|
||||
};
|
||||
|
||||
verbosity = mkOption {
|
||||
description = ''
|
||||
Optional glog verbosity level for logging statements. See
|
||||
<https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md>
|
||||
'';
|
||||
default = null;
|
||||
type = nullOr int;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
###### implementation
|
||||
config = mkMerge [
|
||||
(mkIf cfg.enable {
|
||||
|
||||
environment.etc."cni/net.d".source = cniConfig;
|
||||
|
||||
services.kubernetes.kubelet.seedDockerImages = [ infraContainer ];
|
||||
|
||||
boot.kernel.sysctl = {
|
||||
"net.bridge.bridge-nf-call-iptables" = 1;
|
||||
"net.ipv4.ip_forward" = 1;
|
||||
"net.bridge.bridge-nf-call-ip6tables" = 1;
|
||||
};
|
||||
|
||||
systemd.services.kubelet = {
|
||||
description = "Kubernetes Kubelet Service";
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
after = [
|
||||
"containerd.service"
|
||||
"network.target"
|
||||
"kube-apiserver.service"
|
||||
];
|
||||
path =
|
||||
with pkgs;
|
||||
[
|
||||
gitMinimal
|
||||
openssh
|
||||
# TODO (#409339): remove this patch. We had to add it to avoid a mass rebuild
|
||||
# for the 25.05 release. Once the staging cycle referenced in the above PR completes,
|
||||
# switch back to plain util-linux.
|
||||
util-linux.withPatches
|
||||
iproute2
|
||||
ethtool
|
||||
thin-provisioning-tools
|
||||
iptables
|
||||
socat
|
||||
]
|
||||
++ lib.optional config.boot.zfs.enabled config.boot.zfs.package
|
||||
++ top.path;
|
||||
preStart = ''
|
||||
${concatMapStrings (img: ''
|
||||
echo "Seeding container image: ${img}"
|
||||
${
|
||||
if (lib.hasSuffix "gz" img) then
|
||||
''${pkgs.gzip}/bin/zcat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import -''
|
||||
else
|
||||
''${pkgs.coreutils}/bin/cat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import -''
|
||||
}
|
||||
'') cfg.seedDockerImages}
|
||||
|
||||
rm /opt/cni/bin/* || true
|
||||
${concatMapStrings (package: ''
|
||||
echo "Linking cni package: ${package}"
|
||||
ln -fs ${package}/bin/* /opt/cni/bin
|
||||
'') cfg.cni.packages}
|
||||
'';
|
||||
serviceConfig = {
|
||||
Slice = "kubernetes.slice";
|
||||
CPUAccounting = true;
|
||||
MemoryAccounting = true;
|
||||
Restart = "on-failure";
|
||||
RestartSec = "1000ms";
|
||||
ExecStart = ''
|
||||
${top.package}/bin/kubelet \
|
||||
--config=${kubeletConfig} \
|
||||
--hostname-override=${cfg.hostname} \
|
||||
--kubeconfig=${kubeconfig} \
|
||||
${optionalString (cfg.nodeIp != null) "--node-ip=${cfg.nodeIp}"} \
|
||||
--pod-infra-container-image=pause \
|
||||
${optionalString (cfg.manifests != { }) "--pod-manifest-path=/etc/${manifestPath}"} \
|
||||
${optionalString (taints != "") "--register-with-taints=${taints}"} \
|
||||
--root-dir=${top.dataDir} \
|
||||
${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
|
||||
${cfg.extraOpts}
|
||||
'';
|
||||
WorkingDirectory = top.dataDir;
|
||||
};
|
||||
unitConfig = {
|
||||
StartLimitIntervalSec = 0;
|
||||
};
|
||||
};
|
||||
|
||||
# Always include cni plugins
|
||||
services.kubernetes.kubelet.cni.packages = [
|
||||
pkgs.cni-plugins
|
||||
pkgs.cni-plugin-flannel
|
||||
];
|
||||
|
||||
boot.kernelModules = [
|
||||
"br_netfilter"
|
||||
"overlay"
|
||||
];
|
||||
|
||||
services.kubernetes.kubelet.hostname = mkDefault (lib.toLower config.networking.fqdnOrHostName);
|
||||
|
||||
services.kubernetes.pki.certs = with top.lib; {
|
||||
kubelet = mkCert {
|
||||
name = "kubelet";
|
||||
CN = top.kubelet.hostname;
|
||||
hosts = top.kubelet.extraSANs;
|
||||
action = "systemctl restart kubelet.service";
|
||||
|
||||
};
|
||||
kubeletClient = mkCert {
|
||||
name = "kubelet-client";
|
||||
CN = "system:node:${top.kubelet.hostname}";
|
||||
fields = {
|
||||
O = "system:nodes";
|
||||
};
|
||||
action = "systemctl restart kubelet.service";
|
||||
};
|
||||
};
|
||||
|
||||
services.kubernetes.kubelet.kubeconfig.server = mkDefault top.apiserverAddress;
|
||||
})
|
||||
|
||||
(mkIf (cfg.enable && cfg.manifests != { }) {
|
||||
environment.etc = mapAttrs' (
|
||||
name: manifest:
|
||||
nameValuePair "${manifestPath}/${name}.json" {
|
||||
text = builtins.toJSON manifest;
|
||||
mode = "0755";
|
||||
}
|
||||
) cfg.manifests;
|
||||
})
|
||||
|
||||
(mkIf (cfg.unschedulable && cfg.enable) {
|
||||
services.kubernetes.kubelet.taints.unschedulable = {
|
||||
value = "true";
|
||||
effect = "NoSchedule";
|
||||
};
|
||||
})
|
||||
|
||||
];
|
||||
|
||||
meta.buildDocsInSandbox = false;
|
||||
}
|
||||
362
modules/overrides/kubernetes_default.nix
Normal file
362
modules/overrides/kubernetes_default.nix
Normal file
@@ -0,0 +1,362 @@
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
options,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
|
||||
with lib;
|
||||
|
||||
let
|
||||
cfg = config.services.kubernetes;
|
||||
opt = options.services.kubernetes;
|
||||
|
||||
defaultContainerdSettings = {
|
||||
version = 2;
|
||||
root = "/var/lib/containerd";
|
||||
state = "/run/containerd";
|
||||
oom_score = 0;
|
||||
|
||||
grpc = {
|
||||
address = "/run/containerd/containerd.sock";
|
||||
};
|
||||
|
||||
plugins."io.containerd.grpc.v1.cri" = {
|
||||
sandbox_image = "pause:latest";
|
||||
|
||||
cni = {
|
||||
bin_dir = "/opt/cni/bin";
|
||||
max_conf_num = 0;
|
||||
};
|
||||
|
||||
containerd.runtimes.runc = {
|
||||
runtime_type = "io.containerd.runc.v2";
|
||||
options.SystemdCgroup = true;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkKubeConfig =
|
||||
name: conf:
|
||||
pkgs.writeText "${name}-kubeconfig" (
|
||||
builtins.toJSON {
|
||||
apiVersion = "v1";
|
||||
kind = "Config";
|
||||
clusters = [
|
||||
{
|
||||
name = "local";
|
||||
cluster.certificate-authority = conf.caFile or cfg.caFile;
|
||||
cluster.server = conf.server;
|
||||
}
|
||||
];
|
||||
users = [
|
||||
{
|
||||
inherit name;
|
||||
user = {
|
||||
client-certificate = conf.certFile;
|
||||
client-key = conf.keyFile;
|
||||
};
|
||||
}
|
||||
];
|
||||
contexts = [
|
||||
{
|
||||
context = {
|
||||
cluster = "local";
|
||||
user = name;
|
||||
};
|
||||
name = "local";
|
||||
}
|
||||
];
|
||||
current-context = "local";
|
||||
}
|
||||
);
|
||||
|
||||
caCert = secret "ca";
|
||||
|
||||
etcdEndpoints = [ "https://${cfg.masterAddress}:2379" ];
|
||||
|
||||
mkCert =
|
||||
{
|
||||
name,
|
||||
CN,
|
||||
hosts ? [ ],
|
||||
fields ? { },
|
||||
action ? "",
|
||||
privateKeyOwner ? "kubernetes",
|
||||
privateKeyGroup ? "kubernetes",
|
||||
}:
|
||||
rec {
|
||||
inherit
|
||||
name
|
||||
caCert
|
||||
CN
|
||||
hosts
|
||||
fields
|
||||
action
|
||||
;
|
||||
cert = secret name;
|
||||
key = secret "${name}-key";
|
||||
privateKeyOptions = {
|
||||
owner = privateKeyOwner;
|
||||
group = privateKeyGroup;
|
||||
mode = "0600";
|
||||
path = key;
|
||||
};
|
||||
};
|
||||
|
||||
secret = name: "${cfg.secretsPath}/${name}.pem";
|
||||
|
||||
mkKubeConfigOptions = prefix: {
|
||||
server = mkOption {
|
||||
description = "${prefix} kube-apiserver server address.";
|
||||
type = types.str;
|
||||
};
|
||||
|
||||
caFile = mkOption {
|
||||
description = "${prefix} certificate authority file used to connect to kube-apiserver.";
|
||||
type = types.nullOr types.path;
|
||||
default = cfg.caFile;
|
||||
defaultText = literalExpression "config.${opt.caFile}";
|
||||
};
|
||||
|
||||
certFile = mkOption {
|
||||
description = "${prefix} client certificate file used to connect to kube-apiserver.";
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
};
|
||||
|
||||
keyFile = mkOption {
|
||||
description = "${prefix} client key file used to connect to kube-apiserver.";
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
};
|
||||
};
|
||||
in {
|
||||
|
||||
disabledModules = [ "services/cluster/kubernetes/default.nix" ];
|
||||
|
||||
imports = [
|
||||
(mkRemovedOptionModule [
|
||||
"services"
|
||||
"kubernetes"
|
||||
"addons"
|
||||
"dashboard"
|
||||
] "Removed due to it being an outdated version")
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "verbose" ] "")
|
||||
];
|
||||
|
||||
###### interface
|
||||
|
||||
options.services.kubernetes = {
|
||||
roles = mkOption {
|
||||
description = ''
|
||||
Kubernetes role that this machine should take.
|
||||
|
||||
Master role will enable etcd, apiserver, scheduler, controller manager
|
||||
addon manager, flannel and proxy services.
|
||||
Node role will enable flannel, docker, kubelet and proxy services.
|
||||
'';
|
||||
default = [ ];
|
||||
type = types.listOf (
|
||||
types.enum [
|
||||
"master"
|
||||
"node"
|
||||
]
|
||||
);
|
||||
};
|
||||
|
||||
package = mkPackageOption pkgs "kubernetes" { };
|
||||
|
||||
kubeconfig = mkKubeConfigOptions "Default kubeconfig";
|
||||
|
||||
apiserverAddress = mkOption {
|
||||
description = ''
|
||||
Clusterwide accessible address for the kubernetes apiserver,
|
||||
including protocol and optional port.
|
||||
'';
|
||||
example = "https://kubernetes-apiserver.example.com:6443";
|
||||
type = types.str;
|
||||
};
|
||||
|
||||
caFile = mkOption {
|
||||
description = "Default kubernetes certificate authority";
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
description = "Kubernetes root directory for managing kubelet files.";
|
||||
default = "/var/lib/kubernetes";
|
||||
type = types.path;
|
||||
};
|
||||
|
||||
easyCerts = mkOption {
|
||||
description = "Automatically setup x509 certificates and keys for the entire cluster.";
|
||||
default = false;
|
||||
type = types.bool;
|
||||
};
|
||||
|
||||
featureGates = mkOption {
|
||||
description = "List set of feature gates.";
|
||||
default = { };
|
||||
type = types.attrsOf types.bool;
|
||||
};
|
||||
|
||||
masterAddress = mkOption {
|
||||
description = "Clusterwide available network address or hostname for the kubernetes master server.";
|
||||
example = "master.example.com";
|
||||
type = types.str;
|
||||
};
|
||||
|
||||
path = mkOption {
|
||||
description = "Packages added to the services' PATH environment variable. Both the bin and sbin subdirectories of each package are added.";
|
||||
type = types.listOf types.package;
|
||||
default = [ ];
|
||||
};
|
||||
|
||||
clusterCidr = mkOption {
|
||||
description = "Kubernetes controller manager and proxy CIDR Range for Pods in cluster.";
|
||||
default = "10.1.0.0/16";
|
||||
type = types.nullOr types.str;
|
||||
};
|
||||
|
||||
lib = mkOption {
|
||||
description = "Common functions for the kubernetes modules.";
|
||||
default = {
|
||||
inherit mkCert;
|
||||
inherit mkKubeConfig;
|
||||
inherit mkKubeConfigOptions;
|
||||
};
|
||||
type = types.attrs;
|
||||
};
|
||||
|
||||
secretsPath = mkOption {
|
||||
description = "Default location for kubernetes secrets. Not a store location.";
|
||||
type = types.path;
|
||||
default = cfg.dataDir + "/secrets";
|
||||
defaultText = literalExpression ''
|
||||
config.${opt.dataDir} + "/secrets"
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
###### implementation
|
||||
|
||||
config = mkMerge [
|
||||
|
||||
(mkIf cfg.easyCerts {
|
||||
services.kubernetes.pki.enable = mkDefault true;
|
||||
services.kubernetes.caFile = caCert;
|
||||
})
|
||||
|
||||
(mkIf (elem "master" cfg.roles) {
|
||||
services.kubernetes.apiserver.enable = mkDefault true;
|
||||
services.kubernetes.scheduler.enable = mkDefault true;
|
||||
services.kubernetes.controllerManager.enable = mkDefault true;
|
||||
services.kubernetes.addonManager.enable = mkDefault true;
|
||||
services.kubernetes.proxy.enable = mkDefault true;
|
||||
services.etcd.enable = true; # Cannot mkDefault because of flannel default options
|
||||
services.kubernetes.kubelet = {
|
||||
enable = mkDefault true;
|
||||
taints = mkIf (!(elem "node" cfg.roles)) {
|
||||
master = {
|
||||
key = "node-role.kubernetes.io/master";
|
||||
value = "true";
|
||||
effect = "NoSchedule";
|
||||
};
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf (all (el: el == "master") cfg.roles) {
|
||||
# if this node is only a master make it unschedulable by default
|
||||
services.kubernetes.kubelet.unschedulable = mkDefault true;
|
||||
})
|
||||
|
||||
(mkIf (elem "node" cfg.roles) {
|
||||
services.kubernetes.kubelet.enable = mkDefault true;
|
||||
services.kubernetes.proxy.enable = mkDefault true;
|
||||
})
|
||||
|
||||
# Using "services.kubernetes.roles" will automatically enable easyCerts and flannel
|
||||
(mkIf (cfg.roles != [ ]) {
|
||||
services.kubernetes.flannel.enable = mkDefault true;
|
||||
services.flannel.etcd.endpoints = mkDefault etcdEndpoints;
|
||||
services.kubernetes.easyCerts = mkDefault true;
|
||||
})
|
||||
|
||||
(mkIf cfg.apiserver.enable {
|
||||
services.kubernetes.pki.etcClusterAdminKubeconfig = mkDefault "kubernetes/cluster-admin.kubeconfig";
|
||||
services.kubernetes.apiserver.etcd.servers = mkDefault etcdEndpoints;
|
||||
})
|
||||
|
||||
(mkIf cfg.kubelet.enable {
|
||||
virtualisation.containerd = {
|
||||
enable = mkDefault true;
|
||||
settings = mapAttrsRecursive (name: mkDefault) defaultContainerdSettings;
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf (cfg.apiserver.enable || cfg.controllerManager.enable) {
|
||||
services.kubernetes.pki.certs = {
|
||||
serviceAccount = mkCert {
|
||||
name = "service-account";
|
||||
CN = "system:service-account-signer";
|
||||
action = ''
|
||||
systemctl restart \
|
||||
kube-apiserver.service \
|
||||
kube-controller-manager.service
|
||||
'';
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf
|
||||
(
|
||||
cfg.apiserver.enable
|
||||
|| cfg.scheduler.enable
|
||||
|| cfg.controllerManager.enable
|
||||
|| cfg.kubelet.enable
|
||||
|| cfg.proxy.enable
|
||||
|| cfg.addonManager.enable
|
||||
)
|
||||
{
|
||||
systemd.targets.kubernetes = {
|
||||
description = "Kubernetes";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
};
|
||||
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /opt/cni/bin 0755 root root -"
|
||||
"d /run/kubernetes 0755 kubernetes kubernetes -"
|
||||
"d ${cfg.dataDir} 0755 kubernetes kubernetes -"
|
||||
];
|
||||
|
||||
users.users.kubernetes = {
|
||||
uid = config.ids.uids.kubernetes;
|
||||
description = "Kubernetes user";
|
||||
group = "kubernetes";
|
||||
home = cfg.dataDir;
|
||||
createHome = true;
|
||||
homeMode = "755";
|
||||
};
|
||||
users.groups.kubernetes.gid = config.ids.gids.kubernetes;
|
||||
|
||||
# dns addon is enabled by default
|
||||
services.kubernetes.addons.dns.enable = mkDefault true;
|
||||
|
||||
services.kubernetes.apiserverAddress = mkDefault (
|
||||
"https://${
|
||||
if cfg.apiserver.advertiseAddress != null then
|
||||
cfg.apiserver.advertiseAddress
|
||||
else
|
||||
"${cfg.masterAddress}:${toString cfg.apiserver.securePort}"
|
||||
}"
|
||||
);
|
||||
}
|
||||
)
|
||||
];
|
||||
|
||||
meta.buildDocsInSandbox = false;
|
||||
}
|
||||
409
modules/overrides/prev/kubelet.nix
Normal file
409
modules/overrides/prev/kubelet.nix
Normal file
@@ -0,0 +1,409 @@
|
||||
{ config, lib, options, pkgs, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
let
|
||||
top = config.services.kubernetes;
|
||||
otop = options.services.kubernetes;
|
||||
cfg = top.kubelet;
|
||||
|
||||
cniConfig =
|
||||
if cfg.cni.config != [] && cfg.cni.configDir != null then
|
||||
throw "Verbatim CNI-config and CNI configDir cannot both be set."
|
||||
else if cfg.cni.configDir != null then
|
||||
cfg.cni.configDir
|
||||
else
|
||||
(pkgs.buildEnv {
|
||||
name = "kubernetes-cni-config";
|
||||
paths = imap (i: entry:
|
||||
pkgs.writeTextDir "${toString (10+i)}-${entry.type}.conf" (builtins.toJSON entry)
|
||||
) cfg.cni.config;
|
||||
});
|
||||
|
||||
infraContainer = pkgs.dockerTools.buildImage {
|
||||
name = "pause";
|
||||
tag = "latest";
|
||||
copyToRoot = pkgs.buildEnv {
|
||||
name = "image-root";
|
||||
pathsToLink = [ "/bin" ];
|
||||
paths = [ top.package.pause ];
|
||||
};
|
||||
config.Cmd = ["/bin/pause"];
|
||||
};
|
||||
|
||||
kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig;
|
||||
|
||||
# Flag based settings are deprecated, use the `--config` flag with a
|
||||
# `KubeletConfiguration` struct.
|
||||
# https://kubernetes.io/docs/tasks/administer-cluster/kubelet-config-file/
|
||||
#
|
||||
# NOTE: registerWithTaints requires a []core/v1.Taint, therefore requires
|
||||
# additional work to be put in config format.
|
||||
#
|
||||
kubeletConfig = pkgs.writeText "kubelet-config" (builtins.toJSON ({
|
||||
apiVersion = "kubelet.config.k8s.io/v1beta1";
|
||||
kind = "KubeletConfiguration";
|
||||
address = cfg.address;
|
||||
port = cfg.port;
|
||||
authentication = {
|
||||
x509 = lib.optionalAttrs (cfg.clientCaFile != null) { clientCAFile = cfg.clientCaFile; };
|
||||
webhook = {
|
||||
enabled = true;
|
||||
cacheTTL = "10s";
|
||||
};
|
||||
};
|
||||
authorization = {
|
||||
mode = "Webhook";
|
||||
};
|
||||
cgroupDriver = "systemd";
|
||||
hairpinMode = "hairpin-veth";
|
||||
registerNode = cfg.registerNode;
|
||||
containerRuntimeEndpoint = cfg.containerRuntimeEndpoint;
|
||||
healthzPort = cfg.healthz.port;
|
||||
healthzBindAddress = cfg.healthz.bind;
|
||||
} // lib.optionalAttrs (cfg.tlsCertFile != null) { tlsCertFile = cfg.tlsCertFile; }
|
||||
// lib.optionalAttrs (cfg.tlsKeyFile != null) { tlsPrivateKeyFile = cfg.tlsKeyFile; }
|
||||
// lib.optionalAttrs (cfg.clusterDomain != "") { clusterDomain = cfg.clusterDomain; }
|
||||
// lib.optionalAttrs (cfg.clusterDns != "") { clusterDNS = [ cfg.clusterDns ] ; }
|
||||
// lib.optionalAttrs (cfg.featureGates != []) { featureGates = cfg.featureGates; }
|
||||
));
|
||||
|
||||
manifestPath = "kubernetes/manifests";
|
||||
|
||||
taintOptions = with lib.types; { name, ... }: {
|
||||
options = {
|
||||
key = mkOption {
|
||||
description = "Key of taint.";
|
||||
default = name;
|
||||
defaultText = literalMD "Name of this submodule.";
|
||||
type = str;
|
||||
};
|
||||
value = mkOption {
|
||||
description = "Value of taint.";
|
||||
type = str;
|
||||
};
|
||||
effect = mkOption {
|
||||
description = "Effect of taint.";
|
||||
example = "NoSchedule";
|
||||
type = enum ["NoSchedule" "PreferNoSchedule" "NoExecute"];
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
taints = concatMapStringsSep "," (v: "${v.key}=${v.value}:${v.effect}") (mapAttrsToList (n: v: v) cfg.taints);
|
||||
in
|
||||
{
|
||||
disabledModules = [ "services/cluster/kubernetes/kubelet.nix" ];
|
||||
|
||||
imports = [
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "applyManifests" ] "")
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "cadvisorPort" ] "")
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "allowPrivileged" ] "")
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "networkPlugin" ] "")
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "containerRuntime" ] "")
|
||||
];
|
||||
|
||||
###### interface
|
||||
options.services.kubernetes.kubelet = with lib.types; {
|
||||
|
||||
address = mkOption {
|
||||
description = "Kubernetes kubelet info server listening address.";
|
||||
default = "0.0.0.0";
|
||||
type = str;
|
||||
};
|
||||
|
||||
clusterDns = mkOption {
|
||||
description = "Use alternative DNS.";
|
||||
default = "10.1.0.1";
|
||||
type = str;
|
||||
};
|
||||
|
||||
clusterDomain = mkOption {
|
||||
description = "Use alternative domain.";
|
||||
default = config.services.kubernetes.addons.dns.clusterDomain;
|
||||
defaultText = literalExpression "config.${options.services.kubernetes.addons.dns.clusterDomain}";
|
||||
type = str;
|
||||
};
|
||||
|
||||
extraSANs = mkOption {
|
||||
description = "Extra x509 Subject Alternative Names to be added to the kubelet tls cert.";
|
||||
default = [];
|
||||
type = listOf str;
|
||||
};
|
||||
|
||||
clientCaFile = mkOption {
|
||||
description = "Kubernetes apiserver CA file for client authentication.";
|
||||
default = top.caFile;
|
||||
defaultText = literalExpression "config.${otop.caFile}";
|
||||
type = nullOr path;
|
||||
};
|
||||
|
||||
cni = {
|
||||
packages = mkOption {
|
||||
description = "List of network plugin packages to install.";
|
||||
type = listOf package;
|
||||
default = [];
|
||||
};
|
||||
|
||||
config = mkOption {
|
||||
description = "Kubernetes CNI configuration.";
|
||||
type = listOf attrs;
|
||||
default = [];
|
||||
example = literalExpression ''
|
||||
[{
|
||||
"cniVersion": "0.3.1",
|
||||
"name": "mynet",
|
||||
"type": "bridge",
|
||||
"bridge": "cni0",
|
||||
"isGateway": true,
|
||||
"ipMasq": true,
|
||||
"ipam": {
|
||||
"type": "host-local",
|
||||
"subnet": "10.22.0.0/16",
|
||||
"routes": [
|
||||
{ "dst": "0.0.0.0/0" }
|
||||
]
|
||||
}
|
||||
} {
|
||||
"cniVersion": "0.3.1",
|
||||
"type": "loopback"
|
||||
}]
|
||||
'';
|
||||
};
|
||||
|
||||
configDir = mkOption {
|
||||
description = "Path to Kubernetes CNI configuration directory.";
|
||||
type = nullOr path;
|
||||
default = null;
|
||||
};
|
||||
};
|
||||
|
||||
containerRuntimeEndpoint = mkOption {
|
||||
description = "Endpoint at which to find the container runtime api interface/socket";
|
||||
type = str;
|
||||
default = "unix:///run/containerd/containerd.sock";
|
||||
};
|
||||
|
||||
enable = mkEnableOption "Kubernetes kubelet";
|
||||
|
||||
extraOpts = mkOption {
|
||||
description = "Kubernetes kubelet extra command line options.";
|
||||
default = "";
|
||||
type = separatedString " ";
|
||||
};
|
||||
|
||||
featureGates = mkOption {
|
||||
description = "List set of feature gates";
|
||||
default = top.featureGates;
|
||||
defaultText = literalExpression "config.${otop.featureGates}";
|
||||
type = listOf str;
|
||||
};
|
||||
|
||||
healthz = {
|
||||
bind = mkOption {
|
||||
description = "Kubernetes kubelet healthz listening address.";
|
||||
default = "127.0.0.1";
|
||||
type = str;
|
||||
};
|
||||
|
||||
port = mkOption {
|
||||
description = "Kubernetes kubelet healthz port.";
|
||||
default = 10248;
|
||||
type = port;
|
||||
};
|
||||
};
|
||||
|
||||
hostname = mkOption {
|
||||
description = "Kubernetes kubelet hostname override.";
|
||||
defaultText = literalExpression "config.networking.fqdnOrHostName";
|
||||
type = str;
|
||||
};
|
||||
|
||||
kubeconfig = top.lib.mkKubeConfigOptions "Kubelet";
|
||||
|
||||
manifests = mkOption {
|
||||
description = "List of manifests to bootstrap with kubelet (only pods can be created as manifest entry)";
|
||||
type = attrsOf attrs;
|
||||
default = {};
|
||||
};
|
||||
|
||||
nodeIp = mkOption {
|
||||
description = "IP address of the node. If set, kubelet will use this IP address for the node.";
|
||||
default = null;
|
||||
type = nullOr str;
|
||||
};
|
||||
|
||||
registerNode = mkOption {
|
||||
description = "Whether to auto register kubelet with API server.";
|
||||
default = true;
|
||||
type = bool;
|
||||
};
|
||||
|
||||
port = mkOption {
|
||||
description = "Kubernetes kubelet info server listening port.";
|
||||
default = 10250;
|
||||
type = port;
|
||||
};
|
||||
|
||||
seedDockerImages = mkOption {
|
||||
description = "List of docker images to preload on system";
|
||||
default = [];
|
||||
type = listOf package;
|
||||
};
|
||||
|
||||
taints = mkOption {
|
||||
description = "Node taints (https://kubernetes.io/docs/concepts/configuration/assign-pod-node/).";
|
||||
default = {};
|
||||
type = attrsOf (submodule [ taintOptions ]);
|
||||
};
|
||||
|
||||
tlsCertFile = mkOption {
|
||||
description = "File containing x509 Certificate for HTTPS.";
|
||||
default = null;
|
||||
type = nullOr path;
|
||||
};
|
||||
|
||||
tlsKeyFile = mkOption {
|
||||
description = "File containing x509 private key matching tlsCertFile.";
|
||||
default = null;
|
||||
type = nullOr path;
|
||||
};
|
||||
|
||||
unschedulable = mkOption {
|
||||
description = "Whether to set node taint to unschedulable=true as it is the case of node that has only master role.";
|
||||
default = false;
|
||||
type = bool;
|
||||
};
|
||||
|
||||
verbosity = mkOption {
|
||||
description = ''
|
||||
Optional glog verbosity level for logging statements. See
|
||||
<https://github.com/kubernetes/community/blob/master/contributors/devel/logging.md>
|
||||
'';
|
||||
default = null;
|
||||
type = nullOr int;
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
###### implementation
|
||||
config = mkMerge [
|
||||
(mkIf cfg.enable {
|
||||
|
||||
environment.etc."cni/net.d".source = cniConfig;
|
||||
|
||||
services.kubernetes.kubelet.seedDockerImages = [infraContainer];
|
||||
|
||||
boot.kernel.sysctl = {
|
||||
"net.bridge.bridge-nf-call-iptables" = 1;
|
||||
"net.ipv4.ip_forward" = 1;
|
||||
"net.bridge.bridge-nf-call-ip6tables" = 1;
|
||||
};
|
||||
|
||||
systemd.services.kubelet = {
|
||||
description = "Kubernetes Kubelet Service";
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
after = [ "containerd.service" "network.target" "kube-apiserver.service" ];
|
||||
path = with pkgs; [
|
||||
gitMinimal
|
||||
openssh
|
||||
util-linux
|
||||
iproute2
|
||||
ethtool
|
||||
thin-provisioning-tools
|
||||
iptables
|
||||
socat
|
||||
] ++ lib.optional config.boot.zfs.enabled config.boot.zfs.package ++ top.path;
|
||||
preStart = ''
|
||||
${concatMapStrings (img: ''
|
||||
echo "Seeding container image: ${img}"
|
||||
${if (lib.hasSuffix "gz" img) then
|
||||
''${pkgs.gzip}/bin/zcat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import --all-platforms -''
|
||||
else
|
||||
''${pkgs.coreutils}/bin/cat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import --all-platforms -''
|
||||
}
|
||||
'') cfg.seedDockerImages}
|
||||
|
||||
rm /opt/cni/bin/* || true
|
||||
${concatMapStrings (package: ''
|
||||
echo "Linking cni package: ${package}"
|
||||
ln -fs ${package}/bin/* /opt/cni/bin
|
||||
'') cfg.cni.packages}
|
||||
'';
|
||||
serviceConfig = {
|
||||
Slice = "kubernetes.slice";
|
||||
CPUAccounting = true;
|
||||
MemoryAccounting = true;
|
||||
Restart = "on-failure";
|
||||
RestartSec = "1000ms";
|
||||
ExecStart = ''${top.package}/bin/kubelet \
|
||||
--config=${kubeletConfig} \
|
||||
--hostname-override=${cfg.hostname} \
|
||||
--kubeconfig=${kubeconfig} \
|
||||
${optionalString (cfg.nodeIp != null)
|
||||
"--node-ip=${cfg.nodeIp}"} \
|
||||
--pod-infra-container-image=pause \
|
||||
${optionalString (cfg.manifests != {})
|
||||
"--pod-manifest-path=/etc/${manifestPath}"} \
|
||||
${optionalString (taints != "")
|
||||
"--register-with-taints=${taints}"} \
|
||||
--root-dir=${top.dataDir} \
|
||||
${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
|
||||
${cfg.extraOpts}
|
||||
'';
|
||||
WorkingDirectory = top.dataDir;
|
||||
};
|
||||
unitConfig = {
|
||||
StartLimitIntervalSec = 0;
|
||||
};
|
||||
};
|
||||
|
||||
# Always include cni plugins
|
||||
services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins pkgs.cni-plugin-flannel];
|
||||
|
||||
boot.kernelModules = ["br_netfilter" "overlay"];
|
||||
|
||||
services.kubernetes.kubelet.hostname =
|
||||
mkDefault config.networking.fqdnOrHostName;
|
||||
|
||||
services.kubernetes.pki.certs = with top.lib; {
|
||||
kubelet = mkCert {
|
||||
name = "kubelet";
|
||||
CN = top.kubelet.hostname;
|
||||
hosts = top.kubelet.extraSANs;
|
||||
action = "systemctl restart kubelet.service";
|
||||
|
||||
};
|
||||
kubeletClient = mkCert {
|
||||
name = "kubelet-client";
|
||||
CN = "system:node:${top.kubelet.hostname}";
|
||||
fields = {
|
||||
O = "system:nodes";
|
||||
};
|
||||
action = "systemctl restart kubelet.service";
|
||||
};
|
||||
};
|
||||
|
||||
services.kubernetes.kubelet.kubeconfig.server = mkDefault top.apiserverAddress;
|
||||
})
|
||||
|
||||
(mkIf (cfg.enable && cfg.manifests != {}) {
|
||||
environment.etc = mapAttrs' (name: manifest:
|
||||
nameValuePair "${manifestPath}/${name}.json" {
|
||||
text = builtins.toJSON manifest;
|
||||
mode = "0755";
|
||||
}
|
||||
) cfg.manifests;
|
||||
})
|
||||
|
||||
(mkIf (cfg.unschedulable && cfg.enable) {
|
||||
services.kubernetes.kubelet.taints.unschedulable = {
|
||||
value = "true";
|
||||
effect = "NoSchedule";
|
||||
};
|
||||
})
|
||||
|
||||
];
|
||||
|
||||
meta.buildDocsInSandbox = false;
|
||||
}
|
||||
313
modules/overrides/prev/kubernetes_default.nix
Normal file
313
modules/overrides/prev/kubernetes_default.nix
Normal file
@@ -0,0 +1,313 @@
|
||||
{ config, lib, options, pkgs, ... }:
|
||||
|
||||
with lib;
|
||||
|
||||
let
|
||||
cfg = config.services.kubernetes;
|
||||
opt = options.services.kubernetes;
|
||||
|
||||
defaultContainerdSettings = {
|
||||
version = 2;
|
||||
root = "/var/lib/containerd";
|
||||
state = "/run/containerd";
|
||||
oom_score = 0;
|
||||
|
||||
grpc = {
|
||||
address = "/run/containerd/containerd.sock";
|
||||
};
|
||||
|
||||
plugins."io.containerd.grpc.v1.cri" = {
|
||||
sandbox_image = "pause:latest";
|
||||
|
||||
cni = {
|
||||
bin_dir = "/opt/cni/bin";
|
||||
max_conf_num = 0;
|
||||
};
|
||||
|
||||
containerd.runtimes.runc = {
|
||||
runtime_type = "io.containerd.runc.v2";
|
||||
options.SystemdCgroup = true;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mkKubeConfig = name: conf: pkgs.writeText "${name}-kubeconfig" (builtins.toJSON {
|
||||
apiVersion = "v1";
|
||||
kind = "Config";
|
||||
clusters = [{
|
||||
name = "local";
|
||||
cluster.certificate-authority = conf.caFile or cfg.caFile;
|
||||
cluster.server = conf.server;
|
||||
}];
|
||||
users = [{
|
||||
inherit name;
|
||||
user = {
|
||||
client-certificate = conf.certFile;
|
||||
client-key = conf.keyFile;
|
||||
};
|
||||
}];
|
||||
contexts = [{
|
||||
context = {
|
||||
cluster = "local";
|
||||
user = name;
|
||||
};
|
||||
name = "local";
|
||||
}];
|
||||
current-context = "local";
|
||||
});
|
||||
|
||||
caCert = secret "ca";
|
||||
|
||||
etcdEndpoints = ["https://${cfg.masterAddress}:2379"];
|
||||
|
||||
mkCert = { name, CN, hosts ? [], fields ? {}, action ? "",
|
||||
privateKeyOwner ? "kubernetes" }: rec {
|
||||
inherit name caCert CN hosts fields action;
|
||||
cert = secret name;
|
||||
key = secret "${name}-key";
|
||||
privateKeyOptions = {
|
||||
owner = privateKeyOwner;
|
||||
group = "nogroup";
|
||||
mode = "0600";
|
||||
path = key;
|
||||
};
|
||||
};
|
||||
|
||||
secret = name: "${cfg.secretsPath}/${name}.pem";
|
||||
|
||||
mkKubeConfigOptions = prefix: {
|
||||
server = mkOption {
|
||||
description = "${prefix} kube-apiserver server address.";
|
||||
type = types.str;
|
||||
};
|
||||
|
||||
caFile = mkOption {
|
||||
description = "${prefix} certificate authority file used to connect to kube-apiserver.";
|
||||
type = types.nullOr types.path;
|
||||
default = cfg.caFile;
|
||||
defaultText = literalExpression "config.${opt.caFile}";
|
||||
};
|
||||
|
||||
certFile = mkOption {
|
||||
description = "${prefix} client certificate file used to connect to kube-apiserver.";
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
};
|
||||
|
||||
keyFile = mkOption {
|
||||
description = "${prefix} client key file used to connect to kube-apiserver.";
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
};
|
||||
};
|
||||
in {
|
||||
|
||||
disabledModules = [ "services/cluster/kubernetes/default.nix" ];
|
||||
|
||||
imports = [
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "addons" "dashboard" ] "Removed due to it being an outdated version")
|
||||
(mkRemovedOptionModule [ "services" "kubernetes" "verbose" ] "")
|
||||
];
|
||||
|
||||
###### interface
|
||||
|
||||
options.services.kubernetes = {
|
||||
roles = mkOption {
|
||||
description = ''
|
||||
Kubernetes role that this machine should take.
|
||||
|
||||
Master role will enable etcd, apiserver, scheduler, controller manager
|
||||
addon manager, flannel and proxy services.
|
||||
Node role will enable flannel, docker, kubelet and proxy services.
|
||||
'';
|
||||
default = [];
|
||||
type = types.listOf (types.enum ["master" "node"]);
|
||||
};
|
||||
|
||||
package = mkPackageOption pkgs "kubernetes" { };
|
||||
|
||||
kubeconfig = mkKubeConfigOptions "Default kubeconfig";
|
||||
|
||||
apiserverAddress = mkOption {
|
||||
description = ''
|
||||
Clusterwide accessible address for the kubernetes apiserver,
|
||||
including protocol and optional port.
|
||||
'';
|
||||
example = "https://kubernetes-apiserver.example.com:6443";
|
||||
type = types.str;
|
||||
};
|
||||
|
||||
caFile = mkOption {
|
||||
description = "Default kubernetes certificate authority";
|
||||
type = types.nullOr types.path;
|
||||
default = null;
|
||||
};
|
||||
|
||||
dataDir = mkOption {
|
||||
description = "Kubernetes root directory for managing kubelet files.";
|
||||
default = "/var/lib/kubernetes";
|
||||
type = types.path;
|
||||
};
|
||||
|
||||
easyCerts = mkOption {
|
||||
description = "Automatically setup x509 certificates and keys for the entire cluster.";
|
||||
default = false;
|
||||
type = types.bool;
|
||||
};
|
||||
|
||||
featureGates = mkOption {
|
||||
description = "List set of feature gates.";
|
||||
default = [];
|
||||
type = types.listOf types.str;
|
||||
};
|
||||
|
||||
masterAddress = mkOption {
|
||||
description = "Clusterwide available network address or hostname for the kubernetes master server.";
|
||||
example = "master.example.com";
|
||||
type = types.str;
|
||||
};
|
||||
|
||||
path = mkOption {
|
||||
description = "Packages added to the services' PATH environment variable. Both the bin and sbin subdirectories of each package are added.";
|
||||
type = types.listOf types.package;
|
||||
default = [];
|
||||
};
|
||||
|
||||
clusterCidr = mkOption {
|
||||
description = "Kubernetes controller manager and proxy CIDR Range for Pods in cluster.";
|
||||
default = "10.1.0.0/16";
|
||||
type = types.nullOr types.str;
|
||||
};
|
||||
|
||||
lib = mkOption {
|
||||
description = "Common functions for the kubernetes modules.";
|
||||
default = {
|
||||
inherit mkCert;
|
||||
inherit mkKubeConfig;
|
||||
inherit mkKubeConfigOptions;
|
||||
};
|
||||
type = types.attrs;
|
||||
};
|
||||
|
||||
secretsPath = mkOption {
|
||||
description = "Default location for kubernetes secrets. Not a store location.";
|
||||
type = types.path;
|
||||
default = cfg.dataDir + "/secrets";
|
||||
defaultText = literalExpression ''
|
||||
config.${opt.dataDir} + "/secrets"
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
###### implementation
|
||||
|
||||
config = mkMerge [
|
||||
|
||||
(mkIf cfg.easyCerts {
|
||||
services.kubernetes.pki.enable = mkDefault true;
|
||||
services.kubernetes.caFile = caCert;
|
||||
})
|
||||
|
||||
(mkIf (elem "master" cfg.roles) {
|
||||
services.kubernetes.apiserver.enable = mkDefault true;
|
||||
services.kubernetes.scheduler.enable = mkDefault true;
|
||||
services.kubernetes.controllerManager.enable = mkDefault true;
|
||||
services.kubernetes.addonManager.enable = mkDefault true;
|
||||
services.kubernetes.proxy.enable = mkDefault true;
|
||||
services.etcd.enable = true; # Cannot mkDefault because of flannel default options
|
||||
services.kubernetes.kubelet = {
|
||||
enable = mkDefault true;
|
||||
taints = mkIf (!(elem "node" cfg.roles)) {
|
||||
master = {
|
||||
key = "node-role.kubernetes.io/master";
|
||||
value = "true";
|
||||
effect = "NoSchedule";
|
||||
};
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
|
||||
(mkIf (all (el: el == "master") cfg.roles) {
|
||||
# if this node is only a master make it unschedulable by default
|
||||
services.kubernetes.kubelet.unschedulable = mkDefault true;
|
||||
})
|
||||
|
||||
(mkIf (elem "node" cfg.roles) {
|
||||
services.kubernetes.kubelet.enable = mkDefault true;
|
||||
services.kubernetes.proxy.enable = mkDefault true;
|
||||
})
|
||||
|
||||
# Using "services.kubernetes.roles" will automatically enable easyCerts and flannel
|
||||
(mkIf (cfg.roles != []) {
|
||||
services.kubernetes.flannel.enable = mkDefault true;
|
||||
services.flannel.etcd.endpoints = mkDefault etcdEndpoints;
|
||||
services.kubernetes.easyCerts = mkDefault true;
|
||||
})
|
||||
|
||||
(mkIf cfg.apiserver.enable {
|
||||
services.kubernetes.pki.etcClusterAdminKubeconfig = mkDefault "kubernetes/cluster-admin.kubeconfig";
|
||||
services.kubernetes.apiserver.etcd.servers = mkDefault etcdEndpoints;
|
||||
})
|
||||
|
||||
(mkIf cfg.kubelet.enable {
|
||||
virtualisation.containerd = {
|
||||
enable = mkDefault true;
|
||||
settings = mapAttrsRecursive (name: mkDefault) defaultContainerdSettings;
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf (cfg.apiserver.enable || cfg.controllerManager.enable) {
|
||||
services.kubernetes.pki.certs = {
|
||||
serviceAccount = mkCert {
|
||||
name = "service-account";
|
||||
CN = "system:service-account-signer";
|
||||
action = ''
|
||||
systemctl restart \
|
||||
kube-apiserver.service \
|
||||
kube-controller-manager.service
|
||||
'';
|
||||
};
|
||||
};
|
||||
})
|
||||
|
||||
(mkIf (
|
||||
cfg.apiserver.enable ||
|
||||
cfg.scheduler.enable ||
|
||||
cfg.controllerManager.enable ||
|
||||
cfg.kubelet.enable ||
|
||||
cfg.proxy.enable ||
|
||||
cfg.addonManager.enable
|
||||
) {
|
||||
systemd.targets.kubernetes = {
|
||||
description = "Kubernetes";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
};
|
||||
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /opt/cni/bin 0755 root root -"
|
||||
"d /run/kubernetes 0755 kubernetes kubernetes -"
|
||||
"d ${cfg.dataDir} 0755 kubernetes kubernetes -"
|
||||
];
|
||||
|
||||
users.users.kubernetes = {
|
||||
uid = config.ids.uids.kubernetes;
|
||||
description = "Kubernetes user";
|
||||
group = "kubernetes";
|
||||
home = cfg.dataDir;
|
||||
createHome = true;
|
||||
homeMode = "755";
|
||||
};
|
||||
users.groups.kubernetes.gid = config.ids.gids.kubernetes;
|
||||
|
||||
# dns addon is enabled by default
|
||||
services.kubernetes.addons.dns.enable = mkDefault true;
|
||||
|
||||
services.kubernetes.apiserverAddress = mkDefault ("https://${if cfg.apiserver.advertiseAddress != null
|
||||
then cfg.apiserver.advertiseAddress
|
||||
else "${cfg.masterAddress}:${toString cfg.apiserver.securePort}"}");
|
||||
})
|
||||
];
|
||||
|
||||
meta.buildDocsInSandbox = false;
|
||||
}
|
||||
86
modules/pki/certs.nix
Normal file
86
modules/pki/certs.nix
Normal file
@@ -0,0 +1,86 @@
|
||||
{ pkgs, lib, config, ... } :
|
||||
with lib;
|
||||
let
|
||||
cfg = config.features.certs;
|
||||
|
||||
configuration = {
|
||||
|
||||
services.cfssl = rec {
|
||||
enable = true;
|
||||
caBundle = cfg.caBundle;
|
||||
ca = "${caBundle}/ca.pem";
|
||||
caKey = "${caBundle}/ca-key.pem";
|
||||
};
|
||||
|
||||
services.certmgr.enable = true;
|
||||
services.certmgr.specs =
|
||||
with builtins;
|
||||
let
|
||||
certs = cfg.certs;
|
||||
secret = name: "/var/lib/secrets/${name}.pem";
|
||||
genCert = x: {
|
||||
"${x.name}" = {
|
||||
service = "nginx";
|
||||
action = "nop";
|
||||
authority = {
|
||||
profile = "default";
|
||||
remote = "http://localhost:8888";
|
||||
root_ca = "/var/lib/secrets/ca.pem";
|
||||
file.path = "/var/lib/secrets/ca.pem";
|
||||
};
|
||||
certificate = {
|
||||
path = secret x.name;
|
||||
};
|
||||
private_key = {
|
||||
owner = x.owner;
|
||||
group = x.group;
|
||||
mode = "0600";
|
||||
path = secret "${x.name}-key";
|
||||
};
|
||||
request = {
|
||||
CN = x.name;
|
||||
hosts = [
|
||||
x.name
|
||||
] ++ x.SANs;
|
||||
key = {
|
||||
algo = "rsa";
|
||||
size = 2048;
|
||||
};
|
||||
names = [{
|
||||
L = "generated";
|
||||
O = "NixOS";
|
||||
OU = "services.pki.caSpec";
|
||||
}];
|
||||
};
|
||||
};
|
||||
};
|
||||
in foldl' (a: x: a // genCert x) {} certs;
|
||||
};
|
||||
in
|
||||
{
|
||||
options.features.certs = {
|
||||
enable = mkEnableOption "Enable local certificate generation";
|
||||
|
||||
caBundle = mkOption {
|
||||
type = types.path;
|
||||
default = null;
|
||||
};
|
||||
|
||||
certs = mkOption {
|
||||
type = types.listOf types.attrs;
|
||||
default = [];
|
||||
description = ''
|
||||
[{
|
||||
name="example";
|
||||
SANs = [ "www.example.com" "10.0.0.1" ];
|
||||
owner = "nginx";
|
||||
group = "nginx";
|
||||
}]
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
config = mkIf cfg.enable (mkMerge [ configuration ]);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user