diff --git a/nixops/ekman/cluster.nix b/nixops/ekman/cluster.nix index d9e6aba..ef0c2b7 100644 --- a/nixops/ekman/cluster.nix +++ b/nixops/ekman/cluster.nix @@ -3,14 +3,16 @@ with lib; let cfg = config.features.host; + nodes = import ./nodes.nix; + etcdCluster = { enable = true; existing = true; nodes = { etcd0 = "10.255.241.100"; - # etcd1 = "10.255.241.80"; - # etcd2 = "10.255.241.81"; + etcd1 = "10.255.241.90"; + etcd2 = "10.255.241.80"; }; }; @@ -38,9 +40,9 @@ let # ]; }; - services.udev.extraRules = '' - KERNEL=="ibp1s0", SUBSYSTEM=="net", ATTR{create_child}:="0x3666" - ''; + # services.udev.extraRules = '' + # KERNEL=="ibp1s0", SUBSYSTEM=="net", ATTR{create_child}:="0x3666" + # ''; console = { font = "Lat2-Terminus16"; @@ -102,68 +104,8 @@ let docker.enable = false; }; cachix.enable = false; - monitoring.nodeExporter.enable = false; - pki = { ca = ./ca; }; - - hpc = { - enable = true; - slurm = { - client = true; - mungeKey = ./munge.key; - mungeUid = 995; # hack - controlMachine = "ekman"; - nodeName = [ - "c0-[1-2] Sockets=2 CoresPerSocket=64 ThreadsPerCore=1 RealMemory=256000 TmpDisk=500000 State=UNKNOWN" - "ekman Sockets=2 CoresPerSocket=64 ThreadsPerCore=2 RealMemory=256000 TmpDisk=500000 State=UNKNOWN" - ]; - partitionName = [ - "batch Nodes=c0-[1-2] Default=YES MaxTime=INFINITE State=UP" - "frontend Nodes=ekman MaxTime=1:00:00 State=UP" - ]; - }; - beegfs = { - enable = false; - beegfs = { - bee0-0 = { - mgmtdHost = "mds1-0"; - connAuthFile = "/etc/beegfs/connauthfile"; - client = { - enable = false; - mountPoint = "/work"; - }; - }; - }; - }; - }; - - k8s = { - enable = true; - node.enable = true; - clusterName = "ekman"; - initca = ./ca; - cidr = "10.100.0.0/16"; - master = { - name = "ekman"; - address = "10.255.241.100"; - extraSANs = [ "ekman.local" "ekman.oceanbox.io" ]; - }; - ingressNodes = [ - "ekman.oceanbox.io" - ]; - fileserver = "mds1-0"; - charts = { - acme_email = "innovasjon@itpartner.no"; - grafana_smtp_user = "utvikling"; - grafana_smtp_password = "S0m3rp0m@de#21!"; - }; - }; - }; - - services.kubernetes.kubelet.extraSANs = mkSANs { - name = cfg.name; - address = cfg.address; }; networking = { @@ -177,22 +119,9 @@ let ''; }; - fileSystems = { - # "/opt" = { - # device = "10.255.241.81:/opt"; - # fsType = "nfs"; - # options = [ "soft" "rdma" "defaults" "vers=4.2" ]; - # }; - # "/data" = { - # device = "255.241.81:/data"; - # fsType = "nfs"; - # options = [ "soft" "rdma" "defaults" "vers=4.2" ]; - # }; - }; - environment.variables = {}; - systemd.services."serial-getty@ttyS0".enable = true; + # systemd.services."serial-getty@ttyS0".enable = true; environment.etc."beegfs/connauthfile" = { source = ./connauthfile; @@ -210,6 +139,86 @@ let }; }; + compute = { + features = { + hpc = { + enable = true; + slurm = { + client = true; + mungeKey = ./munge.key; + mungeUid = 996; # hack + controlMachine = "ekman"; + nodeName = [ + "c0-[1-2] Sockets=2 CoresPerSocket=64 ThreadsPerCore=1 RealMemory=256000 TmpDisk=500000 State=UNKNOWN" + "ekman Sockets=2 CoresPerSocket=64 ThreadsPerCore=2 RealMemory=256000 TmpDisk=500000 State=UNKNOWN" + ]; + partitionName = [ + "batch Nodes=c0-[1-2] Default=YES MaxTime=INFINITE State=UP" + "frontend Nodes=ekman MaxTime=1:00:00 State=UP" + ]; + }; + beegfs = { + enable = false; + beegfs = { + bee0-0 = { + mgmtdHost = "beegfs0"; + connAuthFile = "/etc/beegfs/connauthfile"; + client = { + enable = false; + mountPoint = "/work"; + }; + }; + }; + }; + }; + }; + + fileSystems = { + "/opt" = { + device = "10.255.243.80:/opt"; + fsType = "nfs"; + options = [ "soft" "rdma" "defaults" "vers=4.2" ]; + }; + "/data" = { + device = "10.255.243.80:/data"; + fsType = "nfs"; + options = [ "soft" "rdma" "defaults" "vers=4.2" ]; + }; + }; + }; + + k8s-node = { + features = { + k8s = { + enable = true; + node.enable = true; + clusterName = "ekman"; + inherit etcdCluster; + initca = ./ca; + cidr = "10.100.0.0/16"; + master = { + name = "ekman"; + address = "10.255.241.100"; + extraSANs = [ "ekman.local" "ekman.oceanbox.io" ]; + }; + ingressNodes = [ + "ekman.oceanbox.io" + ]; + fileserver = "mds1-0"; + charts = { + acme_email = "innovasjon@itpartner.no"; + # grafana_smtp_user = "utvikling"; + # grafana_smtp_password = "S0m3rp0m@de#21!"; + }; + }; + }; + + services.kubernetes.kubelet.extraSANs = mkSANs { + name = cfg.name; + address = cfg.address; + }; + }; + shosts = { environment.etc."ssh/shosts.equiv" = { mode = "0644"; @@ -217,8 +226,8 @@ let gid = 0; text = '' 10.255.241.80 - 10.255.241.81 - '' + builtins.foldl' (a: x: a + "10.255.240.${toString x}\n") "" (builtins.genList (n: n + 100) 17); + 10.255.241.90 + '' + builtins.foldl' (a: x: a + "${x.address}\n") "" nodes; }; programs.ssh.knownHosts = { @@ -230,19 +239,19 @@ let }; } // builtins.foldl' (a: x: let - n = toString x; - n' = toString (x + 100); + n = toString x.idx; + addr = toString (x.idx + 100); in a // { "c0-${n}" = { hostNames = [ "c0-${n}" "c0-${n}.cluster.local" - "10.255.241.${n'}" - "10.255.243.${n'}" + "10.255.241.${addr}" + "10.255.243.${addr}" ]; - publicKeyFile = ./pubkeys/c0-1.pub; + publicKeyFile = ./pubkeys/c0-${n}.pub; }; - }) {} (builtins.genList (n: n) 16); + }) {} nodes; environment.systemPackages = [ openssh-shosts ]; @@ -261,49 +270,20 @@ let doCheck = false; # the tests take hours }); - myvnc = - let - myvnc = pkgs.writeScriptBin "myvnc" '' - #!${pkgs.runtimeShell} - - uid=`id -u` - port=$((9000+$uid)) - shell=`getent passwd $(id -un) | awk -F : '{print $NF}'` - # vnc=${pkgs.tigervnc}/bin/vncserver - vnc=/nix/store/czp2b60dwk75widi8y287hr0xx1wgv2a-tigervnc-1.10.1/bin/vncserver - - case $1 in - -p|--port) shift; port=$1 ;; - kill|stop) - display=$($vnc -list | sed -n 's/^\(:[0-9]\+\).*/\1/p'| head -1) - $vnc -kill $display - exit 0 - ;; - esac - ps ax | sed '/grep/d' | grep "Xvnc.*-rfbport $port" >/dev/null 2>&1 - [ $? = 1 ] && $vnc -rfbport $port - echo "Xvnc server is running on port $port." - exec $shell -i - ''; - - buildCommand = '' - mkdir -p $out/bin - echo $src > $out/bin/myvnc - chmod 755 $out/bin/myvnc - ''; - in { - environment.systemPackages = [ myvnc ]; - }; - in { - options.node = { - myvnc = mkEnableOption "Enable myvnc script"; + options.cluster = { + compute = mkEnableOption "Enable compute node configs"; + }; + + options.cluster = { + k8sNode = mkEnableOption "Enable k8s node"; }; config = mkMerge [ configuration shosts - (mkIf config.node.myvnc myvnc) + (mkIf config.cluster.compute compute) + (mkIf config.cluster.k8sNode k8s-node) ]; imports = [ diff --git a/nixops/ekman/default.nix b/nixops/ekman/default.nix index 9222333..0a01f10 100644 --- a/nixops/ekman/default.nix +++ b/nixops/ekman/default.nix @@ -8,6 +8,11 @@ let nodes = import ./nodes.nix; + cluster = { + compute = true; + k8sNode = true; + }; + compute = { # deployment.tags = [ "compute" ]; @@ -39,7 +44,10 @@ let in { "${host.name}" = { features = { - inherit host; + host = { + name = host.name; + address = host.address; + }; os.externalInterface = "enp33s0f0np0"; hpc.compute = true; # k8s = { inherit etcdCluster; }; @@ -51,9 +59,6 @@ let # KERNEL=="ibp65s0", SUBSYSTEM=="net", ATTR{create_child}:="0x2222" # ''; - node = { - }; - networking = { useDHCP = false; interfaces.enp33s0f0np0 = { diff --git a/nixops/ekman/hosts.nix b/nixops/ekman/hosts.nix index 7cbfbee..f5f6f46 100644 --- a/nixops/ekman/hosts.nix +++ b/nixops/ekman/hosts.nix @@ -1,11 +1,43 @@ '' - 10.255.240.200 ekman ekman.cluster.local - 10.255.240.200 etcd0 etcd0.cluster.local - 10.255.240.201 c0-1 c0-1.cluster.local + 10.255.241.80 nfs0 nfs0.cluster.local + 10.255.241.90 beegfs0 beegfs0.cluster.local + 10.255.241.100 ekman ekman.cluster.local + 10.255.241.100 etcd0 etcd0.cluster.local + 10.255.241.101 c0-1 c0-1.cluster.local + 10.255.241.102 c0-2 c0-2.cluster.local + 10.255.241.103 c0-3 c0-3.cluster.local + 10.255.241.104 c0-4 c0-4.cluster.local + 10.255.241.105 c0-5 c0-5.cluster.local + 10.255.241.106 c0-6 c0-6.cluster.local + 10.255.241.107 c0-7 c0-7.cluster.local + 10.255.241.108 c0-8 c0-8.cluster.local + 10.255.241.109 c0-9 c0-9.cluster.local + 10.255.241.110 c0-10 c0-10.cluster.local + 10.255.241.111 c0-11 c0-1.cluster.local + 10.255.241.112 c0-12 c0-12.cluster.local + 10.255.241.113 c0-13 c0-13.cluster.local + 10.255.241.114 c0-14 c0-14.cluster.local + 10.255.241.115 c0-15 c0-15.cluster.local + 10.255.241.116 c0-16 c0-16.cluster.local - # 10.1.61.80 bee0-0 bee0-0.cluster.local - - # 10.1.63.101 ib0-1 ib0-1.cluster.local - - # 10.1.63.80 ibmds0-0 ibmds0-0.cluster.local + 10.255.243.80 ibnfs0 ibnfs0.cluster.local + 10.255.243.90 ibbeegfs0 ibbeegfs0.cluster.local + 10.255.243.100 ibekman ibekman.cluster.local + 10.255.243.100 ibetcd0 ibetcd0.cluster.local + 10.255.243.101 ib0-1 ib0-1.cluster.local + 10.255.243.102 ib0-2 ib0-2.cluster.local + 10.255.243.103 ib0-3 ib0-3.cluster.local + 10.255.243.104 ib0-4 ib0-4.cluster.local + 10.255.243.105 ib0-5 ib0-5.cluster.local + 10.255.243.106 ib0-6 ib0-6.cluster.local + 10.255.243.107 ib0-7 ib0-7.cluster.local + 10.255.243.108 ib0-8 ib0-8.cluster.local + 10.255.243.109 ib0-9 ib0-9.cluster.local + 10.255.243.110 ib0-10 ib0-10.cluster.local + 10.255.243.111 ib0-11 ib0-1.cluster.local + 10.255.243.112 ib0-12 ib0-12.cluster.local + 10.255.243.113 ib0-13 ib0-13.cluster.local + 10.255.243.114 ib0-14 ib0-14.cluster.local + 10.255.243.115 ib0-15 ib0-15.cluster.local + 10.255.243.116 ib0-16 ib0-16.cluster.local ''