From 9779200961a7fbcc38358b4da1deb93de00a6201 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20J=C3=B6rg?= Date: Fri, 24 Apr 2026 15:23:10 +0200 Subject: [PATCH] feat(cron): Add arome and norshelf jobs --- raw/ekman/cronjobs/arome/cron.yaml | 136 ++++++++++++++++++++++++ raw/ekman/cronjobs/norshelf/cron.yaml | 146 ++++++++++++++++++++++++++ 2 files changed, 282 insertions(+) create mode 100644 raw/ekman/cronjobs/arome/cron.yaml create mode 100644 raw/ekman/cronjobs/norshelf/cron.yaml diff --git a/raw/ekman/cronjobs/arome/cron.yaml b/raw/ekman/cronjobs/arome/cron.yaml new file mode 100644 index 00000000..4ec49665 --- /dev/null +++ b/raw/ekman/cronjobs/arome/cron.yaml @@ -0,0 +1,136 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: arome-script + namespace: cron +data: + download.py: | + import os + from netCDF4 import Dataset + import re + from datetime import datetime, timedelta + + fname ="https://thredds.met.no/thredds/dodsC/meps25epsarchive/YEAR/MONTH/DAY/meps_det_sfc_YEARMONTHDAYT00Z.ncml" + outdir = "/data/hdd/data/AROME" + + + def generate_thredds_names(start, stop): + start_date = datetime(int(start.split("-")[0]), + int(start.split("-")[1]), + int(start.split("-")[2])) + end_date = datetime(int(stop.split("-")[0]), + int(stop.split("-")[1]), + int(stop.split("-")[2])) + + date_list = [] + while start_date <= end_date: + date_list.append(start_date) + start_date += timedelta(days=1) + + fileList = [] + for date in date_list: + y = str(date.year) + m = (str(date.month)).zfill(2) + d = (str(date.day)).zfill(2) + + f = re.sub("YEAR", y, fname) + f = re.sub("MONTH", m, f) + f = re.sub("DAY", d, f) + fileList.append(f) + + return fileList + + def copy_thredds_file(threddsFile, savename): + dsin = Dataset(threddsFile) + + dsout = Dataset(savename, "w") + + for dname, the_dim in dsin.dimensions.items(): + dsout.createDimension(dname, len(the_dim) if not the_dim.isunlimited() else None) + + aromeNames = ["time", + "longitude", + "latitude", + "land_area_fraction", + "air_temperature_2m", + "precipitation_amount_acc", + "water_evaporation_amount", + "relative_humidity_2m", + "integral_of_surface_downwelling_longwave_flux_in_air_wrt_time", + "integral_of_surface_net_downward_shortwave_flux_wrt_time", + "air_pressure_at_sea_level", + "x_wind_10m", + "y_wind_10m"] + + for v_name, varin in dsin.variables.items(): + if v_name in aromeNames: + fill_value = None + + if hasattr(varin, "_FillValue"): + fill_value = varin._FillValue + + outVar = dsout.createVariable(v_name, varin.datatype, varin.dimensions, fill_value=fill_value) + + outVar.setncatts({k: varin.getncattr(k) for k in varin.ncattrs() if k not in ["_FillValue"]}) + + outVar[:] = varin[:] + dsout.close() + + + os.makedirs(outdir, exist_ok=True) + + fList = generate_thredds_names("2026-04-24", datetime.today().strftime("%Y-%m-%d")) + for fname in fList: + savename = os.path.join(outdir, fname.split("/")[-1].split(".")[0] + ".nc") + print(savename) + try: + try: + copy_thredds_file(fname, savename) + except: + fname = re.sub("sfc", "2_5km", fname) + fname = re.sub("ncml", "nc", fname) + copy_thredds_file(fname, savename) + except: + print("File not found: " + fname) +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: arome + namespace: cron +spec: + schedule: 0 6 * * * # Everyday at 06:00, use https://crontab.guru + concurrencyPolicy: "Forbid" # If only one at at time set to Allow else Forbid + successfulJobsHistoryLimit: 10 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 10 + template: + spec: + restartPolicy: "OnFailure" + containers: + - name: cronpod + image: juselius/busynix:1.1 + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - | + nix-shell -p 'python3.withPackages(ps: [ps.netcdf4])' --run 'python3 /scripts/download.py' + chown -R kraken /data/hdd/data/AROME + resources: {} + volumeMounts: + - name: data + mountPath: /data + - name: script + mountPath: /scripts + securityContext: {} + volumes: + - name: data + persistentVolumeClaim: + claimName: ekman-data + - name: script + configMap: + name: arome-script + defaultMode: 0755 diff --git a/raw/ekman/cronjobs/norshelf/cron.yaml b/raw/ekman/cronjobs/norshelf/cron.yaml new file mode 100644 index 00000000..7c81ba45 --- /dev/null +++ b/raw/ekman/cronjobs/norshelf/cron.yaml @@ -0,0 +1,146 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: norshelf-script + namespace: cron +data: + download.sh: | + #!/usr/bin/env bash + + # this script downloads files from: + # https://thredds.met.no/thredds/catalog/sea_norshelf_files/YYYY/catalog.html + + # safe bash settings + set -euf -o pipefail + + # define start and end dates (YYYY-MM-DD) + start_date="2026-03-01" + end_date=$(date +%Y-%m-%d) + + # check if thredds is reachable before attempting any downloads + if ! wget --spider --quiet "https://thredds.met.no/thredds/catalog/sea_norshelf_files/catalog.html"; then + echo "thredds.met.no is unreachable, aborting" + exit 1 + fi + + # function to print stuff in red + red() { + printf "\e[31m%s\e[0m" "$1" + } + + current_date=$(date -d "${start_date}" +%Y-%m-%d) + while [[ "${current_date}" < "${end_date}" || "${current_date}" == "${end_date}" ]]; do + year=$(date -d "${current_date}" +%Y) + month=$(date -d "${current_date}" +%m) + day=$(date -d "${current_date}" +%d) + + mkdir -p "/data/hdd/data/norshelf/sea_norshelf_files/${year}/${month}" + + file_name="norshelf_qck_an_${year}${month}${day}T00Z.nc" + target_file_name="/data/hdd/data/norshelf/sea_norshelf_files/${year}/${month}/${file_name}" + url="https://thredds.met.no/thredds/fileServer/sea_norshelf_files/${year}/${month}/${file_name}" + + if [[ ! -f "${target_file_name}" ]]; then + if wget --spider --quiet "${url}"; then + echo "downloading ${url}" + wget -O "${target_file_name}" "${url}" + else + echo "${target_file_name} $(red 'not found on server')" + fi + else + echo "${target_file_name} already exists locally" + fi + + # move to next day + current_date=$(date -d "${current_date} + 1 day" +%Y-%m-%d) + done +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: norshelf + namespace: cron +spec: + schedule: 0 13 * * * # Everyday at 13:00, use https://crontab.guru + concurrencyPolicy: "Forbid" # If only one at at time set to Allow else Forbid + successfulJobsHistoryLimit: 10 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 10 + template: + spec: + restartPolicy: "OnFailure" + containers: + - name: cronpod + image: juselius/busynix:1.1 + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - | + nix-env -iA nixpkgs.wget nixpkgs.coreutils nixpkgs.bash + bash /scripts/download.sh + resources: {} + volumeMounts: + - name: data + mountPath: /data + - name: script + mountPath: /scripts + securityContext: {} + volumes: + - name: data + persistentVolumeClaim: + claimName: ekman-data + - name: script + configMap: + name: norshelf-script + defaultMode: 0755 +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: ekman-data + namespace: cron +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 1Gi + storageClassName: "" + volumeMode: Filesystem + volumeName: pv-ekman-data +status: + accessModes: + - ReadWriteMany + capacity: + storage: 1Gi +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: pv-ekman-data +spec: + accessModes: + - ReadWriteMany + claimRef: + apiVersion: v1 + kind: PersistentVolumeClaim + name: ekman-data + namespace: cron + capacity: + storage: 1Gi + csi: + driver: rook-ceph.cephfs.csi.ceph.com + nodeStageSecretRef: + name: rook-csi-cephfs-node + namespace: rook-ceph + volumeAttributes: + clusterID: rook-ceph + fsName: data + rootPath: / + staticVolume: "true" + volumeHandle: pv-ekman-data + persistentVolumeReclaimPolicy: Retain + volumeMode: Filesystem