From 6f00d54907ade38e74108bb57a07bbd26f6fd87d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20J=C3=B6rg?= Date: Tue, 28 Apr 2026 14:44:06 +0200 Subject: [PATCH] fix(cron): Add norkyst --- raw/ekman/cronjobs/nemo/cron.yaml | 72 +++++++++++------- raw/ekman/cronjobs/norkyst/cron.yaml | 108 +++++++++++++++++++++++++++ 2 files changed, 152 insertions(+), 28 deletions(-) create mode 100644 raw/ekman/cronjobs/norkyst/cron.yaml diff --git a/raw/ekman/cronjobs/nemo/cron.yaml b/raw/ekman/cronjobs/nemo/cron.yaml index a475bc12..6aa126ca 100644 --- a/raw/ekman/cronjobs/nemo/cron.yaml +++ b/raw/ekman/cronjobs/nemo/cron.yaml @@ -22,40 +22,56 @@ data: echo "Running subset for $current_date to $next_date" outfile="cmems_mod_nws_phy-sal_anfc_1.5km-3D_PT1H-i_${current_date}--${next_date}.nc" - copernicusmarine subset \ - --dataset-id cmems_mod_nws_phy-sal_anfc_1.5km-3D_PT1H-i \ - -t "$current_date" \ - -T "$next_date" \ - -f "$outfile" \ - -o /data/hdd/data/NEMO/ - echo "Downloaded salt" + if [[ -f "/data/hdd/data/NEMO/$outfile" ]]; then + echo "Skipping salt (already exists)" + else + copernicusmarine subset \ + --dataset-id cmems_mod_nws_phy-sal_anfc_1.5km-3D_PT1H-i \ + -t "$current_date" \ + -T "$next_date" \ + -f "$outfile" \ + -o /data/hdd/data/NEMO/ + echo "Downloaded salt" + fi outfile="cmems_mod_nws_phy-cur_anfc_1.5km-3D_PT1H-i_${current_date}--${next_date}.nc" - copernicusmarine subset \ - --dataset-id cmems_mod_nws_phy-cur_anfc_1.5km-3D_PT1H-i \ - -t "$current_date" \ - -T "$next_date" \ - -f "$outfile" \ - -o /data/hdd/data/NEMO/ - echo "Downloaded currents" + if [[ -f "/data/hdd/data/NEMO/$outfile" ]]; then + echo "Skipping currents (already exists)" + else + copernicusmarine subset \ + --dataset-id cmems_mod_nws_phy-cur_anfc_1.5km-3D_PT1H-i \ + -t "$current_date" \ + -T "$next_date" \ + -f "$outfile" \ + -o /data/hdd/data/NEMO/ + echo "Downloaded currents" + fi outfile="cmems_mod_nws_phy-tem_anfc_1.5km-3D_PT1H-i_${current_date}--${next_date}.nc" - copernicusmarine subset \ - --dataset-id cmems_mod_nws_phy-tem_anfc_1.5km-3D_PT1H-i \ - -t "$current_date" \ - -T "$next_date" \ - -f "$outfile" \ - -o /data/hdd/data/NEMO/ - echo "Downloaded temperature" + if [[ -f "/data/hdd/data/NEMO/$outfile" ]]; then + echo "Skipping temperature (already exists)" + else + copernicusmarine subset \ + --dataset-id cmems_mod_nws_phy-tem_anfc_1.5km-3D_PT1H-i \ + -t "$current_date" \ + -T "$next_date" \ + -f "$outfile" \ + -o /data/hdd/data/NEMO/ + echo "Downloaded temperature" + fi outfile="cmems_mod_nws_phy-ssh_anfc_1.5km-2D_PT15M-i_${current_date}--${next_date}.nc" - copernicusmarine subset \ - --dataset-id cmems_mod_nws_phy-ssh_anfc_1.5km-2D_PT15M-i \ - -t "$current_date" \ - -T "$next_date" \ - -f "$outfile" \ - -o /data/hdd/data/NEMO/ - echo "Downloaded ssh" + if [[ -f "/data/hdd/data/NEMO/$outfile" ]]; then + echo "Skipping ssh (already exists)" + else + copernicusmarine subset \ + --dataset-id cmems_mod_nws_phy-ssh_anfc_1.5km-2D_PT15M-i \ + -t "$current_date" \ + -T "$next_date" \ + -f "$outfile" \ + -o /data/hdd/data/NEMO/ + echo "Downloaded ssh" + fi current_date="$next_date" done diff --git a/raw/ekman/cronjobs/norkyst/cron.yaml b/raw/ekman/cronjobs/norkyst/cron.yaml new file mode 100644 index 00000000..9027c867 --- /dev/null +++ b/raw/ekman/cronjobs/norkyst/cron.yaml @@ -0,0 +1,108 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: norkyst-script + namespace: cron +data: + download.sh: | + #!/usr/bin/env bash + + # this script downloads files from: + # https://thredds.met.no/thredds/catalog/fou-hi/new_norkyst800m/norkyst_v3_test/his/catalog.html + + # safe bash settings + set -euf -o pipefail + + JOBS=8 # parallel downloads + + # define start and end dates (YYYY-MM-DD) + start_date=$(date +%Y-%m-%d) + end_date=$(date +%Y-%m-%d) + + # check if thredds is reachable before attempting any downloads + if ! wget --spider --quiet "https://thredds.met.no/thredds/catalog/fou-hi/new_norkyst800m/norkyst_v3_test/his/catalog.html"; then + echo "thredds.met.no is unreachable, aborting" + exit 1 + fi + + # function to print stuff in red + red() { + printf "\e[31m%s\e[0m" "$1" + } + + download_day() { + local current_date="$1" + local year month day file_name target_file_name url + year=$(date -d "${current_date}" +%Y) + month=$(date -d "${current_date}" +%m) + day=$(date -d "${current_date}" +%d) + + mkdir -p "/data/hdd/data/norkyst/${year}/${month}" + + file_name="norkyst800_his_sdepth_${year}${month}${day}T00Z_m00_AN.nc" + target_file_name="/data/hdd/data/norkyst/${year}/${month}/${file_name}" + url="https://thredds.met.no/thredds/fileServer/fou-hi/new_norkyst800m/norkyst_v3_test/his/${year}/${month}/${day}/${file_name}" + + if [[ ! -f "${target_file_name}" ]]; then + if wget --spider --quiet "${url}"; then + echo "downloading ${url}" + wget -O "${target_file_name}" "${url}" + else + echo "${target_file_name} $(red 'not found on server')" + fi + else + echo "${target_file_name} already exists locally" + fi + } + + export -f download_day red + + current_date=$(date -d "${start_date}" +%Y-%m-%d) + while [[ "${current_date}" < "${end_date}" || "${current_date}" == "${end_date}" ]]; do + echo "${current_date}" + current_date=$(date -d "${current_date} + 1 day" +%Y-%m-%d) + done | parallel -j "${JOBS}" download_day +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: norkyst + namespace: cron +spec: + schedule: 0 13 * * * # Everyday at 13:00, use https://crontab.guru + concurrencyPolicy: "Forbid" + successfulJobsHistoryLimit: 10 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 10 + template: + spec: + restartPolicy: "OnFailure" + containers: + - name: cronpod + image: juselius/busynix:1.1 + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - | + nix-env -iA nixpkgs.wget nixpkgs.coreutils nixpkgs.bash nixpkgs.parallel + bash /scripts/download.sh + chown -R 5000:5000 /data/hdd/data/norkyst + chmod -R g+w /data/hdd/data/norkyst + resources: {} + volumeMounts: + - name: data + mountPath: /data + - name: script + mountPath: /scripts + securityContext: {} + volumes: + - name: data + persistentVolumeClaim: + claimName: ekman-data + - name: script + configMap: + name: norkyst-script + defaultMode: 0755