apiVersion: v1 kind: ConfigMap metadata: name: norkyst-script namespace: cron data: download.sh: | #!/usr/bin/env bash # this script downloads files from: # https://thredds.met.no/thredds/catalog/fou-hi/new_norkyst800m/norkyst_v3_test/his/catalog.html # safe bash settings set -euf -o pipefail JOBS=8 # parallel downloads # define start and end dates (YYYY-MM-DD) start_date=$(date -d "yesterday" +%Y-%m-%d) end_date=$(date -d "yesterday" +%Y-%m-%d) # check if thredds is reachable before attempting any downloads if ! wget --spider --quiet "https://thredds.met.no/thredds/catalog/fou-hi/new_norkyst800m/norkyst_v3_test/his/catalog.html"; then echo "thredds.met.no is unreachable, aborting" exit 1 fi # function to print stuff in red red() { printf "\e[31m%s\e[0m" "$1" } download_day() { local current_date="$1" local year month day file_name target_file_name url year=$(date -d "${current_date}" +%Y) month=$(date -d "${current_date}" +%m) day=$(date -d "${current_date}" +%d) mkdir -p "/data/hdd/data/norkyst/${year}/${month}" file_name="norkyst800_his_sdepth_${year}${month}${day}T00Z_m00_AN.nc" target_file_name="/data/hdd/data/norkyst/${year}/${month}/${file_name}" url="https://thredds.met.no/thredds/fileServer/fou-hi/new_norkyst800m/norkyst_v3_test/his/${year}/${month}/${day}/${file_name}" if [[ ! -f "${target_file_name}" ]]; then if wget --spider --quiet "${url}"; then echo "downloading ${url}" wget --tries=5 --waitretry=60 -O "${target_file_name}" "${url}" else echo "${target_file_name} $(red 'not found on server')" fi else echo "${target_file_name} already exists locally" fi } export -f download_day red current_date=$(date -d "${start_date}" +%Y-%m-%d) while [[ "${current_date}" < "${end_date}" || "${current_date}" == "${end_date}" ]]; do echo "${current_date}" current_date=$(date -d "${current_date} + 1 day" +%Y-%m-%d) done | parallel -j "${JOBS}" download_day --- apiVersion: batch/v1 kind: CronJob metadata: name: norkyst namespace: cron spec: schedule: 0 13 * * * # Everyday at 13:00, use https://crontab.guru concurrencyPolicy: "Allow" successfulJobsHistoryLimit: 10 failedJobsHistoryLimit: 3 jobTemplate: spec: backoffLimit: 3 template: spec: restartPolicy: "Never" containers: - name: cronpod image: juselius/busynix:1.1 imagePullPolicy: IfNotPresent command: - /bin/sh - -c - | nix-env -iA nixpkgs.wget nixpkgs.coreutils nixpkgs.bash nixpkgs.parallel if bash /scripts/download.sh; then chown -R 10000:10000 /data/hdd/data/norkyst chmod -R g+w /data/hdd/data/norkyst else echo "Job failed, sleeping 30 minutes before retry..." sleep 1800 exit 1 fi resources: {} volumeMounts: - name: data mountPath: /data - name: script mountPath: /scripts securityContext: {} volumes: - name: data persistentVolumeClaim: claimName: ekman-data - name: script configMap: name: norkyst-script defaultMode: 0755