Files
manifests/raw/ekman/cronjobs/norkyst/cron.yaml
T

114 lines
3.6 KiB
YAML

apiVersion: v1
kind: ConfigMap
metadata:
name: norkyst-script
namespace: cron
data:
download.sh: |
#!/usr/bin/env bash
# this script downloads files from:
# https://thredds.met.no/thredds/catalog/fou-hi/new_norkyst800m/norkyst_v3_test/his/catalog.html
# safe bash settings
set -euf -o pipefail
JOBS=8 # parallel downloads
# define start and end dates (YYYY-MM-DD)
start_date=$(date -d "yesterday" +%Y-%m-%d)
end_date=$(date -d "yesterday" +%Y-%m-%d)
# check if thredds is reachable before attempting any downloads
if ! wget --spider --quiet "https://thredds.met.no/thredds/catalog/fou-hi/new_norkyst800m/norkyst_v3_test/his/catalog.html"; then
echo "thredds.met.no is unreachable, aborting"
exit 1
fi
# function to print stuff in red
red() {
printf "\e[31m%s\e[0m" "$1"
}
download_day() {
local current_date="$1"
local year month day file_name target_file_name url
year=$(date -d "${current_date}" +%Y)
month=$(date -d "${current_date}" +%m)
day=$(date -d "${current_date}" +%d)
mkdir -p "/data/hdd/data/norkyst/${year}/${month}"
file_name="norkyst800_his_sdepth_${year}${month}${day}T00Z_m00_AN.nc"
target_file_name="/data/hdd/data/norkyst/${year}/${month}/${file_name}"
url="https://thredds.met.no/thredds/fileServer/fou-hi/new_norkyst800m/norkyst_v3_test/his/${year}/${month}/${day}/${file_name}"
if [[ ! -f "${target_file_name}" ]]; then
if wget --spider --quiet "${url}"; then
echo "downloading ${url}"
wget --tries=5 --waitretry=60 -O "${target_file_name}" "${url}"
else
echo "${target_file_name} $(red 'not found on server')"
fi
else
echo "${target_file_name} already exists locally"
fi
}
export -f download_day red
current_date=$(date -d "${start_date}" +%Y-%m-%d)
while [[ "${current_date}" < "${end_date}" || "${current_date}" == "${end_date}" ]]; do
echo "${current_date}"
current_date=$(date -d "${current_date} + 1 day" +%Y-%m-%d)
done | parallel -j "${JOBS}" download_day
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: norkyst
namespace: cron
spec:
schedule: 0 13 * * * # Everyday at 13:00, use https://crontab.guru
concurrencyPolicy: "Allow"
successfulJobsHistoryLimit: 10
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 3
template:
spec:
restartPolicy: "Never"
containers:
- name: cronpod
image: juselius/busynix:1.1
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- |
nix-env -iA nixpkgs.wget nixpkgs.coreutils nixpkgs.bash nixpkgs.parallel
if bash /scripts/download.sh; then
chown -R 10000:10000 /data/hdd/data/norkyst
chmod -R g+w /data/hdd/data/norkyst
else
echo "Job failed, sleeping 30 minutes before retry..."
sleep 1800
exit 1
fi
resources: {}
volumeMounts:
- name: data
mountPath: /data
- name: script
mountPath: /scripts
securityContext: {}
volumes:
- name: data
persistentVolumeClaim:
claimName: ekman-data
- name: script
configMap:
name: norkyst-script
defaultMode: 0755