114 lines
3.6 KiB
YAML
114 lines
3.6 KiB
YAML
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: norkyst-script
|
|
namespace: cron
|
|
data:
|
|
download.sh: |
|
|
#!/usr/bin/env bash
|
|
|
|
# this script downloads files from:
|
|
# https://thredds.met.no/thredds/catalog/fou-hi/new_norkyst800m/norkyst_v3_test/his/catalog.html
|
|
|
|
# safe bash settings
|
|
set -euf -o pipefail
|
|
|
|
JOBS=8 # parallel downloads
|
|
|
|
# define start and end dates (YYYY-MM-DD)
|
|
start_date=$(date -d "yesterday" +%Y-%m-%d)
|
|
end_date=$(date -d "yesterday" +%Y-%m-%d)
|
|
|
|
# check if thredds is reachable before attempting any downloads
|
|
if ! wget --spider --quiet "https://thredds.met.no/thredds/catalog/fou-hi/new_norkyst800m/norkyst_v3_test/his/catalog.html"; then
|
|
echo "thredds.met.no is unreachable, aborting"
|
|
exit 1
|
|
fi
|
|
|
|
# function to print stuff in red
|
|
red() {
|
|
printf "\e[31m%s\e[0m" "$1"
|
|
}
|
|
|
|
download_day() {
|
|
local current_date="$1"
|
|
local year month day file_name target_file_name url
|
|
year=$(date -d "${current_date}" +%Y)
|
|
month=$(date -d "${current_date}" +%m)
|
|
day=$(date -d "${current_date}" +%d)
|
|
|
|
mkdir -p "/data/hdd/data/norkyst/${year}/${month}"
|
|
|
|
file_name="norkyst800_his_sdepth_${year}${month}${day}T00Z_m00_AN.nc"
|
|
target_file_name="/data/hdd/data/norkyst/${year}/${month}/${file_name}"
|
|
url="https://thredds.met.no/thredds/fileServer/fou-hi/new_norkyst800m/norkyst_v3_test/his/${year}/${month}/${day}/${file_name}"
|
|
|
|
if [[ ! -f "${target_file_name}" ]]; then
|
|
if wget --spider --quiet "${url}"; then
|
|
echo "downloading ${url}"
|
|
wget --tries=5 --waitretry=60 -O "${target_file_name}" "${url}"
|
|
else
|
|
echo "${target_file_name} $(red 'not found on server')"
|
|
fi
|
|
else
|
|
echo "${target_file_name} already exists locally"
|
|
fi
|
|
}
|
|
|
|
export -f download_day red
|
|
|
|
current_date=$(date -d "${start_date}" +%Y-%m-%d)
|
|
while [[ "${current_date}" < "${end_date}" || "${current_date}" == "${end_date}" ]]; do
|
|
echo "${current_date}"
|
|
current_date=$(date -d "${current_date} + 1 day" +%Y-%m-%d)
|
|
done | parallel -j "${JOBS}" download_day
|
|
---
|
|
apiVersion: batch/v1
|
|
kind: CronJob
|
|
metadata:
|
|
name: norkyst
|
|
namespace: cron
|
|
spec:
|
|
schedule: 0 13 * * * # Everyday at 13:00, use https://crontab.guru
|
|
concurrencyPolicy: "Allow"
|
|
successfulJobsHistoryLimit: 10
|
|
failedJobsHistoryLimit: 3
|
|
jobTemplate:
|
|
spec:
|
|
backoffLimit: 3
|
|
template:
|
|
spec:
|
|
restartPolicy: "Never"
|
|
containers:
|
|
- name: cronpod
|
|
image: juselius/busynix:1.1
|
|
imagePullPolicy: IfNotPresent
|
|
command:
|
|
- /bin/sh
|
|
- -c
|
|
- |
|
|
nix-env -iA nixpkgs.wget nixpkgs.coreutils nixpkgs.bash nixpkgs.parallel
|
|
if bash /scripts/download.sh; then
|
|
chown -R 10000:10000 /data/hdd/data/norkyst
|
|
chmod -R g+w /data/hdd/data/norkyst
|
|
else
|
|
echo "Job failed, sleeping 30 minutes before retry..."
|
|
sleep 1800
|
|
exit 1
|
|
fi
|
|
resources: {}
|
|
volumeMounts:
|
|
- name: data
|
|
mountPath: /data
|
|
- name: script
|
|
mountPath: /scripts
|
|
securityContext: {}
|
|
volumes:
|
|
- name: data
|
|
persistentVolumeClaim:
|
|
claimName: ekman-data
|
|
- name: script
|
|
configMap:
|
|
name: norkyst-script
|
|
defaultMode: 0755
|