Files
manifests/raw/ekman/cronjobs/norshelf/cron.yaml
T

154 lines
4.2 KiB
YAML

apiVersion: v1
kind: ConfigMap
metadata:
name: norshelf-script
namespace: cron
data:
download.sh: |
#!/usr/bin/env bash
# this script downloads files from:
# https://thredds.met.no/thredds/catalog/sea_norshelf_files/YYYY/catalog.html
# safe bash settings
set -euf -o pipefail
# define start and end dates (YYYY-MM-DD)
start_date="2026-03-01"
end_date=$(date +%Y-%m-%d)
# check if thredds is reachable before attempting any downloads
if ! wget --spider --quiet "https://thredds.met.no/thredds/catalog/sea_norshelf_files/catalog.html"; then
echo "thredds.met.no is unreachable, aborting"
exit 1
fi
# function to print stuff in red
red() {
printf "\e[31m%s\e[0m" "$1"
}
current_date=$(date -d "${start_date}" +%Y-%m-%d)
while [[ "${current_date}" < "${end_date}" || "${current_date}" == "${end_date}" ]]; do
year=$(date -d "${current_date}" +%Y)
month=$(date -d "${current_date}" +%m)
day=$(date -d "${current_date}" +%d)
mkdir -p "/data/hdd/data/norshelf/sea_norshelf_files/${year}/${month}"
file_name="norshelf_qck_an_${year}${month}${day}T00Z.nc"
target_file_name="/data/hdd/data/norshelf/sea_norshelf_files/${year}/${month}/${file_name}"
url="https://thredds.met.no/thredds/fileServer/sea_norshelf_files/${year}/${month}/${file_name}"
if [[ ! -f "${target_file_name}" ]]; then
if wget --spider --quiet "${url}"; then
echo "downloading ${url}"
wget --tries=5 --waitretry=60 -O "${target_file_name}" "${url}"
else
echo "${target_file_name} $(red 'not found on server')"
fi
else
echo "${target_file_name} already exists locally"
fi
# move to next day
current_date=$(date -d "${current_date} + 1 day" +%Y-%m-%d)
done
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: norshelf
namespace: cron
spec:
schedule: 0 13 * * * # Everyday at 13:00, use https://crontab.guru
concurrencyPolicy: "Forbid" # If only one at at time set to Allow else Forbid
successfulJobsHistoryLimit: 10
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 3
template:
spec:
restartPolicy: "Never"
containers:
- name: cronpod
image: juselius/busynix:1.1
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
- |
nix-env -iA nixpkgs.wget nixpkgs.coreutils nixpkgs.bash
if bash /scripts/download.sh; then
chown -R 5000:5000 /data/hdd/data/norshelf
chmod -R g+w /data/hdd/data/norshelf
else
echo "Job failed, sleeping 30 minutes before retry..."
sleep 1800
exit 1
fi
resources: {}
volumeMounts:
- name: data
mountPath: /data
- name: script
mountPath: /scripts
securityContext: {}
volumes:
- name: data
persistentVolumeClaim:
claimName: ekman-data
- name: script
configMap:
name: norshelf-script
defaultMode: 0755
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ekman-data
namespace: cron
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 1Gi
storageClassName: ""
volumeMode: Filesystem
volumeName: pv-ekman-data
status:
accessModes:
- ReadWriteMany
capacity:
storage: 1Gi
---
apiVersion: v1
kind: PersistentVolume
metadata:
name: pv-ekman-data
spec:
accessModes:
- ReadWriteMany
claimRef:
apiVersion: v1
kind: PersistentVolumeClaim
name: ekman-data
namespace: cron
capacity:
storage: 1Gi
csi:
driver: rook-ceph.cephfs.csi.ceph.com
nodeStageSecretRef:
name: rook-csi-cephfs-node
namespace: rook-ceph
volumeAttributes:
clusterID: rook-ceph
fsName: data
rootPath: /
staticVolume: "true"
volumeHandle: pv-ekman-data
persistentVolumeReclaimPolicy: Retain
volumeMode: Filesystem