154 lines
4.2 KiB
YAML
154 lines
4.2 KiB
YAML
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: norshelf-script
|
|
namespace: cron
|
|
data:
|
|
download.sh: |
|
|
#!/usr/bin/env bash
|
|
|
|
# this script downloads files from:
|
|
# https://thredds.met.no/thredds/catalog/sea_norshelf_files/YYYY/catalog.html
|
|
|
|
# safe bash settings
|
|
set -euf -o pipefail
|
|
|
|
# define start and end dates (YYYY-MM-DD)
|
|
start_date="2026-03-01"
|
|
end_date=$(date +%Y-%m-%d)
|
|
|
|
# check if thredds is reachable before attempting any downloads
|
|
if ! wget --spider --quiet "https://thredds.met.no/thredds/catalog/sea_norshelf_files/catalog.html"; then
|
|
echo "thredds.met.no is unreachable, aborting"
|
|
exit 1
|
|
fi
|
|
|
|
# function to print stuff in red
|
|
red() {
|
|
printf "\e[31m%s\e[0m" "$1"
|
|
}
|
|
|
|
current_date=$(date -d "${start_date}" +%Y-%m-%d)
|
|
while [[ "${current_date}" < "${end_date}" || "${current_date}" == "${end_date}" ]]; do
|
|
year=$(date -d "${current_date}" +%Y)
|
|
month=$(date -d "${current_date}" +%m)
|
|
day=$(date -d "${current_date}" +%d)
|
|
|
|
mkdir -p "/data/hdd/data/norshelf/sea_norshelf_files/${year}/${month}"
|
|
|
|
file_name="norshelf_qck_an_${year}${month}${day}T00Z.nc"
|
|
target_file_name="/data/hdd/data/norshelf/sea_norshelf_files/${year}/${month}/${file_name}"
|
|
url="https://thredds.met.no/thredds/fileServer/sea_norshelf_files/${year}/${month}/${file_name}"
|
|
|
|
if [[ ! -f "${target_file_name}" ]]; then
|
|
if wget --spider --quiet "${url}"; then
|
|
echo "downloading ${url}"
|
|
wget --tries=5 --waitretry=60 -O "${target_file_name}" "${url}"
|
|
else
|
|
echo "${target_file_name} $(red 'not found on server')"
|
|
fi
|
|
else
|
|
echo "${target_file_name} already exists locally"
|
|
fi
|
|
|
|
# move to next day
|
|
current_date=$(date -d "${current_date} + 1 day" +%Y-%m-%d)
|
|
done
|
|
---
|
|
apiVersion: batch/v1
|
|
kind: CronJob
|
|
metadata:
|
|
name: norshelf
|
|
namespace: cron
|
|
spec:
|
|
schedule: 0 13 * * * # Everyday at 13:00, use https://crontab.guru
|
|
concurrencyPolicy: "Forbid" # If only one at at time set to Allow else Forbid
|
|
successfulJobsHistoryLimit: 10
|
|
failedJobsHistoryLimit: 3
|
|
jobTemplate:
|
|
spec:
|
|
backoffLimit: 3
|
|
template:
|
|
spec:
|
|
restartPolicy: "Never"
|
|
containers:
|
|
- name: cronpod
|
|
image: juselius/busynix:1.1
|
|
imagePullPolicy: IfNotPresent
|
|
command:
|
|
- /bin/sh
|
|
- -c
|
|
- |
|
|
nix-env -iA nixpkgs.wget nixpkgs.coreutils nixpkgs.bash
|
|
if bash /scripts/download.sh; then
|
|
chown -R 5000:5000 /data/hdd/data/norshelf
|
|
chmod -R g+w /data/hdd/data/norshelf
|
|
else
|
|
echo "Job failed, sleeping 30 minutes before retry..."
|
|
sleep 1800
|
|
exit 1
|
|
fi
|
|
resources: {}
|
|
volumeMounts:
|
|
- name: data
|
|
mountPath: /data
|
|
- name: script
|
|
mountPath: /scripts
|
|
securityContext: {}
|
|
volumes:
|
|
- name: data
|
|
persistentVolumeClaim:
|
|
claimName: ekman-data
|
|
- name: script
|
|
configMap:
|
|
name: norshelf-script
|
|
defaultMode: 0755
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: ekman-data
|
|
namespace: cron
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteMany
|
|
resources:
|
|
requests:
|
|
storage: 1Gi
|
|
storageClassName: ""
|
|
volumeMode: Filesystem
|
|
volumeName: pv-ekman-data
|
|
status:
|
|
accessModes:
|
|
- ReadWriteMany
|
|
capacity:
|
|
storage: 1Gi
|
|
---
|
|
apiVersion: v1
|
|
kind: PersistentVolume
|
|
metadata:
|
|
name: pv-ekman-data
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteMany
|
|
claimRef:
|
|
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
name: ekman-data
|
|
namespace: cron
|
|
capacity:
|
|
storage: 1Gi
|
|
csi:
|
|
driver: rook-ceph.cephfs.csi.ceph.com
|
|
nodeStageSecretRef:
|
|
name: rook-csi-cephfs-node
|
|
namespace: rook-ceph
|
|
volumeAttributes:
|
|
clusterID: rook-ceph
|
|
fsName: data
|
|
rootPath: /
|
|
staticVolume: "true"
|
|
volumeHandle: pv-ekman-data
|
|
persistentVolumeReclaimPolicy: Retain
|
|
volumeMode: Filesystem
|