feat(cron): Add arome and norshelf jobs
This commit is contained in:
@@ -0,0 +1,136 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: arome-script
|
||||
namespace: cron
|
||||
data:
|
||||
download.py: |
|
||||
import os
|
||||
from netCDF4 import Dataset
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
fname ="https://thredds.met.no/thredds/dodsC/meps25epsarchive/YEAR/MONTH/DAY/meps_det_sfc_YEARMONTHDAYT00Z.ncml"
|
||||
outdir = "/data/hdd/data/AROME"
|
||||
|
||||
|
||||
def generate_thredds_names(start, stop):
|
||||
start_date = datetime(int(start.split("-")[0]),
|
||||
int(start.split("-")[1]),
|
||||
int(start.split("-")[2]))
|
||||
end_date = datetime(int(stop.split("-")[0]),
|
||||
int(stop.split("-")[1]),
|
||||
int(stop.split("-")[2]))
|
||||
|
||||
date_list = []
|
||||
while start_date <= end_date:
|
||||
date_list.append(start_date)
|
||||
start_date += timedelta(days=1)
|
||||
|
||||
fileList = []
|
||||
for date in date_list:
|
||||
y = str(date.year)
|
||||
m = (str(date.month)).zfill(2)
|
||||
d = (str(date.day)).zfill(2)
|
||||
|
||||
f = re.sub("YEAR", y, fname)
|
||||
f = re.sub("MONTH", m, f)
|
||||
f = re.sub("DAY", d, f)
|
||||
fileList.append(f)
|
||||
|
||||
return fileList
|
||||
|
||||
def copy_thredds_file(threddsFile, savename):
|
||||
dsin = Dataset(threddsFile)
|
||||
|
||||
dsout = Dataset(savename, "w")
|
||||
|
||||
for dname, the_dim in dsin.dimensions.items():
|
||||
dsout.createDimension(dname, len(the_dim) if not the_dim.isunlimited() else None)
|
||||
|
||||
aromeNames = ["time",
|
||||
"longitude",
|
||||
"latitude",
|
||||
"land_area_fraction",
|
||||
"air_temperature_2m",
|
||||
"precipitation_amount_acc",
|
||||
"water_evaporation_amount",
|
||||
"relative_humidity_2m",
|
||||
"integral_of_surface_downwelling_longwave_flux_in_air_wrt_time",
|
||||
"integral_of_surface_net_downward_shortwave_flux_wrt_time",
|
||||
"air_pressure_at_sea_level",
|
||||
"x_wind_10m",
|
||||
"y_wind_10m"]
|
||||
|
||||
for v_name, varin in dsin.variables.items():
|
||||
if v_name in aromeNames:
|
||||
fill_value = None
|
||||
|
||||
if hasattr(varin, "_FillValue"):
|
||||
fill_value = varin._FillValue
|
||||
|
||||
outVar = dsout.createVariable(v_name, varin.datatype, varin.dimensions, fill_value=fill_value)
|
||||
|
||||
outVar.setncatts({k: varin.getncattr(k) for k in varin.ncattrs() if k not in ["_FillValue"]})
|
||||
|
||||
outVar[:] = varin[:]
|
||||
dsout.close()
|
||||
|
||||
|
||||
os.makedirs(outdir, exist_ok=True)
|
||||
|
||||
fList = generate_thredds_names("2026-04-24", datetime.today().strftime("%Y-%m-%d"))
|
||||
for fname in fList:
|
||||
savename = os.path.join(outdir, fname.split("/")[-1].split(".")[0] + ".nc")
|
||||
print(savename)
|
||||
try:
|
||||
try:
|
||||
copy_thredds_file(fname, savename)
|
||||
except:
|
||||
fname = re.sub("sfc", "2_5km", fname)
|
||||
fname = re.sub("ncml", "nc", fname)
|
||||
copy_thredds_file(fname, savename)
|
||||
except:
|
||||
print("File not found: " + fname)
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: arome
|
||||
namespace: cron
|
||||
spec:
|
||||
schedule: 0 6 * * * # Everyday at 06:00, use https://crontab.guru
|
||||
concurrencyPolicy: "Forbid" # If only one at at time set to Allow else Forbid
|
||||
successfulJobsHistoryLimit: 10
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 10
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: "OnFailure"
|
||||
containers:
|
||||
- name: cronpod
|
||||
image: juselius/busynix:1.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
nix-shell -p 'python3.withPackages(ps: [ps.netcdf4])' --run 'python3 /scripts/download.py'
|
||||
chown -R kraken /data/hdd/data/AROME
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
- name: script
|
||||
mountPath: /scripts
|
||||
securityContext: {}
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: ekman-data
|
||||
- name: script
|
||||
configMap:
|
||||
name: arome-script
|
||||
defaultMode: 0755
|
||||
@@ -0,0 +1,146 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: norshelf-script
|
||||
namespace: cron
|
||||
data:
|
||||
download.sh: |
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# this script downloads files from:
|
||||
# https://thredds.met.no/thredds/catalog/sea_norshelf_files/YYYY/catalog.html
|
||||
|
||||
# safe bash settings
|
||||
set -euf -o pipefail
|
||||
|
||||
# define start and end dates (YYYY-MM-DD)
|
||||
start_date="2026-03-01"
|
||||
end_date=$(date +%Y-%m-%d)
|
||||
|
||||
# check if thredds is reachable before attempting any downloads
|
||||
if ! wget --spider --quiet "https://thredds.met.no/thredds/catalog/sea_norshelf_files/catalog.html"; then
|
||||
echo "thredds.met.no is unreachable, aborting"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# function to print stuff in red
|
||||
red() {
|
||||
printf "\e[31m%s\e[0m" "$1"
|
||||
}
|
||||
|
||||
current_date=$(date -d "${start_date}" +%Y-%m-%d)
|
||||
while [[ "${current_date}" < "${end_date}" || "${current_date}" == "${end_date}" ]]; do
|
||||
year=$(date -d "${current_date}" +%Y)
|
||||
month=$(date -d "${current_date}" +%m)
|
||||
day=$(date -d "${current_date}" +%d)
|
||||
|
||||
mkdir -p "/data/hdd/data/norshelf/sea_norshelf_files/${year}/${month}"
|
||||
|
||||
file_name="norshelf_qck_an_${year}${month}${day}T00Z.nc"
|
||||
target_file_name="/data/hdd/data/norshelf/sea_norshelf_files/${year}/${month}/${file_name}"
|
||||
url="https://thredds.met.no/thredds/fileServer/sea_norshelf_files/${year}/${month}/${file_name}"
|
||||
|
||||
if [[ ! -f "${target_file_name}" ]]; then
|
||||
if wget --spider --quiet "${url}"; then
|
||||
echo "downloading ${url}"
|
||||
wget -O "${target_file_name}" "${url}"
|
||||
else
|
||||
echo "${target_file_name} $(red 'not found on server')"
|
||||
fi
|
||||
else
|
||||
echo "${target_file_name} already exists locally"
|
||||
fi
|
||||
|
||||
# move to next day
|
||||
current_date=$(date -d "${current_date} + 1 day" +%Y-%m-%d)
|
||||
done
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: norshelf
|
||||
namespace: cron
|
||||
spec:
|
||||
schedule: 0 13 * * * # Everyday at 13:00, use https://crontab.guru
|
||||
concurrencyPolicy: "Forbid" # If only one at at time set to Allow else Forbid
|
||||
successfulJobsHistoryLimit: 10
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 10
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: "OnFailure"
|
||||
containers:
|
||||
- name: cronpod
|
||||
image: juselius/busynix:1.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
nix-env -iA nixpkgs.wget nixpkgs.coreutils nixpkgs.bash
|
||||
bash /scripts/download.sh
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
- name: script
|
||||
mountPath: /scripts
|
||||
securityContext: {}
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: ekman-data
|
||||
- name: script
|
||||
configMap:
|
||||
name: norshelf-script
|
||||
defaultMode: 0755
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: ekman-data
|
||||
namespace: cron
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
storageClassName: ""
|
||||
volumeMode: Filesystem
|
||||
volumeName: pv-ekman-data
|
||||
status:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
capacity:
|
||||
storage: 1Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: pv-ekman-data
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
claimRef:
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
name: ekman-data
|
||||
namespace: cron
|
||||
capacity:
|
||||
storage: 1Gi
|
||||
csi:
|
||||
driver: rook-ceph.cephfs.csi.ceph.com
|
||||
nodeStageSecretRef:
|
||||
name: rook-csi-cephfs-node
|
||||
namespace: rook-ceph
|
||||
volumeAttributes:
|
||||
clusterID: rook-ceph
|
||||
fsName: data
|
||||
rootPath: /
|
||||
staticVolume: "true"
|
||||
volumeHandle: pv-ekman-data
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
volumeMode: Filesystem
|
||||
Reference in New Issue
Block a user