feat(sorcerer): Add raid-data pvc and cronjobs
This commit is contained in:
@@ -109,8 +109,6 @@ spec:
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: "OnFailure"
|
||||
securityContext:
|
||||
runAsUser: 5000
|
||||
containers:
|
||||
- name: cronpod
|
||||
image: juselius/busynix:1.1
|
||||
@@ -120,7 +118,8 @@ spec:
|
||||
- -c
|
||||
- |
|
||||
nix-shell -p 'python3.withPackages(ps: [ps.netcdf4])' --run 'python3 /scripts/download.py'
|
||||
chown -R kraken /data/hdd/data/AROME
|
||||
chown -R 5000:5000 /data/hdd/data/AROME
|
||||
chmod -R g+w /data/hdd/data/AROME
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- name: data
|
||||
|
||||
@@ -0,0 +1,174 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: mur-script
|
||||
namespace: cron
|
||||
data:
|
||||
download.py: |
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import requests
|
||||
from datetime import datetime
|
||||
|
||||
parser = argparse.ArgumentParser(description="Download MUR SST files from NASA Earthdata")
|
||||
parser.add_argument("-sd", "--start_date", required=True, help="Start date (YYYY-MM-DD)")
|
||||
parser.add_argument("-ed", "--end_date", required=True, help="End date (YYYY-MM-DD)")
|
||||
parser.add_argument("-o", "--out_dir", default="MUR_SST_nc", help="Output directory")
|
||||
parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def create_session():
|
||||
session = requests.Session()
|
||||
session.headers.update({
|
||||
"User-Agent": "mur-sst-downloader",
|
||||
"Accept-Encoding": "identity"
|
||||
})
|
||||
return session
|
||||
|
||||
|
||||
def get_download_urls(startdate, enddate, verbose=False):
|
||||
url = (
|
||||
"https://cmr.earthdata.nasa.gov/search/granules.umm_json"
|
||||
f"?collection_concept_id=C1996881146-POCLOUD"
|
||||
f"&temporal={startdate}T00:00:00Z,{enddate}T00:00:00Z"
|
||||
"&pageSize=365"
|
||||
)
|
||||
|
||||
r = requests.get(url)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
|
||||
urls = []
|
||||
for item in data["items"]:
|
||||
for link in item["umm"]["RelatedUrls"]:
|
||||
# Prefer direct HTTPS download links
|
||||
if link.get("Type") == "GET DATA":
|
||||
urls.append(link["URL"])
|
||||
|
||||
if verbose:
|
||||
print(f"Found {len(urls)} files")
|
||||
|
||||
return urls
|
||||
|
||||
|
||||
def download_file(session, url, out_dir, verbose=False, retries=3):
|
||||
filename = os.path.basename(url)
|
||||
local_path = os.path.join(out_dir, filename)
|
||||
|
||||
if os.path.exists(local_path):
|
||||
if verbose:
|
||||
print(f"Skipping existing: {filename}")
|
||||
return
|
||||
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
if verbose:
|
||||
print(f"Downloading ({attempt+1}/{retries}): {filename}")
|
||||
|
||||
with session.get(url, stream=True, allow_redirects=True, timeout=60) as r:
|
||||
if r.status_code == 401:
|
||||
raise Exception("Unauthorized (check .netrc credentials)")
|
||||
|
||||
r.raise_for_status()
|
||||
|
||||
with open(local_path, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
if verbose:
|
||||
print(f"Saved: {filename}")
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
if attempt == retries - 1:
|
||||
print(f"Failed: {filename} -> {e}")
|
||||
else:
|
||||
if verbose:
|
||||
print(f"Retrying {filename}... ({e})")
|
||||
|
||||
|
||||
def validate_dates(start, end):
|
||||
try:
|
||||
datetime.strptime(start, "%Y-%m-%d")
|
||||
datetime.strptime(end, "%Y-%m-%d")
|
||||
except ValueError:
|
||||
print("Error: Dates must be in YYYY-MM-DD format")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
validate_dates(args.start_date, args.end_date)
|
||||
|
||||
# os.makedirs(args.out_dir, exist_ok=True)
|
||||
|
||||
session = create_session()
|
||||
|
||||
urls = get_download_urls(args.start_date, args.end_date, args.verbose)
|
||||
|
||||
for url in urls:
|
||||
download_file(session, url, args.out_dir, args.verbose)
|
||||
|
||||
print(f"\nDone. Downloaded files to: {args.out_dir}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: mur
|
||||
namespace: cron
|
||||
spec:
|
||||
schedule: "0 6 * * *" # Everyday at 06:00, use https://crontab.guru
|
||||
concurrencyPolicy: "Forbid"
|
||||
successfulJobsHistoryLimit: 10
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 10
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: "OnFailure"
|
||||
containers:
|
||||
- name: cronpod
|
||||
image: juselius/busynix:1.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
nix-shell -p 'python3.withPackages(ps: [ps.requests])' coreutils --run '
|
||||
python3 /scripts/download.py \
|
||||
-sd $(date -d "3 days ago" +%Y-%m-%d) \
|
||||
-ed $(date +%Y-%m-%d) \
|
||||
-o /data/hdd/data/river-data/MUR/MUR_SST_nc \
|
||||
-v
|
||||
chown -R 5000:5000 /data/hdd/data/river-data/MUR/MUR_SST_nc
|
||||
chmod -R g+w /data/hdd/data/river-data/MUR/MUR_SST_nc
|
||||
'
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
- name: script
|
||||
mountPath: /scripts
|
||||
- name: netrc
|
||||
mountPath: /root/.netrc
|
||||
subPath: .netrc
|
||||
readOnly: true
|
||||
securityContext: {}
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: ekman-data
|
||||
- name: script
|
||||
configMap:
|
||||
name: mur-script
|
||||
defaultMode: 0755
|
||||
- name: netrc
|
||||
secret:
|
||||
secretName: mur-netrc
|
||||
@@ -0,0 +1,225 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: nemo-script
|
||||
namespace: cron
|
||||
data:
|
||||
download.sh: |
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# this script downloads files from
|
||||
# https://data.marine.copernicus.eu/product/NWSHELF_ANALYSISFORECAST_PHY_004_013
|
||||
|
||||
set -euf -o pipefail
|
||||
|
||||
START_DATE="$1"
|
||||
END_DATE="$2"
|
||||
|
||||
current_date="$START_DATE"
|
||||
|
||||
while [[ "$current_date" < "$END_DATE" ]]; do
|
||||
next_date=$(date -I -d "$current_date + 1 day")
|
||||
echo "Running subset for $current_date to $next_date"
|
||||
|
||||
outfile="cmems_mod_nws_phy-sal_anfc_1.5km-3D_PT1H-i_${current_date}--${next_date}.nc"
|
||||
copernicusmarine subset \
|
||||
--dataset-id cmems_mod_nws_phy-sal_anfc_1.5km-3D_PT1H-i \
|
||||
-t "$current_date" \
|
||||
-T "$next_date" \
|
||||
-f "$outfile" \
|
||||
-o /data/hdd/data/NEMO/
|
||||
echo "Downloaded salt"
|
||||
|
||||
outfile="cmems_mod_nws_phy-cur_anfc_1.5km-3D_PT1H-i_${current_date}--${next_date}.nc"
|
||||
copernicusmarine subset \
|
||||
--dataset-id cmems_mod_nws_phy-cur_anfc_1.5km-3D_PT1H-i \
|
||||
-t "$current_date" \
|
||||
-T "$next_date" \
|
||||
-f "$outfile" \
|
||||
-o /data/hdd/data/NEMO/
|
||||
echo "Downloaded currents"
|
||||
|
||||
outfile="cmems_mod_nws_phy-tem_anfc_1.5km-3D_PT1H-i_${current_date}--${next_date}.nc"
|
||||
copernicusmarine subset \
|
||||
--dataset-id cmems_mod_nws_phy-tem_anfc_1.5km-3D_PT1H-i \
|
||||
-t "$current_date" \
|
||||
-T "$next_date" \
|
||||
-f "$outfile" \
|
||||
-o /data/hdd/data/NEMO/
|
||||
echo "Downloaded temperature"
|
||||
|
||||
outfile="cmems_mod_nws_phy-ssh_anfc_1.5km-2D_PT15M-i_${current_date}--${next_date}.nc"
|
||||
copernicusmarine subset \
|
||||
--dataset-id cmems_mod_nws_phy-ssh_anfc_1.5km-2D_PT15M-i \
|
||||
-t "$current_date" \
|
||||
-T "$next_date" \
|
||||
-f "$outfile" \
|
||||
-o /data/hdd/data/NEMO/
|
||||
echo "Downloaded ssh"
|
||||
|
||||
current_date="$next_date"
|
||||
done
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: nemo-nix
|
||||
namespace: cron
|
||||
data:
|
||||
shell.nix: |
|
||||
let
|
||||
nixpkgs = builtins.fetchTarball {
|
||||
url = "https://releases.nixos.org/nixos/25.11/nixos-25.11.9586.10e7ad5bbcb4/nixexprs.tar.xz";
|
||||
sha256 = "sha256-wjAIDqQxE+kWV2lbykQCcS+F0ArQwmN8iNw0kcj4iaA=";
|
||||
};
|
||||
pkgs = import nixpkgs { overlays = [ (import ./default.nix) ]; };
|
||||
in pkgs.mkShell {
|
||||
buildInputs = [
|
||||
(pkgs.python3.withPackages (ps: [ pkgs.copernicusmarine ]))
|
||||
pkgs.coreutils
|
||||
pkgs.bash
|
||||
];
|
||||
}
|
||||
default.nix: |
|
||||
final: prev: {
|
||||
arcosparse = prev.callPackage ./arcosparse.nix { };
|
||||
copernicusmarine = prev.callPackage ./copernicusmarine.nix {
|
||||
arcosparse = final.arcosparse;
|
||||
};
|
||||
}
|
||||
copernicusmarine.nix: |
|
||||
{
|
||||
fetchPypi,
|
||||
python3Packages,
|
||||
arcosparse,
|
||||
}:
|
||||
python3Packages.buildPythonPackage rec {
|
||||
pname = "copernicusmarine";
|
||||
version = "2.2.2";
|
||||
format = "pyproject";
|
||||
|
||||
src = fetchPypi {
|
||||
inherit version;
|
||||
pname = "copernicusmarine";
|
||||
sha256 = "sha256-5T3iH4Hh08wIao2MMveb/bVnVz0pK0PoN4CRk811P0g=";
|
||||
};
|
||||
|
||||
pythonRelaxDeps = true;
|
||||
|
||||
nativeBuildInputs = [ python3Packages.poetry-core ];
|
||||
propagatedBuildInputs = with python3Packages; [
|
||||
boto3
|
||||
click
|
||||
dask
|
||||
h5netcdf
|
||||
arcosparse
|
||||
lxml
|
||||
numpy
|
||||
pydantic
|
||||
pystac
|
||||
requests
|
||||
semver
|
||||
setuptools
|
||||
tqdm
|
||||
xarray
|
||||
zarr
|
||||
];
|
||||
}
|
||||
arcosparse.nix: |
|
||||
{
|
||||
fetchPypi,
|
||||
python3Packages,
|
||||
}:
|
||||
python3Packages.buildPythonPackage rec {
|
||||
pname = "arcosparse";
|
||||
version = "0.4.2";
|
||||
format = "pyproject";
|
||||
|
||||
src = fetchPypi {
|
||||
inherit version;
|
||||
pname = "arcosparse";
|
||||
sha256 = "sha256-Z8NW+dsC3uXk101kr8tzsgjAoFb4KNdGkxyFkJ5UhFA=";
|
||||
};
|
||||
|
||||
pythonRelaxDeps = true;
|
||||
|
||||
nativeBuildInputs = [ python3Packages.poetry-core ];
|
||||
propagatedBuildInputs = with python3Packages; [
|
||||
pyarrow
|
||||
pandas
|
||||
pystac
|
||||
tqdm
|
||||
requests
|
||||
];
|
||||
}
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: nemo
|
||||
namespace: cron
|
||||
spec:
|
||||
schedule: "0 13 * * *" # Everyday at 13:00, use https://crontab.guru
|
||||
concurrencyPolicy: "Forbid"
|
||||
successfulJobsHistoryLimit: 10
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 10
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: "OnFailure"
|
||||
containers:
|
||||
- name: cronpod
|
||||
image: ghcr.io/lix-project/lix:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
nix-shell /nix-overlay/shell.nix \
|
||||
--keep COPERNICUSMARINE_SERVICE_USERNAME \
|
||||
--keep COPERNICUSMARINE_SERVICE_PASSWORD \
|
||||
--run '
|
||||
copernicusmarine login \
|
||||
--username "$COPERNICUSMARINE_SERVICE_USERNAME" \
|
||||
--password "$COPERNICUSMARINE_SERVICE_PASSWORD" \
|
||||
--force-overwrite
|
||||
bash /scripts/download.sh \
|
||||
$(date -d "2 days ago" +%Y-%m-%d) \
|
||||
$(date +%Y-%m-%d)
|
||||
chown -R 5000:5000 /data/hdd/data/NEMO
|
||||
chmod -R g+w /data/hdd/data/NEMO
|
||||
'
|
||||
env:
|
||||
- name: COPERNICUSMARINE_SERVICE_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: nemo-credentials
|
||||
key: username
|
||||
- name: COPERNICUSMARINE_SERVICE_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: nemo-credentials
|
||||
key: password
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
- name: script
|
||||
mountPath: /scripts
|
||||
- name: nix
|
||||
mountPath: /nix-overlay
|
||||
securityContext: {}
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: ekman-data
|
||||
- name: script
|
||||
configMap:
|
||||
name: nemo-script
|
||||
defaultMode: 0755
|
||||
- name: nix
|
||||
configMap:
|
||||
name: nemo-nix
|
||||
defaultMode: 0644
|
||||
@@ -71,8 +71,6 @@ spec:
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: "OnFailure"
|
||||
securityContext:
|
||||
runAsUser: 5000
|
||||
containers:
|
||||
- name: cronpod
|
||||
image: juselius/busynix:1.1
|
||||
@@ -83,6 +81,8 @@ spec:
|
||||
- |
|
||||
nix-env -iA nixpkgs.wget nixpkgs.coreutils nixpkgs.bash
|
||||
bash /scripts/download.sh
|
||||
chown -R 5000:5000 /data/hdd/data/norshelf
|
||||
chmod -R g+w /data/hdd/data/norshelf
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- name: data
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: nve-config
|
||||
namespace: cron
|
||||
data:
|
||||
appsettings.json: |
|
||||
{
|
||||
"NveUrl": "https://chartserver.nve.no/ShowData.aspx?req=getchart&ver=1.0",
|
||||
"DataDir": "/data/hdd/data/river-data"
|
||||
}
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: nve
|
||||
namespace: cron
|
||||
spec:
|
||||
schedule: "0 8 * * *" # Everyday at 08:00, use https://crontab.guru
|
||||
concurrencyPolicy: "Forbid"
|
||||
successfulJobsHistoryLimit: 10
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 10
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: "OnFailure"
|
||||
containers:
|
||||
- name: cronpod
|
||||
image: git.oceanbox.io/oceanbox/churn/riverrun:24a8bbbc-debug
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
riverrun data --download --ndays 5000
|
||||
chown -R 5000:5000 /data/hdd/data/river-data/Data
|
||||
chmod -R g+w /data/hdd/data/river-data/Data
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
- name: config
|
||||
mountPath: /app/appsettings.json
|
||||
subPath: appsettings.json
|
||||
readOnly: true
|
||||
securityContext: {}
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: ekman-data
|
||||
- name: config
|
||||
configMap:
|
||||
name: nve-config
|
||||
Reference in New Issue
Block a user