diff --git a/raw/ekman/cronjobs/arome/cron.yaml b/raw/ekman/cronjobs/arome/cron.yaml index f5f27247..e4b75ac6 100644 --- a/raw/ekman/cronjobs/arome/cron.yaml +++ b/raw/ekman/cronjobs/arome/cron.yaml @@ -109,8 +109,6 @@ spec: template: spec: restartPolicy: "OnFailure" - securityContext: - runAsUser: 5000 containers: - name: cronpod image: juselius/busynix:1.1 @@ -120,7 +118,8 @@ spec: - -c - | nix-shell -p 'python3.withPackages(ps: [ps.netcdf4])' --run 'python3 /scripts/download.py' - chown -R kraken /data/hdd/data/AROME + chown -R 5000:5000 /data/hdd/data/AROME + chmod -R g+w /data/hdd/data/AROME resources: {} volumeMounts: - name: data diff --git a/raw/ekman/cronjobs/mur/cron.yaml b/raw/ekman/cronjobs/mur/cron.yaml new file mode 100644 index 00000000..5087c462 --- /dev/null +++ b/raw/ekman/cronjobs/mur/cron.yaml @@ -0,0 +1,174 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: mur-script + namespace: cron +data: + download.py: | + import argparse + import os + import sys + import requests + from datetime import datetime + + parser = argparse.ArgumentParser(description="Download MUR SST files from NASA Earthdata") + parser.add_argument("-sd", "--start_date", required=True, help="Start date (YYYY-MM-DD)") + parser.add_argument("-ed", "--end_date", required=True, help="End date (YYYY-MM-DD)") + parser.add_argument("-o", "--out_dir", default="MUR_SST_nc", help="Output directory") + parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") + args = parser.parse_args() + + + def create_session(): + session = requests.Session() + session.headers.update({ + "User-Agent": "mur-sst-downloader", + "Accept-Encoding": "identity" + }) + return session + + + def get_download_urls(startdate, enddate, verbose=False): + url = ( + "https://cmr.earthdata.nasa.gov/search/granules.umm_json" + f"?collection_concept_id=C1996881146-POCLOUD" + f"&temporal={startdate}T00:00:00Z,{enddate}T00:00:00Z" + "&pageSize=365" + ) + + r = requests.get(url) + r.raise_for_status() + data = r.json() + + urls = [] + for item in data["items"]: + for link in item["umm"]["RelatedUrls"]: + # Prefer direct HTTPS download links + if link.get("Type") == "GET DATA": + urls.append(link["URL"]) + + if verbose: + print(f"Found {len(urls)} files") + + return urls + + + def download_file(session, url, out_dir, verbose=False, retries=3): + filename = os.path.basename(url) + local_path = os.path.join(out_dir, filename) + + if os.path.exists(local_path): + if verbose: + print(f"Skipping existing: {filename}") + return + + for attempt in range(retries): + try: + if verbose: + print(f"Downloading ({attempt+1}/{retries}): {filename}") + + with session.get(url, stream=True, allow_redirects=True, timeout=60) as r: + if r.status_code == 401: + raise Exception("Unauthorized (check .netrc credentials)") + + r.raise_for_status() + + with open(local_path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + + if verbose: + print(f"Saved: {filename}") + return + + except Exception as e: + if attempt == retries - 1: + print(f"Failed: {filename} -> {e}") + else: + if verbose: + print(f"Retrying {filename}... ({e})") + + + def validate_dates(start, end): + try: + datetime.strptime(start, "%Y-%m-%d") + datetime.strptime(end, "%Y-%m-%d") + except ValueError: + print("Error: Dates must be in YYYY-MM-DD format") + sys.exit(1) + + + def main(): + validate_dates(args.start_date, args.end_date) + + # os.makedirs(args.out_dir, exist_ok=True) + + session = create_session() + + urls = get_download_urls(args.start_date, args.end_date, args.verbose) + + for url in urls: + download_file(session, url, args.out_dir, args.verbose) + + print(f"\nDone. Downloaded files to: {args.out_dir}") + + + if __name__ == "__main__": + main() +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: mur + namespace: cron +spec: + schedule: "0 6 * * *" # Everyday at 06:00, use https://crontab.guru + concurrencyPolicy: "Forbid" + successfulJobsHistoryLimit: 10 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 10 + template: + spec: + restartPolicy: "OnFailure" + containers: + - name: cronpod + image: juselius/busynix:1.1 + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - | + nix-shell -p 'python3.withPackages(ps: [ps.requests])' coreutils --run ' + python3 /scripts/download.py \ + -sd $(date -d "3 days ago" +%Y-%m-%d) \ + -ed $(date +%Y-%m-%d) \ + -o /data/hdd/data/river-data/MUR/MUR_SST_nc \ + -v + chown -R 5000:5000 /data/hdd/data/river-data/MUR/MUR_SST_nc + chmod -R g+w /data/hdd/data/river-data/MUR/MUR_SST_nc + ' + resources: {} + volumeMounts: + - name: data + mountPath: /data + - name: script + mountPath: /scripts + - name: netrc + mountPath: /root/.netrc + subPath: .netrc + readOnly: true + securityContext: {} + volumes: + - name: data + persistentVolumeClaim: + claimName: ekman-data + - name: script + configMap: + name: mur-script + defaultMode: 0755 + - name: netrc + secret: + secretName: mur-netrc diff --git a/raw/ekman/cronjobs/nemo/cron.yaml b/raw/ekman/cronjobs/nemo/cron.yaml new file mode 100644 index 00000000..a475bc12 --- /dev/null +++ b/raw/ekman/cronjobs/nemo/cron.yaml @@ -0,0 +1,225 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: nemo-script + namespace: cron +data: + download.sh: | + #!/usr/bin/env bash + + # this script downloads files from + # https://data.marine.copernicus.eu/product/NWSHELF_ANALYSISFORECAST_PHY_004_013 + + set -euf -o pipefail + + START_DATE="$1" + END_DATE="$2" + + current_date="$START_DATE" + + while [[ "$current_date" < "$END_DATE" ]]; do + next_date=$(date -I -d "$current_date + 1 day") + echo "Running subset for $current_date to $next_date" + + outfile="cmems_mod_nws_phy-sal_anfc_1.5km-3D_PT1H-i_${current_date}--${next_date}.nc" + copernicusmarine subset \ + --dataset-id cmems_mod_nws_phy-sal_anfc_1.5km-3D_PT1H-i \ + -t "$current_date" \ + -T "$next_date" \ + -f "$outfile" \ + -o /data/hdd/data/NEMO/ + echo "Downloaded salt" + + outfile="cmems_mod_nws_phy-cur_anfc_1.5km-3D_PT1H-i_${current_date}--${next_date}.nc" + copernicusmarine subset \ + --dataset-id cmems_mod_nws_phy-cur_anfc_1.5km-3D_PT1H-i \ + -t "$current_date" \ + -T "$next_date" \ + -f "$outfile" \ + -o /data/hdd/data/NEMO/ + echo "Downloaded currents" + + outfile="cmems_mod_nws_phy-tem_anfc_1.5km-3D_PT1H-i_${current_date}--${next_date}.nc" + copernicusmarine subset \ + --dataset-id cmems_mod_nws_phy-tem_anfc_1.5km-3D_PT1H-i \ + -t "$current_date" \ + -T "$next_date" \ + -f "$outfile" \ + -o /data/hdd/data/NEMO/ + echo "Downloaded temperature" + + outfile="cmems_mod_nws_phy-ssh_anfc_1.5km-2D_PT15M-i_${current_date}--${next_date}.nc" + copernicusmarine subset \ + --dataset-id cmems_mod_nws_phy-ssh_anfc_1.5km-2D_PT15M-i \ + -t "$current_date" \ + -T "$next_date" \ + -f "$outfile" \ + -o /data/hdd/data/NEMO/ + echo "Downloaded ssh" + + current_date="$next_date" + done +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: nemo-nix + namespace: cron +data: + shell.nix: | + let + nixpkgs = builtins.fetchTarball { + url = "https://releases.nixos.org/nixos/25.11/nixos-25.11.9586.10e7ad5bbcb4/nixexprs.tar.xz"; + sha256 = "sha256-wjAIDqQxE+kWV2lbykQCcS+F0ArQwmN8iNw0kcj4iaA="; + }; + pkgs = import nixpkgs { overlays = [ (import ./default.nix) ]; }; + in pkgs.mkShell { + buildInputs = [ + (pkgs.python3.withPackages (ps: [ pkgs.copernicusmarine ])) + pkgs.coreutils + pkgs.bash + ]; + } + default.nix: | + final: prev: { + arcosparse = prev.callPackage ./arcosparse.nix { }; + copernicusmarine = prev.callPackage ./copernicusmarine.nix { + arcosparse = final.arcosparse; + }; + } + copernicusmarine.nix: | + { + fetchPypi, + python3Packages, + arcosparse, + }: + python3Packages.buildPythonPackage rec { + pname = "copernicusmarine"; + version = "2.2.2"; + format = "pyproject"; + + src = fetchPypi { + inherit version; + pname = "copernicusmarine"; + sha256 = "sha256-5T3iH4Hh08wIao2MMveb/bVnVz0pK0PoN4CRk811P0g="; + }; + + pythonRelaxDeps = true; + + nativeBuildInputs = [ python3Packages.poetry-core ]; + propagatedBuildInputs = with python3Packages; [ + boto3 + click + dask + h5netcdf + arcosparse + lxml + numpy + pydantic + pystac + requests + semver + setuptools + tqdm + xarray + zarr + ]; + } + arcosparse.nix: | + { + fetchPypi, + python3Packages, + }: + python3Packages.buildPythonPackage rec { + pname = "arcosparse"; + version = "0.4.2"; + format = "pyproject"; + + src = fetchPypi { + inherit version; + pname = "arcosparse"; + sha256 = "sha256-Z8NW+dsC3uXk101kr8tzsgjAoFb4KNdGkxyFkJ5UhFA="; + }; + + pythonRelaxDeps = true; + + nativeBuildInputs = [ python3Packages.poetry-core ]; + propagatedBuildInputs = with python3Packages; [ + pyarrow + pandas + pystac + tqdm + requests + ]; + } +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: nemo + namespace: cron +spec: + schedule: "0 13 * * *" # Everyday at 13:00, use https://crontab.guru + concurrencyPolicy: "Forbid" + successfulJobsHistoryLimit: 10 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 10 + template: + spec: + restartPolicy: "OnFailure" + containers: + - name: cronpod + image: ghcr.io/lix-project/lix:latest + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - | + nix-shell /nix-overlay/shell.nix \ + --keep COPERNICUSMARINE_SERVICE_USERNAME \ + --keep COPERNICUSMARINE_SERVICE_PASSWORD \ + --run ' + copernicusmarine login \ + --username "$COPERNICUSMARINE_SERVICE_USERNAME" \ + --password "$COPERNICUSMARINE_SERVICE_PASSWORD" \ + --force-overwrite + bash /scripts/download.sh \ + $(date -d "2 days ago" +%Y-%m-%d) \ + $(date +%Y-%m-%d) + chown -R 5000:5000 /data/hdd/data/NEMO + chmod -R g+w /data/hdd/data/NEMO + ' + env: + - name: COPERNICUSMARINE_SERVICE_USERNAME + valueFrom: + secretKeyRef: + name: nemo-credentials + key: username + - name: COPERNICUSMARINE_SERVICE_PASSWORD + valueFrom: + secretKeyRef: + name: nemo-credentials + key: password + resources: {} + volumeMounts: + - name: data + mountPath: /data + - name: script + mountPath: /scripts + - name: nix + mountPath: /nix-overlay + securityContext: {} + volumes: + - name: data + persistentVolumeClaim: + claimName: ekman-data + - name: script + configMap: + name: nemo-script + defaultMode: 0755 + - name: nix + configMap: + name: nemo-nix + defaultMode: 0644 diff --git a/raw/ekman/cronjobs/norshelf/cron.yaml b/raw/ekman/cronjobs/norshelf/cron.yaml index 5fc00329..23a82c6d 100644 --- a/raw/ekman/cronjobs/norshelf/cron.yaml +++ b/raw/ekman/cronjobs/norshelf/cron.yaml @@ -71,8 +71,6 @@ spec: template: spec: restartPolicy: "OnFailure" - securityContext: - runAsUser: 5000 containers: - name: cronpod image: juselius/busynix:1.1 @@ -83,6 +81,8 @@ spec: - | nix-env -iA nixpkgs.wget nixpkgs.coreutils nixpkgs.bash bash /scripts/download.sh + chown -R 5000:5000 /data/hdd/data/norshelf + chmod -R g+w /data/hdd/data/norshelf resources: {} volumeMounts: - name: data diff --git a/raw/ekman/cronjobs/nve/cron.yaml b/raw/ekman/cronjobs/nve/cron.yaml new file mode 100644 index 00000000..3761120c --- /dev/null +++ b/raw/ekman/cronjobs/nve/cron.yaml @@ -0,0 +1,55 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: nve-config + namespace: cron +data: + appsettings.json: | + { + "NveUrl": "https://chartserver.nve.no/ShowData.aspx?req=getchart&ver=1.0", + "DataDir": "/data/hdd/data/river-data" + } +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: nve + namespace: cron +spec: + schedule: "0 8 * * *" # Everyday at 08:00, use https://crontab.guru + concurrencyPolicy: "Forbid" + successfulJobsHistoryLimit: 10 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 10 + template: + spec: + restartPolicy: "OnFailure" + containers: + - name: cronpod + image: git.oceanbox.io/oceanbox/churn/riverrun:24a8bbbc-debug + imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - | + riverrun data --download --ndays 5000 + chown -R 5000:5000 /data/hdd/data/river-data/Data + chmod -R g+w /data/hdd/data/river-data/Data + resources: {} + volumeMounts: + - name: data + mountPath: /data + - name: config + mountPath: /app/appsettings.json + subPath: appsettings.json + readOnly: true + securityContext: {} + volumes: + - name: data + persistentVolumeClaim: + claimName: ekman-data + - name: config + configMap: + name: nve-config diff --git a/values/sorcerer/kustomize/staging-ekman/deployment_patch.yaml b/values/sorcerer/kustomize/staging-ekman/deployment_patch.yaml index d2c424d2..d51e456b 100644 --- a/values/sorcerer/kustomize/staging-ekman/deployment_patch.yaml +++ b/values/sorcerer/kustomize/staging-ekman/deployment_patch.yaml @@ -1,6 +1,17 @@ # - op: replace # path: /spec/template/spec/containers/0/volumeMounts/0/mountPath # value: /data +- op: add + path: /spec/template/spec/volumes/- + value: + name: raid-data + persistentVolumeClaim: + claimName: raid-data +- op: add + path: /spec/template/spec/containers/0/volumeMounts/- + value: + name: raid-data + mountPath: /data/raid - op: add path: /spec/template/spec/containers/0/envFrom/- value: diff --git a/values/sorcerer/kustomize/staging-ekman/kustomization.yaml b/values/sorcerer/kustomize/staging-ekman/kustomization.yaml index e9f23010..6d642e0e 100644 --- a/values/sorcerer/kustomize/staging-ekman/kustomization.yaml +++ b/values/sorcerer/kustomize/staging-ekman/kustomization.yaml @@ -14,6 +14,7 @@ resources: - ../base - pv.yaml - pvc.yaml +- pv-raid-data.yaml - secrets.yaml - configurations.yaml - keyvault.yaml diff --git a/values/sorcerer/kustomize/staging-ekman/pv-raid-data.yaml b/values/sorcerer/kustomize/staging-ekman/pv-raid-data.yaml new file mode 100644 index 00000000..d5878502 --- /dev/null +++ b/values/sorcerer/kustomize/staging-ekman/pv-raid-data.yaml @@ -0,0 +1,42 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: raid-data + namespace: staging-sorcerer +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 1Gi + storageClassName: "" + volumeMode: Filesystem + volumeName: pv-raid-data +status: + accessModes: + - ReadWriteMany + capacity: + storage: 1Gi +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: pv-raid-data +spec: + accessModes: + - ReadWriteMany + claimRef: + apiVersion: v1 + kind: PersistentVolumeClaim + name: raid-data + namespace: staging-sorcerer + capacity: + storage: 1Gi + mountOptions: + - nfsvers=4.2 + nfs: + path: /vol/archives + server: 10.255.241.80 + persistentVolumeReclaimPolicy: Retain + storageClassName: managed-nfs-storage + volumeMode: Filesystem