fix(cron): Move backoff inside scripts

This commit is contained in:
2026-04-29 08:58:32 +02:00
parent 773504c908
commit 738112f980
6 changed files with 100 additions and 75 deletions
+33 -32
View File
@@ -8,7 +8,6 @@ data:
import argparse
import os
import sys
import time
import requests
from datetime import datetime
@@ -54,43 +53,37 @@ data:
return urls
def download_file(session, url, out_dir, verbose=False, retries=3):
def download_file(session, url, out_dir, verbose=False):
filename = os.path.basename(url)
local_path = os.path.join(out_dir, filename)
if os.path.exists(local_path):
if verbose:
print(f"Skipping existing: {filename}")
return
return True
for attempt in range(retries):
try:
if verbose:
print(f"Downloading ({attempt+1}/{retries}): {filename}")
if verbose:
print(f"Downloading: {filename}")
with session.get(url, stream=True, allow_redirects=True, timeout=60) as r:
if r.status_code == 401:
raise Exception("Unauthorized (check .netrc credentials)")
try:
with session.get(url, stream=True, allow_redirects=True, timeout=60) as r:
if r.status_code == 401:
raise Exception("Unauthorized (check .netrc credentials)")
r.raise_for_status()
r.raise_for_status()
with open(local_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
with open(local_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
if verbose:
print(f"Saved: {filename}")
return
if verbose:
print(f"Saved: {filename}")
return True
except Exception as e:
if attempt == retries - 1:
print(f"Failed: {filename} -> {e}")
else:
backoff = min(2 ** attempt * 5, 60)
if verbose:
print(f"Retrying {filename} in {backoff}s... ({e})")
time.sleep(backoff)
except Exception as e:
print(f"Failed: {filename} -> {e}")
return False
def validate_dates(start, end):
@@ -111,9 +104,13 @@ data:
urls = get_download_urls(args.start_date, args.end_date, args.verbose)
failed = False
for url in urls:
download_file(session, url, args.out_dir, args.verbose)
if not download_file(session, url, args.out_dir, args.verbose):
failed = True
if failed:
sys.exit(1)
print(f"\nDone. Downloaded files to: {args.out_dir}")
@@ -132,10 +129,10 @@ spec:
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 10
backoffLimit: 3
template:
spec:
restartPolicy: "OnFailure"
restartPolicy: "Never"
containers:
- name: cronpod
image: juselius/busynix:1.1
@@ -149,10 +146,14 @@ spec:
-sd $(date -d "3 days ago" +%Y-%m-%d) \
-ed $(date +%Y-%m-%d) \
-o /data/hdd/data/river-data/MUR/MUR_SST_nc \
-v
chown -R 5000:5000 /data/hdd/data/river-data/MUR/MUR_SST_nc
-v &&
chown -R 5000:5000 /data/hdd/data/river-data/MUR/MUR_SST_nc &&
chmod -R g+w /data/hdd/data/river-data/MUR/MUR_SST_nc
'
' || {
echo "Job failed, sleeping 30 minutes before retry..."
sleep 1800
exit 1
}
resources: {}
volumeMounts:
- name: data