fix(cron): Move backoff inside scripts
This commit is contained in:
@@ -8,7 +8,6 @@ data:
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import requests
|
||||
from datetime import datetime
|
||||
|
||||
@@ -54,43 +53,37 @@ data:
|
||||
return urls
|
||||
|
||||
|
||||
def download_file(session, url, out_dir, verbose=False, retries=3):
|
||||
def download_file(session, url, out_dir, verbose=False):
|
||||
filename = os.path.basename(url)
|
||||
local_path = os.path.join(out_dir, filename)
|
||||
|
||||
if os.path.exists(local_path):
|
||||
if verbose:
|
||||
print(f"Skipping existing: {filename}")
|
||||
return
|
||||
return True
|
||||
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
if verbose:
|
||||
print(f"Downloading ({attempt+1}/{retries}): {filename}")
|
||||
if verbose:
|
||||
print(f"Downloading: {filename}")
|
||||
|
||||
with session.get(url, stream=True, allow_redirects=True, timeout=60) as r:
|
||||
if r.status_code == 401:
|
||||
raise Exception("Unauthorized (check .netrc credentials)")
|
||||
try:
|
||||
with session.get(url, stream=True, allow_redirects=True, timeout=60) as r:
|
||||
if r.status_code == 401:
|
||||
raise Exception("Unauthorized (check .netrc credentials)")
|
||||
|
||||
r.raise_for_status()
|
||||
r.raise_for_status()
|
||||
|
||||
with open(local_path, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
with open(local_path, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
if verbose:
|
||||
print(f"Saved: {filename}")
|
||||
return
|
||||
if verbose:
|
||||
print(f"Saved: {filename}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
if attempt == retries - 1:
|
||||
print(f"Failed: {filename} -> {e}")
|
||||
else:
|
||||
backoff = min(2 ** attempt * 5, 60)
|
||||
if verbose:
|
||||
print(f"Retrying {filename} in {backoff}s... ({e})")
|
||||
time.sleep(backoff)
|
||||
except Exception as e:
|
||||
print(f"Failed: {filename} -> {e}")
|
||||
return False
|
||||
|
||||
|
||||
def validate_dates(start, end):
|
||||
@@ -111,9 +104,13 @@ data:
|
||||
|
||||
urls = get_download_urls(args.start_date, args.end_date, args.verbose)
|
||||
|
||||
failed = False
|
||||
for url in urls:
|
||||
download_file(session, url, args.out_dir, args.verbose)
|
||||
if not download_file(session, url, args.out_dir, args.verbose):
|
||||
failed = True
|
||||
|
||||
if failed:
|
||||
sys.exit(1)
|
||||
print(f"\nDone. Downloaded files to: {args.out_dir}")
|
||||
|
||||
|
||||
@@ -132,10 +129,10 @@ spec:
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 10
|
||||
backoffLimit: 3
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: "OnFailure"
|
||||
restartPolicy: "Never"
|
||||
containers:
|
||||
- name: cronpod
|
||||
image: juselius/busynix:1.1
|
||||
@@ -149,10 +146,14 @@ spec:
|
||||
-sd $(date -d "3 days ago" +%Y-%m-%d) \
|
||||
-ed $(date +%Y-%m-%d) \
|
||||
-o /data/hdd/data/river-data/MUR/MUR_SST_nc \
|
||||
-v
|
||||
chown -R 5000:5000 /data/hdd/data/river-data/MUR/MUR_SST_nc
|
||||
-v &&
|
||||
chown -R 5000:5000 /data/hdd/data/river-data/MUR/MUR_SST_nc &&
|
||||
chmod -R g+w /data/hdd/data/river-data/MUR/MUR_SST_nc
|
||||
'
|
||||
' || {
|
||||
echo "Job failed, sleeping 30 minutes before retry..."
|
||||
sleep 1800
|
||||
exit 1
|
||||
}
|
||||
resources: {}
|
||||
volumeMounts:
|
||||
- name: data
|
||||
|
||||
Reference in New Issue
Block a user