diff --git a/get_obs_from_artsdatabank.py b/get_obs_from_artsdatabank.py new file mode 100644 index 0000000..fc47f06 --- /dev/null +++ b/get_obs_from_artsdatabank.py @@ -0,0 +1,82 @@ +import requests +import pandas as pd +import numpy as np +from pyproj import Transformer + +# create transformer once (faster if used repeatedly) +_transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True) + + +def main(lat, lon, radius_m, + n_vertices=15, + page_size=10000, + species_summary=False): + """ + Fetch Artsdatabanken observations inside a semicircular polygon. + + Parameters + ---------- + lat : float + lon : float + radius_m : float + Radius in meters + n_vertices : int + Polygon vertices approximating the circle + page_size : int + species_summary : bool + If True, also return species counts + + Returns + ------- + pandas.DataFrame + (optional) pandas.Series with species counts + """ + + # ------------------- + # Build polygon + # ------------------- + + cx, cy = _transformer.transform(lon, lat) + + angles = np.linspace(0, 2*np.pi, n_vertices, endpoint=False) + + x = np.float32(cx + radius_m * np.cos(angles)) + y = np.float32(cy + radius_m * np.sin(angles)) + + points = [f"{xi}+{yi}" for xi, yi in zip(x, y)] + points.append(points[0]) # close polygon + + polygon = "POLYGON((" + ",".join(points) + "))" + + # ------------------- + # API request + # ------------------- + + url = ( + "https://artskart.artsdatabanken.no/publicapi/api/observations/list/" + f"?gmWktPolygon={polygon}&page=0&pageSize={page_size}" + ) + + r = requests.get(url) + r.raise_for_status() + + data = r.json() + + if "Observations" not in data: + return pd.DataFrame() + + # ------------------- + # Dataframe creation + # ------------------- + + df = pd.json_normalize(data["Observations"])[ + ["Latitude", "Longitude", "Notes", "ScientificName", "ScientificNameId"] + ] + + df["Latitude"] = df["Latitude"].str.replace(",", ".").astype(float) + df["Longitude"] = df["Longitude"].str.replace(",", ".").astype(float) + + if species_summary: + return df, df["ScientificName"].value_counts() + + return df