from functools import lru_cache
import numpy as np
import pandas as pd
import requests
URL_WOW_KNMI = "https://wow.knmi.nl/sites"
meteo_vars_wow = (
"rain_rate",
"temperature",
# "weather_code",
"wind",
"humidity",
"pressure_msl",
)
meteo_vars_wow_translate = {
"neerslagintensiteit (mm/uur)": "precipitation [mm]",
"temperatuur (C)": "temperature [C]",
"windsnelheid (m/s)": "windspeed [m/s]",
"relatieve vochtigheid (%)": "relative humidity [%]",
"luchtdruk (hPa)": "air pressure [hPa]",
}
wow_filters = ("wow_observations", "official_observations")
[docs]def get_wow_stations(
meteo_var: str = "rain_rate",
date: pd.Timestamp | None = None,
bbox: list[float] | None = None,
obs_filter: str | None = None,
) -> pd.DataFrame:
"""
Get a DataFrame with the observation stations from WOW-KNMI.
Parameters
----------
meteo_var : str, optional
The meteorological variable to query, one of 'rain_rate',
'temperature', 'wind', 'humidity', 'pressure_msl' by default
'rain_rate'.
date : Optional[pd.Timestamp], optional
The date and time of the observations, by default the latest available
(10 minutes ago).
bbox : Optional[List[float]], optional
The bounding box of the spatial query in the format [min_lon, min_lat,
max_lon, max_lat], by default the extent of the Netherlands.
obs_filter : Optional[str], optional
The filter to apply to the observations, one of 'wow_observations',
'official_observations', by default None which selects all stations.
Returns
-------
pd.DataFrame
A dataframe with the station information and observations.
Raises
------
ValueError
If meteo_var or obs_filter are not valid choices.
requests.HTTPError
If the request to the WOW-KNMI API fails.
"""
if meteo_var not in meteo_vars_wow:
raise ValueError(f"{meteo_var} must be one of {meteo_vars_wow}")
if date is None:
date = pd.Timestamp.today() - pd.Timedelta(days=0, hours=0, minutes=10)
datestr = _wow_strftime(date)
if bbox is None:
bbox = [2.5, 50.0, 8.0, 54.0] # Netherlands extent
bboxstr = (
str(bbox[0]) + "%20" + str(bbox[1]) + "," + str(bbox[2]) + "%20" + str(bbox[3])
) # lat lon,lat lon
if obs_filter is None:
obs_filter = ""
else:
if obs_filter not in wow_filters:
raise ValueError(f"{obs_filter} must be one of {wow_filters}")
url = (
f"{URL_WOW_KNMI}?bbox={bboxstr}&layer={meteo_var}"
f"&filter={obs_filter}&date={datestr}"
)
r = requests.get(url, timeout=120)
r.raise_for_status()
sites = r.json()["sites"]
lat_lon = np.array([x["geo"]["coordinates"] for x in sites])
xy = pd.DataFrame(
np.column_stack([lat_lon[:, 1], lat_lon[:, 0]]),
columns=["x", "y"],
)
stations = pd.concat([pd.DataFrame(sites), xy], axis=1).set_index("id", drop=False)
return stations
def _wow_strftime(timestamp: pd.Timestamp) -> str:
"""Convert pandast Timestamp to wow string time"""
return (
timestamp.strftime("%Y-%m-%dT%H")
+ "%3A"
+ timestamp.strftime("%M")[0]
+ "0%3A00"
)
[docs]def get_wow(
meteo_var: str,
stn: str,
xy: list[float] | None = None,
start: pd.Timestamp = None,
end: pd.Timestamp = None,
) -> tuple[pd.DataFrame, dict]:
"""
Get weather observations and metadata from a WOW-KNMI station based on the
station name or lat, lon location
Parameters
----------
meteo_var : str
The meteorological variable to query, one of 'rain_rate',
'temperature', 'wind', 'humidity', 'pressure_msl'.
stn : str
The station ID of the WOW-KNMI station.
xy : List[float], optional
The coordinates of the location to query in the format [lon, lat], by
default None. If specified, the nearest station within 1 degree will be
used.
start : pd.Timestamp, optional
The start date and time of the observations, by default None. If None,
the januari first of the year before will be used.
end : pd.Timestamp, optional
The end date and time of the observations, by default None. If None,
the yesterday will be used.
Returns
-------
Tuple[pd.DataFrame, dict]
A tuple of a dataframe with the observations and a dictionary with the
station metadata.
Raises
------
ValueError
If both stn and xy are None or if meteo_var is not valid.
"""
if stn is None and xy is None:
raise ValueError("specify KNMI station (stn) or coordinates (xy)")
if stn is None and xy is not None:
bbox = [xy[0] - 1, xy[1] - 1, xy[0] + 1, xy[1] + 1] # lat lon lat lon
stations = get_wow_stations(
meteo_var=meteo_var, date=start, bbox=bbox, obs_filter=None
)
nearest_stations = get_nearest_station_xy([xy], stations=stations, ignore=None)
stn = nearest_stations[0]
metadata = get_wow_metadata(stn=stn)
measurements = get_wow_measurements(
stn=stn, meteo_var=meteo_var, start=start, end=end
)
return measurements, metadata
[docs]@lru_cache
def get_wow_measurements(
stn: str,
meteo_var: str,
start: pd.Timestamp = None,
end: pd.Timestamp = None,
) -> pd.DataFrame:
"""
Get measurements from a WOW-KNMI station.
Parameters
----------
stn : str
The station ID of the WOW-KNMI station.
meteo_var : str
The meteorological variable to query, one of 'rain_rate',
'temperature', 'wind', 'humidity', 'pressure_msl'.
start : pd.Timestamp, optional
The start date and time of the observations, by default None. If None,
the januari first of the year before will be used.
end : pd.Timestamp, optional
The end date and time of the observations, by default None. If None,
the yesterday will be used.
Returns
-------
pd.DataFrame
A dataframe with the measurements.
Raises
------
ValueError
If meteo_var is not valid.
requests.HTTPError
If the request to the WOW-KNMI API fails.
"""
if meteo_var not in meteo_vars_wow:
raise ValueError(f"{meteo_var} must be one of {meteo_vars_wow}")
start, end = _start_end_to_datetime(start, end)
if (end - start) > pd.to_timedelta(365, unit="D"):
t = list(pd.date_range(start, end, freq="365D"))
if t[-1] != end:
t.append(end)
dfs = []
for i in range(len(t) - 1):
dfs.append(get_wow_measurements(stn, meteo_var, start=t[i], end=t[i + 1]))
df = pd.concat(dfs)
df = df[~df.index.duplicated()]
return df
startstr = _wow_strftime(start)
endstr = _wow_strftime(end)
# get station measurements
url = f"{URL_WOW_KNMI}/{stn}/export?start={startstr}&end={endstr}&layer={meteo_var}"
meas = pd.read_csv(url, delimiter=";", index_col=["datum"])
meas.index = pd.DatetimeIndex(pd.to_datetime(meas.index.values), name="date")
measurements = meas.rename(
columns={
x: meteo_vars_wow_translate[x.replace(f" [{stn}]", "")]
for x in meas.columns
}
)
if meteo_var == "rain_rate":
weights = (
(measurements.index[1:] - measurements.index[:-1])
/ pd.Timedelta(1, unit="H")
).values
measurements = measurements.iloc[0:-1].multiply(weights, axis=0)
return measurements
[docs]def get_nearest_station_xy(
xy: list[list[float]], stations: pd.DataFrame, ignore: list[str] | None = None
) -> list[str]:
"""find the stations that measure closest to the given
longitude and latitude coordinates.
Parameters
----------
xy : list or numpy array, optional
longitude, latitude coordinates of the locations. e.g. [[10,25], [5,25]]
stations : pandas DataFrame, optional
if None stations will be obtained using the get_stations function.
The default is None.
ignore : list, optional
list of stations to ignore. The default is None.
Returns
-------
stns : list
station numbers.
Notes
-----
assumes you have a structured rectangular grid.
"""
locations = pd.DataFrame(data=xy, columns=["lon", "lat"])
stns = get_nearest_station_df(
locations=locations,
stations=stations,
xcol="lon",
ycol="lat",
ignore=ignore,
)
return stns
[docs]def get_nearest_station_df(
locations: pd.DataFrame,
stations: pd.DataFrame,
xcol: str = "x",
ycol: str = "y",
ignore: list[str] | None = None,
) -> list[str]:
"""Find the nearest stations that measure 'meteo_var' closest to the
coordinates in 'locations'.
Parameters
----------
locations : pandas.DataFrame
DataFrame containing x and y coordinates
stations : pandas DataFrame, optional
if None stations will be obtained using the get_stations function.
The default is None.
xcol : str
The name of the column in the locations dataframe with the x values, by
default 'x'. If lon in xcol, longitude is assumed.
ycol : str
The name of the column in the locations dataframe with the y values, by
default 'y'. If lat in ycol, latitude is assumed.
ignore : list, optional
A list of station IDs to ignore, by default None.
Returns
-------
stns : list
A list of station IDs that are closest to each location.
"""
if ignore is not None:
stations = stations.drop(ignore, inplace=False)
if stations.empty:
return None
xo = pd.to_numeric(locations[xcol])
xt = pd.to_numeric(stations.x)
yo = pd.to_numeric(locations[ycol])
yt = pd.to_numeric(stations.y)
xh, xi = np.meshgrid(xt, xo)
yh, yi = np.meshgrid(yt, yo)
if "lon" in xcol.lower() or "lat" in ycol.lower():
distances = pd.DataFrame(
_latlon_distance(yh, xh, yi, xi),
index=locations.index,
columns=stations.index,
)
else:
distances = pd.DataFrame(
np.sqrt((xh - xi) ** 2 + (yh - yi) ** 2),
index=locations.index,
columns=stations.index,
)
stns = distances.idxmin(axis=1).to_list()
return stns
def _latlon_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""Haversine formula"""
p = 0.017453292519943295
hav = (
0.5
- np.cos((lat2 - lat1) * p) / 2
+ np.cos(lat1 * p) * np.cos(lat2 * p) * (1 - np.cos((lon2 - lon1) * p)) / 2
)
return 12742 * np.arcsin(np.sqrt(hav))
def _start_end_to_datetime(start: str | None, end: str | None) -> tuple[pd.Timestamp]:
"""Convert start and endtime to datetime.
Parameters
----------
start : str, None
start time
end : str, None
start time
Returns
-------
start : pd.Timestamp
start time
end : pd.Timestamp
end time
"""
if start is None:
start = pd.Timestamp(pd.Timestamp.today().year - 1, 1, 1)
else:
start = pd.to_datetime(start)
if end is None:
end = pd.Timestamp.today() - pd.Timedelta(1, unit="D")
else:
end = pd.to_datetime(end)
return start, end