Source code for hydropandas.io.knmi

"""Module with functions to read or download time series with observations from knmi.

The main function to download time series are:

- get_knmi_timeseries_xy: obtain a knmi station based on the xy location
- get_knmi_timeseries_stn: obtain a knmi station based on the station number

The main function to read time series txt files is:

 - read_knmi_timeseries_file: read a knmi txt file

"""

import datetime as dt
import logging
import os
import re
import tempfile
from functools import lru_cache
from io import StringIO

import numpy as np
import pandas as pd
import requests

from .. import util

logger = logging.getLogger(__name__)

URL_DAILY_PREC = "https://www.daggegevens.knmi.nl/klimatologie/monv/reeksen"
URL_DAILY_METEO = "https://www.daggegevens.knmi.nl/klimatologie/daggegevens"
URL_HOURLY_METEO = "https://www.daggegevens.knmi.nl/klimatologie/uurgegevens"


[docs]def get_knmi_obs( stn=None, fname=None, xy=None, meteo_var=None, start=None, end=None, **kwargs, ): """get knmi observation from stn, fname or nearest xy coordinates. Parameters ---------- stn : int, str or None, optional measurement station e.g. 829. The default is None. fname : str, path object, file-like object or None, optional filename of a knmi file. The default is None. xy : list, tuple or None, optional RD coördinates of a location in the Netherlands. The station nearest to this location used. The Default is None. meteo_var : str or None, optional meteo variable e.g. "RH" or "EV24". See list with all options in the hydropandas documentation. start : str, datetime or None, optional start date of observations. The default is None. end : str, datetime or None, optional end date of observations. The default is None. **kwargs: fill_missing_obs : bool, optional if True nan values in time series are filled with nearby time series. The default is False. interval : str, optional desired time interval for observations. Options are 'daily' and 'hourly'. The default is 'daily'. use_api : bool, optional if True the api is used to obtain the data, API documentation is here: https://www.knmi.nl/kennis-en-datacentrum/achtergrond/data-ophalen-vanuit-een-script if False a text file is downloaded into a temporary folder and the data is read from there. Default is True since the api is back online (July 2021). raise_exceptions : bool, optional if True you get errors when no data is returned. The default is False. Returns ------- pd.DataFrame, measurements dict, metadata Raises ------ ValueError if no meteo_var is given or stn, fname and xy are all None. """ if meteo_var is None and fname is None: raise ValueError("To get knmi data a meteo_var should be specified") elif meteo_var is not None: if not isinstance(meteo_var, str): raise (TypeError(f"meteo var should be string not {type(meteo_var)}")) settings = _get_default_settings(kwargs) start, end = _start_end_to_datetime(start, end) if stn is not None: stn = int(stn) start_str = str(start).replace(" 00:00:00", "") end_str = str(end).replace(" 00:00:00", "") logger.info( f"get data from station {stn} and variable {meteo_var} " f"from {start_str} to {end_str}" ) ts, meta = get_knmi_timeseries_stn(stn, meteo_var, settings, start, end) elif fname is not None: logger.info(f"get KNMI data from file {fname} and meteo variable {meteo_var}") ts, meta = get_knmi_timeseries_fname( str(fname), meteo_var, settings, start, end ) elif xy is not None: logger.info( f"get KNMI data from station nearest to coordinates {xy} and meteo" f"variable {meteo_var}" ) stns = get_n_nearest_stations_xy(xy, meteo_var, n=1) ts, meta = get_knmi_timeseries_stn(stns[0], meteo_var, settings, start, end) else: raise ValueError( "specify KNMI station (stn), filename (fname) or coordinates (xy)" ) return ts, meta
[docs]def get_knmi_timeseries_fname(fname, meteo_var, settings, start, end): # first try to figure out filetype by it's name if "neerslaggeg" in fname: # neerslagstation ts, meta = read_knmi_daily_rainfall_file(fname, start, end) meteo_var = "RD" elif "etmgeg" in fname: # meteo station ts, meta = read_knmi_daily_meteo_file(fname, meteo_var, start, end) elif "uurgeg" in fname: # hourly station ts, meta = read_knmi_hourly(fname, meteo_var, start, end) # if that doesn't work try to figure out by the meteo_var and settings elif meteo_var is None or meteo_var == "RD": # neerslagstation ts, meta = read_knmi_daily_rainfall_file(fname, start, end) meteo_var = "RD" elif settings["interval"] == "daily": # meteo station ts, meta = read_knmi_daily_meteo_file(fname, meteo_var, start, end) elif settings["interval"] == "hourly": # uurlijks station ts, meta = read_knmi_hourly(fname, meteo_var, start, end) else: raise ValueError( "please indicate how to read the file by specifying a meteo_var and" " an interval" ) stn = meta["station"] stations = get_stations(meteo_var=meteo_var) stn_name = get_station_name(stn, stations) # set metadata meta.update( { "x": stations.loc[stn, "x"], "y": stations.loc[stn, "y"], "name": f"{meteo_var}_{stn_name}", "source": "KNMI", "filename": fname, } ) return ts, meta
def _get_default_settings(settings=None): """adds the default settings to a dictinary with settings. If settings is None all the settings are default. If there are already settings given only the non-existing settings are added with their default value. The default settings are: fill_missing_obs = False nan values in time series are filled with nearby time series. interval = 'daily' desired time interval for observations. Can be 'daily' or 'hourly'. 'hourly' is only for precipitation ('RH') data from meteo stations. use_api : bool, optional if True the api is used to obtain the data, API documentation is here: https://www.knmi.nl/kennis-en-datacentrum/achtergrond/data-ophalen-vanuit-een-script if False a text file is downloaded into a temporary folder and the data is read from that file Default is True since the api is back online (July 2021). raise_exceptions : bool, optional if True you get errors when no data is returned. The default is False. Parameters ---------- settings : dict or None settings dictionary without default values. Returns ------- settings : dict settings dictionary with default values of no values was given in the input dictionary. """ default_settings = { "fill_missing_obs": False, "interval": "daily", "use_api": True, "raise_exceptions": True, } if settings is None: settings = {} if "fill_missing_obs" in settings.keys(): if "raise_exceptions" in settings.keys(): logger.debug("set raise_exceptions=False because fill_missing_obs is True") if settings["fill_missing_obs"] and settings["raise_exceptions"]: settings["raise_exceptions"] = False else: settings["raise_exceptions"] = False for key, value in default_settings.items(): if key not in settings.keys(): settings[key] = value return settings def _start_end_to_datetime(start, end): """convert start and endtime to datetime. Parameters ---------- start : str, datetime, None start time end : str, datetime, None start time Returns ------- start : pd.TimeStamp or None start time end : pd.TimeStamp or None end time """ if start is not None: start = pd.to_datetime(start) if end is not None: end = pd.to_datetime(end) return start, end
[docs]def get_knmi_timeseries_stn(stn, meteo_var, settings, start=None, end=None): """Get a knmi time series and metadata. Parameters ---------- stn : int measurement station e.g. 829. meteo_var : str observation type e.g. "RH" or "EV24". See list with all options in the hpd.read_knmi function. settings : dict settings for obtaining the right time series, see _get_default_settings for more information start : pd.TimeStamp or None, optional start date of observations. The default is None. end : pd.TimeStamp or None, optional end date of observations. The default is None. Returns ------- knmi_df : pandas DataFrame time series with measurements. meta : dictionary metadata from the measurement station. """ # get station stations = get_stations(meteo_var=meteo_var) stn_name = get_station_name(stn=stn, stations=stations) # raise error if hourly neerslag station data is requested if (meteo_var == "RD") and settings["interval"].startswith("hour"): message = ( "No hourly precipitation data available at precipitation station, " "use meteo_var 'RH' to obtain hourly precipitation data from " "meteo stations." ) raise ValueError(message) elif ( (meteo_var == "EV24") and settings["interval"].startswith("hour") and settings["use_api"] ): message = ( "No hourly evaporation data available through the api, " "set use_api=False." ) raise ValueError(message) elif settings["fill_missing_obs"]: # download data knmi_df, meta = fill_missing_measurements( stn, meteo_var, start, end, settings, stn_name ) return knmi_df, meta elif meteo_var in ["penman", "makkink", "hargreaves"]: # compute evaporation from data knmi_df, meta = get_evaporation(meteo_var, stn, start, end, settings) else: knmi_df, variables, station_meta = download_knmi_data( stn, meteo_var, start, end, settings, stn_name ) if str(stn) in station_meta.index: meta = station_meta.loc[f"{stn}"].to_dict() else: meta = {} # set metadata x = stations.at[stn, "x"] if stn in stations.index else np.nan y = stations.at[stn, "y"] if stn in stations.index else np.nan meta.update( { "x": x, "y": y, "station": stn, "name": f"{meteo_var}_{stn_name}", "source": "KNMI", } ) meta.update(variables) return knmi_df, meta
[docs]def get_stations(meteo_var): """get knmi stations from json files according to variable. Parameters ---------- meteo_var : str, optional type of meteodata, by default 'RH' Returns ------- pandas DataFrame with stations, names and coordinates (Lat/Lon & RD) """ dir_path = os.path.dirname(os.path.realpath(__file__)) if meteo_var == "RD": # read precipitation station data only fname = "../data/knmi_neerslagstation.json" stations = pd.read_json(os.path.join(dir_path, fname)) else: fname = "../data/knmi_meteostation.json" stations = pd.read_json(os.path.join(dir_path, fname)) return stations
[docs]def get_station_name(stn, stations=None): """Returns the station name from a KNMI station. Modifies the station name in such a way that a valid url can be obtained. Parameters ---------- stn : int station number. stations : pandas DataFrame DataFrame with the station metadata. Returns ------- str or None Name of the station or None if station is not found. """ if stations is None: stations = pd.concat([get_stations("RD"), get_stations("EV24")], axis=0) if stn not in stations.index: logger.warning(f"station {stn} not found") return None stn_name = stations.at[stn, "naam"] stn_name = stn_name.upper().replace(" ", "-").replace("(", "").replace(")", "") return stn_name
[docs]def fill_missing_measurements(stn, meteo_var, start, end, settings, stn_name=None): """fill missing measurements in knmi data. Parameters ---------- stn : int measurement station. meteo_var : str observation type. start : pd.TimeStamp start date of observations. end : pd.TimeStamp end date of observations. settings : dict settings for obtaining data. stn_name : str, optional station name. If None the name is obtained form te stn number. The default is None. Returns ------- knmi_df : pandas DataFrame data from one station from one type of observation, with additional column to see which station is used to fill the value variables : dictionary information about the observerd variables stations : pandas DataFrame information about the measurement station. """ if settings["interval"] == "hourly": raise NotImplementedError("cannot yet fill missing values in hourly data") if (start is None) or (end is None): raise ValueError( "a start and end date have to be specified if fill_missing_obs is True" ) # get the location of the stations stations = get_stations(meteo_var=meteo_var) if stn_name is None: stn_name = get_station_name(stn=stn, stations=stations) # download data from station knmi_df, variables, station_meta = download_knmi_data( stn, meteo_var, start, end, settings, stn_name ) # if the first station cannot be read, read another station as the first ignore = [stn] while knmi_df.empty: logger.info(f"station {stn} has no measurements between {start} and {end}") logger.info("trying to get measurements from nearest station") stn_lst = get_nearest_station_df( stations.loc[[stn]], meteo_var=meteo_var, ignore=ignore ) if stn_lst is None: logger.warning( f"there is no station with measurements of {meteo_var} between " f"{start} and {end}" ) return pd.DataFrame(), dict() stn = stn_lst[0] stn_name = get_station_name(stn=stn, stations=stations) knmi_df, variables, station_meta = download_knmi_data( stn, meteo_var, start, end, settings, stn_name ) ignore.append(stn) if end > knmi_df.index[-1]: # check latest date at which measurements are available at De Bilt new_end = _check_latest_measurement_date_de_bilt( meteo_var, use_api=settings["use_api"], start=start if knmi_df.empty else knmi_df.index[-1], end=end, ) if new_end < end: end = new_end logger.warning(f'changing end_date to {end.strftime("%Y-%m-%d")}') # find missing values knmi_df = _add_missing_indices(knmi_df, stn, start, end) missing = knmi_df[meteo_var].isna() logger.debug(f"station {stn} has {missing.sum()} missing measurements") knmi_df.loc[~missing, "station"] = str(stn) # fill missing values while np.any(missing) and not np.all(missing): stn_comp = get_nearest_station_df( stations.loc[[stn]], meteo_var=meteo_var, ignore=ignore ) if stn_comp is None: logger.info( "could not fill all missing measurements as there are " "no stations left to check" ) missing[:] = False break else: stn_comp = stn_comp[0] n_missing = missing.sum() logger.info( f"trying to fill {n_missing} missing measurements with station {stn_comp}" ) stn_name_comp = get_station_name(stn_comp, stations) knmi_df_comp, _, __ = download_knmi_data( stn_comp, meteo_var, start, end, settings, stn_name_comp ) if knmi_df_comp.empty: logger.warning(f"station {stn_comp} cannot be downloaded") else: # dropnans from new data knmi_df_comp = knmi_df_comp.loc[~knmi_df_comp[meteo_var].isna(), :] # get index of missing data in original timeseries missing_idx = missing.loc[missing].index # if any missing are in the new data, update if missing_idx.isin(knmi_df_comp.index).any(): # index for missing but in newly downloaded data ix_idx = missing_idx.intersection(knmi_df_comp.index) # update missing data knmi_df.loc[ix_idx, meteo_var] = knmi_df_comp.loc[ix_idx, meteo_var] # add source station number knmi_df.loc[ix_idx, "station"] = str(stn_comp) missing = knmi_df[meteo_var].isna() ignore.append(stn_comp) if str(stn) in station_meta.index: meta = station_meta.loc[f"{stn}"].to_dict() else: meta = {} # set metadata meta.update( { "x": stations.loc[stn, "x"], "y": stations.loc[stn, "y"], "station": stn, "name": f"{meteo_var}_{stn_name}", "source": "KNMI", } ) meta.update(variables) return knmi_df, meta
[docs]def download_knmi_data(stn, meteo_var, start, end, settings, stn_name=None): """download knmi data of a measurements station for certain observation type. Parameters ---------- stn : int measurement station. meteo_var : str observation type. start : pd.TimeStamp or None start date of observations. end : pd.TimeStamp or None end date of observations. settings : dict settings for obtaining data stn_name : str, optional station name. If None the name is obtained form te stn number. The default is None. Raises ------ ValueError if the data from knmi cannot not be read a ValueError is raised. Unless raise_exceptions is False Returns ------- knmi_df : pandas DataFrame data from one station from one type of observation variables : dictionary information about the observerd variables stations : pandas DataFrame information about the measurement station. """ logger.debug( f"download KNMI {meteo_var} data from station " f"{stn}-{stn_name} between {start} and {end}" ) # define variables knmi_df = pd.DataFrame() variables = {} stations = pd.DataFrame() # download and read data try: try: if settings["use_api"]: if settings["interval"].startswith("hour"): # hourly data from meteorological stations knmi_df, variables = get_knmi_hourly_api(stn, meteo_var, start, end) elif meteo_var == "RD": # daily data from rainfall-stations knmi_df, variables = get_knmi_daily_rainfall_api(stn, start, end) else: # daily data from meteorological stations knmi_df, variables, stations = get_knmi_daily_meteo_api( stn, meteo_var, start, end ) except (RuntimeError, requests.ConnectionError) as e: logger.warning( "KNMI API failed, try setting the 'use_api' argument to 'False'" ) if settings["raise_exceptions"]: raise e logger.info("Try non api method") settings = settings.copy() settings["use_api"] = False if not settings["use_api"]: if settings["interval"].startswith("hour"): # hourly data from meteorological stations raise NotImplementedError() elif meteo_var == "RD": # daily data from rainfall-stations knmi_df, variables = get_knmi_daily_rainfall_url( stn, stn_name, start, end ) else: # daily data from meteorological stations knmi_df, variables = get_knmi_daily_meteo_url( stn, meteo_var, start, end ) except (ValueError, KeyError) as e: logger.error(e) if settings["raise_exceptions"]: raise ValueError(e) if knmi_df.empty: logger.debug( "no measurements found for station " f"{stn}-{stn_name} between {start} and {end}" ) return knmi_df, variables, stations
[docs]@lru_cache() def get_knmi_daily_rainfall_api(stn, start=None, end=None): """download and read knmi daily rainfall. Parameters ---------- stn : int station number. start : pd.TimeStamp or None start time of observations. end : pd.TimeStamp or None end time of observations. Raises ------ ValueError if there is no data for the provided stn an error is raised. Returns ------- pandas DataFrame measurements. variables : dictionary additional information about the variables """ meteo_var = "RD" url = URL_DAILY_PREC params = {"vars": meteo_var, "stns": str(stn)} if start is not None: params["start"] = start.strftime("%Y%m%d") if end is not None: params["end"] = end.strftime("%Y%m%d") result = requests.get(url, params=params) if result.status_code != 200: raise requests.ConnectionError(f"Cannot connect to {url}") result_str = result.text if result_str.startswith("<!DOCTYPE html>"): raise RuntimeError("KNMI API down") f = StringIO(result_str) knmi_df, variables = read_knmi_daily_rainfall(f, meteo_var) if stn not in knmi_df["STN"].unique(): return pd.DataFrame(), variables return knmi_df[[meteo_var]], variables
[docs]@lru_cache() def get_knmi_daily_rainfall_url(stn, stn_name, start=None, end=None): """download and read knmi daily rainfall. Parameters ---------- stn : int station number. stn_name : str the name of the station in capital letters, can be tricky start : pd.TimeStamp or None start time of observations. end : pd.TimeStamp or None end time of observations. Raises ------ ValueError if there is no data for the provided stn an error is raised. Returns ------- pandas DataFrame measurements. variables : dictionary additional information about the variables """ if end is not None: end = end + dt.timedelta(days=1) stn = str(stn) url = ( "https://cdn.knmi.nl/knmi/map/page/klimatologie/" f"gegevens/monv_reeksen/neerslaggeg_{stn_name}_{stn}.zip" ) # get file name basedir = os.path.join(tempfile.gettempdir(), "knmi") if not os.path.isdir(basedir): os.mkdir(basedir) fname_zip = os.path.join(tempfile.gettempdir(), "knmi", f"neerslaggeg_{stn}.zip") fname_dir = os.path.join(tempfile.gettempdir(), "knmi", f"neerslaggeg_{stn}") fname_txt = os.path.join(fname_dir, f"neerslaggeg_{stn_name}_{stn}.txt") # download zip file r = requests.get(url, stream=True) if r.status_code != 200: raise ValueError(f"invalid url {url} please check station name {stn_name}") with open(fname_zip, "wb") as fd: for chunk in r.iter_content(chunk_size=128): fd.write(chunk) # unzip file util.unzip_file(fname_zip, fname_dir, force=True, preserve_datetime=True) return read_knmi_daily_rainfall_file(fname_txt, start=start, end=end)
[docs]def read_knmi_daily_rainfall_file(fname_txt, start=None, end=None): """read a knmi file with daily rainfall data. Parameters ---------- path : str file path of a knmi .txt file. start : pd.TimeStamp or None start time of observations. end : pd.TimeStamp or None end time of observations. Returns ------- pandas DataFrame measurements. variables : dictionary additional information about the variables """ meteo_var = "RD" with open(fname_txt, "r") as f: line = f.readline() # get meteo var for _ in range(50): if "RD " in line: # RD komt te vaak voor vandaar de spaties key, item = line.split("=") variables = {key.strip(): item.strip()} break line = f.readline() # get dataframe for _ in range(50): if "STN" in line: columns = line.strip("# ").strip("\n").split(",") columns = [x.strip(" ") for x in columns] values = f.readline() if values == "\n": values = f.readline() line = f.readline() df = pd.read_csv(f, header=None, names=columns, na_values=" ") df.set_index(pd.to_datetime(df.YYYYMMDD, format="%Y%m%d"), inplace=True) df = df.drop("YYYYMMDD", axis=1) if df.index.duplicated().sum() > 0: df = df.loc[~df.index.duplicated(keep="first")] logger.info("duplicate indices removed from RD measurements") # sometimes the last row is empty, check for that and remove it if not df.empty: if df.iloc[-1].isna().any(): logger.debug("duplicate indices removed from RD measurements") df = df.drop(index=df.index[-1]) df.loc[:, meteo_var] = df[meteo_var].astype(float) df, variables = _transform_variables(df, variables) variables["unit"] = "m" # add station to variables unique_stn = df["STN"].unique() if len(unique_stn) > 1: raise ValueError(f"multiple stations in single file {unique_stn}") else: variables["station"] = df["STN"].iloc[0] return df.loc[start:end, [meteo_var]], variables
def _read_knmi_header(f, meteo_var): variables = {} line = f.readline() for iline in range(500): if " = " in line or " : " in line: line = line.lstrip(" #").strip("\n") if " = " in line: varDes = line.split(" = ") else: varDes = line.split(" : ") if varDes[0].strip() == meteo_var: variables[varDes[0].strip()] = varDes[1].strip() if "STN,YY" in line: header = line.replace("#", "").split(",") header = [item.lstrip().rstrip() for item in header] break line = f.readline() if iline > 498: raise ValueError("cannot read measurements from file") return f, variables, header def _transform_variables(df, variables): """Transforms the timeseries to default units and settings. Does 3 things: 1. all values equal to -1 are converted to zero 2. the units are changed from 0.1 mm to 1 mm. 3. the units are changed from mm to m. Parameters ---------- df : pandas DataFrame time series. variables : dictionary description of variables in time series. Raises ------ NameError if there are columns in the DataFrame and no matching key in the variables dictionary. Returns ------- df : pandas DataFrame time series. variables : dictionary description of variables in time series. """ for key, value in variables.items(): # test if key existst in data if key not in df.columns: if key == "YYYYMMDD" or key == "HH": pass elif key == "T10N": variables.pop(key) key = "T10" else: raise NameError(key + " does not exist in data") if "(-1 voor <0.05 mm)" in value: # remove -1 for precipitation smaller than <0.05 mm df.loc[df.loc[:, key] == -1, key] = 0.0 value = value.replace("(-1 voor <0.05 mm)", "(0 voor <0.05mm)").replace( "(-1 for <0.05 mm)", "(0 for <0.05mm)" ) logger.debug(f"transform {key}, {value} lower than 0.05 mm to 0 mm") if "0.1 " in value: logger.debug(f"transform {key}, {value} from 0.1 to 1") df[key] = df[key] * 0.1 value = value.replace("0.1 ", "") if " tiende " in value: logger.debug(f"transform {key}, {value} from 0.1 to 1") df[key] = df[key] * 0.1 value = value.replace(" tiende ", " ") if " mm" in value: logger.debug(f"transform {key}, {value} from mm to m") df[key] = df[key] * 0.001 value = value.replace(" mm", " m") if " millimeters" in value: logger.debug(f"transform {key}, {value} from mm to m") df[key] = df[key] * 0.001 value = value.replace(" millimeters", " m") if "08.00 UTC" in value: logger.debug(f"transform {key}, {value} from UTC to UTC+1") # over the period 08.00 preceding day - 08.00 UTC present day df.index = df.index + pd.to_timedelta(8, unit="h") # from UT to UT+1 (standard-time in the Netherlands) df.index = df.index + pd.to_timedelta(1, unit="h") value = value.replace("08.00", "09.00").replace("UTC", "UTC+1") # Store new variable variables[key] = value return df, variables
[docs]def read_knmi_daily_rainfall(f, meteo_var): """ Read daily rainfall data from a KNMI file. Parameters ---------- f : file-like object The file object containing the KNMI data. meteo_var : str The meteorological variable to extract. Returns ------- pandas.DataFrame The DataFrame containing the extracted daily rainfall data. dict A dictionary containing information about the variables in the DataFrame. Notes ----- This function assumes that the file object `f` is already open and positioned at the start of the data. The file is expected to have a header with variable names and a corresponding data table. The DataFrame returned by this function has the following modifications: - The index is set to the datetime values derived from the 'YYYYMMDD' column. - The 'YYYYMMDD' column is dropped. - Duplicate indices are removed, keeping the first occurrence. - If the last row has missing values, it is removed. - The 'meteo_var' column is cast to float data type. - Variables are transformed using an internal function `_transform_variables`. - The unit of measurement for the 'meteo_var' variable is set to 'm'. """ f, variables, header = _read_knmi_header(f, meteo_var) df = pd.read_csv(f, header=None, names=header, na_values=" ") f.close() df.set_index(pd.to_datetime(df.YYYYMMDD, format="%Y%m%d"), inplace=True) df = df.drop("YYYYMMDD", axis=1) if df.index.duplicated().sum() > 0: df = df.loc[~df.index.duplicated(keep="first")] logger.debug("duplicate indices removed from RD measurements") # sometimes the last row is messed up, check for that and remove it if not df.empty: if df.iloc[-1].isna().any(): logger.debug("last row contains no data, remove last row") df = df.drop(index=df.index[-1]) df.loc[:, meteo_var] = df[meteo_var].astype(float) df, variables = _transform_variables(df, variables) variables["unit"] = "m" return df, variables
def _read_station_location(f): stations = None line = f.readline() for _ in range(30): if "STN" in line: titels = line.strip("# ").split() titels = [x.replace("(", "_") for x in titels] titels = [x.replace(r")", "") for x in titels] values = f.readline().strip("# ").strip().replace(":", "") values = re.split(r"\s{2,}", values) # Create pd.DataFrame for station data stations = pd.DataFrame(columns=titels, data=[values]) stations.set_index(["STN"], inplace=True) for col in stations.columns: try: stations[col] = stations[col].astype(float) except ValueError: pass break line = f.readline() if stations is None: logger.warning("could not find stations") return f, stations
[docs]@lru_cache() def get_knmi_daily_meteo_api(stn, meteo_var, start=None, end=None): """download and read knmi daily meteo data. Parameters ---------- stn : int station number. meteo_var : str e.g. 'EV24'. start : pd.TimeStamp or None start time of observations. end : pd.TimeStamp or None end time of observations. Returns ------- pandas DataFrame measurements. variables : dictionary additional information about the variables stations : pandas DataFrame additional data about the measurement station """ url = URL_DAILY_METEO params = { "vars": meteo_var, "stns": str(stn), } # modify start and end date for the api call because the dates are later # transformed, see example notebook 02_knmi_observations. if start is not None: start = start - dt.timedelta(days=1) params["start"] = start.strftime("%Y%m%d") if end is not None: end = end - dt.timedelta(days=1) params["end"] = end.strftime("%Y%m%d") result = requests.get(url, params=params) if result.status_code != 200: raise requests.ConnectionError(f"Cannot connect to {url}") result_str = result.text if result_str.startswith("<!DOCTYPE html>"): raise RuntimeError("KNMI API down") f = StringIO(result_str) knmi_df, variables, stations = read_knmi_daily_meteo(f, meteo_var) knmi_df.dropna(subset=[meteo_var], inplace=True) return knmi_df[[meteo_var]], variables, stations
[docs]@lru_cache() def get_knmi_daily_meteo_url(stn, meteo_var, start=None, end=None): """download and read knmi daily meteo data. Parameters ---------- stn : str station number. meteo_var : str e.g. 'EV24'. start : pd.TimeStamp or None start time of observations. end : pd.TimeStamp or None end time of observations. Returns ------- pandas DataFrame measurements. variables : dictionary additional information about the variables stations : pandas DataFrame additional data about the measurement station """ url = ( "https://cdn.knmi.nl/knmi/map/page/klimatologie" f"/gegevens/daggegevens/etmgeg_{stn}.zip" ) # get file name basedir = os.path.join(tempfile.gettempdir(), "knmi") if not os.path.isdir(basedir): os.mkdir(basedir) fname_zip = os.path.join(tempfile.gettempdir(), "knmi", f"etmgeg_{stn}.zip") fname_dir = os.path.join(tempfile.gettempdir(), "knmi", f"etmgeg_{stn}") fname_txt = os.path.join(fname_dir, f"etmgeg_{stn}.txt") # download zip file r = requests.get(url, stream=True) with open(fname_zip, "wb") as fd: for chunk in r.iter_content(chunk_size=128): fd.write(chunk) # unzip file util.unzip_file(fname_zip, fname_dir, force=True, preserve_datetime=True) return read_knmi_daily_meteo_file(fname_txt, meteo_var, start, end)
[docs]def read_knmi_daily_meteo_file(path, meteo_var, start=None, end=None): """read knmi daily meteo data from a file Parameters ---------- path : str file path of .txt file. meteo_var : str e.g. 'EV24'. start : pd.TimeStamp or None start time of observations. end : pd.TimeStamp or None end time of observations. Raises ------ ValueError If the meteo var is not in the file. Returns ------- pandas DataFrame measurements. variables : dictionary additional information about the variables """ variables = None with open(path, "r") as f: line = f.readline() # get meteo var for _ in range(50): if meteo_var in line: key, item = line.split("=") var_name = key.strip() variables = {var_name: item.strip()} if var_name == meteo_var: break line = f.readline() if variables is None: raise ValueError(f"could not find {meteo_var} in file {path}") # get dataframe for _ in range(50): if "STN" in line: columns = line.strip("# ").strip("\n").split(",") columns = [x.strip(" ") for x in columns] values = f.readline() if values == "\n": values = f.readline() df = pd.read_csv(f, header=None, names=columns, na_values=" ") df.set_index(pd.to_datetime(df.YYYYMMDD, format="%Y%m%d"), inplace=True) df = df.drop("YYYYMMDD", axis=1) df = df.loc[df.index.notnull(), :] # add a full day for meteorological data, so that the # timestamp is at the end of the period in the data df.index = df.index + pd.to_timedelta(1, unit="d") # from UT to UT+1 (standard-time in the Netherlands) df.index = df.index + pd.to_timedelta(1, unit="h") # add station to variables unique_stn = df["STN"].unique() if len(unique_stn) > 1: raise ValueError(f"multiple stations in single file {unique_stn}") else: station = df["STN"].iloc[0] df = df.loc[start:end, [meteo_var]] df = df.dropna() df, variables = _transform_variables(df, variables) variables["unit"] = "" variables["station"] = station break line = f.readline() return df, variables
[docs]def read_knmi_daily_meteo(f, meteo_var): f, stations = _read_station_location(f) f, variables, header = _read_knmi_header(f, meteo_var) header[0] = header[0].lstrip("# ") df = pd.read_csv(f, header=None, names=header, na_values=" ") f.close() df.set_index(pd.to_datetime(df.YYYYMMDD, format="%Y%m%d"), inplace=True) df = df.drop("YYYYMMDD", axis=1) df = df.loc[df.index.notnull(), :] # add a full day for meteorological data, so that the # timestamp is at the end of the period in the data df.index = df.index + pd.to_timedelta(1, unit="d") # from UT to UT+1 (standard-time in the Netherlands) df.index = df.index + pd.to_timedelta(1, unit="h") df, variables = _transform_variables(df, variables) variables["unit"] = "" return df, variables, stations
[docs]@lru_cache() def get_knmi_hourly_api(stn, meteo_var, start, end): """Retrieve hourly meteorological data from the KNMI API. Parameters ---------- stn : int The station number. meteo_var : str The meteorological variable to retrieve. start : pd.Timestamp or None The start date and time of the data. end : pd.Timestamp or None The end date and time of the data. Returns ------- pandas.DataFrame A DataFrame containing the requested meteorological variable data. dict A dictionary containing information about the variables in the DataFrame. Raises ------ requests.ConnectionError If there is a connection error while accessing the KNMI API. RuntimeError If the KNMI API is down or returns unexpected data. """ url = URL_HOURLY_METEO params = { "vars": meteo_var, "stns": str(stn), } # It looks like the API has an option to select the start and end hour that # you want but in reality it does not have this option. if start is None: raise ValueError("A start date is required when using hourly interval") if end is None: raise ValueError("An end date is required when using hourly interval") if (end - start).days > 4150: raise ValueError("time span for hourly data cannot be greater than 10 years") if (end - start).days < 1: raise ValueError("time span should be more than 1 day") params["end"] = end.strftime("%Y%m%d") + "24" s = start - pd.Timedelta(1, unit="H") params["start"] = s.strftime("%Y%m%d") + "01" result = requests.get(url, params=params) if result.status_code != 200: raise requests.ConnectionError(f"Cannot connect to {url}") result_str = result.text if result_str.startswith("<!DOCTYPE html>"): raise RuntimeError("KNMI API down") f = StringIO(result_str) return read_knmi_hourly(f, meteo_var, start, end)
[docs]def read_knmi_hourly(f, meteo_var, start=None, end=None): """Read hourly KNMI file. Parameters ---------- f : str or filelike object path to file or filelike object Returns ------- df : pd.DataFrame DataFrame containing data variables : dict dictionary containing metadata about the variables """ ispath = False if isinstance(f, str): f = open(f, "r") ispath = True f, variables, header = _read_knmi_header(f, meteo_var) df = pd.read_csv(f, header=None, names=header, na_values=" ") if ispath: f.close() # convert 24 to 0 if "H" in df.columns: hour_col = "H" elif "HH" in df.columns: hour_col = "HH" else: raise ValueError("could not find column with hour") df.loc[df[hour_col] == 24, hour_col] = 0 datetime = pd.to_datetime( df.YYYYMMDD.astype(str) + df[hour_col].astype(str).str.zfill(2), format="%Y%m%d%H", ) datetime.loc[datetime.dt.hour == 0] = datetime + dt.timedelta(days=1) # add station to variables unique_stn = df["STN"].unique() if len(unique_stn) > 1: raise ValueError(f"multiple stations in single file {unique_stn}") else: station = df["STN"].iloc[0] # set index df.set_index(datetime, inplace=True) df = df[[meteo_var]] variables = variables df, variables = _transform_variables(df, variables) variables["unit"] = "m" variables["station"] = station return df.loc[start:end, [meteo_var]], variables
def _check_latest_measurement_date_de_bilt( meteo_var, use_api=True, start=None, end=None ): """According to the website of the knmi it can take up to 3 weeks before precipitation data is updated. If you use the fill_missing_measurements method to fill a time series untill today, it will keep looking at all stations to find the data of these last days that probably does not exist. To prevent this, this function will find the last day there are measure- ments at knmi station de Bilt. It is assumed that if de Bilt doesn't have recent measurements, no station will have measurements for these dates. website knmi: https://www.knmi.nl/nederland-nu/klimatologie/monv/reeksen Parameters ---------- meteo_var : str meteo variable. use_api : bool, optional if True the api is used to obtain the data, API documentation is here: https://www.knmi.nl/kennis-en-datacentrum/achtergrond/data-ophalen-vanuit-een-script Default is True. start : pd.TimeStamp or None, optional start date of observations. Set to 365 days before today when None. The default is None. end : pd.TimeStamp or None, optional end date of observations. Set to 10 days after today when None. The default is None. Returns ------- last_measurement_date_debilt : pd.TimeStamp last date with measurements at station de Bilt """ if start is None: start = dt.datetime.now() - pd.Timedelta(365, unit="D") if end is None: end = dt.datetime.now() + pd.Timedelta(10, unit="D") if meteo_var == "RD": if use_api: try: knmi_df, _ = get_knmi_daily_rainfall_api(550, start, end) except (RuntimeError, requests.ConnectionError): logger.warning("KNMI API failed, switching to non-API method") knmi_df, _ = get_knmi_daily_rainfall_url(550, "DE-BILT", start, end) else: knmi_df, _ = get_knmi_daily_rainfall_url(550, "DE-BILT", start, end) else: if use_api: try: knmi_df, _, _ = get_knmi_daily_meteo_api( 260, meteo_var, start=start, end=end ) except (RuntimeError, requests.ConnectionError): logger.warning("KNMI API failed, switching to non-API method") knmi_df, _, _ = get_knmi_daily_meteo_url(260, meteo_var, start, end) else: knmi_df, _, _ = get_knmi_daily_meteo_url(260, meteo_var, start, end) knmi_df = knmi_df.dropna() end_str = end.strftime("%Y-%m-%d") if knmi_df.empty: start_str = start.strftime("%Y-%m-%d") raise ValueError( "knmi station de Bilt has no measurements between " f"{start_str} and {end_str} for variable {meteo_var}." ) last_measurement_date_debilt = knmi_df.index[-1] logger.debug( f"last {meteo_var} measurement available at the Bilt until {end_str} is from" f' {last_measurement_date_debilt.strftime("%Y-%m-%d")}' ) logger.debug( f"assuming no {meteo_var} measurements are available at " "other stations after this date" ) return last_measurement_date_debilt
[docs]def get_nearest_station_df( locations, xcol="x", ycol="y", stations=None, meteo_var="RH", ignore=None ): """Find the KNMI stations that measure 'meteo_var' closest to the coordinates in 'locations'. Parameters ---------- locations : pandas.DataFrame DataFrame containing x and y coordinates xcol : str name of the column in the locations dataframe with the x values ycol : str name of the column in the locations dataframe with the y values stations : pandas DataFrame, optional if None stations will be obtained using the get_stations function. The default is None. meteo_var : str measurement variable e.g. 'RH' or 'EV24' ignore : list, optional list of stations to ignore. The default is None. Returns ------- stns : list station numbers. """ if stations is None: stations = get_stations(meteo_var=meteo_var) if ignore is not None: stations.drop(ignore, inplace=True) if stations.empty: return None xo = pd.to_numeric(locations[xcol]) xt = pd.to_numeric(stations.x) yo = pd.to_numeric(locations[ycol]) yt = pd.to_numeric(stations.y) xh, xi = np.meshgrid(xt, xo) yh, yi = np.meshgrid(yt, yo) distances = pd.DataFrame( np.sqrt((xh - xi) ** 2 + (yh - yi) ** 2), index=locations.index, columns=stations.index, ) stns = distances.idxmin(axis=1).to_list() return stns
[docs]def get_nearest_station_xy(xy, stations=None, meteo_var="RH", ignore=None): """find the KNMI stations that measure 'meteo_var' closest to the given x and y coordinates. Parameters ---------- xy : list or numpy array, optional xy coordinates of the locations. e.g. [[10,25], [5,25]] stations : pandas DataFrame, optional if None stations will be obtained using the get_stations function. The default is None. meteo_var : str measurement variable e.g. 'RH' or 'EV24' ignore : list, optional list of stations to ignore. The default is None. Returns ------- stns : list station numbers. Notes ----- assumes you have a structured rectangular grid. """ locations = pd.DataFrame(data=xy, columns=["x", "y"]) stns = get_nearest_station_df( locations, xcol="x", ycol="y", stations=stations, meteo_var=meteo_var, ignore=ignore, ) return stns
[docs]def get_n_nearest_stations_xy(xy, meteo_var, n=1, stations=None, ignore=None): """Find the N nearest KNMI stations that measure variable 'meteo_var' to the x, y coordinates. Parameters ---------- xy : list, tuple or numpy.array of int or float sinlge pair of xy coordinates. e.g. (150_000., 400_000.) meteo_var : str measurement variable e.g. 'RH' or 'EV24' n : int, optional number of stations you want to return. The default is 1. stations : pandas DataFrame, optional if None stations will be obtained using the get_stations function. The default is None. ignore : list, optional list of stations to ignore. The default is None. Returns ------- list station numbers. """ if stations is None: stations = get_stations(meteo_var=meteo_var) if ignore is not None: stations.drop(ignore, inplace=True) if stations.empty: return None distance = np.sqrt((stations.x - xy[0]) ** 2 + (stations.y - xy[1]) ** 2) stns = distance.nsmallest(n).index.to_list() return stns
def _add_missing_indices(knmi_df, stn, start, end): """When downloading KNMI data you don't always get a DataFrame with the periods that you provided in your request. Thus the index does not cover the complete period that you are interested in. This function adds the missing period to the index of the DataFrame. Parameters ---------- knmi_df : pandas DataFrame data from one station from one type of observation, with additional column to see which station is used to fill the value stn : int or str measurement station. start : pd.TimeStamp start time of observations. end : pd.TimeStamp end time of observations. Returns ------- knmi_df : pandas DataFrame data from one station from one type of observation """ # check if given dates are more or less similar than measurement dates if (knmi_df.index[0] - start).days < 2: new_start = knmi_df.index[0] else: new_start = pd.Timestamp( year=start.year, month=start.month, day=start.day, hour=knmi_df.index[0].hour, minute=knmi_df.index[0].minute, second=knmi_df.index[0].second, ) logger.info(f"station {stn} has no measurements before {knmi_df.index[0]}") if (end - knmi_df.index[-1]).days < 2: new_end = knmi_df.index[-1] else: new_end = pd.Timestamp( year=end.year, month=end.month, day=end.day, hour=knmi_df.index[-1].hour, minute=knmi_df.index[-1].minute, second=knmi_df.index[-1].second, ) logger.info(f"station {stn} has no measurements after {knmi_df.index[-1]}") # add missing indices new_index = pd.date_range(new_start, new_end, freq="D") knmi_df = knmi_df.reindex(new_index) return knmi_df
[docs]def get_knmi_obslist( locations=None, stns=None, xy=None, meteo_vars=("RH",), starts=None, ends=None, ObsClasses=None, **kwargs, ): """Get a list of observations of knmi stations. Either specify a list of knmi stations (stns) or a dataframe with x, y coordinates (locations). Parameters ---------- locations : pandas DataFrame or None dataframe with x and y coordinates. The default is None stns : list of str or None list of knmi stations. The default is None xy : list or numpy array, optional xy coordinates of the locations. e.g. [[10,25], [5,25]] meteo_vars : list or tuple of str meteo variables e.g. ["RH", "EV24"]. The default is ("RH") starts : None, str, datetime or list, optional start date of observations per meteo variable. The start date is included in the time series. If start is None the start date will be January 1st of the previous year. if start is str it will be converted to datetime if start is a list it should be the same length as meteo_vars and the start time for each variable. The default is None ends : list of str, datetime or None end date of observations per meteo variable. The end date is included in the time series. If end is None the start date will be January 1st of the previous year. if end is a str it will be converted to datetime if end is a list it should be the same length as meteo_vars and the end time for each meteo variable. The default is None ObsClasses : list of type or None class of the observations, can be PrecipitationObs or EvaporationObs. The default is None. **kwargs: fill_missing_obs : bool, optional if True nan values in time series are filled with nearby time series. The default is False. interval : str, optional desired time interval for observations. Options are 'daily' and 'hourly'. The default is 'daily'. use_api : bool, optional if True the api is used to obtain the data, API documentation is here: https://www.knmi.nl/kennis-en-datacentrum/achtergrond/data-ophalen-vanuit-een-script if False a text file is downloaded into a temporary folder and the data is read from there. Default is True since the api is back online (July 2021). raise_exceptions : bool, optional if True you get errors when no data is returned. The default is False. Returns ------- obs_list : list of obsevation objects collection of multiple point observations """ settings = _get_default_settings(kwargs) if isinstance(meteo_vars, str): meteo_vars = [meteo_vars] if starts is None: starts = [None] * len(meteo_vars) elif isinstance(starts, (str, dt.datetime)): starts = [starts] * len(meteo_vars) elif isinstance(starts, list): pass else: raise TypeError("must be None, str, dt.datetime or list") if ends is None: ends = [None] * len(meteo_vars) elif isinstance(ends, (str, dt.datetime)): ends = [ends] * len(meteo_vars) elif isinstance(ends, list): pass else: raise TypeError("must be None, str, dt.datetime or list") if not isinstance(xy, (tuple, list, np.ndarray, type(None))): raise TypeError("must be tuple, list or numpy.ndarray") obs_list = [] for meteo_var, start, end, ObsClass in zip(meteo_vars, starts, ends, ObsClasses): start, end = _start_end_to_datetime(start, end) # get stations if stns is None: stations = get_stations(meteo_var=meteo_var) if (locations is None) and (xy is not None): _stns = get_nearest_station_xy( xy, stations=stations, meteo_var=meteo_var ) elif locations is not None: _stns = get_nearest_station_df( locations, stations=stations, meteo_var=meteo_var ) else: raise ValueError( "stns, location and xy are all None. " "Please specify one of these arguments." ) _stns = np.unique(_stns) if locations is not None: xy = locations[["x", "y"]].values else: _stns = stns for stn in _stns: o = ObsClass.from_knmi( meteo_var=meteo_var, stn=stn, start=start, end=end, **settings, ) obs_list.append(o) return obs_list
[docs]def get_evaporation(meteo_var, stn=260, start=None, end=None, settings=None): """Collect different types of (reference) evaporation from KNMI weather stations Parameters ---------- meteo_var : str Choice between 'penman', 'makkink' or 'hargraves'. stn : str station number, defaults to 260 De Bilt start : pd.TimeStamp start time of observations. end : pd.TimeStamp end time of observations. settings : dict or None, optional settings for the time series Returns ------ pandas.DataFrame """ if meteo_var == "hargreaves": if not settings["use_api"]: raise NotImplementedError( "cannot use hargreaves without the api because hargreaves needs " "the lattitude" ) d = {} mvs = ["TG", "TN", "TX"] for mv in mvs: ts, meta = get_knmi_timeseries_stn( stn, meteo_var=mv, start=start, end=end, settings=settings ) d[mv] = ts.squeeze() et = hargreaves( d["TG"], d["TN"], d["TX"], d["TG"].index, meta["LAT_north"], ).to_frame(name=meteo_var) elif meteo_var == "makkink": d = {} mvs = ["TG", "Q"] for mv in mvs: ts, meta = get_knmi_timeseries_stn( stn, meteo_var=mv, start=start, end=end, settings=settings ) d[mv] = ts.squeeze() et = makkink(d["TG"], d["Q"]).to_frame(name=meteo_var) elif meteo_var == "penman": if not settings["use_api"]: raise NotImplementedError( "cannot use penman without the api because penman needs the " "lattitude and height of the meteo station" ) d = {} mvs = ["TG", "TN", "TX", "Q", "FG", "UG"] for mv in mvs: ts, meta = get_knmi_timeseries_stn( stn, meteo_var=mv, start=start, end=end, settings=settings ) d[mv] = ts.squeeze() et = penman( d["TG"], d["TN"], d["TX"], d["Q"], d["FG"], d["UG"], d["TG"].index, meta["LAT_north"], meta["ALT_m"], ).to_frame(name=meteo_var) else: raise ValueError( "Provide valid argument for meteo_var -> 'hargreaves', 'makkink' or " "'penman'" ) stn_name = get_station_name(meta["station"], stations=get_stations(meteo_var)) meta["name"] = f"{meteo_var}_{stn_name}" meta["unit"] = "m" return et, meta
[docs]def makkink(tmean, K): """Estimate of Makkink reference evaporation according to KNMI. Parameters ---------- tmean : tmean : pandas.Series Daily mean temperature in Celsius K : pandas.Series Global radiation estimate in J/cm2 Returns ------- pandas.Series """ a = 0.646 + 0.0006 * tmean b = 1 + tmean / 237.3 c = 7.5 * np.log(10) * 6.107 * 10 ** (7.5 * (1 - 1 / b)) et = 0.0065 * (1 - a / (c / (237.3 * b * b) + a)) / (2501 - 2.38 * tmean) * K return et
[docs]def penman( tmean, tmin, tmax, K, wind, rh, dates, z=1.0, lat=52.1, G=0.0, wh=10.0, tdew=None ): """Estimate of Penman reference evaporation according to Allen et al 1990. Parameters ---------- tmean : pandas.Series Daily mean temperature in Celsius tmin : pandas.Series Daily minimum temperature in Celsius tmax : pandas.Series Daily maximum temperature in Celsius K : pandas.Series Global radiation estimate in J/cm2 wind : pandas.Series Daily mean wind speed in m/s rh : pandas.Series Relative humidity in % dates : pandas.Series Dates z : float, optional Elevation of station in m, by default 1.0 lat : float, optional Latitude of station, by default 52.1 G : float, optional Ground flux in MJ/m2, by default 0.0 wh : float, optional Height of wind measurement in m, by default 10.0 tdew : pandas.Series Dew point temperature in C, by default None Returns ------- pandas.Series """ K = K * 1e4 * 0.000012 # J/cm2 to W/m2/d P = 101.3 * ((293 - 0.0065 * z) / 293) ** 5.26 # kPa gamma = 0.665e-3 * P # kPa/C tg = (tmax - tmin) / 2 # C s = 4098 * (0.6108 * np.exp(17.27 * tg / (tg + 237.3)) / (tg + 237.3) ** 2) # kPa/C es0 = 0.6108 * np.exp(17.27 * tmean / (tmean + 237.3)) # kPa esmax = 0.6108 * np.exp(17.27 * tmax / (tmax + 237.3)) # kPa esmin = 0.6108 * np.exp(17.27 * tmin / (tmin + 237.3)) # kPa es = (esmax + esmin) / 2 # kpa if tdew: ea = 0.6108 * np.exp(17.27 * tdew / (tdew + 237.3)) # kPa else: ea = es0 * rh / 100 # kPa u2 = wind * 4.87 / np.log(67.8 * wh - 5.42) # m/s J = pd.Series([int(date.strftime("%j")) for date in dates], index=dates) dr = 1 + 0.033 * np.cos(2 * np.pi * J / 365) # - delt = 0.409 * np.sin(2 * np.pi * J / 365 - 1.39) # rad phi = lat * np.pi / 180 # rad ws = np.arccos(-np.tan(phi) * np.tan(delt)) # rad Kext = ( 1366 * dr / np.pi * (ws * np.sin(phi) * np.sin(delt) + np.cos(phi) * np.cos(delt) * np.sin(ws)) ) # W/m2/d K0 = Kext * (0.75 + 2e-5 * z) # W/m2/d Lout_in = ( 5.67e-8 * (((tmax + 273) ** 4 + (tmin + 273) ** 4) / 2) * (0.34 - 0.14 * np.sqrt(ea)) * (1.35 * K / K0 - 0.35) ) # W/m2/d Q = (1 - 0.23) * K + Lout_in # W/m2/d Q = Q * 0.0864 # W/m2/d to MJ/m2 straling = 0.408 * s * (Q - G) windterm = gamma * 900 / (tmean + 273) * u2 * (es - ea) denom = s + gamma * (1 + 0.34 * u2) et = (straling + windterm) / denom * 1e-3 et[et < 0] = 0 return et
[docs]def hargreaves(tmean, tmin, tmax, dates, lat=52.1, x=None): """Estimate of Hargraves potential evaporation according to Allen et al. 1990. Parameters ---------- tmean : pandas.Series Daily mean temperature tmin : pandas.Series Daily minimum temperature tmax : pandas.Series Daily maximum temperature dates : pandas.Series.index Dates lat : float, optional Latitude of station, by default 52.1 x : _type_, optional Optional parameter to scale evaporation estimate, by default None Returns ------- pandas.Series """ J = pd.Series([int(date.strftime("%j")) for date in dates], index=dates) dr = 1 + 0.033 * np.cos(2 * np.pi * J / 365) delt = 0.409 * np.sin(2 * np.pi * J / 365 - 1.39) phi = lat * np.pi / 180 ws = np.arccos(-np.tan(phi) * np.tan(delt)) Kext = ( 12 * 60 * 0.0820 * dr / np.pi * (ws * np.sin(phi) * np.sin(delt) + np.cos(phi) * np.cos(delt) * np.sin(ws)) ) # MJ/m2/d Kext = 0.408 * Kext # MJ/m2/d to mm/d et = 0.0023 * (tmean + 273 + 17.8) / np.sqrt(tmax - tmin) * Kext * 1e-3 if x: et = x[0] + x[1] * et return et