Source code for hydropandas.io.menyanthes

import logging
import os

import numpy as np
from pandas import DataFrame, Series, Timedelta, Timestamp
from scipy.io import loadmat

from ..observation import GroundwaterObs, WaterlvlObs

logger = logging.getLogger(__name__)


[docs]def matlab2datetime(tindex): """ Transform a MATLAB serial date number to a Python datetime object, rounded to seconds. Parameters ---------- tindex : float The MATLAB serial date number to convert. Returns ------- datetime : datetime.datetime The equivalent datetime object in Python. Notes ----- MATLAB serial date numbers represent the number of days elapsed since January 1, 0000 (the proleptic Gregorian calendar), with January 1, 0000 as day 1. Fractions of a day can be represented as a decimal. The returned datetime object is rounded to the nearest second. Examples -------- >>> matlab2datetime(719529.496527778) datetime.datetime(2019, 1, 1, 11, 55, 2) """ day = Timestamp.fromordinal(int(tindex)) dayfrac = Timedelta(days=float(tindex) % 1) - Timedelta(days=366) return day + dayfrac
[docs]def read_file(path, ObsClass, load_oseries=True, load_stresses=True): """ Read data from a Menyanthes file and create observation objects. Parameters ---------- path : str Full path of the Menyanthes file (.men) to read. ObsClass : GroundwaterObs or WaterlvlObs Class of observation object to create. load_oseries : bool, optional Flag indicating whether to load observation series or not, by default True. load_stresses : bool, optional Flag indicating whether to load stresses or not, by default True. Returns ------- obs_list : list List of observation objects created from the Menyanthes file. """ logger.info(f"reading menyanthes file {path}") if ObsClass == GroundwaterObs: _rename_dic = { "xcoord": "x", "ycoord": "y", "upfiltlev": "screen_top", "lowfiltlev": "screen_bottom", "surflev": "ground_level", "filtnr": "tube_nr", "measpointlev": "tube_top", } _keys_o = [ "name", "x", "y", "source", "unit", "monitoring_well", "tube_nr", "metadata_available", "ground_level", "tube_top", "screen_top", "screen_bottom", ] unit = "m NAP" elif ObsClass == WaterlvlObs: _rename_dic = {"xcoord": "x", "ycoord": "y", "measpointlev": "tube_top"} _keys_o = ["name", "x", "y", "source", "unit", "monitoring_well"] unit = "m NAP" else: _rename_dic = { "xcoord": "x", "ycoord": "y", } _keys_o = ["name", "x", "y", "source", "unit"] unit = "" # Check if file is present if not (os.path.isfile(path)): print("Could not find file ", path) mat = loadmat(path, struct_as_record=False, squeeze_me=True, chars_as_strings=True) obs_list = [] if load_oseries and ("H" in mat.keys()): d_h = read_oseries(mat) locations = d_h.keys() for location in locations: metadata = d_h[location] metadata["projection"] = "epsg:28992" metadata["metadata_available"] = True metadata["source"] = "Menyanthes" metadata["unit"] = unit df = DataFrame(metadata.pop("values"), columns=["values"]) for key in _rename_dic.keys(): if key in metadata.keys(): metadata[_rename_dic[key]] = metadata.pop(key) meta_o = {k: metadata[k] for k in _keys_o if k in metadata} o = ObsClass(df, meta=metadata, **meta_o, filename=path) obs_list.append(o) if load_stresses and ("IN" in mat.keys()): d_in = read_stresses(mat) stresses = d_in.keys() for stress in stresses: metadata = d_in[stress] metadata["projection"] = "epsg:28992" metadata["metadata_available"] = True metadata["source"] = "Menyanthes" metadata["unit"] = unit s = metadata.pop("values") df = DataFrame(s, columns=["values"]) for key in _rename_dic.keys(): if key in metadata.keys(): metadata[_rename_dic[key]] = metadata.pop(key) o = ObsClass( df, meta=metadata, name=metadata["name"], x=metadata["x"], y=metadata["y"], source=metadata["source"], unit=metadata["unit"], filename=path, ) obs_list.append(o) return obs_list
[docs]def read_oseries(mat): """Read the oseries from a mat file from menyanthes. Parameters ---------- mat : dict A dictionary object containing the Menyanthes file data. Returns ------- dict A dictionary containing oseries data, with oseries names as keys and their corresponding metadata and values as values. Notes ----- This function reads the oseries data from a Menyanthes file in .mat format and returns it in a dictionary format. The oseries data contains the following metadata: - name: The name of the oseries. - x: The x-coordinate of the oseries location. - y: The y-coordinate of the oseries location. - source: The data source. - unit: The unit of measurement. In addition to the metadata, the oseries data also contains a pandas Series object named 'values', which contains the time series data for the oseries. Examples -------- >>> mat = loadmat('menyanthes_file.mat') >>> d_h = read_oseries(mat) """ d_h = {} # Check if more then one time series model is present if not isinstance(mat["H"], np.ndarray): mat["H"] = [mat["H"]] # Read all the time series models for i, H in enumerate(mat["H"]): if not hasattr(H, "Name") and not hasattr(H, "name"): H.Name = "H" + str(i) # Give it the index name if hasattr(H, "name"): H.Name = H.name if len(H.Name) == 0: H.Name = H.tnocode logger.info(f"reading oseries -> {H.Name}") data = {} for name in H._fieldnames: val = getattr(H, name) if name != "values": # if value is an empty numpy array set value to nan if isinstance(val, np.ndarray) and val.size == 0: val = np.nan data[name.lower()] = val else: if H.values.size == 0: # when diver-files are used, values will be empty series = Series() else: tindex = map(matlab2datetime, H.values[:, 0]) # measurement is used as is series = Series(H.values[:, 1], index=tindex) # round on seconds, to get rid of conversion milliseconds series.index = series.index.round("s") data["values"] = series # add to self.H d_h[H.Name] = data return d_h
[docs]def read_stresses(mat): """Reads the stresses from a mat file from menyanthes. Parameters ---------- mat : dict A dictionary object containing the mat file. Returns ------- dict A dictionary object containing the stresses data. """ d_in = {} # Read all the time series for i, IN in enumerate(mat["IN"]): if not hasattr(IN, "Name") and not hasattr(IN, "name"): IN.Name = "IN" + str(i) # Give it the index name if hasattr(IN, "name"): IN.Name = IN.name if len(IN.Name) == 0: IN.Name = IN.tnocode logger.info(f"reading stress -> {IN.Name}") data = {} for name in IN._fieldnames: val = getattr(IN, name) if name != "values": # if value is an empty numpy array set value to nan if isinstance(val, np.ndarray) and val.size == 0: val = np.nan data[name.lower()] = val else: if IN.values.size == 0: # when diver-files are used, values will be empty series = Series() else: tindex = map(matlab2datetime, IN.values[:, 0]) # measurement is used as is series = Series(IN.values[:, 1], index=tindex) # round on seconds, to get rid of conversion milliseconds series.index = series.index.round("s") data["values"] = series # add to self.H d_in[IN.Name] = data return d_in