Source code for hydropandas.io.wiski

import logging
import os

import numpy as np
import pandas as pd

from ..util import get_files

logger = logging.getLogger(__name__)


def _read_wiski_header(f, header_sep=":", header_identifier="#", end_header_str=None):
    line = f.readline()
    header = dict()
    while header_identifier in line:
        prop, val = line.split(header_sep)
        prop = prop.strip().replace(header_identifier, "")
        val = val.strip()
        try:
            val = float(val)
        except ValueError:
            pass
        header[prop] = val
        line = f.readline()
        if end_header_str is not None:
            if end_header_str in line:
                break

    return line, header


[docs]def read_wiski_file( path, sep=";", header_sep=None, header_identifier="#", read_series=True, translate_dic=None, tz_localize=True, unit="", **kwargs, ): """ Read data from a WISKI file. Parameters: ----------- path : str The path of the file to be read. sep : str, optional (default=";") The delimiter used to separate fields in the file. header_sep : str, optional (default=None) The delimiter used to separate fields in the header. If None, the function will try to automatically detect the separator. header_identifier : str, optional (default="#") The character used to identify header lines. read_series : bool, optional (default=True) Whether to read the time series data from the file. translate_dic : dict, optional (default=None) A dictionary mapping header field names to the desired output names. tz_localize : bool, optional (default=True) Whether to localize the datetime index to the machine's timezone. unit : str, optional (default="") The unit of measurement of the data. **kwargs : keyword arguments Additional arguments to pass to the pandas `read_csv` function. Returns: -------- data : pandas.DataFrame or None A dataframe containing the time series data from the file. Returns None if `read_series` is False. metadata : dict A dictionary containing metadata about the data in the file. """ logger.info("reading -> {}".format(os.path.split(path)[-1])) if translate_dic is None: translate_dic = {} # manually break header parse at certain point if "end_header_str" in kwargs.keys(): end_header_str = kwargs.pop("end_header_str") else: end_header_str = None # read header with open(path, "r") as f: if header_sep is None: line, header = _read_wiski_header( f, end_header_str=end_header_str, header_identifier=header_identifier ) else: line, header = _read_wiski_header( f, header_sep=header_sep, header_identifier=header_identifier, end_header_str=end_header_str, ) # specify names for key, item in translate_dic.items(): header[key] = header[item] # get column names of data # columns = split(r'\s{2,}', line) if sep == r"\s+": columns = line.split("\t") else: columns = line.split(sep) columns = [icol.strip("\n") for icol in columns] columns = [icol.replace("[", "_[") for icol in columns] validator = np.lib._iotools.NameValidator() columns = validator(columns) if read_series: # read data data = pd.read_csv( f, sep=sep, header=None, names=columns, **kwargs, ) if tz_localize: data.index = data.index.tz_localize(None) # convert Value to float col = [icol for icol in data.columns if icol.lower().startswith("value")][0] data[col] = pd.to_numeric(data[col], errors="coerce") else: data = None # translate some header keys metadata = {"source": "wiski", "unit": unit} for key, val in header.items(): if key == "Station Site": metadata["monitoring_well"] = val elif key == "x": metadata["x"] = val elif key == "y": metadata["y"] = val elif key == "name": metadata["name"] = val # this adds the other header keys to metadata # but this will not work if keys contain spaces etc. # because it tries adding them as attributes. # else: # metadata[key] = val return data, metadata
[docs]def read_wiski_dir( dirname, ObsClass=None, suffix=".csv", unpackdir=None, force_unpack=False, preserve_datetime=False, keep_all_obs=True, **kwargs, ): """ Reads WISKI CSV files from a directory and returns a list of observation objects. Parameters ---------- dirname : str The path of the directory containing the WISKI CSV files. ObsClass : object, optional The observation class to use for creating observation objects. Default is None. suffix : str, optional The file extension of the WISKI CSV files. Default is ".csv". unpackdir : str, optional The directory to which the files should be unpacked. Default is None. force_unpack : bool, optional If True, forces the files to be unpacked even if they are already in the target directory. Default is False. preserve_datetime : bool, optional If True, preserves the original modification times of the files when unpacking them. Default is False. keep_all_obs : bool, optional If True, keeps all observation objects even if they have no metadata available. Default is True. **kwargs Additional keyword arguments to pass to the `from_wiski` method of the `ObsClass` object. Returns ------- list A list of observation objects created from the WISKI CSV files in the directory. Raises ------ FileNotFoundError If no WISKI CSV files are found in the directory. """ # get files dirname, unzip_fnames = get_files( dirname, ext=suffix, force_unpack=force_unpack, preserve_datetime=preserve_datetime, ) if not unzip_fnames: raise FileNotFoundError( "no files were found in '{}' that end with '{}'".format( os.path.join(dirname), suffix ) ) # gather all obs in list obs_list = [] for i, csv in enumerate(unzip_fnames): logger.info("reading {0}/{1} -> {2}".format(i + 1, len(unzip_fnames), csv)) obs = ObsClass.from_wiski(os.path.join(dirname, csv), **kwargs) if obs.metadata_available: obs_list.append(obs) elif keep_all_obs: obs_list.append(obs) else: logger.info(f"not added to collection -> {csv}") return obs_list