Source code for hydropandas.io.solinst

import logging
import os
import zipfile

import numpy as np
import pandas as pd
from pyproj import Transformer

logger = logging.getLogger(__name__)


[docs]def read_solinst_file(
    path,
    transform_coords=True,
):
    """Read Solinst logger file (XLE)

    Parameters
    ----------
    path : str
        path to Solinst file (.xle)
    transform_coords : boolean
        convert coordinates from WGS84 to RD

    Returns
    -------
    df : pandas.DataFrame
        DataFrame containing file content
    meta : dict, optional
        dict containing meta
    """

    # open file
    path = str(path)
    name = os.path.splitext(os.path.basename(path))[0]
    if path.endswith((".xle", ".xml")):
        f = path
    elif path.endswith(".zip"):
        zf = zipfile.ZipFile(path)
        f = zf.open(f"{name}.xle")
    else:
        raise NotImplementedError(
            f"File type '{os.path.splitext(path)[-1]}' not supported!"
        )

    logger.info(f"reading -> {f}")

    # read channel 1 data header
    df_ch1_data_header = pd.read_xml(path, xpath="/Body_xle/Ch1_data_header")
    series_ch1_data_header = df_ch1_data_header.T.iloc[:, 0]
    colname_ch1 = (
        series_ch1_data_header.Identification.lower()
        + "_"
        + series_ch1_data_header.Unit.lower()
    )

    # read channel 2 data header
    df_ch2_data_header = pd.read_xml(path, xpath="/Body_xle/Ch2_data_header")
    series_ch2_data_header = df_ch2_data_header.T.iloc[:, 0]
    colname_ch2 = (
        series_ch2_data_header.Identification.lower()
        + "_"
        + series_ch2_data_header.Unit.lower()
    )

    # read observations
    df = pd.read_xml(
        path,
        xpath="/Body_xle/Data/Log",
    )
    df.rename(columns={"ch1": colname_ch1, "ch2": colname_ch2}, inplace=True)
    if "ms" in df.columns:
        df["date_time"] = pd.to_datetime(
            df["Date"] + " " + df["Time"]
        ) + pd.to_timedelta(df["ms"], unit="ms")
        drop_cols = ["id", "Date", "Time", "ms"]
    else:
        df["date_time"] = pd.to_datetime(df["Date"] + " " + df["Time"])
        drop_cols = ["id", "Date", "Time"]
    df.set_index("date_time", inplace=True)

    df.drop(columns=drop_cols, inplace=True)

    # parse meta into dict, per group in XLE file
    meta = {}
    # read file info
    df_file_info = pd.read_xml(path, xpath="/Body_xle/File_info")
    dict_file_info = df_file_info.T.iloc[:, 0].to_dict()

    # read instrument info
    df_instrument_info = pd.read_xml(path, xpath="/Body_xle/Instrument_info")
    dict_instrument_info = df_instrument_info.T.iloc[:, 0].to_dict()

    # read instrument info
    df_instrument_info_data_header = pd.read_xml(
        path, xpath="/Body_xle/Instrument_info_data_header"
    )
    dict_instrument_info_data_header = df_instrument_info_data_header.T.iloc[
        :, 0
    ].to_dict()

    meta = {
        **dict_file_info,
        **dict_instrument_info,
        **dict_instrument_info_data_header,
    }

    if transform_coords:
        # lat and lon has 0,000 when location is not supplied
        # replace comma with point first
        if isinstance(meta["Latitude"], str):
            meta["Latitude"] = float(meta["Latitude"].replace(",", "."))
        if isinstance(meta["Longtitude"], str):
            meta["Longtitude"] = float(meta["Longtitude"].replace(",", "."))
        if (meta["Latitude"] != 0) & (meta["Longtitude"] != 0):
            # NOTE: check EPSG:28992 definition and whether location is showing up in
            # the right spot.
            transformer = Transformer.from_crs("epsg:4326", "epsg:28992")
            x, y = transformer.transform(meta["Latitude"], meta["Longtitude"])
            x = np.round(x, 2)
            y = np.round(y, 2)
        else:
            logger.warning("file has no location included")
            x = None
            y = None
    else:
        x = meta["Latitude"]
        y = meta["Longtitude"]
    meta["x"] = x
    meta["y"] = y
    meta["filename"] = f
    meta["source"] = meta["Created_by"]
    meta["name"] = name
    meta["location"] = name
    meta["unit"] = series_ch1_data_header.Unit.lower()
    meta["metadata_available"] = True

    return df, meta