Source code for hydropandas.io.solinst

import logging
import os
import zipfile

import numpy as np
import pandas as pd
from pyproj import Transformer

logger = logging.getLogger(__name__)


[docs]def read_solinst_file( path, transform_coords=True, ): """Read Solinst logger file (XLE) Parameters ---------- path : str path to Solinst file (.xle) transform_coords : boolean convert coordinates from WGS84 to RD Returns ------- df : pandas.DataFrame DataFrame containing file content meta : dict, optional dict containing meta """ # open file path = str(path) name = os.path.splitext(os.path.basename(path))[0] if path.endswith((".xle", ".xml")): f = path elif path.endswith(".zip"): zf = zipfile.ZipFile(path) f = zf.open(f"{name}.xle") else: raise NotImplementedError( f"File type '{os.path.splitext(path)[-1]}' not supported!" ) logger.info(f"reading -> {f}") # read channel 1 data header df_ch1_data_header = pd.read_xml(path, xpath="/Body_xle/Ch1_data_header") series_ch1_data_header = df_ch1_data_header.T.iloc[:, 0] colname_ch1 = ( series_ch1_data_header.Identification.lower() + "_" + series_ch1_data_header.Unit.lower() ) # read channel 2 data header df_ch2_data_header = pd.read_xml(path, xpath="/Body_xle/Ch2_data_header") series_ch2_data_header = df_ch2_data_header.T.iloc[:, 0] colname_ch2 = ( series_ch2_data_header.Identification.lower() + "_" + series_ch2_data_header.Unit.lower() ) # read observations df = pd.read_xml( path, xpath="/Body_xle/Data/Log", ) df.rename(columns={"ch1": colname_ch1, "ch2": colname_ch2}, inplace=True) if "ms" in df.columns: df["date_time"] = pd.to_datetime( df["Date"] + " " + df["Time"] ) + pd.to_timedelta(df["ms"], unit="ms") drop_cols = ["id", "Date", "Time", "ms"] else: df["date_time"] = pd.to_datetime(df["Date"] + " " + df["Time"]) drop_cols = ["id", "Date", "Time"] df.set_index("date_time", inplace=True) df.drop(columns=drop_cols, inplace=True) # parse meta into dict, per group in XLE file meta = {} # read file info df_file_info = pd.read_xml(path, xpath="/Body_xle/File_info") dict_file_info = df_file_info.T.iloc[:, 0].to_dict() # read instrument info df_instrument_info = pd.read_xml(path, xpath="/Body_xle/Instrument_info") dict_instrument_info = df_instrument_info.T.iloc[:, 0].to_dict() # read instrument info df_instrument_info_data_header = pd.read_xml( path, xpath="/Body_xle/Instrument_info_data_header" ) dict_instrument_info_data_header = df_instrument_info_data_header.T.iloc[ :, 0 ].to_dict() meta = { **dict_file_info, **dict_instrument_info, **dict_instrument_info_data_header, } if transform_coords: # lat and lon has 0,000 when location is not supplied # replace comma with point first if isinstance(meta["Latitude"], str): meta["Latitude"] = float(meta["Latitude"].replace(",", ".")) if isinstance(meta["Longtitude"], str): meta["Longtitude"] = float(meta["Longtitude"].replace(",", ".")) if (meta["Latitude"] != 0) & (meta["Longtitude"] != 0): # NOTE: check EPSG:28992 definition and whether location is showing up in # the right spot. transformer = Transformer.from_crs("epsg:4326", "epsg:28992") x, y = transformer.transform(meta["Latitude"], meta["Longtitude"]) x = np.round(x, 2) y = np.round(y, 2) else: logger.warning("file has no location included") x = None y = None else: x = meta["Latitude"] y = meta["Longtitude"] meta["x"] = x meta["y"] = y meta["filename"] = f meta["source"] = meta["Created_by"] meta["name"] = name meta["location"] = name meta["unit"] = series_ch1_data_header.Unit.lower() meta["metadata_available"] = True return df, meta