Source code for hydropandas.extensions.plots

import logging
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.gridspec import GridSpec
from tqdm import tqdm

from ..observation import GroundwaterObs
from . import accessor

logger = logging.getLogger(__name__)


[docs]@accessor.register_obscollection_accessor("plots") class CollectionPlots: def __init__(self, oc_obj): """Object containing plotting methods for ObsCollections. Parameters ---------- oc : ObsCollection ObsCollection instance """ self._obj = oc_obj
[docs] def interactive_plots( self, savedir="figures", tmin=None, tmax=None, per_location=True, **kwargs, ): """Create interactive plots of the observations using bokeh. Parameters ---------- savedir : str directory used for the folium map and bokeh plots tmin : dt.datetime, optional start date for timeseries plot tmax : dt.datetime, optional end date for timeseries plot per_location : bool, optional if True plot multiple observations at the same location in one figure **kwargs : will be passed to the Obs.interactive_plot method, options include: - cols : list of str or None - hoover_names : list of str - plot_freq : list of str - plot_legend_names : list of str - markers : list of str - hoover_names : list of str - plot_colors : list of str - ylabel : str - add_screen_to_legend : boolean """ _color_cycle = ( "blue", "olive", "lime", "red", "orange", "yellow", "purple", "silver", "powderblue", "salmon", "tan", ) if per_location: plot_names = self._obj.groupby("location").count().index else: plot_names = self._obj.index for name in plot_names: if per_location: oc = self._obj.loc[self._obj.location == name, "obs"].sort_index() else: oc = self._obj.loc[[name], "obs"] p = None for i, o in enumerate(oc.values): if i == 10: raise NotImplementedError( "cannot add more than 10 lines to a single plot" ) try: p = o.plots.interactive_plot( savedir=savedir, p=p, tmin=tmin, tmax=tmax, plot_colors=[_color_cycle[i]], return_filename=False, **kwargs, ) logger.debug(f"created iplot -> {o.name}") except ValueError: logger.error(f"{o.name} has no data between {tmin} and {tmax}") o.meta["iplot_fname"] = None
[docs] def interactive_map( self, plot_dir="figures", m=None, tiles="OpenStreetMap", fname=None, per_location=True, color="blue", legend_name=None, add_legend=True, map_label="", map_label_size=20, col_name_lat="lat", col_name_lon="lon", zoom_start=13, popup_width=620, create_interactive_plots=True, **kwargs, ): """Create an interactive map with interactive plots using folium and bokeh. Notes ----- Some notes on this method: - if you want to have multiple obs collections on one folium map, only the last one should have add_legend = True to create a correct legend - the color of the observation point on the map is now the same color as the line of the observation measurements. Also a built-in color cycle is used for different measurements at the same location. Parameters ---------- plot_dir : str directory used for the folium map and bokeh plots m : folium.Map, str, optional current map to add observations too, if None a new map is created tiles : str, optional background tiles, default is openstreetmap fname : str, optional name of the folium map per_location : bool, optional if True plot multiple observations at the same location in one figure color : str, optional color of the observation points on the map legend_name : str, optional the name of the observation points shown in the map legend add_legend : boolean, optional add a legend to a plot map_label : str, optional add a label to the observations on the map, the label should be 1. the attribute of an observation 2. the key in the meta attribute of the observation 3. a generic label for each observation in this collection. A label is only added if map_label is not ''. The default is ''. map_label_size : int, optional label size of the map_label in pt. col_name_lat : str, optional name of the column in the obs_collection dic with the lat values of the observation points col_name_lon : str, optional see col_name_lat zoom_start : int, optional start zoom level of the folium ma create_interactive_plots : boolean, optional if True interactive plots will be created, if False the iplot_fname in the meta ditctionary of the observations is used. popup_width : int, optional popup width in pixels, optional. **kwargs : will be passed to the interactive_plots method options are: - cols : list of str or None - hoover_names : list of str - plot_legend_names : list of str - plot_freq : list of str - markers : list of str - hoover_names : list of str - plot_colors : list of str - ylabel : str - add_screen_to_legend : boolean - tmin : dt.datetime - tmax : dt.datetime Returns ------- m : folium.Map the folium map """ import branca import folium from folium.features import DivIcon # check for empty observations if all(o.empty for o in self._obj.obs.values): logger.warning("all observations in the collection are empty") for oname in self._obj.index: self._obj.set_metadata_value( oname, "iplot_fname", None, add_to_meta=True ) empty_obs = True else: empty_obs = False # create interactive bokeh plots if create_interactive_plots and not empty_obs: self._obj.plots.interactive_plots( savedir=plot_dir, per_location=per_location, width=popup_width, **kwargs ) # check if observation collection has lat and lon values if (col_name_lat not in self._obj.columns) and ( col_name_lon not in self._obj.columns ): self._obj.geo.set_lat_lon() # determine start location of map northing = np.mean( (self._obj[col_name_lat].min(), self._obj[col_name_lat].max()) ) easting = np.mean( (self._obj[col_name_lon].min(), self._obj[col_name_lon].max()) ) # create map if no map is given if m is None: m = folium.Map([northing, easting], zoom_start=zoom_start, tiles=tiles) # get oc name if no legend name is given if legend_name is None: legend_name = self._obj.name # add the point observations with plots to the map group_name = f'<span style=\\"color: {color};\\">{legend_name}</span>' group = folium.FeatureGroup(name=group_name) if per_location: plot_names = self._obj.groupby("location").count().index else: plot_names = self._obj.index for name in plot_names: if per_location: oc = self._obj.loc[self._obj.location == name, "obs"].sort_index() o = oc.iloc[-1] name = o.name else: o = self._obj.loc[name, "obs"] if o.meta["iplot_fname"] is not None: with open(o.meta["iplot_fname"], "r") as f: bokeh_html = f.read() iframe = branca.element.IFrame( html=bokeh_html, width=popup_width, height=int(popup_width / 1.5) ) popup = folium.Popup(iframe, max_width=popup_width) folium.CircleMarker( [ self._obj.loc[o.name, col_name_lat], self._obj.loc[o.name, col_name_lon], ], icon=folium.Icon(icon="signal"), fill=True, color=color, popup=popup, ).add_to(group) if map_label != "": if map_label in o._get_meta_attr(): map_label_val = getattr(o, map_label) elif map_label in o.meta: map_label_val = o.meta[map_label] else: map_label_val = map_label folium.map.Marker( [ self._obj.loc[name, col_name_lat], self._obj.loc[name, col_name_lon], ], icon=DivIcon( icon_size=(150, 36), icon_anchor=(0, 0), html=f'<div style="font-size: {map_label_size}pt">{map_label_val}</div>', ), ).add_to(group) else: logger.info(f"no iplot available for {o.name}") folium.CircleMarker( [ self._obj.loc[o.name, col_name_lat], self._obj.loc[o.name, col_name_lon], ], icon=folium.Icon(icon="signal"), fill=True, color=color, ).add_to(group) group.add_to(m) # add legend if add_legend: folium.map.LayerControl("topright", collapsed=False).add_to(m) # save map # filename and path if fname is not None: if not fname.endswith(".html"): fname = fname + ".html" if not os.path.exists(plot_dir): os.mkdir(plot_dir) m.save(os.path.join(plot_dir, fname)) return m
[docs] def section_plot( self, tmin=None, tmax=None, cols=(None,), section_colname_x=None, section_label_x=None, section_well_layout_color="gray", section_markersize=100, ylabel="auto", fn_save=None, check_obs_close_to_screen_bottom=True, plot_well_layout_markers=True, plot_obs=True, ): """Create plot with well layout (left) en observations (right). Parameters ---------- tmin : dt.datetime, optional start date for timeseries plot tmax : dt.datetime, optional end date for timeseries plot cols : tuple of str or None, optional the columns of the observation to plot. The first numeric column is used if cols is None, by default None. section_colname_x : str, optional column used for x position on section plot, when None order collection is used section_label_x : str, optional label applied to x-axis in section plot section_well_layout_color : str, optional color of well layout, default is gray section_markersize : int, optional size of makers in sectionplot ylabel: str or list, optional when 'auto' column unit in collection is ylabel, otherwise first element of list is label of section plot, second element of observation plot fn_save : str, optional filename to save plot check_obs_close_to_screen_bottom : bool, optional plots a horizontal line when minimum observation is close to screen_bottom plot_well_layout_markers : bool, optional plots ground level, top tube, screen levels and sandtrap via makers. Default is True plot_obs : bool, optional Plots observation. Default is True TODO: - speficy colors via extra column in ObsCollection - addtional visual checks: maximum observation is close to or above ground level, maximum observation is close to or above tube top minimum observation is close to or below tube bottom (sand trap) - include some interactive Bokeh fancy - apply an offset when two wells are at same location - limit y-axis of section plot to observations only - remove the checking (if obs are near bottom) from this function - moving the legend outside the plot - set xlim of observation plot more tight when tmin is not specified """ # prepare column for x location in section plot if section_colname_x is None: # use order in ObsCollection plot_x = np.arange(len(self._obj)) else: plot_x = self._obj[section_colname_x].to_numpy() # create figure fig = plt.figure(figsize=(15, 5)) if plot_obs: # make figure with section plot on left, and righthand plot for obs gs = GridSpec( 1, 2, width_ratios=[1, 3], ) ax_section = fig.add_subplot(gs[0]) ax_obs = fig.add_subplot(gs[1]) axes = [ax_section, ax_obs] else: # make figure with only one plot on left, observations are not plotted ax_section = fig.add_subplot(111) axes = [ax_section] # check observation types otypes = self._obj._infer_otype() if plot_well_layout_markers and GroundwaterObs in otypes: # plot well layout via markers ax_section.scatter( plot_x, self._obj.tube_top.values, section_markersize, label="tube top", marker="*", facecolors="none", color=section_well_layout_color, ) ax_section.scatter( plot_x, self._obj.ground_level.values, section_markersize, label="ground level", marker="_", color=section_well_layout_color, ) ax_section.scatter( plot_x, self._obj.screen_top.values, section_markersize / 2, label="screen top", marker="x", color=section_well_layout_color, ) ax_section.scatter( plot_x, self._obj.screen_bottom.values, section_markersize, label="screen bottom", marker="+", color=section_well_layout_color, ) # loop over all wells, plot observations and details in section plot for counter, name in enumerate(self._obj.index): if plot_obs: # which column to plot? Create a local copy to avoid modifying original cols_local = list(cols) for i, col in enumerate(cols_local): if col is None: cols_local[i] = self._obj.loc[ name, "obs" ]._get_first_numeric_col_name() # create plot dataframe plot_df = self._obj.loc[name, "obs"][tmin:tmax][cols_local].copy() if plot_df.empty or plot_df[cols_local].isna().all().all(): logger.warning(f"{name} has no data between {tmin} and {tmax}") continue # PART 1: plot timeseries of observations # one or multiple columns to plot? if len(cols_local) == 1: p = ax_obs.plot(plot_df, label=name) else: for col in cols_local: p = ax_obs.plot(plot_df[col], label=f"{name}, {col}") plot_color = p[0].get_color() if check_obs_close_to_screen_bottom: # add horizonal line to plot when minimum observation in first plot # column is close to bottom of screen offset = 0.1 if GroundwaterObs in otypes and self._obj.loc[ name, "screen_bottom" ] > (plot_df[cols_local[0]].dropna().min() - offset): ax_obs.axhline( y=self._obj.loc[name, "screen_bottom"], ls="--", lw=4, alpha=0.5, label=( f"screen bottom of {name}\n" "is close to minimum observation" ), color=plot_color, ) else: plot_color = "hotpink" # PART 2: fancy section plot with lines along tube if GroundwaterObs in otypes: # highlight filter on section plot ax_section.plot( [plot_x[counter]] * 2, [ self._obj.loc[name, "screen_top"], self._obj.loc[name, "screen_bottom"], ], color="k", lw=3, ls="-", ) # highlight blind tube on section plot ax_section.plot( [plot_x[counter]] * 2, [ self._obj.loc[name, "screen_top"], self._obj.loc[name, "tube_top"], ], color=plot_color, lw=3, ) # add sandtrap when present if "tube_bottom" in self._obj.columns: ax_section.plot( [plot_x[counter]] * 2, [ self._obj.loc[name, "screen_bottom"], self._obj.loc[name, "tube_bottom"], ], color=plot_color, lw=3, ) if plot_obs: # PART 3: fancy section plot with bandwith of observations ax_section.scatter( plot_x[counter], plot_df.quantile(q=0.95), section_markersize, marker=(3, 0, 0), color="blue", alpha=0.5, label="95% observation", zorder=100, ) ax_section.scatter( plot_x[counter], plot_df.median(), section_markersize, marker=(4, 0, 90), color="green", alpha=0.5, label="median", zorder=100, ) ax_section.scatter( plot_x[counter], plot_df.quantile(q=0.05), section_markersize, marker=(3, 0, 180), color="red", alpha=0.5, label="5% observation", zorder=100, ) logger.info(f"created sectionplot -> {name}") # layout if section_label_x is None: # use name as xtick and rotate ax_section.set_xticks( plot_x, self._obj.index, rotation="vertical", fontsize="small", ) else: ax_section.set_xlabel(section_label_x) if plot_obs: ax_obs.set_xlim(left=tmin, right=tmax) # rotate labels on observation axis ax_obs.set_xticks( ax_obs.get_xticks(), ax_obs.get_xticklabels(), rotation="vertical", fontsize="small", ) if ylabel == "auto": # has collection uniform unit? if len(self._obj.unit.unique()) == 1: ylabel = self._obj.unit.unique()[0] else: logger.error( "Collection has more than one unit. Plot has both on one y-axis." ) ylabel = ", ".join(map(str, self._obj.unit.unique())) for ax in axes: ax.set_ylabel(ylabel) else: try: ax_section.set_ylabel(ylabel[0]) ax_obs.set_ylabel(ylabel[1]) except ValueError: logger.error(f"Invalid value for ylabel {ylabel}. Plot has no ylabels.") # add layout to both plots for ax in axes: ax.grid() # simple legend handles, labels = ax.get_legend_handles_labels() by_label = dict(zip(labels, handles)) ax.legend(by_label.values(), by_label.keys()) fig.suptitle(self._obj.name) if fn_save is not None: try: fig.tight_layout() fig.savefig( fn_save, dpi=250, ) except ValueError: logger.error(f"Save of figure {name} failed ({fn_save}).") return fig, axes
[docs] def series_per_group( self, plot_column, by=None, savefig=True, outputdir=".", naming_method=None, units_for_well_screen_depth="mNAP", ): """Plot time series per group. The default groupby is based on identical x, y coordinates, so plots unique time series per location. Parameters ---------- plot_column : str name of column containing time series data by : (list of) str or (list of) array-like groupby parameters, default is None which sets groupby to columns ["x", "y"]. savefig : bool, optional save figures, by default True, if False returns axes handles to plots. outputdir : str, optional path to output directory, by default the current directory (".") naming_method : str, optional method to determine file names for plots, default is None, which uses the following template: "series_by_{groupby columns}_group_{groupby values}.png". If a string is passed, it is interpreted as a column name and the filename contains the unique values from that column per group: "series_group_{unique values in column}.png" The final option includes "infer_name_monitoring_well", which attempts to obtain the name of the monitoring well from the final name in the group, by splitting the string on "-" or "_", and using the first part. For example, if the name is "PB001-001", the filename will be "PB001.png". units_for_well_screen_depth : str, optional if the observations are GroundWaterObs, the units of the screen depth are added to the legend. This keyword argument defines the units for the screen depth. Default is 'mNAP'. Returns ------- axes : list returns list of axes handles if savefig=False """ if by is None: by = ["x", "y"] gr = self._obj.groupby(by=by) axes = [] for groupname, group in tqdm( gr, desc="Plotting series per group", total=len(gr) ): f, ax = plt.subplots(1, 1, figsize=(10, 3)) for name, row in group.iterrows(): if isinstance(row.obs, GroundwaterObs): if units_for_well_screen_depth == "mNAP": lbl = ( f"{name} (NAP{row['screen_top']:+.1f}" f"-{row['screen_bottom']:+.1f} m)" ) else: lbl = ( f"{name} ({row['screen_top']:.1f}" f"-{row['screen_bottom']:.1f} " f"{units_for_well_screen_depth})" ) else: lbl = f"{name}" ax.plot( row.obs.index, row.obs[plot_column], label=lbl, ) ax.legend( loc=(0, 1), frameon=False, ncol=min(group.index.size, 3), fontsize="x-small", ) ax.set_ylabel(row["unit"]) ax.grid(True) f.tight_layout() if savefig: if isinstance(by, list): by_name = "-".join(by) groupname = "-".join(groupname) else: by_name = by if naming_method is None: filename = f"series_by_{by_name}_group_{groupname}.png" elif naming_method == "infer_name_monitoring_well": if "-" in name: filename = f"{name.split('-')[0]}.png" elif "_" in name: filename = f"{name.split('_')[0]}.png" else: filename = f"{name}.png" elif isinstance(naming_method, str): filename = ( f"series_by_{by_name}_group_" f"{'-'.join(group[naming_method].unique().tolist())}.png" ) f.savefig( os.path.join(outputdir, filename), bbox_inches="tight", dpi=150 ) plt.close(f) else: axes.append(ax) if not savefig: return axes
[docs]@accessor.register_obs_accessor("plots") class ObsPlots: def __init__(self, obs): self._obj = obs
[docs] def interactive_plot( self, savedir=None, cols=(None,), markers=("line",), p=None, plot_legend_names=("",), plot_freq=(None,), tmin=None, tmax=None, hoover_names=(None,), hoover_date_format="%Y-%m-%d", ylabel=None, plot_colors=("blue",), add_screen_to_legend=False, width=600, return_filename=False, ): """Create an interactive plot of the observations using bokeh. Todo: - add options for hoovers, markers, linestyle Parameters ---------- savedir : str, optional directory used for the folium map and bokeh plots cols : tuple of str or None, optional the columns of the observation to plot. The first numeric column is used if cols is None, by default None. markers : list of str, optional type of markers that can be used for plot, 'line' and 'circle' are supported p : bokeh.plotting.figure, optional reference to existing figure, if p is None a new figure is created plot_legend_names : list of str, optional legend in bokeh plot plot_freq : list of str, optional bokeh plot is resampled with this frequency to reduce the size tmin : dt.datetime, optional start date for timeseries plot tmax : dt.datetime, optional end date for timeseries plot hoover_names : list of str, optional names will be displayed together with the cols values when hoovering over plot hoover_date_format : str, optional date format to use when hoovering over a plot ylabel : str or None, optional label on the y-axis. If None the unit attribute of the observation is used. plot_colors : list of str, optional plot_colors used for the plots add_screen_to_legend : boolean, optional if True the attributes screen_top and screen_bottom are added to the legend name width : int, optional width of the interactive figure. return_filename : boolean, optional if True filename will be returned Returns ------- fname_plot : str or bokeh plot filename of the bokeh plot or reference to bokeh plot """ from bokeh.models import ColumnDataSource, HoverTool, Range1d from bokeh.plotting import figure, save from bokeh.resources import CDN xlim = None hoover_names = list(hoover_names) cols = list(cols) for i, col in enumerate(cols): if col is None: cols[i] = self._obj._get_first_numeric_col_name() if hoover_names[i] is None: hoover_names[i] = cols[i] # create plot dataframe plot_df = self._obj[tmin:tmax].copy() plot_df["date"] = plot_df.index.strftime(hoover_date_format) if plot_df.empty or plot_df[cols].isna().all().all(): raise ValueError(f"{self._obj.name} has no data between {tmin} and {tmax}") elif len(plot_df) == 1: markers = ["circle"] * len(cols) # set xlim because there is only one measurement obsdate = plot_df.index[0] xlim = Range1d( start=obsdate - pd.Timedelta(days=365), end=obsdate + pd.Timedelta(days=365), ) # create plot if p is None: p = figure( width=width, height=int(width / 1.5), x_axis_type="datetime", title="" ) if ylabel is None: ylabel = self._obj.unit p.yaxis.axis_label = ylabel # get x axis xcol = self._obj.index.name if xcol is None: xcol = "index" # get color if len(plot_colors) < len(cols): plot_colors = list(plot_colors) * len(cols) # get base for hoover tooltips plots = [] tooltips = [] tooltips.append(("date", "@date")) # plot multiple columns for i, column in enumerate(cols): # legend name if add_screen_to_legend: lname = f"{plot_legend_names[i]} {self._obj.name} (NAP {self._obj.screen_bottom:.2f} - {self._obj.screen_top:.2f})" else: lname = f"{plot_legend_names[i]} {self._obj.name}" # resample data if plot_freq[i] is None: source = ColumnDataSource(plot_df[[column, "date"]]) else: source = ColumnDataSource( plot_df[[column, "date"]].resample(plot_freq[i]).first() ) # plot data if markers[i] in ["line", "l"]: plots.append( p.line( xcol, column, source=source, color=plot_colors[i], legend_label=lname, alpha=0.8, muted_alpha=0.2, ) ) elif markers[i] in ["circle", "c"]: plots.append( p.circle( xcol, column, source=source, color=plot_colors[i], legend_label=lname, alpha=0.8, muted_alpha=0.2, ) ) else: raise NotImplementedError( f"marker '{markers[i]}' invalid. Only line and" "circle are currently available" ) # add columns to hoover tooltips tooltips_p = tooltips.copy() tooltips_p.append((hoover_names[i], f"@{column}")) hover = HoverTool(renderers=[plots[i]], tooltips=tooltips_p, mode="vline") p.add_tools(hover) # set xlim if xlim is not None: p.x_range = xlim p.legend.location = "top_left" p.legend.click_policy = "mute" # save plot if savedir is not None: if not os.path.isdir(savedir): os.makedirs(savedir) self._obj.meta["iplot_fname"] = os.path.join( savedir, str(self._obj.name) + ".html" ) save(p, self._obj.meta["iplot_fname"], resources=CDN, title=self._obj.name) if return_filename: return self._obj.meta["iplot_fname"] else: return p