Source code for covizpy.plot_summary

import pandas as pd
import altair as alt
from dateutil.parser import parse


[docs]def plot_summary( df, var="location", val="new_cases", fun="sum", date_from=None, date_to=None, top_n=5, ): """Generate summary plot Create a horizontal bar chart summarising a specified variable and value within a time period Parameters ---------- df : Pandas dataframe Pandas dataframe of the selected covid data from get_data() var : str, optional Qualitative values to segment data. Must be a categorical variable. Also known as a 'dimension'. By default 'location' val : str, optional Quantitative values to be aggregated. Must be numeric variable. Also known as a 'measure'. By default 'new_cases' fun : str, optional Aggregation function for val, by default 'sum' date_from : str, optional Start date of the data range with format 'YYYY-MM-DD'. By default 'None' is used to represent 7 days prior to today's date date_to : str, optional End date of data range with format 'YYYY-MM-DD'. By default 'None' is used to represent today's date top_n : int, optional Specify number of qualitative values to show, by default 5 Returns ------- altair.Chart Altair bar plot for the specified variables and period Example ------- >>> plot_summary(df, var="location", var="new_cases", fun="sum", date_from="2022-01-01", date_to="2022-01-15", top_n=10) """ # init dates if None if date_from is None: date_from = ( pd.to_datetime("today").normalize() - pd.to_timedelta(7, unit="d") ).strftime("%Y-%m-%d") if date_to is None: date_to = pd.to_datetime("today").normalize().strftime("%Y-%m-%d") # Exception Handling if not isinstance(df, pd.DataFrame): raise FileNotFoundError("Data not found! There may be a problem with data URL.") if not isinstance(var, str): raise TypeError("var needs to be of str type!") if not isinstance(val, str): raise TypeError("val needs to be of str type!") if not isinstance(fun, str): raise TypeError("fun needs to be of str type!") if df[var].dtypes.kind != "O": raise TypeError("var needs to be a categorical variable!") if df[val].dtypes.kind == "O": raise TypeError("val needs to be a numeric variable!") if not isinstance(top_n, int) or top_n <= 0: raise ValueError("top_n must be an integer bigger than zero") if pd.to_datetime(date_to) < pd.to_datetime(date_from): raise ValueError( "Invalid values: date_from should be smaller or equal to date_to (or today's date if date_to is not specified)." ) if pd.to_datetime(date_to) > pd.to_datetime("today").normalize(): raise ValueError("Invalid values: date_to should be smaller or equal to today.") # Parse date, else raise ValueError date_from = parse(date_from) date_to = parse(date_to) # Convert 'date' to date format df["date"] = pd.to_datetime(df["date"]) # Filter by date df = df.query("date >= @date_from & date <= @date_to") # Remove aggregated locations df = df[~df["iso_code"].str.startswith("OWID")] # Aggregation df_plot = df.groupby(var).agg({val: fun})[val].nlargest(top_n) df_plot = df_plot.to_frame().reset_index() y_lab = var.replace("_", " ").title() x_lab = val.replace("_", " ").title() if date_from == date_to: subtitle = f"from {date_from.strftime('%Y-%m-%d')}" else: subtitle = ( f"from {date_from.strftime('%Y-%m-%d')} to {date_to.strftime('%Y-%m-%d')}" ) title = alt.TitleParams(f"Top {top_n} {y_lab} by {x_lab}", subtitle=[subtitle]) return ( alt.Chart(df_plot, title=title) .mark_bar() .encode( y=alt.Y(var, sort="x", title=y_lab), x=alt.X(val, title=x_lab), color=alt.Color(var, legend=None), ) )