Ben Chuanlong Du's Blog

It is never too late to learn.

Ndoverlay Plots in HoloViews

In [67]:
import numpy as np
import holoviews as hv

hv.extension("bokeh")
In [68]:
import pandas as pd
from matplotlib.cbook import get_sample_data

fname = get_sample_data("percent_bachelors_degrees_women_usa.csv")
gender_degree_data = pd.read_csv(fname)

title = (
    "Percentage of Bachelor's degrees conferred to women in "
    "the U.S.A. by major (1970-2011)\n"
)

# These are the colors that will be used in the plot
color_sequence = [
    "#1f77b4",
    "#aec7e8",
    "#ff7f0e",
    "#ffbb78",
    "#2ca02c",
    "#98df8a",
    "#d62728",
    "#ff9896",
    "#9467bd",
    "#c5b0d5",
    "#8c564b",
    "#c49c94",
    "#e377c2",
    "#f7b6d2",
    "#7f7f7f",
    "#c7c7c7",
    "#bcbd22",
    "#dbdb8d",
    "#17becf",
    "#9edae5",
]

# Offsets for degree labels
y_offsets = {
    "Foreign Languages": 0.5,
    "English": -0.5,
    "Communications and Journalism": 0.75,
    "Art and Performance": -0.25,
    "Agriculture": 1.25,
    "Social Sciences and History": 0.25,
    "Business": -0.75,
    "Math and Statistics": 0.75,
    "Architecture": -0.75,
    "Computer Science": 0.75,
    "Engineering": -0.25,
}

# Load the data into a dataframe and us pd.melt to unpivot the degree column
df = pd.DataFrame(gender_degree_data)
df = pd.melt(df, id_vars="Year", var_name="Degree", value_name="conferred")
df["Degree"] = [d.replace("_", " ").title() for d in df.Degree]


# Define a formatter that works for both bokeh and matplotlib
def percent_format(x):
    try:
        return "{:0.0f}%".format(x)
    except:
        return "%d%" % x


# Define the value dimensions
vdim = hv.Dimension("conferred", value_format=percent_format, range=(0, 90))

# Define the dataset
ds = hv.Dataset(df, vdims=vdim)
curves = ds.to(hv.Curve, "Year", groupby="Degree").overlay()

# Define a function to get the text annotations
max_year = ds["Year"].max()


def offset(row):
    row["conferred"] += y_offsets.get(row.Degree, 0)
    return row


label_df = df[df.Year == max_year].apply(offset, axis=1)
labels = hv.Labels(label_df, ["Year", "conferred"], "Degree")
In [69]:
def grid_cb(plot, element):
    plot = plot.handles["plot"]
    plot.xgrid.visible = False
    plot.ygrid.grid_line_dash = [6, 4]
    plot.ygrid.grid_line_width = 3
    plot.grid.bounds = (1970, 2010)


# Define some custom options for bokeh
options = hv.Store.options(backend="bokeh")
options.NdOverlay = hv.Options("plot", batched=False)
options.Curve = hv.Options(
    "plot",
    show_frame=False,
    labelled=[],
    tools=["hover"],
    finalize_hooks=[grid_cb],
    height=900,
    width=900,
    show_legend=False,
    xticks=[1970, 1980, 1990, 2000, 2010],
)
options.Curve = hv.Options("style", color=hv.Cycle(values=color_sequence), line_width=2)

(
    curves.redim(Year=dict(range=(1970, 2030)))
    * labels.options(color_index="Degree", cmap=color_sequence, text_align="left")
).relabel(title)
WARNING:param.main: conferred dimension formatter could not be converted to tick formatter. Ensure pscript is installed ("conda install pscript" or "pip install pscript")
Out[69]:
In [ ]:
 

Comments