"""
Module for applying conditional formatting to DataFrames and Series.
"""
from collections import defaultdict
from contextlib import contextmanager
import copy
from functools import partial
from itertools import product
from typing import (
Any,
Callable,
DefaultDict,
Dict,
List,
Optional,
Sequence,
Tuple,
Union,
)
from uuid import uuid1
import numpy as np
from pandas._config import get_option
from pandas._libs import lib
from pandas._typing import Axis, FrameOrSeries, FrameOrSeriesUnion, Label
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc
from pandas.core.dtypes.common import is_float
import pandas as pd
from pandas.api.types import is_dict_like, is_list_like
import pandas.core.common as com
from pandas.core.frame import DataFrame
from pandas.core.generic import NDFrame
from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice
jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.")
try:
from matplotlib import colors
import matplotlib.pyplot as plt
has_mpl = True
except ImportError:
has_mpl = False
no_mpl_message = "{0} requires matplotlib."
@contextmanager
def _mpl(func: Callable):
if has_mpl:
yield plt, colors
else:
raise ImportError(no_mpl_message.format(func.__name__))
class Styler:
"""
Helps style a DataFrame or Series according to the data with HTML and CSS.
Parameters
----------
data : Series or DataFrame
Data to be styled - either a Series or DataFrame.
precision : int
Precision to round floats to, defaults to pd.options.display.precision.
table_styles : list-like, default None
List of {selector: (attr, value)} dicts; see Notes.
uuid : str, default None
A unique identifier to avoid CSS collisions; generated automatically.
caption : str, default None
Caption to attach to the table.
table_attributes : str, default None
Items that show up in the opening ``
`` tag
in addition to automatic (by default) id.
cell_ids : bool, default True
If True, each cell will have an ``id`` attribute in their HTML tag.
The ``id`` takes the form ``T__row_col``
where ```` is the unique identifier, ```` is the row
number and ```` is the column number.
na_rep : str, optional
Representation for missing values.
If ``na_rep`` is None, no special formatting is applied.
.. versionadded:: 1.0.0
Attributes
----------
env : Jinja2 jinja2.Environment
template : Jinja2 Template
loader : Jinja2 Loader
See Also
--------
DataFrame.style : Return a Styler object containing methods for building
a styled HTML representation for the DataFrame.
Notes
-----
Most styling will be done by passing style functions into
``Styler.apply`` or ``Styler.applymap``. Style functions should
return values with strings containing CSS ``'attr: value'`` that will
be applied to the indicated cells.
If using in the Jupyter notebook, Styler has defined a ``_repr_html_``
to automatically render itself. Otherwise call Styler.render to get
the generated HTML.
CSS classes are attached to the generated HTML
* Index and Column names include ``index_name`` and ``level``
where `k` is its level in a MultiIndex
* Index label cells include
* ``row_heading``
* ``row`` where `n` is the numeric position of the row
* ``level`` where `k` is the level in a MultiIndex
* Column label cells include
* ``col_heading``
* ``col`` where `n` is the numeric position of the column
* ``level`` where `k` is the level in a MultiIndex
* Blank cells include ``blank``
* Data cells include ``data``
"""
loader = jinja2.PackageLoader("pandas", "io/formats/templates")
env = jinja2.Environment(loader=loader, trim_blocks=True)
template = env.get_template("html.tpl")
def __init__(
self,
data: FrameOrSeriesUnion,
precision: Optional[int] = None,
table_styles: Optional[List[Dict[str, List[Tuple[str, str]]]]] = None,
uuid: Optional[str] = None,
caption: Optional[str] = None,
table_attributes: Optional[str] = None,
cell_ids: bool = True,
na_rep: Optional[str] = None,
):
self.ctx: DefaultDict[Tuple[int, int], List[str]] = defaultdict(list)
self._todo: List[Tuple[Callable, Tuple, Dict]] = []
if not isinstance(data, (pd.Series, pd.DataFrame)):
raise TypeError("``data`` must be a Series or DataFrame")
if data.ndim == 1:
data = data.to_frame()
if not data.index.is_unique or not data.columns.is_unique:
raise ValueError("style is not supported for non-unique indices.")
self.data = data
self.index = data.index
self.columns = data.columns
self.uuid = uuid
self.table_styles = table_styles
self.caption = caption
if precision is None:
precision = get_option("display.precision")
self.precision = precision
self.table_attributes = table_attributes
self.hidden_index = False
self.hidden_columns: Sequence[int] = []
self.cell_ids = cell_ids
self.na_rep = na_rep
# display_funcs maps (row, col) -> formatting function
def default_display_func(x):
if self.na_rep is not None and pd.isna(x):
return self.na_rep
elif is_float(x):
display_format = f"{x:.{self.precision}f}"
return display_format
else:
return x
self._display_funcs: DefaultDict[
Tuple[int, int], Callable[[Any], str]
] = defaultdict(lambda: default_display_func)
def _repr_html_(self) -> str:
"""
Hooks into Jupyter notebook rich display system.
"""
return self.render()
@doc(NDFrame.to_excel, klass="Styler")
def to_excel(
self,
excel_writer,
sheet_name: str = "Sheet1",
na_rep: str = "",
float_format: Optional[str] = None,
columns: Optional[Sequence[Label]] = None,
header: Union[Sequence[Label], bool] = True,
index: bool = True,
index_label: Optional[Union[Label, Sequence[Label]]] = None,
startrow: int = 0,
startcol: int = 0,
engine: Optional[str] = None,
merge_cells: bool = True,
encoding: Optional[str] = None,
inf_rep: str = "inf",
verbose: bool = True,
freeze_panes: Optional[Tuple[int, int]] = None,
) -> None:
from pandas.io.formats.excel import ExcelFormatter
formatter = ExcelFormatter(
self,
na_rep=na_rep,
cols=columns,
header=header,
float_format=float_format,
index=index,
index_label=index_label,
merge_cells=merge_cells,
inf_rep=inf_rep,
)
formatter.write(
excel_writer,
sheet_name=sheet_name,
startrow=startrow,
startcol=startcol,
freeze_panes=freeze_panes,
engine=engine,
)
def _translate(self):
"""
Convert the DataFrame in `self.data` and the attrs from `_build_styles`
into a dictionary of {head, body, uuid, cellstyle}.
"""
table_styles = self.table_styles or []
caption = self.caption
ctx = self.ctx
precision = self.precision
hidden_index = self.hidden_index
hidden_columns = self.hidden_columns
uuid = self.uuid or str(uuid1()).replace("-", "_")
ROW_HEADING_CLASS = "row_heading"
COL_HEADING_CLASS = "col_heading"
INDEX_NAME_CLASS = "index_name"
DATA_CLASS = "data"
BLANK_CLASS = "blank"
BLANK_VALUE = ""
def format_attr(pair):
return f"{pair['key']}={pair['value']}"
# for sparsifying a MultiIndex
idx_lengths = _get_level_lengths(self.index)
col_lengths = _get_level_lengths(self.columns, hidden_columns)
cell_context = dict()
n_rlvls = self.data.index.nlevels
n_clvls = self.data.columns.nlevels
rlabels = self.data.index.tolist()
clabels = self.data.columns.tolist()
if n_rlvls == 1:
rlabels = [[x] for x in rlabels]
if n_clvls == 1:
clabels = [[x] for x in clabels]
clabels = list(zip(*clabels))
cellstyle_map = defaultdict(list)
head = []
for r in range(n_clvls):
# Blank for Index columns...
row_es = [
{
"type": "th",
"value": BLANK_VALUE,
"display_value": BLANK_VALUE,
"is_visible": not hidden_index,
"class": " ".join([BLANK_CLASS]),
}
] * (n_rlvls - 1)
# ... except maybe the last for columns.names
name = self.data.columns.names[r]
cs = [
BLANK_CLASS if name is None else INDEX_NAME_CLASS,
f"level{r}",
]
name = BLANK_VALUE if name is None else name
row_es.append(
{
"type": "th",
"value": name,
"display_value": name,
"class": " ".join(cs),
"is_visible": not hidden_index,
}
)
if clabels:
for c, value in enumerate(clabels[r]):
cs = [
COL_HEADING_CLASS,
f"level{r}",
f"col{c}",
]
cs.extend(
cell_context.get("col_headings", {}).get(r, {}).get(c, [])
)
es = {
"type": "th",
"value": value,
"display_value": value,
"class": " ".join(cs),
"is_visible": _is_visible(c, r, col_lengths),
}
colspan = col_lengths.get((r, c), 0)
if colspan > 1:
es["attributes"] = [
format_attr({"key": "colspan", "value": colspan})
]
row_es.append(es)
head.append(row_es)
if (
self.data.index.names
and com.any_not_none(*self.data.index.names)
and not hidden_index
):
index_header_row = []
for c, name in enumerate(self.data.index.names):
cs = [INDEX_NAME_CLASS, f"level{c}"]
name = "" if name is None else name
index_header_row.append(
{"type": "th", "value": name, "class": " ".join(cs)}
)
index_header_row.extend(
[{"type": "th", "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS])}]
* (len(clabels[0]) - len(hidden_columns))
)
head.append(index_header_row)
body = []
for r, idx in enumerate(self.data.index):
row_es = []
for c, value in enumerate(rlabels[r]):
rid = [
ROW_HEADING_CLASS,
f"level{c}",
f"row{r}",
]
es = {
"type": "th",
"is_visible": (_is_visible(r, c, idx_lengths) and not hidden_index),
"value": value,
"display_value": value,
"id": "_".join(rid[1:]),
"class": " ".join(rid),
}
rowspan = idx_lengths.get((c, r), 0)
if rowspan > 1:
es["attributes"] = [
format_attr({"key": "rowspan", "value": rowspan})
]
row_es.append(es)
for c, col in enumerate(self.data.columns):
cs = [DATA_CLASS, f"row{r}", f"col{c}"]
cs.extend(cell_context.get("data", {}).get(r, {}).get(c, []))
formatter = self._display_funcs[(r, c)]
value = self.data.iloc[r, c]
row_dict = {
"type": "td",
"value": value,
"class": " ".join(cs),
"display_value": formatter(value),
"is_visible": (c not in hidden_columns),
}
# only add an id if the cell has a style
props = []
if self.cell_ids or (r, c) in ctx:
row_dict["id"] = "_".join(cs[1:])
for x in ctx[r, c]:
# have to handle empty styles like ['']
if x.count(":"):
props.append(tuple(x.split(":")))
else:
props.append(("", ""))
row_es.append(row_dict)
cellstyle_map[tuple(props)].append(f"row{r}_col{c}")
body.append(row_es)
cellstyle = [
{"props": list(props), "selectors": selectors}
for props, selectors in cellstyle_map.items()
]
table_attr = self.table_attributes
use_mathjax = get_option("display.html.use_mathjax")
if not use_mathjax:
table_attr = table_attr or ""
if 'class="' in table_attr:
table_attr = table_attr.replace('class="', 'class="tex2jax_ignore ')
else:
table_attr += ' class="tex2jax_ignore"'
return dict(
head=head,
cellstyle=cellstyle,
body=body,
uuid=uuid,
precision=precision,
table_styles=table_styles,
caption=caption,
table_attributes=table_attr,
)
def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> "Styler":
"""
Format the text display value of cells.
Parameters
----------
formatter : str, callable, dict or None
If ``formatter`` is None, the default formatter is used.
subset : IndexSlice
An argument to ``DataFrame.loc`` that restricts which elements
``formatter`` is applied to.
na_rep : str, optional
Representation for missing values.
If ``na_rep`` is None, no special formatting is applied.
.. versionadded:: 1.0.0
Returns
-------
self : Styler
Notes
-----
``formatter`` is either an ``a`` or a dict ``{column name: a}`` where
``a`` is one of
- str: this will be wrapped in: ``a.format(x)``
- callable: called with the value of an individual cell
The default display value for numeric values is the "general" (``g``)
format with ``pd.options.display.precision`` precision.
Examples
--------
>>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b'])
>>> df.style.format("{:.2%}")
>>> df['c'] = ['a', 'b', 'c', 'd']
>>> df.style.format({'c': str.upper})
"""
if formatter is None:
assert self._display_funcs.default_factory is not None
formatter = self._display_funcs.default_factory()
if subset is None:
row_locs = range(len(self.data))
col_locs = range(len(self.data.columns))
else:
subset = _non_reducing_slice(subset)
if len(subset) == 1:
subset = subset, self.data.columns
sub_df = self.data.loc[subset]
row_locs = self.data.index.get_indexer_for(sub_df.index)
col_locs = self.data.columns.get_indexer_for(sub_df.columns)
if is_dict_like(formatter):
for col, col_formatter in formatter.items():
# formatter must be callable, so '{}' are converted to lambdas
col_formatter = _maybe_wrap_formatter(col_formatter, na_rep)
col_num = self.data.columns.get_indexer_for([col])[0]
for row_num in row_locs:
self._display_funcs[(row_num, col_num)] = col_formatter
else:
# single scalar to format all cells with
formatter = _maybe_wrap_formatter(formatter, na_rep)
locs = product(*(row_locs, col_locs))
for i, j in locs:
self._display_funcs[(i, j)] = formatter
return self
def render(self, **kwargs) -> str:
"""
Render the built up styles to HTML.
Parameters
----------
**kwargs
Any additional keyword arguments are passed
through to ``self.template.render``.
This is useful when you need to provide
additional variables for a custom template.
Returns
-------
rendered : str
The rendered HTML.
Notes
-----
``Styler`` objects have defined the ``_repr_html_`` method
which automatically calls ``self.render()`` when it's the
last item in a Notebook cell. When calling ``Styler.render()``
directly, wrap the result in ``IPython.display.HTML`` to view
the rendered HTML in the notebook.
Pandas uses the following keys in render. Arguments passed
in ``**kwargs`` take precedence, so think carefully if you want
to override them:
* head
* cellstyle
* body
* uuid
* precision
* table_styles
* caption
* table_attributes
"""
self._compute()
# TODO: namespace all the pandas keys
d = self._translate()
# filter out empty styles, every cell will have a class
# but the list of props may just be [['', '']].
# so we have the nested anys below
trimmed = [x for x in d["cellstyle"] if any(any(y) for y in x["props"])]
d["cellstyle"] = trimmed
d.update(kwargs)
return self.template.render(**d)
def _update_ctx(self, attrs: DataFrame) -> None:
"""
Update the state of the Styler.
Collects a mapping of {index_label: [': ']}.
Parameters
----------
attrs : DataFrame
should contain strings of ': ;: '
Whitespace shouldn't matter and the final trailing ';' shouldn't
matter.
"""
coli = {k: i for i, k in enumerate(self.columns)}
rowi = {k: i for i, k in enumerate(self.index)}
for jj in range(len(attrs.columns)):
cn = attrs.columns[jj]
j = coli[cn]
for rn, c in attrs[[cn]].itertuples():
if not c:
continue
c = c.rstrip(";")
if not c:
continue
i = rowi[rn]
for pair in c.split(";"):
self.ctx[(i, j)].append(pair)
def _copy(self, deepcopy: bool = False) -> "Styler":
styler = Styler(
self.data,
precision=self.precision,
caption=self.caption,
uuid=self.uuid,
table_styles=self.table_styles,
na_rep=self.na_rep,
)
if deepcopy:
styler.ctx = copy.deepcopy(self.ctx)
styler._todo = copy.deepcopy(self._todo)
else:
styler.ctx = self.ctx
styler._todo = self._todo
return styler
def __copy__(self) -> "Styler":
"""
Deep copy by default.
"""
return self._copy(deepcopy=False)
def __deepcopy__(self, memo) -> "Styler":
return self._copy(deepcopy=True)
def clear(self) -> None:
"""
Reset the styler, removing any previously applied styles.
Returns None.
"""
self.ctx.clear()
self._todo = []
def _compute(self):
"""
Execute the style functions built up in `self._todo`.
Relies on the conventions that all style functions go through
.apply or .applymap. The append styles to apply as tuples of
(application method, *args, **kwargs)
"""
r = self
for func, args, kwargs in self._todo:
r = func(self)(*args, **kwargs)
return r
def _apply(
self,
func: Callable[..., "Styler"],
axis: Optional[Axis] = 0,
subset=None,
**kwargs,
) -> "Styler":
subset = slice(None) if subset is None else subset
subset = _non_reducing_slice(subset)
data = self.data.loc[subset]
if axis is not None:
result = data.apply(func, axis=axis, result_type="expand", **kwargs)
result.columns = data.columns
else:
result = func(data, **kwargs)
if not isinstance(result, pd.DataFrame):
raise TypeError(
f"Function {repr(func)} must return a DataFrame when "
f"passed to `Styler.apply` with axis=None"
)
if not (
result.index.equals(data.index) and result.columns.equals(data.columns)
):
raise ValueError(
f"Result of {repr(func)} must have identical "
f"index and columns as the input"
)
result_shape = result.shape
expected_shape = self.data.loc[subset].shape
if result_shape != expected_shape:
raise ValueError(
f"Function {repr(func)} returned the wrong shape.\n"
f"Result has shape: {result.shape}\n"
f"Expected shape: {expected_shape}"
)
self._update_ctx(result)
return self
def apply(
self,
func: Callable[..., "Styler"],
axis: Optional[Axis] = 0,
subset=None,
**kwargs,
) -> "Styler":
"""
Apply a function column-wise, row-wise, or table-wise.
Updates the HTML representation with the result.
Parameters
----------
func : function
``func`` should take a Series or DataFrame (depending
on ``axis``), and return an object with the same shape.
Must return a DataFrame with identical index and
column labels when ``axis=None``.
axis : {0 or 'index', 1 or 'columns', None}, default 0
Apply to each column (``axis=0`` or ``'index'``), to each row
(``axis=1`` or ``'columns'``), or to the entire DataFrame at once
with ``axis=None``.
subset : IndexSlice
A valid indexer to limit ``data`` to *before* applying the
function. Consider using a pandas.IndexSlice.
**kwargs : dict
Pass along to ``func``.
Returns
-------
self : Styler
Notes
-----
The output shape of ``func`` should match the input, i.e. if
``x`` is the input row, column, or table (depending on ``axis``),
then ``func(x).shape == x.shape`` should be true.
This is similar to ``DataFrame.apply``, except that ``axis=None``
applies the function to the entire DataFrame at once,
rather than column-wise or row-wise.
Examples
--------
>>> def highlight_max(x):
... return ['background-color: yellow' if v == x.max() else ''
for v in x]
...
>>> df = pd.DataFrame(np.random.randn(5, 2))
>>> df.style.apply(highlight_max)
"""
self._todo.append(
(lambda instance: getattr(instance, "_apply"), (func, axis, subset), kwargs)
)
return self
def _applymap(self, func: Callable, subset=None, **kwargs) -> "Styler":
func = partial(func, **kwargs) # applymap doesn't take kwargs?
if subset is None:
subset = pd.IndexSlice[:]
subset = _non_reducing_slice(subset)
result = self.data.loc[subset].applymap(func)
self._update_ctx(result)
return self
def applymap(self, func: Callable, subset=None, **kwargs) -> "Styler":
"""
Apply a function elementwise.
Updates the HTML representation with the result.
Parameters
----------
func : function
``func`` should take a scalar and return a scalar.
subset : IndexSlice
A valid indexer to limit ``data`` to *before* applying the
function. Consider using a pandas.IndexSlice.
**kwargs : dict
Pass along to ``func``.
Returns
-------
self : Styler
See Also
--------
Styler.where
"""
self._todo.append(
(lambda instance: getattr(instance, "_applymap"), (func, subset), kwargs)
)
return self
def where(
self,
cond: Callable,
value: str,
other: Optional[str] = None,
subset=None,
**kwargs,
) -> "Styler":
"""
Apply a function elementwise.
Updates the HTML representation with a style which is
selected in accordance with the return value of a function.
Parameters
----------
cond : callable
``cond`` should take a scalar and return a boolean.
value : str
Applied when ``cond`` returns true.
other : str
Applied when ``cond`` returns false.
subset : IndexSlice
A valid indexer to limit ``data`` to *before* applying the
function. Consider using a pandas.IndexSlice.
**kwargs : dict
Pass along to ``cond``.
Returns
-------
self : Styler
See Also
--------
Styler.applymap
"""
if other is None:
other = ""
return self.applymap(
lambda val: value if cond(val) else other, subset=subset, **kwargs
)
def set_precision(self, precision: int) -> "Styler":
"""
Set the precision used to render.
Parameters
----------
precision : int
Returns
-------
self : Styler
"""
self.precision = precision
return self
def set_table_attributes(self, attributes: str) -> "Styler":
"""
Set the table attributes.
These are the items that show up in the opening ```` tag
in addition to to automatic (by default) id.
Parameters
----------
attributes : str
Returns
-------
self : Styler
Examples
--------
>>> df = pd.DataFrame(np.random.randn(10, 4))
>>> df.style.set_table_attributes('class="pure-table"')
# ... ...
"""
self.table_attributes = attributes
return self
def export(self) -> List[Tuple[Callable, Tuple, Dict]]:
"""
Export the styles to applied to the current Styler.
Can be applied to a second style with ``Styler.use``.
Returns
-------
styles : list
See Also
--------
Styler.use
"""
return self._todo
def use(self, styles: List[Tuple[Callable, Tuple, Dict]]) -> "Styler":
"""
Set the styles on the current Styler.
Possibly uses styles from ``Styler.export``.
Parameters
----------
styles : list
List of style functions.
Returns
-------
self : Styler
See Also
--------
Styler.export
"""
self._todo.extend(styles)
return self
def set_uuid(self, uuid: str) -> "Styler":
"""
Set the uuid for a Styler.
Parameters
----------
uuid : str
Returns
-------
self : Styler
"""
self.uuid = uuid
return self
def set_caption(self, caption: str) -> "Styler":
"""
Set the caption on a Styler.
Parameters
----------
caption : str
Returns
-------
self : Styler
"""
self.caption = caption
return self
def set_table_styles(self, table_styles) -> "Styler":
"""
Set the table styles on a Styler.
These are placed in a ``