craftbeerpi4-pione/venv/lib/python3.8/site-packages/pandas/io/formats/latex.py

373 lines
13 KiB
Python
Raw Normal View History

"""
Module for formatting output data in Latex.
"""
2021-01-30 22:29:33 +01:00
from typing import IO, List, Optional, Tuple
import numpy as np
from pandas.core.dtypes.generic import ABCMultiIndex
2021-01-30 22:29:33 +01:00
from pandas.io.formats.format import DataFrameFormatter, TableFormatter
2021-01-30 22:29:33 +01:00
class LatexFormatter(TableFormatter):
"""
2021-01-30 22:29:33 +01:00
Used to render a DataFrame to a LaTeX tabular/longtable environment output.
Parameters
----------
formatter : `DataFrameFormatter`
2021-01-30 22:29:33 +01:00
column_format : str, default None
The columns format as specified in `LaTeX table format
<https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl' for 3 columns
longtable : boolean, default False
Use a longtable environment instead of tabular.
2021-01-30 22:29:33 +01:00
See Also
--------
HTMLFormatter
"""
def __init__(
self,
formatter: DataFrameFormatter,
2021-01-30 22:29:33 +01:00
column_format: Optional[str] = None,
longtable: bool = False,
multicolumn: bool = False,
multicolumn_format: Optional[str] = None,
multirow: bool = False,
2021-01-30 22:29:33 +01:00
caption: Optional[str] = None,
label: Optional[str] = None,
):
self.fmt = formatter
self.frame = self.fmt.frame
2021-01-30 22:29:33 +01:00
self.bold_rows = self.fmt.bold_rows
self.column_format = column_format
self.longtable = longtable
self.multicolumn = multicolumn
self.multicolumn_format = multicolumn_format
self.multirow = multirow
2021-01-30 22:29:33 +01:00
self.caption = caption
self.label = label
self.escape = self.fmt.escape
2021-01-30 22:29:33 +01:00
def write_result(self, buf: IO[str]) -> None:
"""
Render a DataFrame to a LaTeX tabular, longtable, or table/tabular
environment output.
"""
# string representation of the columns
if len(self.frame.columns) == 0 or len(self.frame.index) == 0:
info_line = (
f"Empty {type(self.frame).__name__}\n"
f"Columns: {self.frame.columns}\n"
f"Index: {self.frame.index}"
)
strcols = [[info_line]]
else:
2021-01-30 22:29:33 +01:00
strcols = self.fmt._to_str_columns()
def get_col_type(dtype):
if issubclass(dtype.type, np.number):
return "r"
else:
return "l"
2021-01-30 22:29:33 +01:00
# reestablish the MultiIndex that has been joined by _to_str_column
if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex):
out = self.frame.index.format(
adjoin=False,
sparsify=self.fmt.sparsify,
names=self.fmt.has_index_names,
na_rep=self.fmt.na_rep,
)
# index.format will sparsify repeated entries with empty strings
# so pad these with some empty space
def pad_empties(x):
for pad in reversed(x):
if pad:
break
return [x[0]] + [i if i else " " * len(pad) for i in x[1:]]
2021-01-30 22:29:33 +01:00
out = (pad_empties(i) for i in out)
# Add empty spaces for each column level
clevels = self.frame.columns.nlevels
2021-01-30 22:29:33 +01:00
out = [[" " * len(i[-1])] * clevels + i for i in out]
# Add the column names to the last index column
cnames = self.frame.columns.names
if any(cnames):
new_names = [i if i else "{}" for i in cnames]
out[self.frame.index.nlevels - 1][:clevels] = new_names
# Get rid of old multiindex column and add new ones
strcols = out + strcols[1:]
2021-01-30 22:29:33 +01:00
if self.column_format is None:
dtypes = self.frame.dtypes._values
column_format = "".join(map(get_col_type, dtypes))
if self.fmt.index:
index_format = "l" * self.frame.index.nlevels
column_format = index_format + column_format
elif not isinstance(self.column_format, str): # pragma: no cover
raise AssertionError(
f"column_format must be str or unicode, not {type(column_format)}"
)
else:
column_format = self.column_format
if self.longtable:
self._write_longtable_begin(buf, column_format)
else:
self._write_tabular_begin(buf, column_format)
buf.write("\\toprule\n")
ilevels = self.frame.index.nlevels
clevels = self.frame.columns.nlevels
nlevels = clevels
if self.fmt.has_index_names and self.fmt.show_index_names:
nlevels += 1
strrows = list(zip(*strcols))
self.clinebuf: List[List[int]] = []
for i, row in enumerate(strrows):
if i == nlevels and self.fmt.header:
buf.write("\\midrule\n") # End of header
if self.longtable:
buf.write("\\endhead\n")
buf.write("\\midrule\n")
buf.write(
f"\\multicolumn{{{len(row)}}}{{r}}"
"{{Continued on next page}} \\\\\n"
)
buf.write("\\midrule\n")
buf.write("\\endfoot\n\n")
buf.write("\\bottomrule\n")
buf.write("\\endlastfoot\n")
if self.escape:
# escape backslashes first
crow = [
(
x.replace("\\", "\\textbackslash ")
.replace("_", "\\_")
.replace("%", "\\%")
.replace("$", "\\$")
.replace("#", "\\#")
.replace("{", "\\{")
.replace("}", "\\}")
.replace("~", "\\textasciitilde ")
.replace("^", "\\textasciicircum ")
.replace("&", "\\&")
if (x and x != "{}")
else "{}"
)
for x in row
]
else:
crow = [x if x else "{}" for x in row]
if self.bold_rows and self.fmt.index:
# bold row labels
crow = [
f"\\textbf{{{x}}}"
if j < ilevels and x.strip() not in ["", "{}"]
else x
for j, x in enumerate(crow)
]
if i < clevels and self.fmt.header and self.multicolumn:
# sum up columns to multicolumns
crow = self._format_multicolumn(crow, ilevels)
if i >= nlevels and self.fmt.index and self.multirow and ilevels > 1:
# sum up rows to multirows
crow = self._format_multirow(crow, ilevels, i, strrows)
buf.write(" & ".join(crow))
buf.write(" \\\\\n")
if self.multirow and i < len(strrows) - 1:
self._print_cline(buf, i, len(strcols))
if self.longtable:
self._write_longtable_end(buf)
else:
2021-01-30 22:29:33 +01:00
self._write_tabular_end(buf)
2021-01-30 22:29:33 +01:00
def _format_multicolumn(self, row: List[str], ilevels: int) -> List[str]:
r"""
Combine columns belonging to a group to a single multicolumn entry
according to self.multicolumn_format
e.g.:
a & & & b & c &
will become
\multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c}
"""
2021-01-30 22:29:33 +01:00
row2 = list(row[:ilevels])
ncol = 1
coltext = ""
def append_col():
# write multicolumn if needed
if ncol > 1:
row2.append(
f"\\multicolumn{{{ncol:d}}}{{{self.multicolumn_format}}}"
f"{{{coltext.strip()}}}"
)
# don't modify where not needed
else:
row2.append(coltext)
2021-01-30 22:29:33 +01:00
for c in row[ilevels:]:
# if next col has text, write the previous
if c.strip():
if coltext:
append_col()
coltext = c
ncol = 1
# if not, add it to the previous multicolumn
else:
ncol += 1
# write last column name
if coltext:
append_col()
return row2
2021-01-30 22:29:33 +01:00
def _format_multirow(
self, row: List[str], ilevels: int, i: int, rows: List[Tuple[str, ...]]
) -> List[str]:
r"""
Check following rows, whether row should be a multirow
e.g.: becomes:
a & 0 & \multirow{2}{*}{a} & 0 &
& 1 & & 1 &
b & 0 & \cline{1-2}
b & 0 &
"""
2021-01-30 22:29:33 +01:00
for j in range(ilevels):
if row[j].strip():
nrow = 1
2021-01-30 22:29:33 +01:00
for r in rows[i + 1 :]:
if not r[j].strip():
nrow += 1
else:
break
if nrow > 1:
# overwrite non-multirow entry
row[j] = f"\\multirow{{{nrow:d}}}{{*}}{{{row[j].strip()}}}"
# save when to end the current block with \cline
self.clinebuf.append([i + nrow - 1, j + 1])
return row
2021-01-30 22:29:33 +01:00
def _print_cline(self, buf: IO[str], i: int, icol: int) -> None:
"""
2021-01-30 22:29:33 +01:00
Print clines after multirow-blocks are finished.
"""
for cl in self.clinebuf:
if cl[0] == i:
2021-01-30 22:29:33 +01:00
buf.write(f"\\cline{{{cl[1]:d}-{icol:d}}}\n")
# remove entries that have been written to buffer
self.clinebuf = [x for x in self.clinebuf if x[0] != i]
2021-01-30 22:29:33 +01:00
def _write_tabular_begin(self, buf, column_format: str):
"""
2021-01-30 22:29:33 +01:00
Write the beginning of a tabular environment or
nested table/tabular environments including caption and label.
Parameters
----------
2021-01-30 22:29:33 +01:00
buf : string or file handle
File path or object. If not specified, the result is returned as
a string.
column_format : str
The columns format as specified in `LaTeX table format
<https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl'
for 3 columns
"""
2021-01-30 22:29:33 +01:00
if self.caption is not None or self.label is not None:
# then write output in a nested table/tabular environment
if self.caption is None:
caption_ = ""
else:
caption_ = f"\n\\caption{{{self.caption}}}"
2021-01-30 22:29:33 +01:00
if self.label is None:
label_ = ""
else:
label_ = f"\n\\label{{{self.label}}}"
2021-01-30 22:29:33 +01:00
buf.write(f"\\begin{{table}}\n\\centering{caption_}{label_}\n")
else:
# then write output only in a tabular environment
pass
2021-01-30 22:29:33 +01:00
buf.write(f"\\begin{{tabular}}{{{column_format}}}\n")
2021-01-30 22:29:33 +01:00
def _write_tabular_end(self, buf):
"""
2021-01-30 22:29:33 +01:00
Write the end of a tabular environment or nested table/tabular
environment.
2021-01-30 22:29:33 +01:00
Parameters
----------
buf : string or file handle
File path or object. If not specified, the result is returned as
a string.
"""
2021-01-30 22:29:33 +01:00
buf.write("\\bottomrule\n")
buf.write("\\end{tabular}\n")
if self.caption is not None or self.label is not None:
buf.write("\\end{table}\n")
else:
2021-01-30 22:29:33 +01:00
pass
2021-01-30 22:29:33 +01:00
def _write_longtable_begin(self, buf, column_format: str):
"""
2021-01-30 22:29:33 +01:00
Write the beginning of a longtable environment including caption and
label if provided by user.
2021-01-30 22:29:33 +01:00
Parameters
----------
buf : string or file handle
File path or object. If not specified, the result is returned as
a string.
column_format : str
The columns format as specified in `LaTeX table format
<https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl'
for 3 columns
"""
buf.write(f"\\begin{{longtable}}{{{column_format}}}\n")
2021-01-30 22:29:33 +01:00
if self.caption is not None or self.label is not None:
if self.caption is None:
pass
else:
buf.write(f"\\caption{{{self.caption}}}")
2021-01-30 22:29:33 +01:00
if self.label is None:
pass
else:
buf.write(f"\\label{{{self.label}}}")
2021-01-30 22:29:33 +01:00
# a double-backslash is required at the end of the line
# as discussed here:
# https://tex.stackexchange.com/questions/219138
buf.write("\\\\\n")
else:
pass
2021-01-30 22:29:33 +01:00
@staticmethod
def _write_longtable_end(buf):
"""
Write the end of a longtable environment.
2021-01-30 22:29:33 +01:00
Parameters
----------
buf : string or file handle
File path or object. If not specified, the result is returned as
a string.
2021-01-30 22:29:33 +01:00
"""
buf.write("\\end{longtable}\n")