2021-01-09 15:20:56 +01:00
|
|
|
"""
|
|
|
|
Module for formatting output data in Latex.
|
|
|
|
"""
|
2021-01-30 22:29:33 +01:00
|
|
|
from typing import IO, List, Optional, Tuple
|
2021-01-09 15:20:56 +01:00
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
from pandas.core.dtypes.generic import ABCMultiIndex
|
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
from pandas.io.formats.format import DataFrameFormatter, TableFormatter
|
2021-01-09 15:20:56 +01:00
|
|
|
|
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
class LatexFormatter(TableFormatter):
|
2021-01-09 15:20:56 +01:00
|
|
|
"""
|
2021-01-30 22:29:33 +01:00
|
|
|
Used to render a DataFrame to a LaTeX tabular/longtable environment output.
|
2021-01-09 15:20:56 +01:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
formatter : `DataFrameFormatter`
|
2021-01-30 22:29:33 +01:00
|
|
|
column_format : str, default None
|
|
|
|
The columns format as specified in `LaTeX table format
|
|
|
|
<https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl' for 3 columns
|
|
|
|
longtable : boolean, default False
|
|
|
|
Use a longtable environment instead of tabular.
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
See Also
|
|
|
|
--------
|
|
|
|
HTMLFormatter
|
2021-01-09 15:20:56 +01:00
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
formatter: DataFrameFormatter,
|
2021-01-30 22:29:33 +01:00
|
|
|
column_format: Optional[str] = None,
|
|
|
|
longtable: bool = False,
|
2021-01-09 15:20:56 +01:00
|
|
|
multicolumn: bool = False,
|
|
|
|
multicolumn_format: Optional[str] = None,
|
|
|
|
multirow: bool = False,
|
2021-01-30 22:29:33 +01:00
|
|
|
caption: Optional[str] = None,
|
|
|
|
label: Optional[str] = None,
|
2021-01-09 15:20:56 +01:00
|
|
|
):
|
|
|
|
self.fmt = formatter
|
|
|
|
self.frame = self.fmt.frame
|
2021-01-30 22:29:33 +01:00
|
|
|
self.bold_rows = self.fmt.bold_rows
|
|
|
|
self.column_format = column_format
|
|
|
|
self.longtable = longtable
|
2021-01-09 15:20:56 +01:00
|
|
|
self.multicolumn = multicolumn
|
|
|
|
self.multicolumn_format = multicolumn_format
|
|
|
|
self.multirow = multirow
|
2021-01-30 22:29:33 +01:00
|
|
|
self.caption = caption
|
|
|
|
self.label = label
|
|
|
|
self.escape = self.fmt.escape
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
def write_result(self, buf: IO[str]) -> None:
|
|
|
|
"""
|
|
|
|
Render a DataFrame to a LaTeX tabular, longtable, or table/tabular
|
|
|
|
environment output.
|
|
|
|
"""
|
|
|
|
# string representation of the columns
|
|
|
|
if len(self.frame.columns) == 0 or len(self.frame.index) == 0:
|
|
|
|
info_line = (
|
|
|
|
f"Empty {type(self.frame).__name__}\n"
|
|
|
|
f"Columns: {self.frame.columns}\n"
|
|
|
|
f"Index: {self.frame.index}"
|
|
|
|
)
|
|
|
|
strcols = [[info_line]]
|
2021-01-09 15:20:56 +01:00
|
|
|
else:
|
2021-01-30 22:29:33 +01:00
|
|
|
strcols = self.fmt._to_str_columns()
|
|
|
|
|
|
|
|
def get_col_type(dtype):
|
|
|
|
if issubclass(dtype.type, np.number):
|
|
|
|
return "r"
|
|
|
|
else:
|
|
|
|
return "l"
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
# reestablish the MultiIndex that has been joined by _to_str_column
|
2021-01-09 15:20:56 +01:00
|
|
|
if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex):
|
|
|
|
out = self.frame.index.format(
|
|
|
|
adjoin=False,
|
|
|
|
sparsify=self.fmt.sparsify,
|
|
|
|
names=self.fmt.has_index_names,
|
|
|
|
na_rep=self.fmt.na_rep,
|
|
|
|
)
|
|
|
|
|
|
|
|
# index.format will sparsify repeated entries with empty strings
|
|
|
|
# so pad these with some empty space
|
|
|
|
def pad_empties(x):
|
|
|
|
for pad in reversed(x):
|
|
|
|
if pad:
|
|
|
|
break
|
|
|
|
return [x[0]] + [i if i else " " * len(pad) for i in x[1:]]
|
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
out = (pad_empties(i) for i in out)
|
2021-01-09 15:20:56 +01:00
|
|
|
|
|
|
|
# Add empty spaces for each column level
|
|
|
|
clevels = self.frame.columns.nlevels
|
2021-01-30 22:29:33 +01:00
|
|
|
out = [[" " * len(i[-1])] * clevels + i for i in out]
|
2021-01-09 15:20:56 +01:00
|
|
|
|
|
|
|
# Add the column names to the last index column
|
|
|
|
cnames = self.frame.columns.names
|
|
|
|
if any(cnames):
|
|
|
|
new_names = [i if i else "{}" for i in cnames]
|
|
|
|
out[self.frame.index.nlevels - 1][:clevels] = new_names
|
|
|
|
|
|
|
|
# Get rid of old multiindex column and add new ones
|
|
|
|
strcols = out + strcols[1:]
|
2021-01-30 22:29:33 +01:00
|
|
|
|
|
|
|
if self.column_format is None:
|
|
|
|
dtypes = self.frame.dtypes._values
|
|
|
|
column_format = "".join(map(get_col_type, dtypes))
|
|
|
|
if self.fmt.index:
|
|
|
|
index_format = "l" * self.frame.index.nlevels
|
|
|
|
column_format = index_format + column_format
|
|
|
|
elif not isinstance(self.column_format, str): # pragma: no cover
|
|
|
|
raise AssertionError(
|
|
|
|
f"column_format must be str or unicode, not {type(column_format)}"
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
column_format = self.column_format
|
|
|
|
|
|
|
|
if self.longtable:
|
|
|
|
self._write_longtable_begin(buf, column_format)
|
|
|
|
else:
|
|
|
|
self._write_tabular_begin(buf, column_format)
|
|
|
|
|
|
|
|
buf.write("\\toprule\n")
|
|
|
|
|
|
|
|
ilevels = self.frame.index.nlevels
|
|
|
|
clevels = self.frame.columns.nlevels
|
|
|
|
nlevels = clevels
|
|
|
|
if self.fmt.has_index_names and self.fmt.show_index_names:
|
|
|
|
nlevels += 1
|
|
|
|
strrows = list(zip(*strcols))
|
|
|
|
self.clinebuf: List[List[int]] = []
|
|
|
|
|
|
|
|
for i, row in enumerate(strrows):
|
|
|
|
if i == nlevels and self.fmt.header:
|
|
|
|
buf.write("\\midrule\n") # End of header
|
|
|
|
if self.longtable:
|
|
|
|
buf.write("\\endhead\n")
|
|
|
|
buf.write("\\midrule\n")
|
|
|
|
buf.write(
|
|
|
|
f"\\multicolumn{{{len(row)}}}{{r}}"
|
|
|
|
"{{Continued on next page}} \\\\\n"
|
|
|
|
)
|
|
|
|
buf.write("\\midrule\n")
|
|
|
|
buf.write("\\endfoot\n\n")
|
|
|
|
buf.write("\\bottomrule\n")
|
|
|
|
buf.write("\\endlastfoot\n")
|
|
|
|
if self.escape:
|
|
|
|
# escape backslashes first
|
|
|
|
crow = [
|
|
|
|
(
|
|
|
|
x.replace("\\", "\\textbackslash ")
|
|
|
|
.replace("_", "\\_")
|
|
|
|
.replace("%", "\\%")
|
|
|
|
.replace("$", "\\$")
|
|
|
|
.replace("#", "\\#")
|
|
|
|
.replace("{", "\\{")
|
|
|
|
.replace("}", "\\}")
|
|
|
|
.replace("~", "\\textasciitilde ")
|
|
|
|
.replace("^", "\\textasciicircum ")
|
|
|
|
.replace("&", "\\&")
|
|
|
|
if (x and x != "{}")
|
|
|
|
else "{}"
|
|
|
|
)
|
|
|
|
for x in row
|
|
|
|
]
|
|
|
|
else:
|
|
|
|
crow = [x if x else "{}" for x in row]
|
|
|
|
if self.bold_rows and self.fmt.index:
|
|
|
|
# bold row labels
|
|
|
|
crow = [
|
|
|
|
f"\\textbf{{{x}}}"
|
|
|
|
if j < ilevels and x.strip() not in ["", "{}"]
|
|
|
|
else x
|
|
|
|
for j, x in enumerate(crow)
|
|
|
|
]
|
|
|
|
if i < clevels and self.fmt.header and self.multicolumn:
|
|
|
|
# sum up columns to multicolumns
|
|
|
|
crow = self._format_multicolumn(crow, ilevels)
|
|
|
|
if i >= nlevels and self.fmt.index and self.multirow and ilevels > 1:
|
|
|
|
# sum up rows to multirows
|
|
|
|
crow = self._format_multirow(crow, ilevels, i, strrows)
|
|
|
|
buf.write(" & ".join(crow))
|
|
|
|
buf.write(" \\\\\n")
|
|
|
|
if self.multirow and i < len(strrows) - 1:
|
|
|
|
self._print_cline(buf, i, len(strcols))
|
|
|
|
|
|
|
|
if self.longtable:
|
|
|
|
self._write_longtable_end(buf)
|
2021-01-09 15:20:56 +01:00
|
|
|
else:
|
2021-01-30 22:29:33 +01:00
|
|
|
self._write_tabular_end(buf)
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
def _format_multicolumn(self, row: List[str], ilevels: int) -> List[str]:
|
2021-01-09 15:20:56 +01:00
|
|
|
r"""
|
|
|
|
Combine columns belonging to a group to a single multicolumn entry
|
|
|
|
according to self.multicolumn_format
|
|
|
|
|
|
|
|
e.g.:
|
|
|
|
a & & & b & c &
|
|
|
|
will become
|
|
|
|
\multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c}
|
|
|
|
"""
|
2021-01-30 22:29:33 +01:00
|
|
|
row2 = list(row[:ilevels])
|
2021-01-09 15:20:56 +01:00
|
|
|
ncol = 1
|
|
|
|
coltext = ""
|
|
|
|
|
|
|
|
def append_col():
|
|
|
|
# write multicolumn if needed
|
|
|
|
if ncol > 1:
|
|
|
|
row2.append(
|
|
|
|
f"\\multicolumn{{{ncol:d}}}{{{self.multicolumn_format}}}"
|
|
|
|
f"{{{coltext.strip()}}}"
|
|
|
|
)
|
|
|
|
# don't modify where not needed
|
|
|
|
else:
|
|
|
|
row2.append(coltext)
|
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
for c in row[ilevels:]:
|
2021-01-09 15:20:56 +01:00
|
|
|
# if next col has text, write the previous
|
|
|
|
if c.strip():
|
|
|
|
if coltext:
|
|
|
|
append_col()
|
|
|
|
coltext = c
|
|
|
|
ncol = 1
|
|
|
|
# if not, add it to the previous multicolumn
|
|
|
|
else:
|
|
|
|
ncol += 1
|
|
|
|
# write last column name
|
|
|
|
if coltext:
|
|
|
|
append_col()
|
|
|
|
return row2
|
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
def _format_multirow(
|
|
|
|
self, row: List[str], ilevels: int, i: int, rows: List[Tuple[str, ...]]
|
|
|
|
) -> List[str]:
|
2021-01-09 15:20:56 +01:00
|
|
|
r"""
|
|
|
|
Check following rows, whether row should be a multirow
|
|
|
|
|
|
|
|
e.g.: becomes:
|
|
|
|
a & 0 & \multirow{2}{*}{a} & 0 &
|
|
|
|
& 1 & & 1 &
|
|
|
|
b & 0 & \cline{1-2}
|
|
|
|
b & 0 &
|
|
|
|
"""
|
2021-01-30 22:29:33 +01:00
|
|
|
for j in range(ilevels):
|
2021-01-09 15:20:56 +01:00
|
|
|
if row[j].strip():
|
|
|
|
nrow = 1
|
2021-01-30 22:29:33 +01:00
|
|
|
for r in rows[i + 1 :]:
|
2021-01-09 15:20:56 +01:00
|
|
|
if not r[j].strip():
|
|
|
|
nrow += 1
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
if nrow > 1:
|
|
|
|
# overwrite non-multirow entry
|
|
|
|
row[j] = f"\\multirow{{{nrow:d}}}{{*}}{{{row[j].strip()}}}"
|
|
|
|
# save when to end the current block with \cline
|
|
|
|
self.clinebuf.append([i + nrow - 1, j + 1])
|
|
|
|
return row
|
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
def _print_cline(self, buf: IO[str], i: int, icol: int) -> None:
|
2021-01-09 15:20:56 +01:00
|
|
|
"""
|
2021-01-30 22:29:33 +01:00
|
|
|
Print clines after multirow-blocks are finished.
|
2021-01-09 15:20:56 +01:00
|
|
|
"""
|
|
|
|
for cl in self.clinebuf:
|
|
|
|
if cl[0] == i:
|
2021-01-30 22:29:33 +01:00
|
|
|
buf.write(f"\\cline{{{cl[1]:d}-{icol:d}}}\n")
|
|
|
|
# remove entries that have been written to buffer
|
|
|
|
self.clinebuf = [x for x in self.clinebuf if x[0] != i]
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
def _write_tabular_begin(self, buf, column_format: str):
|
2021-01-09 15:20:56 +01:00
|
|
|
"""
|
2021-01-30 22:29:33 +01:00
|
|
|
Write the beginning of a tabular environment or
|
|
|
|
nested table/tabular environments including caption and label.
|
2021-01-09 15:20:56 +01:00
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
2021-01-30 22:29:33 +01:00
|
|
|
buf : string or file handle
|
|
|
|
File path or object. If not specified, the result is returned as
|
|
|
|
a string.
|
|
|
|
column_format : str
|
|
|
|
The columns format as specified in `LaTeX table format
|
|
|
|
<https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl'
|
|
|
|
for 3 columns
|
2021-01-09 15:20:56 +01:00
|
|
|
"""
|
2021-01-30 22:29:33 +01:00
|
|
|
if self.caption is not None or self.label is not None:
|
|
|
|
# then write output in a nested table/tabular environment
|
|
|
|
if self.caption is None:
|
|
|
|
caption_ = ""
|
|
|
|
else:
|
|
|
|
caption_ = f"\n\\caption{{{self.caption}}}"
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
if self.label is None:
|
|
|
|
label_ = ""
|
|
|
|
else:
|
|
|
|
label_ = f"\n\\label{{{self.label}}}"
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
buf.write(f"\\begin{{table}}\n\\centering{caption_}{label_}\n")
|
|
|
|
else:
|
|
|
|
# then write output only in a tabular environment
|
|
|
|
pass
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
buf.write(f"\\begin{{tabular}}{{{column_format}}}\n")
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
def _write_tabular_end(self, buf):
|
2021-01-09 15:20:56 +01:00
|
|
|
"""
|
2021-01-30 22:29:33 +01:00
|
|
|
Write the end of a tabular environment or nested table/tabular
|
|
|
|
environment.
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
buf : string or file handle
|
|
|
|
File path or object. If not specified, the result is returned as
|
|
|
|
a string.
|
2021-01-09 15:20:56 +01:00
|
|
|
|
|
|
|
"""
|
2021-01-30 22:29:33 +01:00
|
|
|
buf.write("\\bottomrule\n")
|
|
|
|
buf.write("\\end{tabular}\n")
|
|
|
|
if self.caption is not None or self.label is not None:
|
|
|
|
buf.write("\\end{table}\n")
|
2021-01-09 15:20:56 +01:00
|
|
|
else:
|
2021-01-30 22:29:33 +01:00
|
|
|
pass
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
def _write_longtable_begin(self, buf, column_format: str):
|
2021-01-09 15:20:56 +01:00
|
|
|
"""
|
2021-01-30 22:29:33 +01:00
|
|
|
Write the beginning of a longtable environment including caption and
|
|
|
|
label if provided by user.
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
buf : string or file handle
|
|
|
|
File path or object. If not specified, the result is returned as
|
|
|
|
a string.
|
|
|
|
column_format : str
|
|
|
|
The columns format as specified in `LaTeX table format
|
|
|
|
<https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl'
|
|
|
|
for 3 columns
|
|
|
|
"""
|
|
|
|
buf.write(f"\\begin{{longtable}}{{{column_format}}}\n")
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
if self.caption is not None or self.label is not None:
|
|
|
|
if self.caption is None:
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
buf.write(f"\\caption{{{self.caption}}}")
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
if self.label is None:
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
buf.write(f"\\label{{{self.label}}}")
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
# a double-backslash is required at the end of the line
|
|
|
|
# as discussed here:
|
|
|
|
# https://tex.stackexchange.com/questions/219138
|
|
|
|
buf.write("\\\\\n")
|
|
|
|
else:
|
|
|
|
pass
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
@staticmethod
|
|
|
|
def _write_longtable_end(buf):
|
|
|
|
"""
|
|
|
|
Write the end of a longtable environment.
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
buf : string or file handle
|
|
|
|
File path or object. If not specified, the result is returned as
|
|
|
|
a string.
|
2021-01-09 15:20:56 +01:00
|
|
|
|
2021-01-30 22:29:33 +01:00
|
|
|
"""
|
|
|
|
buf.write("\\end{longtable}\n")
|