craftbeerpi4-pione/venv3/lib/python3.7/site-packages/pandas/io/formats/info.py

from abc import ABCMeta, abstractmethod
import sys
from typing import IO, TYPE_CHECKING, List, Optional, Tuple, Union

from pandas._config import get_option

from pandas._typing import Dtype, FrameOrSeries

from pandas.core.indexes.api import Index

from pandas.io.formats import format as fmt
from pandas.io.formats.printing import pprint_thing

if TYPE_CHECKING:
    from pandas.core.series import Series  # noqa: F401


def _put_str(s: Union[str, Dtype], space: int) -> str:
    """
    Make string of specified length, padding to the right if necessary.

    Parameters
    ----------
    s : Union[str, Dtype]
        String to be formatted.
    space : int
        Length to force string to be of.

    Returns
    -------
    str
        String coerced to given length.

    Examples
    --------
    >>> pd.io.formats.info._put_str("panda", 6)
    'panda '
    >>> pd.io.formats.info._put_str("panda", 4)
    'pand'
    """
    return str(s)[:space].ljust(space)


def _sizeof_fmt(num: Union[int, float], size_qualifier: str) -> str:
    """
    Return size in human readable format.

    Parameters
    ----------
    num : int
        Size in bytes.
    size_qualifier : str
        Either empty, or '+' (if lower bound).

    Returns
    -------
    str
        Size in human readable format.

    Examples
    --------
    >>> _sizeof_fmt(23028, '')
    '22.5 KB'

    >>> _sizeof_fmt(23028, '+')
    '22.5+ KB'
    """
    for x in ["bytes", "KB", "MB", "GB", "TB"]:
        if num < 1024.0:
            return f"{num:3.1f}{size_qualifier} {x}"
        num /= 1024.0
    return f"{num:3.1f}{size_qualifier} PB"


class BaseInfo(metaclass=ABCMeta):
    def __init__(
        self,
        data: FrameOrSeries,
        verbose: Optional[bool] = None,
        buf: Optional[IO[str]] = None,
        max_cols: Optional[int] = None,
        memory_usage: Optional[Union[bool, str]] = None,
        null_counts: Optional[bool] = None,
    ):
        if buf is None:  # pragma: no cover
            buf = sys.stdout
        if memory_usage is None:
            memory_usage = get_option("display.memory_usage")

        self.data = data
        self.verbose = verbose
        self.buf = buf
        self.max_cols = max_cols
        self.memory_usage = memory_usage
        self.null_counts = null_counts

    @abstractmethod
    def _get_mem_usage(self, deep: bool) -> int:
        """
        Get memory usage in bytes.

        Parameters
        ----------
        deep : bool
            If True, introspect the data deeply by interrogating object dtypes
            for system-level memory consumption, and include it in the returned
            values.

        Returns
        -------
        mem_usage : int
            Object's total memory usage in bytes.
        """
        pass

    @abstractmethod
    def _get_ids_and_dtypes(self) -> Tuple["Index", "Series"]:
        """
        Get column names and dtypes.

        Returns
        -------
        ids : Index
            DataFrame's column names.
        dtypes : Series
            Dtype of each of the DataFrame's columns.
        """
        pass

    @abstractmethod
    def _verbose_repr(
        self, lines: List[str], ids: "Index", dtypes: "Series", show_counts: bool
    ) -> None:
        """
        Append name, non-null count (optional), and dtype for each column to `lines`.

        Parameters
        ----------
        lines : List[str]
            Lines that will contain `info` representation.
        ids : Index
            The DataFrame's column names.
        dtypes : Series
            The DataFrame's columns' dtypes.
        show_counts : bool
            If True, count of non-NA cells for each column will be appended to `lines`.
        """
        pass

    @abstractmethod
    def _non_verbose_repr(self, lines: List[str], ids: "Index") -> None:
        """
        Append short summary of columns' names to `lines`.

        Parameters
        ----------
        lines : List[str]
            Lines that will contain `info` representation.
        ids : Index
            The DataFrame's column names.
        """
        pass

    def info(self) -> None:
        """
        Print a concise summary of a %(klass)s.

        This method prints information about a %(klass)s including
        the index dtype%(type_sub)s, non-null values and memory usage.

        Parameters
        ----------
        data : %(klass)s
            %(klass)s to print information about.
        verbose : bool, optional
            Whether to print the full summary. By default, the setting in
            ``pandas.options.display.max_info_columns`` is followed.
        buf : writable buffer, defaults to sys.stdout
            Where to send the output. By default, the output is printed to
            sys.stdout. Pass a writable buffer if you need to further process
            the output.
        %(max_cols_sub)s
        memory_usage : bool, str, optional
            Specifies whether total memory usage of the %(klass)s
            elements (including the index) should be displayed. By default,
            this follows the ``pandas.options.display.memory_usage`` setting.

            True always show memory usage. False never shows memory usage.
            A value of 'deep' is equivalent to "True with deep introspection".
            Memory usage is shown in human-readable units (base-2
            representation). Without deep introspection a memory estimation is
            made based in column dtype and number of rows assuming values
            consume the same memory amount for corresponding dtypes. With deep
            memory introspection, a real memory usage calculation is performed
            at the cost of computational resources.
        null_counts : bool, optional
            Whether to show the non-null counts. By default, this is shown
            only if the %(klass)s is smaller than
            ``pandas.options.display.max_info_rows`` and
            ``pandas.options.display.max_info_columns``. A value of True always
            shows the counts, and False never shows the counts.

        Returns
        -------
        None
            This method prints a summary of a %(klass)s and returns None.

        See Also
        --------
        %(see_also_sub)s

        Examples
        --------
        %(examples_sub)s
        """
        lines = []

        lines.append(str(type(self.data)))
        lines.append(self.data.index._summary())

        ids, dtypes = self._get_ids_and_dtypes()
        col_count = len(ids)

        if col_count == 0:
            lines.append(f"Empty {type(self.data).__name__}")
            fmt.buffer_put_lines(self.buf, lines)
            return

        # hack
        max_cols = self.max_cols
        if max_cols is None:
            max_cols = get_option("display.max_info_columns", col_count + 1)

        max_rows = get_option("display.max_info_rows", len(self.data) + 1)

        if self.null_counts is None:
            show_counts = (col_count <= max_cols) and (len(self.data) < max_rows)
        else:
            show_counts = self.null_counts
        exceeds_info_cols = col_count > max_cols

        if self.verbose:
            self._verbose_repr(lines, ids, dtypes, show_counts)
        elif self.verbose is False:  # specifically set to False, not necessarily None
            self._non_verbose_repr(lines, ids)
        else:
            if exceeds_info_cols:
                self._non_verbose_repr(lines, ids)
            else:
                self._verbose_repr(lines, ids, dtypes, show_counts)

        # groupby dtype.name to collect e.g. Categorical columns
        counts = dtypes.value_counts().groupby(lambda x: x.name).sum()
        collected_dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())]
        lines.append(f"dtypes: {', '.join(collected_dtypes)}")

        if self.memory_usage:
            # append memory usage of df to display
            size_qualifier = ""
            if self.memory_usage == "deep":
                deep = True
            else:
                # size_qualifier is just a best effort; not guaranteed to catch
                # all cases (e.g., it misses categorical data even with object
                # categories)
                deep = False
                if "object" in counts or self.data.index._is_memory_usage_qualified():
                    size_qualifier = "+"
            mem_usage = self._get_mem_usage(deep=deep)
            lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n")
        fmt.buffer_put_lines(self.buf, lines)


class DataFrameInfo(BaseInfo):
    def _get_mem_usage(self, deep: bool) -> int:
        return self.data.memory_usage(index=True, deep=deep).sum()

    def _get_ids_and_dtypes(self) -> Tuple["Index", "Series"]:
        return self.data.columns, self.data.dtypes

    def _verbose_repr(
        self, lines: List[str], ids: "Index", dtypes: "Series", show_counts: bool
    ) -> None:
        col_count = len(ids)
        lines.append(f"Data columns (total {col_count} columns):")

        id_head = " # "
        column_head = "Column"
        col_space = 2

        max_col = max(len(pprint_thing(k)) for k in ids)
        len_column = len(pprint_thing(column_head))
        space = max(max_col, len_column) + col_space

        max_id = len(pprint_thing(col_count))
        len_id = len(pprint_thing(id_head))
        space_num = max(max_id, len_id) + col_space

        header = _put_str(id_head, space_num) + _put_str(column_head, space)
        if show_counts:
            counts = self.data.count()
            if col_count != len(counts):  # pragma: no cover
                raise AssertionError(
                    f"Columns must equal counts ({col_count} != {len(counts)})"
                )
            count_header = "Non-Null Count"
            len_count = len(count_header)
            non_null = " non-null"
            max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null)
            space_count = max(len_count, max_count) + col_space
            count_temp = "{count}" + non_null
        else:
            count_header = ""
            space_count = len(count_header)
            len_count = space_count
            count_temp = "{count}"

        dtype_header = "Dtype"
        len_dtype = len(dtype_header)
        max_dtypes = max(len(pprint_thing(k)) for k in dtypes)
        space_dtype = max(len_dtype, max_dtypes)
        header += _put_str(count_header, space_count) + _put_str(
            dtype_header, space_dtype
        )

        lines.append(header)
        lines.append(
            _put_str("-" * len_id, space_num)
            + _put_str("-" * len_column, space)
            + _put_str("-" * len_count, space_count)
            + _put_str("-" * len_dtype, space_dtype)
        )

        for i, col in enumerate(ids):
            dtype = dtypes.iloc[i]
            col = pprint_thing(col)

            line_no = _put_str(f" {i}", space_num)
            count = ""
            if show_counts:
                count = counts.iloc[i]

            lines.append(
                line_no
                + _put_str(col, space)
                + _put_str(count_temp.format(count=count), space_count)
                + _put_str(dtype, space_dtype)
            )

    def _non_verbose_repr(self, lines: List[str], ids: "Index") -> None:
        lines.append(ids._summary(name="Columns"))
"fix cli main_dir error" 2021-03-03 23:49:41 +01:00			`from abc import ABCMeta, abstractmethod`
			`import sys`
			`from typing import IO, TYPE_CHECKING, List, Optional, Tuple, Union`

			`from pandas._config import get_option`

			`from pandas._typing import Dtype, FrameOrSeries`

			`from pandas.core.indexes.api import Index`

			`from pandas.io.formats import format as fmt`
			`from pandas.io.formats.printing import pprint_thing`

			`if TYPE_CHECKING:`
			`from pandas.core.series import Series # noqa: F401`


			`def _put_str(s: Union[str, Dtype], space: int) -> str:`
			`"""`
			`Make string of specified length, padding to the right if necessary.`

			`Parameters`
			`----------`
			`s : Union[str, Dtype]`
			`String to be formatted.`
			`space : int`
			`Length to force string to be of.`

			`Returns`
			`-------`
			`str`
			`String coerced to given length.`

			`Examples`
			`--------`
			`>>> pd.io.formats.info._put_str("panda", 6)`
			`'panda '`
			`>>> pd.io.formats.info._put_str("panda", 4)`
			`'pand'`
			`"""`
			`return str(s)[:space].ljust(space)`


			`def _sizeof_fmt(num: Union[int, float], size_qualifier: str) -> str:`
			`"""`
			`Return size in human readable format.`

			`Parameters`
			`----------`
			`num : int`
			`Size in bytes.`
			`size_qualifier : str`
			`Either empty, or '+' (if lower bound).`

			`Returns`
			`-------`
			`str`
			`Size in human readable format.`

			`Examples`
			`--------`
			`>>> _sizeof_fmt(23028, '')`
			`'22.5 KB'`

			`>>> _sizeof_fmt(23028, '+')`
			`'22.5+ KB'`
			`"""`
			`for x in ["bytes", "KB", "MB", "GB", "TB"]:`
			`if num < 1024.0:`
			`return f"{num:3.1f}{size_qualifier} {x}"`
			`num /= 1024.0`
			`return f"{num:3.1f}{size_qualifier} PB"`


			`class BaseInfo(metaclass=ABCMeta):`
			`def __init__(`
			`self,`
			`data: FrameOrSeries,`
			`verbose: Optional[bool] = None,`
			`buf: Optional[IO[str]] = None,`
			`max_cols: Optional[int] = None,`
			`memory_usage: Optional[Union[bool, str]] = None,`
			`null_counts: Optional[bool] = None,`
			`):`
			`if buf is None: # pragma: no cover`
			`buf = sys.stdout`
			`if memory_usage is None:`
			`memory_usage = get_option("display.memory_usage")`

			`self.data = data`
			`self.verbose = verbose`
			`self.buf = buf`
			`self.max_cols = max_cols`
			`self.memory_usage = memory_usage`
			`self.null_counts = null_counts`

			`@abstractmethod`
			`def _get_mem_usage(self, deep: bool) -> int:`
			`"""`
			`Get memory usage in bytes.`

			`Parameters`
			`----------`
			`deep : bool`
			`If True, introspect the data deeply by interrogating object dtypes`
			`for system-level memory consumption, and include it in the returned`
			`values.`

			`Returns`
			`-------`
			`mem_usage : int`
			`Object's total memory usage in bytes.`
			`"""`
			`pass`

			`@abstractmethod`
			`def _get_ids_and_dtypes(self) -> Tuple["Index", "Series"]:`
			`"""`
			`Get column names and dtypes.`

			`Returns`
			`-------`
			`ids : Index`
			`DataFrame's column names.`
			`dtypes : Series`
			`Dtype of each of the DataFrame's columns.`
			`"""`
			`pass`

			`@abstractmethod`
			`def _verbose_repr(`
			`self, lines: List[str], ids: "Index", dtypes: "Series", show_counts: bool`
			`) -> None:`
			`"""`
			Append name, non-null count (optional), and dtype for each column to `lines`.

			`Parameters`
			`----------`
			`lines : List[str]`
			Lines that will contain `info` representation.
			`ids : Index`
			`The DataFrame's column names.`
			`dtypes : Series`
			`The DataFrame's columns' dtypes.`
			`show_counts : bool`
			If True, count of non-NA cells for each column will be appended to `lines`.
			`"""`
			`pass`

			`@abstractmethod`
			`def _non_verbose_repr(self, lines: List[str], ids: "Index") -> None:`
			`"""`
			Append short summary of columns' names to `lines`.

			`Parameters`
			`----------`
			`lines : List[str]`
			Lines that will contain `info` representation.
			`ids : Index`
			`The DataFrame's column names.`
			`"""`
			`pass`

			`def info(self) -> None:`
			`"""`
			`Print a concise summary of a %(klass)s.`

			`This method prints information about a %(klass)s including`
			`the index dtype%(type_sub)s, non-null values and memory usage.`

			`Parameters`
			`----------`
			`data : %(klass)s`
			`%(klass)s to print information about.`
			`verbose : bool, optional`
			`Whether to print the full summary. By default, the setting in`
			``pandas.options.display.max_info_columns`` is followed.
			`buf : writable buffer, defaults to sys.stdout`
			`Where to send the output. By default, the output is printed to`
			`sys.stdout. Pass a writable buffer if you need to further process`
			`the output.`
			`%(max_cols_sub)s`
			`memory_usage : bool, str, optional`
			`Specifies whether total memory usage of the %(klass)s`
			`elements (including the index) should be displayed. By default,`
			this follows the ``pandas.options.display.memory_usage`` setting.

			`True always show memory usage. False never shows memory usage.`
			`A value of 'deep' is equivalent to "True with deep introspection".`
			`Memory usage is shown in human-readable units (base-2`
			`representation). Without deep introspection a memory estimation is`
			`made based in column dtype and number of rows assuming values`
			`consume the same memory amount for corresponding dtypes. With deep`
			`memory introspection, a real memory usage calculation is performed`
			`at the cost of computational resources.`
			`null_counts : bool, optional`
			`Whether to show the non-null counts. By default, this is shown`
			`only if the %(klass)s is smaller than`
			``pandas.options.display.max_info_rows`` and
			``pandas.options.display.max_info_columns``. A value of True always
			`shows the counts, and False never shows the counts.`

			`Returns`
			`-------`
			`None`
			`This method prints a summary of a %(klass)s and returns None.`

			`See Also`
			`--------`
			`%(see_also_sub)s`

			`Examples`
			`--------`
			`%(examples_sub)s`
			`"""`
			`lines = []`

			`lines.append(str(type(self.data)))`
			`lines.append(self.data.index._summary())`

			`ids, dtypes = self._get_ids_and_dtypes()`
			`col_count = len(ids)`

			`if col_count == 0:`
			`lines.append(f"Empty {type(self.data).__name__}")`
			`fmt.buffer_put_lines(self.buf, lines)`
			`return`

			`# hack`
			`max_cols = self.max_cols`
			`if max_cols is None:`
			`max_cols = get_option("display.max_info_columns", col_count + 1)`

			`max_rows = get_option("display.max_info_rows", len(self.data) + 1)`

			`if self.null_counts is None:`
			`show_counts = (col_count <= max_cols) and (len(self.data) < max_rows)`
			`else:`
			`show_counts = self.null_counts`
			`exceeds_info_cols = col_count > max_cols`

			`if self.verbose:`
			`self._verbose_repr(lines, ids, dtypes, show_counts)`
			`elif self.verbose is False: # specifically set to False, not necessarily None`
			`self._non_verbose_repr(lines, ids)`
			`else:`
			`if exceeds_info_cols:`
			`self._non_verbose_repr(lines, ids)`
			`else:`
			`self._verbose_repr(lines, ids, dtypes, show_counts)`

			`# groupby dtype.name to collect e.g. Categorical columns`
			`counts = dtypes.value_counts().groupby(lambda x: x.name).sum()`
			`collected_dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())]`
			`lines.append(f"dtypes: {', '.join(collected_dtypes)}")`

			`if self.memory_usage:`
			`# append memory usage of df to display`
			`size_qualifier = ""`
			`if self.memory_usage == "deep":`
			`deep = True`
			`else:`
			`# size_qualifier is just a best effort; not guaranteed to catch`
			`# all cases (e.g., it misses categorical data even with object`
			`# categories)`
			`deep = False`
			`if "object" in counts or self.data.index._is_memory_usage_qualified():`
			`size_qualifier = "+"`
			`mem_usage = self._get_mem_usage(deep=deep)`
			`lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n")`
			`fmt.buffer_put_lines(self.buf, lines)`


			`class DataFrameInfo(BaseInfo):`
			`def _get_mem_usage(self, deep: bool) -> int:`
			`return self.data.memory_usage(index=True, deep=deep).sum()`

			`def _get_ids_and_dtypes(self) -> Tuple["Index", "Series"]:`
			`return self.data.columns, self.data.dtypes`

			`def _verbose_repr(`
			`self, lines: List[str], ids: "Index", dtypes: "Series", show_counts: bool`
			`) -> None:`
			`col_count = len(ids)`
			`lines.append(f"Data columns (total {col_count} columns):")`

			`id_head = " # "`
			`column_head = "Column"`
			`col_space = 2`

			`max_col = max(len(pprint_thing(k)) for k in ids)`
			`len_column = len(pprint_thing(column_head))`
			`space = max(max_col, len_column) + col_space`

			`max_id = len(pprint_thing(col_count))`
			`len_id = len(pprint_thing(id_head))`
			`space_num = max(max_id, len_id) + col_space`

			`header = _put_str(id_head, space_num) + _put_str(column_head, space)`
			`if show_counts:`
			`counts = self.data.count()`
			`if col_count != len(counts): # pragma: no cover`
			`raise AssertionError(`
			`f"Columns must equal counts ({col_count} != {len(counts)})"`
			`)`
			`count_header = "Non-Null Count"`
			`len_count = len(count_header)`
			`non_null = " non-null"`
			`max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null)`
			`space_count = max(len_count, max_count) + col_space`
			`count_temp = "{count}" + non_null`
			`else:`
			`count_header = ""`
			`space_count = len(count_header)`
			`len_count = space_count`
			`count_temp = "{count}"`

			`dtype_header = "Dtype"`
			`len_dtype = len(dtype_header)`
			`max_dtypes = max(len(pprint_thing(k)) for k in dtypes)`
			`space_dtype = max(len_dtype, max_dtypes)`
			`header += _put_str(count_header, space_count) + _put_str(`
			`dtype_header, space_dtype`
			`)`

			`lines.append(header)`
			`lines.append(`
			`_put_str("-" * len_id, space_num)`
			`+ _put_str("-" * len_column, space)`
			`+ _put_str("-" * len_count, space_count)`
			`+ _put_str("-" * len_dtype, space_dtype)`
			`)`

			`for i, col in enumerate(ids):`
			`dtype = dtypes.iloc[i]`
			`col = pprint_thing(col)`

			`line_no = _put_str(f" {i}", space_num)`
			`count = ""`
			`if show_counts:`
			`count = counts.iloc[i]`

			`lines.append(`
			`line_no`
			`+ _put_str(col, space)`
			`+ _put_str(count_temp.format(count=count), space_count)`
			`+ _put_str(dtype, space_dtype)`
			`)`

			`def _non_verbose_repr(self, lines: List[str], ids: "Index") -> None:`
			`lines.append(ids._summary(name="Columns"))`