mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Refactor df.info() for better readability
This commit is contained in:
parent
bc201e22dd
commit
dabb327b8b
@ -154,7 +154,8 @@ class DataFrame(NDFrame):
|
|||||||
_query_compiler=_query_compiler,
|
_query_compiler=_query_compiler,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_columns(self) -> pd.Index:
|
@property
|
||||||
|
def columns(self) -> pd.Index:
|
||||||
"""
|
"""
|
||||||
The column labels of the DataFrame.
|
The column labels of the DataFrame.
|
||||||
|
|
||||||
@ -182,8 +183,6 @@ class DataFrame(NDFrame):
|
|||||||
"""
|
"""
|
||||||
return self._query_compiler.columns
|
return self._query_compiler.columns
|
||||||
|
|
||||||
columns = property(_get_columns)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def empty(self) -> bool:
|
def empty(self) -> bool:
|
||||||
"""Determines if the DataFrame is empty.
|
"""Determines if the DataFrame is empty.
|
||||||
@ -808,7 +807,12 @@ class DataFrame(NDFrame):
|
|||||||
return f"{name}: {len(self)} entries{index_summary}"
|
return f"{name}: {len(self)} entries{index_summary}"
|
||||||
|
|
||||||
def info(
|
def info(
|
||||||
self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None
|
self,
|
||||||
|
verbose: Optional[bool] = None,
|
||||||
|
buf: Optional[StringIO] = None,
|
||||||
|
max_cols: Optional[int] = None,
|
||||||
|
memory_usage: Optional[bool] = None,
|
||||||
|
show_counts: Optional[bool] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Print a concise summary of a DataFrame.
|
Print a concise summary of a DataFrame.
|
||||||
@ -844,42 +848,41 @@ class DataFrame(NDFrame):
|
|||||||
|
|
||||||
lines = [str(type(self)), self._index_summary()]
|
lines = [str(type(self)), self._index_summary()]
|
||||||
|
|
||||||
if len(self.columns) == 0:
|
columns: pd.Index = self.columns
|
||||||
|
number_of_columns: int = len(columns)
|
||||||
|
|
||||||
|
if number_of_columns == 0:
|
||||||
lines.append(f"Empty {type(self).__name__}")
|
lines.append(f"Empty {type(self).__name__}")
|
||||||
fmt.buffer_put_lines(buf, lines)
|
fmt.buffer_put_lines(buf, lines)
|
||||||
return
|
return
|
||||||
|
|
||||||
cols = self.columns
|
|
||||||
col_count = len(self.columns)
|
|
||||||
|
|
||||||
# hack
|
# hack
|
||||||
if max_cols is None:
|
if max_cols is None:
|
||||||
max_cols = pd.get_option("display.max_info_columns", len(self.columns) + 1)
|
max_cols = pd.get_option("display.max_info_columns", number_of_columns + 1)
|
||||||
|
|
||||||
max_rows = pd.get_option("display.max_info_rows", len(self) + 1)
|
max_rows = pd.get_option("display.max_info_rows", len(self) + 1)
|
||||||
|
|
||||||
if null_counts is None:
|
if show_counts is None:
|
||||||
show_counts = (len(self.columns) <= max_cols) and (len(self) < max_rows)
|
show_counts = (number_of_columns <= max_cols) and (len(self) < max_rows)
|
||||||
else:
|
|
||||||
show_counts = null_counts
|
exceeds_info_cols = number_of_columns > max_cols
|
||||||
exceeds_info_cols = len(self.columns) > max_cols
|
|
||||||
|
|
||||||
# From pandas.DataFrame
|
# From pandas.DataFrame
|
||||||
def _put_str(s, space) -> str:
|
def _put_str(s, space) -> str:
|
||||||
return f"{s}"[:space].ljust(space)
|
return f"{s}"[:space].ljust(space)
|
||||||
|
|
||||||
def _verbose_repr() -> None:
|
def _verbose_repr(number_of_columns: int) -> None:
|
||||||
lines.append(f"Data columns (total {len(self.columns)} columns):")
|
lines.append(f"Data columns (total {number_of_columns} columns):")
|
||||||
|
|
||||||
id_head = " # "
|
id_head = " # "
|
||||||
column_head = "Column"
|
column_head = "Column"
|
||||||
col_space = 2
|
col_space = 2
|
||||||
|
|
||||||
max_col = max(len(pprint_thing(k)) for k in cols)
|
max_col = max(len(pprint_thing(k)) for k in columns)
|
||||||
len_column = len(pprint_thing(column_head))
|
len_column = len(pprint_thing(column_head))
|
||||||
space = max(max_col, len_column) + col_space
|
space = max(max_col, len_column) + col_space
|
||||||
|
|
||||||
max_id = len(pprint_thing(col_count))
|
max_id = len(pprint_thing(number_of_columns))
|
||||||
len_id = len(pprint_thing(id_head))
|
len_id = len(pprint_thing(id_head))
|
||||||
space_num = max(max_id, len_id) + col_space
|
space_num = max(max_id, len_id) + col_space
|
||||||
counts = None
|
counts = None
|
||||||
@ -887,9 +890,9 @@ class DataFrame(NDFrame):
|
|||||||
header = _put_str(id_head, space_num) + _put_str(column_head, space)
|
header = _put_str(id_head, space_num) + _put_str(column_head, space)
|
||||||
if show_counts:
|
if show_counts:
|
||||||
counts = self.count()
|
counts = self.count()
|
||||||
if len(cols) != len(counts): # pragma: no cover
|
if number_of_columns != len(counts): # pragma: no cover
|
||||||
raise AssertionError(
|
raise AssertionError(
|
||||||
f"Columns must equal counts ({len(cols):d} != {len(counts):d})"
|
f"Columns must equal counts ({number_of_columns:d} != {len(counts):d})"
|
||||||
)
|
)
|
||||||
count_header = "Non-Null Count"
|
count_header = "Non-Null Count"
|
||||||
len_count = len(count_header)
|
len_count = len(count_header)
|
||||||
@ -920,7 +923,7 @@ class DataFrame(NDFrame):
|
|||||||
)
|
)
|
||||||
|
|
||||||
dtypes = self.dtypes
|
dtypes = self.dtypes
|
||||||
for i, col in enumerate(self.columns):
|
for i, col in enumerate(columns):
|
||||||
dtype = dtypes.iloc[i]
|
dtype = dtypes.iloc[i]
|
||||||
col = pprint_thing(col)
|
col = pprint_thing(col)
|
||||||
|
|
||||||
@ -938,7 +941,7 @@ class DataFrame(NDFrame):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _non_verbose_repr() -> None:
|
def _non_verbose_repr() -> None:
|
||||||
lines.append(self.columns._summary(name="Columns"))
|
lines.append(columns._summary(name="Columns"))
|
||||||
|
|
||||||
def _sizeof_fmt(num: float, size_qualifier: str) -> str:
|
def _sizeof_fmt(num: float, size_qualifier: str) -> str:
|
||||||
# returns size in human readable format
|
# returns size in human readable format
|
||||||
@ -949,14 +952,13 @@ class DataFrame(NDFrame):
|
|||||||
return f"{num:3.3f}{size_qualifier} PB"
|
return f"{num:3.3f}{size_qualifier} PB"
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
_verbose_repr()
|
_verbose_repr(number_of_columns)
|
||||||
elif verbose is False: # specifically set to False, not nesc None
|
elif verbose is False: # specifically set to False, not nesc None
|
||||||
_non_verbose_repr()
|
_non_verbose_repr()
|
||||||
else:
|
else:
|
||||||
if exceeds_info_cols:
|
_non_verbose_repr() if exceeds_info_cols else _verbose_repr(
|
||||||
_non_verbose_repr()
|
number_of_columns
|
||||||
else:
|
)
|
||||||
_verbose_repr()
|
|
||||||
|
|
||||||
# pandas 0.25.1 uses get_dtype_counts() here. This
|
# pandas 0.25.1 uses get_dtype_counts() here. This
|
||||||
# returns a Series with strings as the index NOT dtypes.
|
# returns a Series with strings as the index NOT dtypes.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user