From dabb327b8bbc638642423407df41791ab67b03a6 Mon Sep 17 00:00:00 2001 From: "P. Sai Vinay" <33659563+V1NAY8@users.noreply.github.com> Date: Wed, 29 Sep 2021 01:42:29 +0530 Subject: [PATCH] Refactor df.info() for better readability --- eland/dataframe.py | 56 ++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/eland/dataframe.py b/eland/dataframe.py index 5c4fa3a..d6857bd 100644 --- a/eland/dataframe.py +++ b/eland/dataframe.py @@ -154,7 +154,8 @@ class DataFrame(NDFrame): _query_compiler=_query_compiler, ) - def _get_columns(self) -> pd.Index: + @property + def columns(self) -> pd.Index: """ The column labels of the DataFrame. @@ -182,8 +183,6 @@ class DataFrame(NDFrame): """ return self._query_compiler.columns - columns = property(_get_columns) - @property def empty(self) -> bool: """Determines if the DataFrame is empty. @@ -808,7 +807,12 @@ class DataFrame(NDFrame): return f"{name}: {len(self)} entries{index_summary}" def info( - self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None + self, + verbose: Optional[bool] = None, + buf: Optional[StringIO] = None, + max_cols: Optional[int] = None, + memory_usage: Optional[bool] = None, + show_counts: Optional[bool] = None, ) -> None: """ Print a concise summary of a DataFrame. @@ -844,42 +848,41 @@ class DataFrame(NDFrame): lines = [str(type(self)), self._index_summary()] - if len(self.columns) == 0: + columns: pd.Index = self.columns + number_of_columns: int = len(columns) + + if number_of_columns == 0: lines.append(f"Empty {type(self).__name__}") fmt.buffer_put_lines(buf, lines) return - cols = self.columns - col_count = len(self.columns) - # hack if max_cols is None: - max_cols = pd.get_option("display.max_info_columns", len(self.columns) + 1) + max_cols = pd.get_option("display.max_info_columns", number_of_columns + 1) max_rows = pd.get_option("display.max_info_rows", len(self) + 1) - if null_counts is None: - show_counts = (len(self.columns) <= max_cols) and (len(self) < max_rows) - else: - show_counts = null_counts - exceeds_info_cols = len(self.columns) > max_cols + if show_counts is None: + show_counts = (number_of_columns <= max_cols) and (len(self) < max_rows) + + exceeds_info_cols = number_of_columns > max_cols # From pandas.DataFrame def _put_str(s, space) -> str: return f"{s}"[:space].ljust(space) - def _verbose_repr() -> None: - lines.append(f"Data columns (total {len(self.columns)} columns):") + def _verbose_repr(number_of_columns: int) -> None: + lines.append(f"Data columns (total {number_of_columns} columns):") id_head = " # " column_head = "Column" col_space = 2 - max_col = max(len(pprint_thing(k)) for k in cols) + max_col = max(len(pprint_thing(k)) for k in columns) len_column = len(pprint_thing(column_head)) space = max(max_col, len_column) + col_space - max_id = len(pprint_thing(col_count)) + max_id = len(pprint_thing(number_of_columns)) len_id = len(pprint_thing(id_head)) space_num = max(max_id, len_id) + col_space counts = None @@ -887,9 +890,9 @@ class DataFrame(NDFrame): header = _put_str(id_head, space_num) + _put_str(column_head, space) if show_counts: counts = self.count() - if len(cols) != len(counts): # pragma: no cover + if number_of_columns != len(counts): # pragma: no cover raise AssertionError( - f"Columns must equal counts ({len(cols):d} != {len(counts):d})" + f"Columns must equal counts ({number_of_columns:d} != {len(counts):d})" ) count_header = "Non-Null Count" len_count = len(count_header) @@ -920,7 +923,7 @@ class DataFrame(NDFrame): ) dtypes = self.dtypes - for i, col in enumerate(self.columns): + for i, col in enumerate(columns): dtype = dtypes.iloc[i] col = pprint_thing(col) @@ -938,7 +941,7 @@ class DataFrame(NDFrame): ) def _non_verbose_repr() -> None: - lines.append(self.columns._summary(name="Columns")) + lines.append(columns._summary(name="Columns")) def _sizeof_fmt(num: float, size_qualifier: str) -> str: # returns size in human readable format @@ -949,14 +952,13 @@ class DataFrame(NDFrame): return f"{num:3.3f}{size_qualifier} PB" if verbose: - _verbose_repr() + _verbose_repr(number_of_columns) elif verbose is False: # specifically set to False, not nesc None _non_verbose_repr() else: - if exceeds_info_cols: - _non_verbose_repr() - else: - _verbose_repr() + _non_verbose_repr() if exceeds_info_cols else _verbose_repr( + number_of_columns + ) # pandas 0.25.1 uses get_dtype_counts() here. This # returns a Series with strings as the index NOT dtypes.