Add Elasticsearch storage usage to df.info()

This commit is contained in:
P. Sai Vinay 2020-11-16 21:37:28 +05:30 committed by GitHub
parent 789f8959bc
commit 56f6ba6c8b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 17 additions and 7 deletions

View File

@ -121,6 +121,7 @@ Data columns (total 27 columns):
26 timestamp 13059 non-null datetime64[ns] 26 timestamp 13059 non-null datetime64[ns]
dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17) dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)
memory usage: 80.0 bytes memory usage: 80.0 bytes
Elasticsearch storage usage: 5.043 MB
# Filtering of rows using comparisons # Filtering of rows using comparisons
>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head() >>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head()

View File

@ -88,7 +88,7 @@
"eland.dataframe.DataFrame" "eland.dataframe.DataFrame"
] ]
}, },
"execution_count": 1, "execution_count": 3,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -3122,7 +3122,8 @@
" 25 dayOfWeek 13059 non-null int64 \n", " 25 dayOfWeek 13059 non-null int64 \n",
" 26 timestamp 13059 non-null datetime64[ns]\n", " 26 timestamp 13059 non-null datetime64[ns]\n",
"dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n", "dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n",
"memory usage: 64.0 bytes\n" "memory usage: 64.000 bytes\n",
"Elasticsearch storage usage: 5.043 MB\n"
] ]
} }
], ],
@ -4065,7 +4066,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.6" "version": "3.8.5"
}, },
"pycharm": { "pycharm": {
"stem_cell": { "stem_cell": {

View File

@ -831,6 +831,7 @@ class DataFrame(NDFrame):
1 geoip.city_name 4094 non-null object 1 geoip.city_name 4094 non-null object
dtypes: object(2) dtypes: object(2)
memory usage: ... memory usage: ...
Elasticsearch storage usage: ...
""" """
if buf is None: # pragma: no cover if buf is None: # pragma: no cover
buf = sys.stdout buf = sys.stdout
@ -940,9 +941,9 @@ class DataFrame(NDFrame):
# returns size in human readable format # returns size in human readable format
for x in ["bytes", "KB", "MB", "GB", "TB"]: for x in ["bytes", "KB", "MB", "GB", "TB"]:
if num < 1024.0: if num < 1024.0:
return f"{num:3.1f}{size_qualifier} {x}" return f"{num:3.3f}{size_qualifier} {x}"
num /= 1024.0 num /= 1024.0
return f"{num:3.1f}{size_qualifier} PB" return f"{num:3.3f}{size_qualifier} PB"
if verbose: if verbose:
_verbose_repr() _verbose_repr()
@ -972,7 +973,13 @@ class DataFrame(NDFrame):
# TODO - this is different from pd.DataFrame as we shouldn't # TODO - this is different from pd.DataFrame as we shouldn't
# really hold much in memory. For now just approximate with getsizeof + ignore deep # really hold much in memory. For now just approximate with getsizeof + ignore deep
mem_usage = sys.getsizeof(self) mem_usage = sys.getsizeof(self)
lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n") lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}")
storage_usage = self._query_compiler._client.indices.stats(
index=self._query_compiler._index_pattern, metric=["store"]
)["_all"]["total"]["store"]["size_in_bytes"]
lines.append(
f"Elasticsearch storage usage: {_sizeof_fmt(storage_usage,size_qualifier)}\n"
)
fmt.buffer_put_lines(buf, lines) fmt.buffer_put_lines(buf, lines)

View File

@ -2870,7 +2870,8 @@
" 25 dayOfWeek 13059 non-null int64 \n", " 25 dayOfWeek 13059 non-null int64 \n",
" 26 timestamp 13059 non-null datetime64[ns]\n", " 26 timestamp 13059 non-null datetime64[ns]\n",
"dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n", "dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n",
"memory usage: 64.0 bytes\n" "memory usage: 64.000 bytes\n",
"Elasticsearch storage usage: 5.043 MB\n"
] ]
} }
], ],