Add Elasticsearch storage usage to df.info()

This commit is contained in:
P. Sai Vinay 2020-11-16 21:37:28 +05:30 committed by GitHub
parent 789f8959bc
commit 56f6ba6c8b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 17 additions and 7 deletions

View File

@ -121,6 +121,7 @@ Data columns (total 27 columns):
26 timestamp 13059 non-null datetime64[ns]
dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)
memory usage: 80.0 bytes
Elasticsearch storage usage: 5.043 MB
# Filtering of rows using comparisons
>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head()

View File

@ -88,7 +88,7 @@
"eland.dataframe.DataFrame"
]
},
"execution_count": 1,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@ -3122,7 +3122,8 @@
" 25 dayOfWeek 13059 non-null int64 \n",
" 26 timestamp 13059 non-null datetime64[ns]\n",
"dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n",
"memory usage: 64.0 bytes\n"
"memory usage: 64.000 bytes\n",
"Elasticsearch storage usage: 5.043 MB\n"
]
}
],
@ -4065,7 +4066,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
"version": "3.8.5"
},
"pycharm": {
"stem_cell": {

View File

@ -831,6 +831,7 @@ class DataFrame(NDFrame):
1 geoip.city_name 4094 non-null object
dtypes: object(2)
memory usage: ...
Elasticsearch storage usage: ...
"""
if buf is None: # pragma: no cover
buf = sys.stdout
@ -940,9 +941,9 @@ class DataFrame(NDFrame):
# returns size in human readable format
for x in ["bytes", "KB", "MB", "GB", "TB"]:
if num < 1024.0:
return f"{num:3.1f}{size_qualifier} {x}"
return f"{num:3.3f}{size_qualifier} {x}"
num /= 1024.0
return f"{num:3.1f}{size_qualifier} PB"
return f"{num:3.3f}{size_qualifier} PB"
if verbose:
_verbose_repr()
@ -972,7 +973,13 @@ class DataFrame(NDFrame):
# TODO - this is different from pd.DataFrame as we shouldn't
# really hold much in memory. For now just approximate with getsizeof + ignore deep
mem_usage = sys.getsizeof(self)
lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n")
lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}")
storage_usage = self._query_compiler._client.indices.stats(
index=self._query_compiler._index_pattern, metric=["store"]
)["_all"]["total"]["store"]["size_in_bytes"]
lines.append(
f"Elasticsearch storage usage: {_sizeof_fmt(storage_usage,size_qualifier)}\n"
)
fmt.buffer_put_lines(buf, lines)

View File

@ -2870,7 +2870,8 @@
" 25 dayOfWeek 13059 non-null int64 \n",
" 26 timestamp 13059 non-null datetime64[ns]\n",
"dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n",
"memory usage: 64.0 bytes\n"
"memory usage: 64.000 bytes\n",
"Elasticsearch storage usage: 5.043 MB\n"
]
}
],