mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Refactoring of plotting + fixes for multiple charts (#117)
* Refactoring of plotting + fixes for multiple charts Updates to plotting inline with pandas 0.25.3 Enables plotting of multiple histograms on the same figure. * Fix to setup.py to allow submodules + reformat of code and better Series.hist docs
This commit is contained in:
parent
46b428d59b
commit
409cb043c8
@ -753,7 +753,7 @@
|
|||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"<eland.index.Index at 0x11ffd7f90>"
|
"<eland.index.Index at 0x11a122310>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 17,
|
"execution_count": 17,
|
||||||
@ -2707,7 +2707,7 @@
|
|||||||
" <td>410.008918</td>\n",
|
" <td>410.008918</td>\n",
|
||||||
" <td>2470.545974</td>\n",
|
" <td>2470.545974</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>251.698552</td>\n",
|
" <td>251.682199</td>\n",
|
||||||
" <td>1.000000</td>\n",
|
" <td>1.000000</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
@ -2724,7 +2724,7 @@
|
|||||||
" <td>9735.660463</td>\n",
|
" <td>9735.660463</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>720.534532</td>\n",
|
" <td>720.534532</td>\n",
|
||||||
" <td>4.254967</td>\n",
|
" <td>4.288079</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>max</th>\n",
|
" <th>max</th>\n",
|
||||||
@ -2745,9 +2745,9 @@
|
|||||||
"mean 628.253689 7092.142457 ... 511.127842 2.835975\n",
|
"mean 628.253689 7092.142457 ... 511.127842 2.835975\n",
|
||||||
"std 266.386661 4578.263193 ... 334.741135 1.939365\n",
|
"std 266.386661 4578.263193 ... 334.741135 1.939365\n",
|
||||||
"min 100.020531 0.000000 ... 0.000000 0.000000\n",
|
"min 100.020531 0.000000 ... 0.000000 0.000000\n",
|
||||||
"25% 410.008918 2470.545974 ... 251.698552 1.000000\n",
|
"25% 410.008918 2470.545974 ... 251.682199 1.000000\n",
|
||||||
"50% 640.387285 7612.072403 ... 503.148975 3.000000\n",
|
"50% 640.387285 7612.072403 ... 503.148975 3.000000\n",
|
||||||
"75% 842.233478 9735.660463 ... 720.534532 4.254967\n",
|
"75% 842.233478 9735.660463 ... 720.534532 4.288079\n",
|
||||||
"max 1199.729004 19881.482422 ... 1902.901978 6.000000\n",
|
"max 1199.729004 19881.482422 ... 1902.901978 6.000000\n",
|
||||||
"\n",
|
"\n",
|
||||||
"[8 rows x 7 columns]"
|
"[8 rows x 7 columns]"
|
||||||
|
@ -1023,21 +1023,21 @@
|
|||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>25%</th>\n",
|
" <th>25%</th>\n",
|
||||||
" <td>14215.123301</td>\n",
|
" <td>14221.960201</td>\n",
|
||||||
" <td>1.000000</td>\n",
|
" <td>1.000000</td>\n",
|
||||||
" <td>1.250100</td>\n",
|
" <td>1.250000</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>50%</th>\n",
|
" <th>50%</th>\n",
|
||||||
" <td>15654.828552</td>\n",
|
" <td>15671.712170</td>\n",
|
||||||
" <td>2.000000</td>\n",
|
" <td>2.000000</td>\n",
|
||||||
" <td>2.510000</td>\n",
|
" <td>2.510000</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>75%</th>\n",
|
" <th>75%</th>\n",
|
||||||
" <td>17218.003301</td>\n",
|
" <td>17214.376367</td>\n",
|
||||||
" <td>6.570576</td>\n",
|
" <td>6.615042</td>\n",
|
||||||
" <td>4.210000</td>\n",
|
" <td>4.210533</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>max</th>\n",
|
" <th>max</th>\n",
|
||||||
@ -1055,9 +1055,9 @@
|
|||||||
"mean 15590.776680 7.464000 4.103233\n",
|
"mean 15590.776680 7.464000 4.103233\n",
|
||||||
"std 1764.025160 85.924387 20.104873\n",
|
"std 1764.025160 85.924387 20.104873\n",
|
||||||
"min 12347.000000 -9360.000000 0.000000\n",
|
"min 12347.000000 -9360.000000 0.000000\n",
|
||||||
"25% 14215.123301 1.000000 1.250100\n",
|
"25% 14221.960201 1.000000 1.250000\n",
|
||||||
"50% 15654.828552 2.000000 2.510000\n",
|
"50% 15671.712170 2.000000 2.510000\n",
|
||||||
"75% 17218.003301 6.570576 4.210000\n",
|
"75% 17214.376367 6.615042 4.210533\n",
|
||||||
"max 18239.000000 2880.000000 950.990000"
|
"max 18239.000000 2880.000000 950.990000"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -34,7 +34,6 @@ In general, the data resides in elasticsearch and not in memory, which allows el
|
|||||||
* :doc:`reference/general_utility_functions`
|
* :doc:`reference/general_utility_functions`
|
||||||
* :doc:`reference/dataframe`
|
* :doc:`reference/dataframe`
|
||||||
* :doc:`reference/series`
|
* :doc:`reference/series`
|
||||||
* :doc:`reference/index`
|
|
||||||
* :doc:`reference/indexing`
|
* :doc:`reference/indexing`
|
||||||
|
|
||||||
* :doc:`implementation/index`
|
* :doc:`implementation/index`
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 45 KiB |
BIN
docs/source/reference/api/eland-Series-hist-2.png
Normal file
BIN
docs/source/reference/api/eland-Series-hist-2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 16 KiB |
@ -4,5 +4,5 @@ eland.Series.hist
|
|||||||
.. currentmodule:: eland
|
.. currentmodule:: eland
|
||||||
|
|
||||||
.. automethod:: Series.hist
|
.. automethod:: Series.hist
|
||||||
.. image:: eland-Series-hist-1.png
|
.. image:: eland-Series-hist-2.png
|
||||||
|
|
||||||
|
@ -23,7 +23,6 @@ from eland.field_mappings import *
|
|||||||
from eland.query import *
|
from eland.query import *
|
||||||
from eland.operations import *
|
from eland.operations import *
|
||||||
from eland.query_compiler import *
|
from eland.query_compiler import *
|
||||||
from eland.plotting import *
|
|
||||||
from eland.ndframe import *
|
from eland.ndframe import *
|
||||||
from eland.series import *
|
from eland.series import *
|
||||||
from eland.dataframe import *
|
from eland.dataframe import *
|
||||||
|
@ -439,7 +439,8 @@ class FieldMappings:
|
|||||||
|
|
||||||
# extract relevant fields and convert to dict
|
# extract relevant fields and convert to dict
|
||||||
# <class 'dict'>: {'category.keyword': 'category', 'currency': 'currency', ...
|
# <class 'dict'>: {'category.keyword': 'category', 'currency': 'currency', ...
|
||||||
return OrderedDict(aggregatables[['aggregatable_es_field_name', 'es_field_name']].to_dict(orient='split')['data'])
|
return OrderedDict(
|
||||||
|
aggregatables[['aggregatable_es_field_name', 'es_field_name']].to_dict(orient='split')['data'])
|
||||||
|
|
||||||
def date_field_format(self, es_field_name):
|
def date_field_format(self, es_field_name):
|
||||||
"""
|
"""
|
||||||
|
@ -1,133 +0,0 @@
|
|||||||
# Copyright 2019 Elasticsearch BV
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
import pandas.core.common as com
|
|
||||||
from pandas.core.dtypes.generic import (
|
|
||||||
ABCIndexClass)
|
|
||||||
from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
|
|
||||||
|
|
||||||
|
|
||||||
def ed_hist_frame(ed_df, column=None, by=None, grid=True, xlabelsize=None,
|
|
||||||
xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False,
|
|
||||||
sharey=False, figsize=None, layout=None, bins=10, **kwds):
|
|
||||||
"""
|
|
||||||
See :pandas_api_docs:`pandas.DataFrame.hist` for usage.
|
|
||||||
|
|
||||||
Notes
|
|
||||||
-----
|
|
||||||
Derived from ``pandas.plotting._core.hist_frame 0.24.2`` - TODO update to ``0.25.1``
|
|
||||||
|
|
||||||
Ideally, we'd call `hist_frame` directly with histogram data,
|
|
||||||
but weights are applied to ALL series. For example, we can
|
|
||||||
plot a histogram of pre-binned data via:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
counts, bins = np.histogram(data)
|
|
||||||
plt.hist(bins[:-1], bins, weights=counts)
|
|
||||||
|
|
||||||
However,
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
ax.hist(data[col].dropna().values, bins=bins, **kwds)
|
|
||||||
|
|
||||||
is for ``[col]`` and weights are a single array.
|
|
||||||
|
|
||||||
Examples
|
|
||||||
--------
|
|
||||||
>>> df = ed.DataFrame('localhost', 'flights')
|
|
||||||
>>> hist = df.select_dtypes(include=[np.number]).hist(figsize=[10,10]) # doctest: +SKIP
|
|
||||||
"""
|
|
||||||
# Start with empty pandas data frame derived from
|
|
||||||
ed_df_bins, ed_df_weights = ed_df._hist(num_bins=bins)
|
|
||||||
|
|
||||||
if by is not None:
|
|
||||||
raise NotImplementedError("TODO")
|
|
||||||
|
|
||||||
if column is not None:
|
|
||||||
if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
|
|
||||||
column = [column]
|
|
||||||
ed_df_bins = ed_df_bins[column]
|
|
||||||
ed_df_weights = ed_df_weights[column]
|
|
||||||
naxes = len(ed_df_bins.columns)
|
|
||||||
|
|
||||||
fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False,
|
|
||||||
sharex=sharex, sharey=sharey, figsize=figsize,
|
|
||||||
layout=layout)
|
|
||||||
_axes = _flatten(axes)
|
|
||||||
|
|
||||||
for i, col in enumerate(com.try_sort(ed_df_bins.columns)):
|
|
||||||
ax = _axes[i]
|
|
||||||
|
|
||||||
# pandas code
|
|
||||||
# pandas / plotting / _core.py: 2410
|
|
||||||
# ax.hist(data[col].dropna().values, bins=bins, **kwds)
|
|
||||||
|
|
||||||
ax.hist(ed_df_bins[col][:-1], bins=ed_df_bins[col], weights=ed_df_weights[col], **kwds)
|
|
||||||
ax.set_title(col)
|
|
||||||
ax.grid(grid)
|
|
||||||
|
|
||||||
_set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot,
|
|
||||||
ylabelsize=ylabelsize, yrot=yrot)
|
|
||||||
fig.subplots_adjust(wspace=0.3, hspace=0.3)
|
|
||||||
|
|
||||||
return axes
|
|
||||||
|
|
||||||
def ed_hist_series(ed_s, column=None, by=None, grid=True, xlabelsize=None,
|
|
||||||
xrot=None, ylabelsize=None, yrot=None, ax=None,
|
|
||||||
figsize=None, layout=None, bins=10, **kwds):
|
|
||||||
"""
|
|
||||||
See :pandas_api_docs:`pandas.Series.hist` for usage.
|
|
||||||
|
|
||||||
Notes
|
|
||||||
-----
|
|
||||||
Derived from ``pandas.plotting._core.hist_frame 0.24.2`` - TODO update to ``0.25.1``
|
|
||||||
|
|
||||||
|
|
||||||
Examples
|
|
||||||
--------
|
|
||||||
>>> df = ed.DataFrame('localhost', 'ecommerce')
|
|
||||||
>>> hist = df['taxful_total_price'].hist(figsize=[10,10]) # doctest: +SKIP
|
|
||||||
"""
|
|
||||||
# this is mostly the same code as above, it has been split out
|
|
||||||
# to a series specific method now so we can expand series plotting
|
|
||||||
|
|
||||||
|
|
||||||
# Start with empty pandas data frame derived from
|
|
||||||
ed_s_bins, ed_s_weights = ed_s._hist(num_bins=bins)
|
|
||||||
|
|
||||||
if by is not None:
|
|
||||||
raise NotImplementedError("TODO")
|
|
||||||
|
|
||||||
# raise error rather than warning when series is not plottable
|
|
||||||
if ed_s_bins.empty:
|
|
||||||
raise ValueError("{} has no meaningful histogram interval. All values 0."
|
|
||||||
.format(ed_s.name))
|
|
||||||
|
|
||||||
naxes = len(ed_s_bins.columns)
|
|
||||||
fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, figsize=figsize, layout=layout)
|
|
||||||
_axes = _flatten(axes)
|
|
||||||
for i, col in enumerate(com.try_sort(ed_s_bins.columns)):
|
|
||||||
ax = _axes[i]
|
|
||||||
ax.hist(ed_s_bins[col][:-1], bins=ed_s_bins[col], weights=ed_s_weights[col], **kwds)
|
|
||||||
ax.grid(grid)
|
|
||||||
|
|
||||||
_set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot,
|
|
||||||
ylabelsize=ylabelsize, yrot=yrot)
|
|
||||||
fig.subplots_adjust(wspace=0.3, hspace=0.3)
|
|
||||||
|
|
||||||
return axes
|
|
28
eland/plotting/__init__.py
Normal file
28
eland/plotting/__init__.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# Copyright 2019 Elasticsearch BV
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""
|
||||||
|
Public plotting API
|
||||||
|
|
||||||
|
Based from https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/plotting/__init__.py
|
||||||
|
but only supporting a subset of plotting methods (for now).
|
||||||
|
"""
|
||||||
|
from eland.plotting._core import (
|
||||||
|
ed_hist_frame,
|
||||||
|
ed_hist_series,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all___ = [
|
||||||
|
"ed_hist_frame",
|
||||||
|
"ed_hist_series",
|
||||||
|
]
|
127
eland/plotting/_core.py
Normal file
127
eland/plotting/_core.py
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
# Copyright 2019 Elasticsearch BV
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
from eland.plotting._matplotlib.hist import hist_series, hist_frame
|
||||||
|
|
||||||
|
|
||||||
|
def ed_hist_series(
|
||||||
|
self,
|
||||||
|
by=None,
|
||||||
|
ax=None,
|
||||||
|
grid=True,
|
||||||
|
xlabelsize=None,
|
||||||
|
xrot=None,
|
||||||
|
ylabelsize=None,
|
||||||
|
yrot=None,
|
||||||
|
figsize=None,
|
||||||
|
bins=10,
|
||||||
|
**kwds
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Draw histogram of the input series using matplotlib.
|
||||||
|
|
||||||
|
See :pandas_api_docs:`pandas.Series.hist` for usage.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
Derived from ``pandas.plotting._core.hist_frame 0.25.3``
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> import matplotlib.pyplot as plt
|
||||||
|
>>> df = ed.DataFrame('localhost', 'flights')
|
||||||
|
>>> df[df.OriginWeather == 'Sunny']['FlightTimeMin'].hist(alpha=0.5, density=True) # doctest: +SKIP
|
||||||
|
>>> df[df.OriginWeather != 'Sunny']['FlightTimeMin'].hist(alpha=0.5, density=True) # doctest: +SKIP
|
||||||
|
>>> plt.show() # doctest: +SKIP
|
||||||
|
|
||||||
|
"""
|
||||||
|
return hist_series(
|
||||||
|
self,
|
||||||
|
by=by,
|
||||||
|
ax=ax,
|
||||||
|
grid=grid,
|
||||||
|
xlabelsize=xlabelsize,
|
||||||
|
xrot=xrot,
|
||||||
|
ylabelsize=ylabelsize,
|
||||||
|
yrot=yrot,
|
||||||
|
figsize=figsize,
|
||||||
|
bins=bins,
|
||||||
|
**kwds
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def ed_hist_frame(
|
||||||
|
data,
|
||||||
|
column=None,
|
||||||
|
by=None,
|
||||||
|
grid=True,
|
||||||
|
xlabelsize=None,
|
||||||
|
xrot=None,
|
||||||
|
ylabelsize=None,
|
||||||
|
yrot=None,
|
||||||
|
ax=None,
|
||||||
|
sharex=False,
|
||||||
|
sharey=False,
|
||||||
|
figsize=None,
|
||||||
|
layout=None,
|
||||||
|
bins=10,
|
||||||
|
**kwds
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Make a histogram of the DataFrame's.
|
||||||
|
|
||||||
|
See :pandas_api_docs:`pandas.DataFrame.hist` for usage.
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
Derived from ``pandas.plotting._core.hist_frame 0.25.3``
|
||||||
|
|
||||||
|
Ideally, we'd call the pandas method `hist_frame` directly
|
||||||
|
with histogram data, but weights are applied to ALL series.
|
||||||
|
For example, we can plot a histogram of pre-binned data via:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
counts, bins = np.histogram(data)
|
||||||
|
plt.hist(bins[:-1], bins, weights=counts)
|
||||||
|
|
||||||
|
However,
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
|
||||||
|
ax.hist(data[col].dropna().values, bins=bins, **kwds)
|
||||||
|
|
||||||
|
is for ``[col]`` and weights are a single array.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> df = ed.DataFrame('localhost', 'flights')
|
||||||
|
>>> hist = df.select_dtypes(include=[np.number]).hist(figsize=[10,10]) # doctest: +SKIP
|
||||||
|
"""
|
||||||
|
return hist_frame(
|
||||||
|
data,
|
||||||
|
column=column,
|
||||||
|
by=by,
|
||||||
|
grid=grid,
|
||||||
|
xlabelsize=xlabelsize,
|
||||||
|
xrot=xrot,
|
||||||
|
ylabelsize=ylabelsize,
|
||||||
|
yrot=yrot,
|
||||||
|
ax=ax,
|
||||||
|
sharex=sharex,
|
||||||
|
sharey=sharey,
|
||||||
|
figsize=figsize,
|
||||||
|
layout=layout,
|
||||||
|
bins=bins,
|
||||||
|
**kwds
|
||||||
|
)
|
40
eland/plotting/_matplotlib/__init__.py
Normal file
40
eland/plotting/_matplotlib/__init__.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# Copyright 2019 Elasticsearch BV
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""
|
||||||
|
Public plotting API
|
||||||
|
|
||||||
|
Based from https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/plotting/__init__.py
|
||||||
|
but only supporting a subset of plotting methods (for now).
|
||||||
|
"""
|
||||||
|
from eland.plotting._matplotlib.hist import (
|
||||||
|
hist_frame,
|
||||||
|
hist_series,
|
||||||
|
)
|
||||||
|
|
||||||
|
__all___ = [
|
||||||
|
"hist_frame",
|
||||||
|
"hist_series",
|
||||||
|
]
|
131
eland/plotting/_matplotlib/hist.py
Normal file
131
eland/plotting/_matplotlib/hist.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
# Copyright 2019 Elasticsearch BV
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas.core.common as com
|
||||||
|
from pandas.core.dtypes.generic import ABCIndexClass
|
||||||
|
from pandas.plotting._matplotlib import converter
|
||||||
|
from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
|
||||||
|
|
||||||
|
|
||||||
|
def hist_series(
|
||||||
|
self,
|
||||||
|
by=None,
|
||||||
|
ax=None,
|
||||||
|
grid=True,
|
||||||
|
xlabelsize=None,
|
||||||
|
xrot=None,
|
||||||
|
ylabelsize=None,
|
||||||
|
yrot=None,
|
||||||
|
figsize=None,
|
||||||
|
bins=10,
|
||||||
|
**kwds
|
||||||
|
):
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
if by is None:
|
||||||
|
if kwds.get("layout", None) is not None:
|
||||||
|
raise ValueError(
|
||||||
|
"The 'layout' keyword is not supported when " "'by' is None"
|
||||||
|
)
|
||||||
|
# hack until the plotting interface is a bit more unified
|
||||||
|
fig = kwds.pop(
|
||||||
|
"figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize)
|
||||||
|
)
|
||||||
|
if figsize is not None and tuple(figsize) != tuple(fig.get_size_inches()):
|
||||||
|
fig.set_size_inches(*figsize, forward=True)
|
||||||
|
if ax is None:
|
||||||
|
ax = fig.gca()
|
||||||
|
elif ax.get_figure() != fig:
|
||||||
|
raise AssertionError("passed axis not bound to passed figure")
|
||||||
|
|
||||||
|
self_bins, self_weights = self._hist(num_bins=bins)
|
||||||
|
# As this is a series, squeeze Series to arrays
|
||||||
|
self_bins = self_bins.squeeze()
|
||||||
|
self_weights = self_weights.squeeze()
|
||||||
|
|
||||||
|
ax.hist(self_bins[:-1], bins=self_bins, weights=self_weights, **kwds)
|
||||||
|
ax.grid(grid)
|
||||||
|
axes = np.array([ax])
|
||||||
|
|
||||||
|
_set_ticks_props(
|
||||||
|
axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("TODO")
|
||||||
|
|
||||||
|
if hasattr(axes, "ndim"):
|
||||||
|
if axes.ndim == 1 and len(axes) == 1:
|
||||||
|
return axes[0]
|
||||||
|
return axes
|
||||||
|
|
||||||
|
|
||||||
|
def hist_frame(
|
||||||
|
data,
|
||||||
|
column=None,
|
||||||
|
by=None,
|
||||||
|
grid=True,
|
||||||
|
xlabelsize=None,
|
||||||
|
xrot=None,
|
||||||
|
ylabelsize=None,
|
||||||
|
yrot=None,
|
||||||
|
ax=None,
|
||||||
|
sharex=False,
|
||||||
|
sharey=False,
|
||||||
|
figsize=None,
|
||||||
|
layout=None,
|
||||||
|
bins=10,
|
||||||
|
**kwds
|
||||||
|
):
|
||||||
|
# Start with empty pandas data frame derived from
|
||||||
|
ed_df_bins, ed_df_weights = data._hist(num_bins=bins)
|
||||||
|
|
||||||
|
converter._WARN = False # no warning for pandas plots
|
||||||
|
if by is not None:
|
||||||
|
raise NotImplementedError("TODO")
|
||||||
|
|
||||||
|
if column is not None:
|
||||||
|
if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
|
||||||
|
column = [column]
|
||||||
|
ed_df_bins = ed_df_bins[column]
|
||||||
|
ed_df_weights = ed_df_weights[column]
|
||||||
|
naxes = len(ed_df_bins.columns)
|
||||||
|
|
||||||
|
if naxes == 0:
|
||||||
|
raise ValueError("hist method requires numerical columns, " "nothing to plot.")
|
||||||
|
|
||||||
|
fig, axes = _subplots(
|
||||||
|
naxes=naxes,
|
||||||
|
ax=ax,
|
||||||
|
squeeze=False,
|
||||||
|
sharex=sharex,
|
||||||
|
sharey=sharey,
|
||||||
|
figsize=figsize,
|
||||||
|
layout=layout,
|
||||||
|
)
|
||||||
|
_axes = _flatten(axes)
|
||||||
|
|
||||||
|
for i, col in enumerate(com.try_sort(data.columns)):
|
||||||
|
ax = _axes[i]
|
||||||
|
ax.hist(ed_df_bins[col][:-1], bins=ed_df_bins[col], weights=ed_df_weights[col], **kwds)
|
||||||
|
ax.set_title(col)
|
||||||
|
ax.grid(grid)
|
||||||
|
|
||||||
|
_set_ticks_props(
|
||||||
|
axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot
|
||||||
|
)
|
||||||
|
fig.subplots_adjust(wspace=0.3, hspace=0.3)
|
||||||
|
|
||||||
|
return axes
|
@ -149,7 +149,6 @@ class Query:
|
|||||||
if interval != 0:
|
if interval != 0:
|
||||||
self._aggs[name] = agg
|
self._aggs[name] = agg
|
||||||
|
|
||||||
|
|
||||||
def to_search_body(self):
|
def to_search_body(self):
|
||||||
if self._query.empty():
|
if self._query.empty():
|
||||||
if self._aggs:
|
if self._aggs:
|
||||||
|
@ -732,11 +732,13 @@ def elasticsearch_date_to_pandas_date(value: Union[int, str], date_format: str)
|
|||||||
# TODO investigate how we could generate this just once for a bulk read.
|
# TODO investigate how we could generate this just once for a bulk read.
|
||||||
return pd.to_datetime(value)
|
return pd.to_datetime(value)
|
||||||
|
|
||||||
|
|
||||||
class FieldMappingCache:
|
class FieldMappingCache:
|
||||||
"""
|
"""
|
||||||
Very simple dict cache for field mappings. This improves performance > 3 times on large datasets as
|
Very simple dict cache for field mappings. This improves performance > 3 times on large datasets as
|
||||||
DataFrame access is slower than dict access.
|
DataFrame access is slower than dict access.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, mappings):
|
def __init__(self, mappings):
|
||||||
self._mappings = mappings
|
self._mappings = mappings
|
||||||
|
|
||||||
@ -764,4 +766,3 @@ class FieldMappingCache:
|
|||||||
self._date_field_format[es_field_name] = es_date_field_format
|
self._date_field_format[es_field_name] = es_date_field_format
|
||||||
|
|
||||||
return es_date_field_format
|
return es_date_field_format
|
||||||
|
|
||||||
|
@ -37,11 +37,11 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas.io.common import _expand_user, _stringify_path
|
from pandas.io.common import _expand_user, _stringify_path
|
||||||
|
|
||||||
|
import eland.plotting
|
||||||
from eland import NDFrame
|
from eland import NDFrame
|
||||||
from eland.arithmetics import ArithmeticSeries, ArithmeticString, ArithmeticNumber
|
from eland.arithmetics import ArithmeticSeries, ArithmeticString, ArithmeticNumber
|
||||||
from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
|
from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
|
||||||
from eland.filter import NotFilter, Equal, Greater, Less, GreaterEqual, LessEqual, ScriptFilter, IsIn
|
from eland.filter import NotFilter, Equal, Greater, Less, GreaterEqual, LessEqual, ScriptFilter, IsIn
|
||||||
import eland.plotting as gfx
|
|
||||||
|
|
||||||
|
|
||||||
def _get_method_name():
|
def _get_method_name():
|
||||||
@ -108,7 +108,7 @@ class Series(NDFrame):
|
|||||||
index_field=index_field,
|
index_field=index_field,
|
||||||
query_compiler=query_compiler)
|
query_compiler=query_compiler)
|
||||||
|
|
||||||
hist = gfx.ed_hist_series
|
hist = eland.plotting.ed_hist_series
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def empty(self):
|
def empty(self):
|
||||||
|
@ -19,7 +19,6 @@ import numpy as np
|
|||||||
|
|
||||||
import eland as ed
|
import eland as ed
|
||||||
from eland.tests import FLIGHTS_INDEX_NAME, ES_TEST_CLIENT
|
from eland.tests import FLIGHTS_INDEX_NAME, ES_TEST_CLIENT
|
||||||
|
|
||||||
from eland.tests.common import TestData
|
from eland.tests.common import TestData
|
||||||
|
|
||||||
|
|
||||||
|
@ -39,6 +39,7 @@ def test_plot_hist(fig_test, fig_ref):
|
|||||||
ed_ax = fig_test.subplots()
|
ed_ax = fig_test.subplots()
|
||||||
ed_flights.hist(ax=ed_ax)
|
ed_flights.hist(ax=ed_ax)
|
||||||
|
|
||||||
|
|
||||||
@check_figures_equal(extensions=['png'])
|
@check_figures_equal(extensions=['png'])
|
||||||
def test_plot_filtered_hist(fig_test, fig_ref):
|
def test_plot_filtered_hist(fig_test, fig_ref):
|
||||||
test_data = TestData()
|
test_data = TestData()
|
||||||
@ -49,8 +50,6 @@ def test_plot_filtered_hist(fig_test, fig_ref):
|
|||||||
pd_flights = pd_flights[pd_flights.FlightDelayMin > 0]
|
pd_flights = pd_flights[pd_flights.FlightDelayMin > 0]
|
||||||
ed_flights = ed_flights[ed_flights.FlightDelayMin > 0]
|
ed_flights = ed_flights[ed_flights.FlightDelayMin > 0]
|
||||||
|
|
||||||
print(ed_flights.head())
|
|
||||||
|
|
||||||
# This throws a userwarning
|
# This throws a userwarning
|
||||||
# (https://github.com/pandas-dev/pandas/blob/171c71611886aab8549a8620c5b0071a129ad685/pandas/plotting/_matplotlib/tools.py#L222)
|
# (https://github.com/pandas-dev/pandas/blob/171c71611886aab8549a8620c5b0071a129ad685/pandas/plotting/_matplotlib/tools.py#L222)
|
||||||
with pytest.warns(UserWarning):
|
with pytest.warns(UserWarning):
|
||||||
|
@ -25,8 +25,32 @@ def test_plot_hist(fig_test, fig_ref):
|
|||||||
pd_flights = test_data.pd_flights()['FlightDelayMin']
|
pd_flights = test_data.pd_flights()['FlightDelayMin']
|
||||||
ed_flights = test_data.ed_flights()['FlightDelayMin']
|
ed_flights = test_data.ed_flights()['FlightDelayMin']
|
||||||
|
|
||||||
pd_ax = fig_ref.subplots()
|
pd_flights.hist(figure=fig_ref)
|
||||||
ed_ax = fig_test.subplots()
|
ed_flights.hist(figure=fig_test)
|
||||||
|
|
||||||
pd_flights.hist(ax=pd_ax)
|
|
||||||
ed_flights.hist(ax=ed_ax)
|
@check_figures_equal(extensions=['png'])
|
||||||
|
def test_plot_multiple_hists(fig_test, fig_ref):
|
||||||
|
test_data = TestData()
|
||||||
|
|
||||||
|
pd_flights = test_data.pd_flights()
|
||||||
|
ed_flights = test_data.ed_flights()
|
||||||
|
|
||||||
|
pd_flights[pd_flights.AvgTicketPrice < 250]['FlightDelayMin'].hist(figure=fig_ref, alpha=0.5, density=True)
|
||||||
|
pd_flights[pd_flights.AvgTicketPrice > 250]['FlightDelayMin'].hist(figure=fig_ref, alpha=0.5, density=True)
|
||||||
|
|
||||||
|
ed_flights[ed_flights.AvgTicketPrice < 250]['FlightDelayMin'].hist(figure=fig_test, alpha=0.5, density=True)
|
||||||
|
ed_flights[ed_flights.AvgTicketPrice > 250]['FlightDelayMin'].hist(figure=fig_test, alpha=0.5, density=True)
|
||||||
|
|
||||||
|
@check_figures_equal(extensions=['png'])
|
||||||
|
def test_plot_multiple_hists_pretty(fig_test, fig_ref):
|
||||||
|
test_data = TestData()
|
||||||
|
|
||||||
|
pd_flights = test_data.pd_flights()
|
||||||
|
ed_flights = test_data.ed_flights()
|
||||||
|
|
||||||
|
pd_flights[pd_flights.OriginWeather == 'Sunny']['FlightTimeMin'].hist(figure=fig_ref, alpha=0.5, density=True)
|
||||||
|
pd_flights[pd_flights.OriginWeather != 'Sunny']['FlightTimeMin'].hist(figure=fig_ref, alpha=0.5, density=True)
|
||||||
|
|
||||||
|
ed_flights[ed_flights.OriginWeather == 'Sunny']['FlightTimeMin'].hist(figure=fig_test, alpha=0.5, density=True)
|
||||||
|
ed_flights[ed_flights.OriginWeather != 'Sunny']['FlightTimeMin'].hist(figure=fig_test, alpha=0.5, density=True)
|
||||||
|
@ -16,8 +16,8 @@
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas.util.testing import assert_almost_equal
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from pandas.util.testing import assert_almost_equal
|
||||||
|
|
||||||
from eland.tests.common import TestData
|
from eland.tests.common import TestData
|
||||||
|
|
||||||
|
4
setup.py
4
setup.py
@ -15,7 +15,7 @@
|
|||||||
from codecs import open
|
from codecs import open
|
||||||
from os import path
|
from os import path
|
||||||
|
|
||||||
from setuptools import setup
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
here = path.abspath(path.dirname(__file__))
|
here = path.abspath(path.dirname(__file__))
|
||||||
about = {}
|
about = {}
|
||||||
@ -183,7 +183,7 @@ setup(
|
|||||||
license='Apache 2.0',
|
license='Apache 2.0',
|
||||||
classifiers=CLASSIFIERS,
|
classifiers=CLASSIFIERS,
|
||||||
keywords='elastic eland pandas python',
|
keywords='elastic eland pandas python',
|
||||||
packages=['eland'],
|
packages=find_packages(include=["eland", "eland.*"]),
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'elasticsearch>=7.0.5',
|
'elasticsearch>=7.0.5',
|
||||||
'pandas==0.25.3',
|
'pandas==0.25.3',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user