Refactoring of plotting + fixes for multiple charts (#117)

* Refactoring of plotting + fixes for multiple charts

Updates to plotting inline with pandas 0.25.3
Enables plotting of multiple histograms on the
same figure.

* Fix to setup.py to allow submodules

+ reformat of code and better Series.hist docs
This commit is contained in:
stevedodson 2020-01-29 07:07:56 +00:00 committed by GitHub
parent 46b428d59b
commit 409cb043c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 379 additions and 165 deletions

View File

@ -753,7 +753,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"<eland.index.Index at 0x11ffd7f90>" "<eland.index.Index at 0x11a122310>"
] ]
}, },
"execution_count": 17, "execution_count": 17,
@ -2707,7 +2707,7 @@
" <td>410.008918</td>\n", " <td>410.008918</td>\n",
" <td>2470.545974</td>\n", " <td>2470.545974</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>251.698552</td>\n", " <td>251.682199</td>\n",
" <td>1.000000</td>\n", " <td>1.000000</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -2724,7 +2724,7 @@
" <td>9735.660463</td>\n", " <td>9735.660463</td>\n",
" <td>...</td>\n", " <td>...</td>\n",
" <td>720.534532</td>\n", " <td>720.534532</td>\n",
" <td>4.254967</td>\n", " <td>4.288079</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>max</th>\n", " <th>max</th>\n",
@ -2745,9 +2745,9 @@
"mean 628.253689 7092.142457 ... 511.127842 2.835975\n", "mean 628.253689 7092.142457 ... 511.127842 2.835975\n",
"std 266.386661 4578.263193 ... 334.741135 1.939365\n", "std 266.386661 4578.263193 ... 334.741135 1.939365\n",
"min 100.020531 0.000000 ... 0.000000 0.000000\n", "min 100.020531 0.000000 ... 0.000000 0.000000\n",
"25% 410.008918 2470.545974 ... 251.698552 1.000000\n", "25% 410.008918 2470.545974 ... 251.682199 1.000000\n",
"50% 640.387285 7612.072403 ... 503.148975 3.000000\n", "50% 640.387285 7612.072403 ... 503.148975 3.000000\n",
"75% 842.233478 9735.660463 ... 720.534532 4.254967\n", "75% 842.233478 9735.660463 ... 720.534532 4.288079\n",
"max 1199.729004 19881.482422 ... 1902.901978 6.000000\n", "max 1199.729004 19881.482422 ... 1902.901978 6.000000\n",
"\n", "\n",
"[8 rows x 7 columns]" "[8 rows x 7 columns]"

View File

@ -1023,21 +1023,21 @@
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>25%</th>\n", " <th>25%</th>\n",
" <td>14215.123301</td>\n", " <td>14221.960201</td>\n",
" <td>1.000000</td>\n", " <td>1.000000</td>\n",
" <td>1.250100</td>\n", " <td>1.250000</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>50%</th>\n", " <th>50%</th>\n",
" <td>15654.828552</td>\n", " <td>15671.712170</td>\n",
" <td>2.000000</td>\n", " <td>2.000000</td>\n",
" <td>2.510000</td>\n", " <td>2.510000</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>75%</th>\n", " <th>75%</th>\n",
" <td>17218.003301</td>\n", " <td>17214.376367</td>\n",
" <td>6.570576</td>\n", " <td>6.615042</td>\n",
" <td>4.210000</td>\n", " <td>4.210533</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>max</th>\n", " <th>max</th>\n",
@ -1055,9 +1055,9 @@
"mean 15590.776680 7.464000 4.103233\n", "mean 15590.776680 7.464000 4.103233\n",
"std 1764.025160 85.924387 20.104873\n", "std 1764.025160 85.924387 20.104873\n",
"min 12347.000000 -9360.000000 0.000000\n", "min 12347.000000 -9360.000000 0.000000\n",
"25% 14215.123301 1.000000 1.250100\n", "25% 14221.960201 1.000000 1.250000\n",
"50% 15654.828552 2.000000 2.510000\n", "50% 15671.712170 2.000000 2.510000\n",
"75% 17218.003301 6.570576 4.210000\n", "75% 17214.376367 6.615042 4.210533\n",
"max 18239.000000 2880.000000 950.990000" "max 18239.000000 2880.000000 950.990000"
] ]
}, },

View File

@ -34,7 +34,6 @@ In general, the data resides in elasticsearch and not in memory, which allows el
* :doc:`reference/general_utility_functions` * :doc:`reference/general_utility_functions`
* :doc:`reference/dataframe` * :doc:`reference/dataframe`
* :doc:`reference/series` * :doc:`reference/series`
* :doc:`reference/index`
* :doc:`reference/indexing` * :doc:`reference/indexing`
* :doc:`implementation/index` * :doc:`implementation/index`

Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -4,5 +4,5 @@ eland.Series.hist
.. currentmodule:: eland .. currentmodule:: eland
.. automethod:: Series.hist .. automethod:: Series.hist
.. image:: eland-Series-hist-1.png .. image:: eland-Series-hist-2.png

View File

@ -23,7 +23,6 @@ from eland.field_mappings import *
from eland.query import * from eland.query import *
from eland.operations import * from eland.operations import *
from eland.query_compiler import * from eland.query_compiler import *
from eland.plotting import *
from eland.ndframe import * from eland.ndframe import *
from eland.series import * from eland.series import *
from eland.dataframe import * from eland.dataframe import *

View File

@ -439,7 +439,8 @@ class FieldMappings:
# extract relevant fields and convert to dict # extract relevant fields and convert to dict
# <class 'dict'>: {'category.keyword': 'category', 'currency': 'currency', ... # <class 'dict'>: {'category.keyword': 'category', 'currency': 'currency', ...
return OrderedDict(aggregatables[['aggregatable_es_field_name', 'es_field_name']].to_dict(orient='split')['data']) return OrderedDict(
aggregatables[['aggregatable_es_field_name', 'es_field_name']].to_dict(orient='split')['data'])
def date_field_format(self, es_field_name): def date_field_format(self, es_field_name):
""" """

View File

@ -1,133 +0,0 @@
# Copyright 2019 Elasticsearch BV
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pandas.core.common as com
from pandas.core.dtypes.generic import (
ABCIndexClass)
from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
def ed_hist_frame(ed_df, column=None, by=None, grid=True, xlabelsize=None,
xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False,
sharey=False, figsize=None, layout=None, bins=10, **kwds):
"""
See :pandas_api_docs:`pandas.DataFrame.hist` for usage.
Notes
-----
Derived from ``pandas.plotting._core.hist_frame 0.24.2`` - TODO update to ``0.25.1``
Ideally, we'd call `hist_frame` directly with histogram data,
but weights are applied to ALL series. For example, we can
plot a histogram of pre-binned data via:
.. code-block:: python
counts, bins = np.histogram(data)
plt.hist(bins[:-1], bins, weights=counts)
However,
.. code-block:: python
ax.hist(data[col].dropna().values, bins=bins, **kwds)
is for ``[col]`` and weights are a single array.
Examples
--------
>>> df = ed.DataFrame('localhost', 'flights')
>>> hist = df.select_dtypes(include=[np.number]).hist(figsize=[10,10]) # doctest: +SKIP
"""
# Start with empty pandas data frame derived from
ed_df_bins, ed_df_weights = ed_df._hist(num_bins=bins)
if by is not None:
raise NotImplementedError("TODO")
if column is not None:
if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
column = [column]
ed_df_bins = ed_df_bins[column]
ed_df_weights = ed_df_weights[column]
naxes = len(ed_df_bins.columns)
fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False,
sharex=sharex, sharey=sharey, figsize=figsize,
layout=layout)
_axes = _flatten(axes)
for i, col in enumerate(com.try_sort(ed_df_bins.columns)):
ax = _axes[i]
# pandas code
# pandas / plotting / _core.py: 2410
# ax.hist(data[col].dropna().values, bins=bins, **kwds)
ax.hist(ed_df_bins[col][:-1], bins=ed_df_bins[col], weights=ed_df_weights[col], **kwds)
ax.set_title(col)
ax.grid(grid)
_set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot,
ylabelsize=ylabelsize, yrot=yrot)
fig.subplots_adjust(wspace=0.3, hspace=0.3)
return axes
def ed_hist_series(ed_s, column=None, by=None, grid=True, xlabelsize=None,
xrot=None, ylabelsize=None, yrot=None, ax=None,
figsize=None, layout=None, bins=10, **kwds):
"""
See :pandas_api_docs:`pandas.Series.hist` for usage.
Notes
-----
Derived from ``pandas.plotting._core.hist_frame 0.24.2`` - TODO update to ``0.25.1``
Examples
--------
>>> df = ed.DataFrame('localhost', 'ecommerce')
>>> hist = df['taxful_total_price'].hist(figsize=[10,10]) # doctest: +SKIP
"""
# this is mostly the same code as above, it has been split out
# to a series specific method now so we can expand series plotting
# Start with empty pandas data frame derived from
ed_s_bins, ed_s_weights = ed_s._hist(num_bins=bins)
if by is not None:
raise NotImplementedError("TODO")
# raise error rather than warning when series is not plottable
if ed_s_bins.empty:
raise ValueError("{} has no meaningful histogram interval. All values 0."
.format(ed_s.name))
naxes = len(ed_s_bins.columns)
fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, figsize=figsize, layout=layout)
_axes = _flatten(axes)
for i, col in enumerate(com.try_sort(ed_s_bins.columns)):
ax = _axes[i]
ax.hist(ed_s_bins[col][:-1], bins=ed_s_bins[col], weights=ed_s_weights[col], **kwds)
ax.grid(grid)
_set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot,
ylabelsize=ylabelsize, yrot=yrot)
fig.subplots_adjust(wspace=0.3, hspace=0.3)
return axes

View File

@ -0,0 +1,28 @@
# Copyright 2019 Elasticsearch BV
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Public plotting API
Based from https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/plotting/__init__.py
but only supporting a subset of plotting methods (for now).
"""
from eland.plotting._core import (
ed_hist_frame,
ed_hist_series,
)
__all___ = [
"ed_hist_frame",
"ed_hist_series",
]

127
eland/plotting/_core.py Normal file
View File

@ -0,0 +1,127 @@
# Copyright 2019 Elasticsearch BV
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from eland.plotting._matplotlib.hist import hist_series, hist_frame
def ed_hist_series(
self,
by=None,
ax=None,
grid=True,
xlabelsize=None,
xrot=None,
ylabelsize=None,
yrot=None,
figsize=None,
bins=10,
**kwds
):
"""
Draw histogram of the input series using matplotlib.
See :pandas_api_docs:`pandas.Series.hist` for usage.
Notes
-----
Derived from ``pandas.plotting._core.hist_frame 0.25.3``
Examples
--------
>>> import matplotlib.pyplot as plt
>>> df = ed.DataFrame('localhost', 'flights')
>>> df[df.OriginWeather == 'Sunny']['FlightTimeMin'].hist(alpha=0.5, density=True) # doctest: +SKIP
>>> df[df.OriginWeather != 'Sunny']['FlightTimeMin'].hist(alpha=0.5, density=True) # doctest: +SKIP
>>> plt.show() # doctest: +SKIP
"""
return hist_series(
self,
by=by,
ax=ax,
grid=grid,
xlabelsize=xlabelsize,
xrot=xrot,
ylabelsize=ylabelsize,
yrot=yrot,
figsize=figsize,
bins=bins,
**kwds
)
def ed_hist_frame(
data,
column=None,
by=None,
grid=True,
xlabelsize=None,
xrot=None,
ylabelsize=None,
yrot=None,
ax=None,
sharex=False,
sharey=False,
figsize=None,
layout=None,
bins=10,
**kwds
):
"""
Make a histogram of the DataFrame's.
See :pandas_api_docs:`pandas.DataFrame.hist` for usage.
Notes
-----
Derived from ``pandas.plotting._core.hist_frame 0.25.3``
Ideally, we'd call the pandas method `hist_frame` directly
with histogram data, but weights are applied to ALL series.
For example, we can plot a histogram of pre-binned data via:
.. code-block:: python
counts, bins = np.histogram(data)
plt.hist(bins[:-1], bins, weights=counts)
However,
.. code-block:: python
ax.hist(data[col].dropna().values, bins=bins, **kwds)
is for ``[col]`` and weights are a single array.
Examples
--------
>>> df = ed.DataFrame('localhost', 'flights')
>>> hist = df.select_dtypes(include=[np.number]).hist(figsize=[10,10]) # doctest: +SKIP
"""
return hist_frame(
data,
column=column,
by=by,
grid=grid,
xlabelsize=xlabelsize,
xrot=xrot,
ylabelsize=ylabelsize,
yrot=yrot,
ax=ax,
sharex=sharex,
sharey=sharey,
figsize=figsize,
layout=layout,
bins=bins,
**kwds
)

View File

@ -0,0 +1,40 @@
# Copyright 2019 Elasticsearch BV
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Public plotting API
Based from https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/plotting/__init__.py
but only supporting a subset of plotting methods (for now).
"""
from eland.plotting._matplotlib.hist import (
hist_frame,
hist_series,
)
__all___ = [
"hist_frame",
"hist_series",
]

View File

@ -0,0 +1,131 @@
# Copyright 2019 Elasticsearch BV
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import pandas.core.common as com
from pandas.core.dtypes.generic import ABCIndexClass
from pandas.plotting._matplotlib import converter
from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
def hist_series(
self,
by=None,
ax=None,
grid=True,
xlabelsize=None,
xrot=None,
ylabelsize=None,
yrot=None,
figsize=None,
bins=10,
**kwds
):
import matplotlib.pyplot as plt
if by is None:
if kwds.get("layout", None) is not None:
raise ValueError(
"The 'layout' keyword is not supported when " "'by' is None"
)
# hack until the plotting interface is a bit more unified
fig = kwds.pop(
"figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize)
)
if figsize is not None and tuple(figsize) != tuple(fig.get_size_inches()):
fig.set_size_inches(*figsize, forward=True)
if ax is None:
ax = fig.gca()
elif ax.get_figure() != fig:
raise AssertionError("passed axis not bound to passed figure")
self_bins, self_weights = self._hist(num_bins=bins)
# As this is a series, squeeze Series to arrays
self_bins = self_bins.squeeze()
self_weights = self_weights.squeeze()
ax.hist(self_bins[:-1], bins=self_bins, weights=self_weights, **kwds)
ax.grid(grid)
axes = np.array([ax])
_set_ticks_props(
axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot
)
else:
raise NotImplementedError("TODO")
if hasattr(axes, "ndim"):
if axes.ndim == 1 and len(axes) == 1:
return axes[0]
return axes
def hist_frame(
data,
column=None,
by=None,
grid=True,
xlabelsize=None,
xrot=None,
ylabelsize=None,
yrot=None,
ax=None,
sharex=False,
sharey=False,
figsize=None,
layout=None,
bins=10,
**kwds
):
# Start with empty pandas data frame derived from
ed_df_bins, ed_df_weights = data._hist(num_bins=bins)
converter._WARN = False # no warning for pandas plots
if by is not None:
raise NotImplementedError("TODO")
if column is not None:
if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
column = [column]
ed_df_bins = ed_df_bins[column]
ed_df_weights = ed_df_weights[column]
naxes = len(ed_df_bins.columns)
if naxes == 0:
raise ValueError("hist method requires numerical columns, " "nothing to plot.")
fig, axes = _subplots(
naxes=naxes,
ax=ax,
squeeze=False,
sharex=sharex,
sharey=sharey,
figsize=figsize,
layout=layout,
)
_axes = _flatten(axes)
for i, col in enumerate(com.try_sort(data.columns)):
ax = _axes[i]
ax.hist(ed_df_bins[col][:-1], bins=ed_df_bins[col], weights=ed_df_weights[col], **kwds)
ax.set_title(col)
ax.grid(grid)
_set_ticks_props(
axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot
)
fig.subplots_adjust(wspace=0.3, hspace=0.3)
return axes

View File

@ -149,7 +149,6 @@ class Query:
if interval != 0: if interval != 0:
self._aggs[name] = agg self._aggs[name] = agg
def to_search_body(self): def to_search_body(self):
if self._query.empty(): if self._query.empty():
if self._aggs: if self._aggs:

View File

@ -732,11 +732,13 @@ def elasticsearch_date_to_pandas_date(value: Union[int, str], date_format: str)
# TODO investigate how we could generate this just once for a bulk read. # TODO investigate how we could generate this just once for a bulk read.
return pd.to_datetime(value) return pd.to_datetime(value)
class FieldMappingCache: class FieldMappingCache:
""" """
Very simple dict cache for field mappings. This improves performance > 3 times on large datasets as Very simple dict cache for field mappings. This improves performance > 3 times on large datasets as
DataFrame access is slower than dict access. DataFrame access is slower than dict access.
""" """
def __init__(self, mappings): def __init__(self, mappings):
self._mappings = mappings self._mappings = mappings
@ -764,4 +766,3 @@ class FieldMappingCache:
self._date_field_format[es_field_name] = es_date_field_format self._date_field_format[es_field_name] = es_date_field_format
return es_date_field_format return es_date_field_format

View File

@ -37,11 +37,11 @@ import numpy as np
import pandas as pd import pandas as pd
from pandas.io.common import _expand_user, _stringify_path from pandas.io.common import _expand_user, _stringify_path
import eland.plotting
from eland import NDFrame from eland import NDFrame
from eland.arithmetics import ArithmeticSeries, ArithmeticString, ArithmeticNumber from eland.arithmetics import ArithmeticSeries, ArithmeticString, ArithmeticNumber
from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
from eland.filter import NotFilter, Equal, Greater, Less, GreaterEqual, LessEqual, ScriptFilter, IsIn from eland.filter import NotFilter, Equal, Greater, Less, GreaterEqual, LessEqual, ScriptFilter, IsIn
import eland.plotting as gfx
def _get_method_name(): def _get_method_name():
@ -108,7 +108,7 @@ class Series(NDFrame):
index_field=index_field, index_field=index_field,
query_compiler=query_compiler) query_compiler=query_compiler)
hist = gfx.ed_hist_series hist = eland.plotting.ed_hist_series
@property @property
def empty(self): def empty(self):

View File

@ -19,7 +19,6 @@ import numpy as np
import eland as ed import eland as ed
from eland.tests import FLIGHTS_INDEX_NAME, ES_TEST_CLIENT from eland.tests import FLIGHTS_INDEX_NAME, ES_TEST_CLIENT
from eland.tests.common import TestData from eland.tests.common import TestData

View File

@ -39,6 +39,7 @@ def test_plot_hist(fig_test, fig_ref):
ed_ax = fig_test.subplots() ed_ax = fig_test.subplots()
ed_flights.hist(ax=ed_ax) ed_flights.hist(ax=ed_ax)
@check_figures_equal(extensions=['png']) @check_figures_equal(extensions=['png'])
def test_plot_filtered_hist(fig_test, fig_ref): def test_plot_filtered_hist(fig_test, fig_ref):
test_data = TestData() test_data = TestData()
@ -49,8 +50,6 @@ def test_plot_filtered_hist(fig_test, fig_ref):
pd_flights = pd_flights[pd_flights.FlightDelayMin > 0] pd_flights = pd_flights[pd_flights.FlightDelayMin > 0]
ed_flights = ed_flights[ed_flights.FlightDelayMin > 0] ed_flights = ed_flights[ed_flights.FlightDelayMin > 0]
print(ed_flights.head())
# This throws a userwarning # This throws a userwarning
# (https://github.com/pandas-dev/pandas/blob/171c71611886aab8549a8620c5b0071a129ad685/pandas/plotting/_matplotlib/tools.py#L222) # (https://github.com/pandas-dev/pandas/blob/171c71611886aab8549a8620c5b0071a129ad685/pandas/plotting/_matplotlib/tools.py#L222)
with pytest.warns(UserWarning): with pytest.warns(UserWarning):

View File

@ -25,8 +25,32 @@ def test_plot_hist(fig_test, fig_ref):
pd_flights = test_data.pd_flights()['FlightDelayMin'] pd_flights = test_data.pd_flights()['FlightDelayMin']
ed_flights = test_data.ed_flights()['FlightDelayMin'] ed_flights = test_data.ed_flights()['FlightDelayMin']
pd_ax = fig_ref.subplots() pd_flights.hist(figure=fig_ref)
ed_ax = fig_test.subplots() ed_flights.hist(figure=fig_test)
pd_flights.hist(ax=pd_ax)
ed_flights.hist(ax=ed_ax) @check_figures_equal(extensions=['png'])
def test_plot_multiple_hists(fig_test, fig_ref):
test_data = TestData()
pd_flights = test_data.pd_flights()
ed_flights = test_data.ed_flights()
pd_flights[pd_flights.AvgTicketPrice < 250]['FlightDelayMin'].hist(figure=fig_ref, alpha=0.5, density=True)
pd_flights[pd_flights.AvgTicketPrice > 250]['FlightDelayMin'].hist(figure=fig_ref, alpha=0.5, density=True)
ed_flights[ed_flights.AvgTicketPrice < 250]['FlightDelayMin'].hist(figure=fig_test, alpha=0.5, density=True)
ed_flights[ed_flights.AvgTicketPrice > 250]['FlightDelayMin'].hist(figure=fig_test, alpha=0.5, density=True)
@check_figures_equal(extensions=['png'])
def test_plot_multiple_hists_pretty(fig_test, fig_ref):
test_data = TestData()
pd_flights = test_data.pd_flights()
ed_flights = test_data.ed_flights()
pd_flights[pd_flights.OriginWeather == 'Sunny']['FlightTimeMin'].hist(figure=fig_ref, alpha=0.5, density=True)
pd_flights[pd_flights.OriginWeather != 'Sunny']['FlightTimeMin'].hist(figure=fig_ref, alpha=0.5, density=True)
ed_flights[ed_flights.OriginWeather == 'Sunny']['FlightTimeMin'].hist(figure=fig_test, alpha=0.5, density=True)
ed_flights[ed_flights.OriginWeather != 'Sunny']['FlightTimeMin'].hist(figure=fig_test, alpha=0.5, density=True)

View File

@ -16,8 +16,8 @@
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from pandas.util.testing import assert_almost_equal
import pytest import pytest
from pandas.util.testing import assert_almost_equal
from eland.tests.common import TestData from eland.tests.common import TestData

View File

@ -15,7 +15,7 @@
from codecs import open from codecs import open
from os import path from os import path
from setuptools import setup from setuptools import setup, find_packages
here = path.abspath(path.dirname(__file__)) here = path.abspath(path.dirname(__file__))
about = {} about = {}
@ -183,7 +183,7 @@ setup(
license='Apache 2.0', license='Apache 2.0',
classifiers=CLASSIFIERS, classifiers=CLASSIFIERS,
keywords='elastic eland pandas python', keywords='elastic eland pandas python',
packages=['eland'], packages=find_packages(include=["eland", "eland.*"]),
install_requires=[ install_requires=[
'elasticsearch>=7.0.5', 'elasticsearch>=7.0.5',
'pandas==0.25.3', 'pandas==0.25.3',