diff --git a/docs/source/examples/demo_notebook.ipynb b/docs/source/examples/demo_notebook.ipynb index bc9a13c..4fe6ed4 100644 --- a/docs/source/examples/demo_notebook.ipynb +++ b/docs/source/examples/demo_notebook.ipynb @@ -753,7 +753,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 17, @@ -2707,7 +2707,7 @@ " 410.008918\n", " 2470.545974\n", " ...\n", - " 251.698552\n", + " 251.682199\n", " 1.000000\n", " \n", " \n", @@ -2724,7 +2724,7 @@ " 9735.660463\n", " ...\n", " 720.534532\n", - " 4.254967\n", + " 4.288079\n", " \n", " \n", " max\n", @@ -2745,9 +2745,9 @@ "mean 628.253689 7092.142457 ... 511.127842 2.835975\n", "std 266.386661 4578.263193 ... 334.741135 1.939365\n", "min 100.020531 0.000000 ... 0.000000 0.000000\n", - "25% 410.008918 2470.545974 ... 251.698552 1.000000\n", + "25% 410.008918 2470.545974 ... 251.682199 1.000000\n", "50% 640.387285 7612.072403 ... 503.148975 3.000000\n", - "75% 842.233478 9735.660463 ... 720.534532 4.254967\n", + "75% 842.233478 9735.660463 ... 720.534532 4.288079\n", "max 1199.729004 19881.482422 ... 1902.901978 6.000000\n", "\n", "[8 rows x 7 columns]" diff --git a/docs/source/examples/online_retail_analysis.ipynb b/docs/source/examples/online_retail_analysis.ipynb index a8ed6c5..c66322d 100644 --- a/docs/source/examples/online_retail_analysis.ipynb +++ b/docs/source/examples/online_retail_analysis.ipynb @@ -1023,21 +1023,21 @@ " \n", " \n", " 25%\n", - " 14215.123301\n", + " 14221.960201\n", " 1.000000\n", - " 1.250100\n", + " 1.250000\n", " \n", " \n", " 50%\n", - " 15654.828552\n", + " 15671.712170\n", " 2.000000\n", " 2.510000\n", " \n", " \n", " 75%\n", - " 17218.003301\n", - " 6.570576\n", - " 4.210000\n", + " 17214.376367\n", + " 6.615042\n", + " 4.210533\n", " \n", " \n", " max\n", @@ -1055,9 +1055,9 @@ "mean 15590.776680 7.464000 4.103233\n", "std 1764.025160 85.924387 20.104873\n", "min 12347.000000 -9360.000000 0.000000\n", - "25% 14215.123301 1.000000 1.250100\n", - "50% 15654.828552 2.000000 2.510000\n", - "75% 17218.003301 6.570576 4.210000\n", + "25% 14221.960201 1.000000 1.250000\n", + "50% 15671.712170 2.000000 2.510000\n", + "75% 17214.376367 6.615042 4.210533\n", "max 18239.000000 2880.000000 950.990000" ] }, diff --git a/docs/source/index.rst b/docs/source/index.rst index 08b7525..8696873 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -34,7 +34,6 @@ In general, the data resides in elasticsearch and not in memory, which allows el * :doc:`reference/general_utility_functions` * :doc:`reference/dataframe` * :doc:`reference/series` - * :doc:`reference/index` * :doc:`reference/indexing` * :doc:`implementation/index` diff --git a/docs/source/reference/api/eland-Series-hist-1.png b/docs/source/reference/api/eland-Series-hist-1.png deleted file mode 100644 index d1c0465..0000000 Binary files a/docs/source/reference/api/eland-Series-hist-1.png and /dev/null differ diff --git a/docs/source/reference/api/eland-Series-hist-2.png b/docs/source/reference/api/eland-Series-hist-2.png new file mode 100644 index 0000000..07af463 Binary files /dev/null and b/docs/source/reference/api/eland-Series-hist-2.png differ diff --git a/docs/source/reference/api/eland.Series.hist.rst b/docs/source/reference/api/eland.Series.hist.rst index 2b1378f..8abdb05 100644 --- a/docs/source/reference/api/eland.Series.hist.rst +++ b/docs/source/reference/api/eland.Series.hist.rst @@ -4,5 +4,5 @@ eland.Series.hist .. currentmodule:: eland .. automethod:: Series.hist -.. image:: eland-Series-hist-1.png +.. image:: eland-Series-hist-2.png diff --git a/eland/__init__.py b/eland/__init__.py index 3a8a0c2..36a599b 100644 --- a/eland/__init__.py +++ b/eland/__init__.py @@ -23,7 +23,6 @@ from eland.field_mappings import * from eland.query import * from eland.operations import * from eland.query_compiler import * -from eland.plotting import * from eland.ndframe import * from eland.series import * from eland.dataframe import * diff --git a/eland/field_mappings.py b/eland/field_mappings.py index 28ccdad..1e0a8ba 100644 --- a/eland/field_mappings.py +++ b/eland/field_mappings.py @@ -439,7 +439,8 @@ class FieldMappings: # extract relevant fields and convert to dict # : {'category.keyword': 'category', 'currency': 'currency', ... - return OrderedDict(aggregatables[['aggregatable_es_field_name', 'es_field_name']].to_dict(orient='split')['data']) + return OrderedDict( + aggregatables[['aggregatable_es_field_name', 'es_field_name']].to_dict(orient='split')['data']) def date_field_format(self, es_field_name): """ diff --git a/eland/plotting.py b/eland/plotting.py deleted file mode 100644 index 3c34132..0000000 --- a/eland/plotting.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright 2019 Elasticsearch BV -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np - -import pandas.core.common as com -from pandas.core.dtypes.generic import ( - ABCIndexClass) -from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots - - -def ed_hist_frame(ed_df, column=None, by=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, - sharey=False, figsize=None, layout=None, bins=10, **kwds): - """ - See :pandas_api_docs:`pandas.DataFrame.hist` for usage. - - Notes - ----- - Derived from ``pandas.plotting._core.hist_frame 0.24.2`` - TODO update to ``0.25.1`` - - Ideally, we'd call `hist_frame` directly with histogram data, - but weights are applied to ALL series. For example, we can - plot a histogram of pre-binned data via: - - .. code-block:: python - - counts, bins = np.histogram(data) - plt.hist(bins[:-1], bins, weights=counts) - - However, - - .. code-block:: python - - ax.hist(data[col].dropna().values, bins=bins, **kwds) - - is for ``[col]`` and weights are a single array. - - Examples - -------- - >>> df = ed.DataFrame('localhost', 'flights') - >>> hist = df.select_dtypes(include=[np.number]).hist(figsize=[10,10]) # doctest: +SKIP - """ - # Start with empty pandas data frame derived from - ed_df_bins, ed_df_weights = ed_df._hist(num_bins=bins) - - if by is not None: - raise NotImplementedError("TODO") - - if column is not None: - if not isinstance(column, (list, np.ndarray, ABCIndexClass)): - column = [column] - ed_df_bins = ed_df_bins[column] - ed_df_weights = ed_df_weights[column] - naxes = len(ed_df_bins.columns) - - fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, - sharex=sharex, sharey=sharey, figsize=figsize, - layout=layout) - _axes = _flatten(axes) - - for i, col in enumerate(com.try_sort(ed_df_bins.columns)): - ax = _axes[i] - - # pandas code - # pandas / plotting / _core.py: 2410 - # ax.hist(data[col].dropna().values, bins=bins, **kwds) - - ax.hist(ed_df_bins[col][:-1], bins=ed_df_bins[col], weights=ed_df_weights[col], **kwds) - ax.set_title(col) - ax.grid(grid) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - fig.subplots_adjust(wspace=0.3, hspace=0.3) - - return axes - -def ed_hist_series(ed_s, column=None, by=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, ax=None, - figsize=None, layout=None, bins=10, **kwds): - """ - See :pandas_api_docs:`pandas.Series.hist` for usage. - - Notes - ----- - Derived from ``pandas.plotting._core.hist_frame 0.24.2`` - TODO update to ``0.25.1`` - - - Examples - -------- - >>> df = ed.DataFrame('localhost', 'ecommerce') - >>> hist = df['taxful_total_price'].hist(figsize=[10,10]) # doctest: +SKIP - """ - # this is mostly the same code as above, it has been split out - # to a series specific method now so we can expand series plotting - - - # Start with empty pandas data frame derived from - ed_s_bins, ed_s_weights = ed_s._hist(num_bins=bins) - - if by is not None: - raise NotImplementedError("TODO") - - # raise error rather than warning when series is not plottable - if ed_s_bins.empty: - raise ValueError("{} has no meaningful histogram interval. All values 0." - .format(ed_s.name)) - - naxes = len(ed_s_bins.columns) - fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, figsize=figsize, layout=layout) - _axes = _flatten(axes) - for i, col in enumerate(com.try_sort(ed_s_bins.columns)): - ax = _axes[i] - ax.hist(ed_s_bins[col][:-1], bins=ed_s_bins[col], weights=ed_s_weights[col], **kwds) - ax.grid(grid) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - fig.subplots_adjust(wspace=0.3, hspace=0.3) - - return axes \ No newline at end of file diff --git a/eland/plotting/__init__.py b/eland/plotting/__init__.py new file mode 100644 index 0000000..9176b31 --- /dev/null +++ b/eland/plotting/__init__.py @@ -0,0 +1,28 @@ +# Copyright 2019 Elasticsearch BV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Public plotting API + +Based from https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/plotting/__init__.py +but only supporting a subset of plotting methods (for now). +""" +from eland.plotting._core import ( + ed_hist_frame, + ed_hist_series, +) + +__all___ = [ + "ed_hist_frame", + "ed_hist_series", +] diff --git a/eland/plotting/_core.py b/eland/plotting/_core.py new file mode 100644 index 0000000..f3844ac --- /dev/null +++ b/eland/plotting/_core.py @@ -0,0 +1,127 @@ +# Copyright 2019 Elasticsearch BV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from eland.plotting._matplotlib.hist import hist_series, hist_frame + + +def ed_hist_series( + self, + by=None, + ax=None, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + figsize=None, + bins=10, + **kwds +): + """ + Draw histogram of the input series using matplotlib. + + See :pandas_api_docs:`pandas.Series.hist` for usage. + + Notes + ----- + Derived from ``pandas.plotting._core.hist_frame 0.25.3`` + + Examples + -------- + >>> import matplotlib.pyplot as plt + >>> df = ed.DataFrame('localhost', 'flights') + >>> df[df.OriginWeather == 'Sunny']['FlightTimeMin'].hist(alpha=0.5, density=True) # doctest: +SKIP + >>> df[df.OriginWeather != 'Sunny']['FlightTimeMin'].hist(alpha=0.5, density=True) # doctest: +SKIP + >>> plt.show() # doctest: +SKIP + + """ + return hist_series( + self, + by=by, + ax=ax, + grid=grid, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + figsize=figsize, + bins=bins, + **kwds + ) + + +def ed_hist_frame( + data, + column=None, + by=None, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + ax=None, + sharex=False, + sharey=False, + figsize=None, + layout=None, + bins=10, + **kwds +): + """ + Make a histogram of the DataFrame's. + + See :pandas_api_docs:`pandas.DataFrame.hist` for usage. + + Notes + ----- + Derived from ``pandas.plotting._core.hist_frame 0.25.3`` + + Ideally, we'd call the pandas method `hist_frame` directly + with histogram data, but weights are applied to ALL series. + For example, we can plot a histogram of pre-binned data via: + + .. code-block:: python + + counts, bins = np.histogram(data) + plt.hist(bins[:-1], bins, weights=counts) + + However, + + .. code-block:: python + + ax.hist(data[col].dropna().values, bins=bins, **kwds) + + is for ``[col]`` and weights are a single array. + + Examples + -------- + >>> df = ed.DataFrame('localhost', 'flights') + >>> hist = df.select_dtypes(include=[np.number]).hist(figsize=[10,10]) # doctest: +SKIP + """ + return hist_frame( + data, + column=column, + by=by, + grid=grid, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + ax=ax, + sharex=sharex, + sharey=sharey, + figsize=figsize, + layout=layout, + bins=bins, + **kwds + ) diff --git a/eland/plotting/_matplotlib/__init__.py b/eland/plotting/_matplotlib/__init__.py new file mode 100644 index 0000000..edade50 --- /dev/null +++ b/eland/plotting/_matplotlib/__init__.py @@ -0,0 +1,40 @@ +# Copyright 2019 Elasticsearch BV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Public plotting API + +Based from https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/plotting/__init__.py +but only supporting a subset of plotting methods (for now). +""" +from eland.plotting._matplotlib.hist import ( + hist_frame, + hist_series, +) + +__all___ = [ + "hist_frame", + "hist_series", +] diff --git a/eland/plotting/_matplotlib/hist.py b/eland/plotting/_matplotlib/hist.py new file mode 100644 index 0000000..31819d2 --- /dev/null +++ b/eland/plotting/_matplotlib/hist.py @@ -0,0 +1,131 @@ +# Copyright 2019 Elasticsearch BV +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import pandas.core.common as com +from pandas.core.dtypes.generic import ABCIndexClass +from pandas.plotting._matplotlib import converter +from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots + + +def hist_series( + self, + by=None, + ax=None, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + figsize=None, + bins=10, + **kwds +): + import matplotlib.pyplot as plt + + if by is None: + if kwds.get("layout", None) is not None: + raise ValueError( + "The 'layout' keyword is not supported when " "'by' is None" + ) + # hack until the plotting interface is a bit more unified + fig = kwds.pop( + "figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize) + ) + if figsize is not None and tuple(figsize) != tuple(fig.get_size_inches()): + fig.set_size_inches(*figsize, forward=True) + if ax is None: + ax = fig.gca() + elif ax.get_figure() != fig: + raise AssertionError("passed axis not bound to passed figure") + + self_bins, self_weights = self._hist(num_bins=bins) + # As this is a series, squeeze Series to arrays + self_bins = self_bins.squeeze() + self_weights = self_weights.squeeze() + + ax.hist(self_bins[:-1], bins=self_bins, weights=self_weights, **kwds) + ax.grid(grid) + axes = np.array([ax]) + + _set_ticks_props( + axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot + ) + + else: + raise NotImplementedError("TODO") + + if hasattr(axes, "ndim"): + if axes.ndim == 1 and len(axes) == 1: + return axes[0] + return axes + + +def hist_frame( + data, + column=None, + by=None, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + ax=None, + sharex=False, + sharey=False, + figsize=None, + layout=None, + bins=10, + **kwds +): + # Start with empty pandas data frame derived from + ed_df_bins, ed_df_weights = data._hist(num_bins=bins) + + converter._WARN = False # no warning for pandas plots + if by is not None: + raise NotImplementedError("TODO") + + if column is not None: + if not isinstance(column, (list, np.ndarray, ABCIndexClass)): + column = [column] + ed_df_bins = ed_df_bins[column] + ed_df_weights = ed_df_weights[column] + naxes = len(ed_df_bins.columns) + + if naxes == 0: + raise ValueError("hist method requires numerical columns, " "nothing to plot.") + + fig, axes = _subplots( + naxes=naxes, + ax=ax, + squeeze=False, + sharex=sharex, + sharey=sharey, + figsize=figsize, + layout=layout, + ) + _axes = _flatten(axes) + + for i, col in enumerate(com.try_sort(data.columns)): + ax = _axes[i] + ax.hist(ed_df_bins[col][:-1], bins=ed_df_bins[col], weights=ed_df_weights[col], **kwds) + ax.set_title(col) + ax.grid(grid) + + _set_ticks_props( + axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot + ) + fig.subplots_adjust(wspace=0.3, hspace=0.3) + + return axes diff --git a/eland/query.py b/eland/query.py index f120da7..1e12bf9 100644 --- a/eland/query.py +++ b/eland/query.py @@ -149,7 +149,6 @@ class Query: if interval != 0: self._aggs[name] = agg - def to_search_body(self): if self._query.empty(): if self._aggs: diff --git a/eland/query_compiler.py b/eland/query_compiler.py index ed6f04b..6dd3cdf 100644 --- a/eland/query_compiler.py +++ b/eland/query_compiler.py @@ -732,11 +732,13 @@ def elasticsearch_date_to_pandas_date(value: Union[int, str], date_format: str) # TODO investigate how we could generate this just once for a bulk read. return pd.to_datetime(value) + class FieldMappingCache: """ Very simple dict cache for field mappings. This improves performance > 3 times on large datasets as DataFrame access is slower than dict access. """ + def __init__(self, mappings): self._mappings = mappings @@ -764,4 +766,3 @@ class FieldMappingCache: self._date_field_format[es_field_name] = es_date_field_format return es_date_field_format - diff --git a/eland/series.py b/eland/series.py index aa358e2..cc22845 100644 --- a/eland/series.py +++ b/eland/series.py @@ -37,11 +37,11 @@ import numpy as np import pandas as pd from pandas.io.common import _expand_user, _stringify_path +import eland.plotting from eland import NDFrame from eland.arithmetics import ArithmeticSeries, ArithmeticString, ArithmeticNumber from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter from eland.filter import NotFilter, Equal, Greater, Less, GreaterEqual, LessEqual, ScriptFilter, IsIn -import eland.plotting as gfx def _get_method_name(): @@ -108,7 +108,7 @@ class Series(NDFrame): index_field=index_field, query_compiler=query_compiler) - hist = gfx.ed_hist_series + hist = eland.plotting.ed_hist_series @property def empty(self): diff --git a/eland/tests/field_mappings/test_scripted_fields_pytest.py b/eland/tests/field_mappings/test_scripted_fields_pytest.py index a728038..b5cc248 100644 --- a/eland/tests/field_mappings/test_scripted_fields_pytest.py +++ b/eland/tests/field_mappings/test_scripted_fields_pytest.py @@ -19,7 +19,6 @@ import numpy as np import eland as ed from eland.tests import FLIGHTS_INDEX_NAME, ES_TEST_CLIENT - from eland.tests.common import TestData diff --git a/eland/tests/plotting/test_dataframe_hist_pytest.py b/eland/tests/plotting/test_dataframe_hist_pytest.py index c88734c..4e0a4c4 100644 --- a/eland/tests/plotting/test_dataframe_hist_pytest.py +++ b/eland/tests/plotting/test_dataframe_hist_pytest.py @@ -39,6 +39,7 @@ def test_plot_hist(fig_test, fig_ref): ed_ax = fig_test.subplots() ed_flights.hist(ax=ed_ax) + @check_figures_equal(extensions=['png']) def test_plot_filtered_hist(fig_test, fig_ref): test_data = TestData() @@ -49,8 +50,6 @@ def test_plot_filtered_hist(fig_test, fig_ref): pd_flights = pd_flights[pd_flights.FlightDelayMin > 0] ed_flights = ed_flights[ed_flights.FlightDelayMin > 0] - print(ed_flights.head()) - # This throws a userwarning # (https://github.com/pandas-dev/pandas/blob/171c71611886aab8549a8620c5b0071a129ad685/pandas/plotting/_matplotlib/tools.py#L222) with pytest.warns(UserWarning): diff --git a/eland/tests/plotting/test_series_hist_pytest.py b/eland/tests/plotting/test_series_hist_pytest.py index 9ec4e64..c6ad7bf 100644 --- a/eland/tests/plotting/test_series_hist_pytest.py +++ b/eland/tests/plotting/test_series_hist_pytest.py @@ -25,8 +25,32 @@ def test_plot_hist(fig_test, fig_ref): pd_flights = test_data.pd_flights()['FlightDelayMin'] ed_flights = test_data.ed_flights()['FlightDelayMin'] - pd_ax = fig_ref.subplots() - ed_ax = fig_test.subplots() + pd_flights.hist(figure=fig_ref) + ed_flights.hist(figure=fig_test) - pd_flights.hist(ax=pd_ax) - ed_flights.hist(ax=ed_ax) + +@check_figures_equal(extensions=['png']) +def test_plot_multiple_hists(fig_test, fig_ref): + test_data = TestData() + + pd_flights = test_data.pd_flights() + ed_flights = test_data.ed_flights() + + pd_flights[pd_flights.AvgTicketPrice < 250]['FlightDelayMin'].hist(figure=fig_ref, alpha=0.5, density=True) + pd_flights[pd_flights.AvgTicketPrice > 250]['FlightDelayMin'].hist(figure=fig_ref, alpha=0.5, density=True) + + ed_flights[ed_flights.AvgTicketPrice < 250]['FlightDelayMin'].hist(figure=fig_test, alpha=0.5, density=True) + ed_flights[ed_flights.AvgTicketPrice > 250]['FlightDelayMin'].hist(figure=fig_test, alpha=0.5, density=True) + +@check_figures_equal(extensions=['png']) +def test_plot_multiple_hists_pretty(fig_test, fig_ref): + test_data = TestData() + + pd_flights = test_data.pd_flights() + ed_flights = test_data.ed_flights() + + pd_flights[pd_flights.OriginWeather == 'Sunny']['FlightTimeMin'].hist(figure=fig_ref, alpha=0.5, density=True) + pd_flights[pd_flights.OriginWeather != 'Sunny']['FlightTimeMin'].hist(figure=fig_ref, alpha=0.5, density=True) + + ed_flights[ed_flights.OriginWeather == 'Sunny']['FlightTimeMin'].hist(figure=fig_test, alpha=0.5, density=True) + ed_flights[ed_flights.OriginWeather != 'Sunny']['FlightTimeMin'].hist(figure=fig_test, alpha=0.5, density=True) diff --git a/eland/tests/series/test_hist_pytest.py b/eland/tests/series/test_hist_pytest.py index 7c84553..576c81e 100644 --- a/eland/tests/series/test_hist_pytest.py +++ b/eland/tests/series/test_hist_pytest.py @@ -16,8 +16,8 @@ import numpy as np import pandas as pd -from pandas.util.testing import assert_almost_equal import pytest +from pandas.util.testing import assert_almost_equal from eland.tests.common import TestData diff --git a/setup.py b/setup.py index 48b7150..aad92ec 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ from codecs import open from os import path -from setuptools import setup +from setuptools import setup, find_packages here = path.abspath(path.dirname(__file__)) about = {} @@ -183,7 +183,7 @@ setup( license='Apache 2.0', classifiers=CLASSIFIERS, keywords='elastic eland pandas python', - packages=['eland'], + packages=find_packages(include=["eland", "eland.*"]), install_requires=[ 'elasticsearch>=7.0.5', 'pandas==0.25.3',