Refactoring of plotting + fixes for multiple charts (#117)

* Refactoring of plotting + fixes for multiple charts Updates to plotting inline with pandas 0.25.3 Enables plotting of multiple histograms on the same figure. * Fix to setup.py to allow submodules + reformat of code and better Series.hist docs
2025-07-11 00:02:14 +08:00 · 2020-01-29 07:07:56 +00:00 · 2020-01-29 07:07:56 +00:00 · 409cb043c8
commit 409cb043c8
parent 46b428d59b
21 changed files with 379 additions and 165 deletions
--- a/docs/source/examples/demo_notebook.ipynb
+++ b/docs/source/examples/demo_notebook.ipynb
@ -753,7 +753,7 @@
    {
     "data": {
      "text/plain": [
-       "<eland.index.Index at 0x11ffd7f90>"
+       "<eland.index.Index at 0x11a122310>"
      ]
     },
     "execution_count": 17,
@ -2707,7 +2707,7 @@
       "      <td>410.008918</td>\n",
       "      <td>2470.545974</td>\n",
       "      <td>...</td>\n",
-       "      <td>251.698552</td>\n",
+       "      <td>251.682199</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
@ -2724,7 +2724,7 @@
       "      <td>9735.660463</td>\n",
       "      <td>...</td>\n",
       "      <td>720.534532</td>\n",
-       "      <td>4.254967</td>\n",
+       "      <td>4.288079</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
@ -2745,9 +2745,9 @@
       "mean       628.253689         7092.142457  ...     511.127842      2.835975\n",
       "std        266.386661         4578.263193  ...     334.741135      1.939365\n",
       "min        100.020531            0.000000  ...       0.000000      0.000000\n",
-       "25%        410.008918         2470.545974  ...     251.698552      1.000000\n",
+       "25%        410.008918         2470.545974  ...     251.682199      1.000000\n",
       "50%        640.387285         7612.072403  ...     503.148975      3.000000\n",
-       "75%        842.233478         9735.660463  ...     720.534532      4.254967\n",
+       "75%        842.233478         9735.660463  ...     720.534532      4.288079\n",
       "max       1199.729004        19881.482422  ...    1902.901978      6.000000\n",
       "\n",
       "[8 rows x 7 columns]"
--- a/docs/source/examples/online_retail_analysis.ipynb
+++ b/docs/source/examples/online_retail_analysis.ipynb
@ -1023,21 +1023,21 @@
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
-       "      <td>14215.123301</td>\n",
+       "      <td>14221.960201</td>\n",
       "      <td>1.000000</td>\n",
-       "      <td>1.250100</td>\n",
+       "      <td>1.250000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
-       "      <td>15654.828552</td>\n",
+       "      <td>15671.712170</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.510000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
-       "      <td>17218.003301</td>\n",
+       "      <td>17214.376367</td>\n",
-       "      <td>6.570576</td>\n",
+       "      <td>6.615042</td>\n",
-       "      <td>4.210000</td>\n",
+       "      <td>4.210533</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
@ -1055,9 +1055,9 @@
       "mean   15590.776680      7.464000      4.103233\n",
       "std     1764.025160     85.924387     20.104873\n",
       "min    12347.000000  -9360.000000      0.000000\n",
-       "25%    14215.123301      1.000000      1.250100\n",
+       "25%    14221.960201      1.000000      1.250000\n",
-       "50%    15654.828552      2.000000      2.510000\n",
+       "50%    15671.712170      2.000000      2.510000\n",
-       "75%    17218.003301      6.570576      4.210000\n",
+       "75%    17214.376367      6.615042      4.210533\n",
       "max    18239.000000   2880.000000    950.990000"
      ]
     },
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@ -34,7 +34,6 @@ In general, the data resides in elasticsearch and not in memory, which allows el
  * :doc:`reference/general_utility_functions`
  * :doc:`reference/dataframe`
  * :doc:`reference/series`
  * :doc:`reference/index`
  * :doc:`reference/indexing`
 * :doc:`implementation/index`
--- a/docs/source/reference/api/eland-Series-hist-1.png
+++ b/docs/source/reference/api/eland-Series-hist-1.png
--- a/docs/source/reference/api/eland-Series-hist-2.png
+++ b/docs/source/reference/api/eland-Series-hist-2.png
--- a/docs/source/reference/api/eland.Series.hist.rst
+++ b/docs/source/reference/api/eland.Series.hist.rst
@ -4,5 +4,5 @@ eland.Series.hist
 .. currentmodule:: eland
 .. automethod:: Series.hist
-.. image:: eland-Series-hist-1.png
+.. image:: eland-Series-hist-2.png
--- a/eland/init.py
+++ b/eland/init.py
@ -23,7 +23,6 @@ from eland.field_mappings import *
 from eland.query import *
 from eland.operations import *
 from eland.query_compiler import *
 from eland.plotting import *
 from eland.ndframe import *
 from eland.series import *
 from eland.dataframe import *
--- a/eland/field_mappings.py
+++ b/eland/field_mappings.py
@ -439,7 +439,8 @@ class FieldMappings:
        # extract relevant fields and convert to dict
        # <class 'dict'>: {'category.keyword': 'category', 'currency': 'currency', ...
-        return OrderedDict(aggregatables[['aggregatable_es_field_name', 'es_field_name']].to_dict(orient='split')['data'])
+        return OrderedDict(
            aggregatables[['aggregatable_es_field_name', 'es_field_name']].to_dict(orient='split')['data'])
    def date_field_format(self, es_field_name):
        """
--- a/eland/plotting.py
+++ b/eland/plotting.py
@ -1,133 +0,0 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 import numpy as np
 import pandas.core.common as com
 from pandas.core.dtypes.generic import (
    ABCIndexClass)
 from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
 def ed_hist_frame(ed_df, column=None, by=None, grid=True, xlabelsize=None,
                  xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False,
                  sharey=False, figsize=None, layout=None, bins=10, **kwds):
    """
    See :pandas_api_docs:`pandas.DataFrame.hist` for usage.
    Notes
    -----
    Derived from ``pandas.plotting._core.hist_frame 0.24.2`` - TODO update to ``0.25.1``
    Ideally, we'd call `hist_frame` directly with histogram data,
    but weights are applied to ALL series. For example, we can
    plot a histogram of pre-binned data via:
    .. code-block:: python
        counts, bins = np.histogram(data)
        plt.hist(bins[:-1], bins, weights=counts)
    However,
    .. code-block:: python
        ax.hist(data[col].dropna().values, bins=bins, **kwds)
    is for ``[col]`` and weights are a single array.
    Examples
    --------
    >>> df = ed.DataFrame('localhost', 'flights')
    >>> hist = df.select_dtypes(include=[np.number]).hist(figsize=[10,10]) # doctest: +SKIP
    """
    # Start with empty pandas data frame derived from
    ed_df_bins, ed_df_weights = ed_df._hist(num_bins=bins)
    if by is not None:
        raise NotImplementedError("TODO")
    if column is not None:
        if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
            column = [column]
        ed_df_bins = ed_df_bins[column]
        ed_df_weights = ed_df_weights[column]
    naxes = len(ed_df_bins.columns)
    fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False,
                          sharex=sharex, sharey=sharey, figsize=figsize,
                          layout=layout)
    _axes = _flatten(axes)
    for i, col in enumerate(com.try_sort(ed_df_bins.columns)):
        ax = _axes[i]
        # pandas code
        # pandas / plotting / _core.py: 2410
        # ax.hist(data[col].dropna().values, bins=bins, **kwds)
        ax.hist(ed_df_bins[col][:-1], bins=ed_df_bins[col], weights=ed_df_weights[col], **kwds)
        ax.set_title(col)
        ax.grid(grid)
    _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot,
                     ylabelsize=ylabelsize, yrot=yrot)
    fig.subplots_adjust(wspace=0.3, hspace=0.3)
    return axes
 def ed_hist_series(ed_s, column=None, by=None, grid=True, xlabelsize=None,
                  xrot=None, ylabelsize=None, yrot=None, ax=None,
                  figsize=None, layout=None, bins=10, **kwds):
    """
    See :pandas_api_docs:`pandas.Series.hist` for usage.
    Notes
    -----
    Derived from ``pandas.plotting._core.hist_frame 0.24.2`` - TODO update to ``0.25.1``
    Examples
    --------
    >>> df = ed.DataFrame('localhost', 'ecommerce')
    >>> hist = df['taxful_total_price'].hist(figsize=[10,10]) # doctest: +SKIP
    """
    # this is mostly the same code as above, it has been split out
    # to a series specific method now so we can expand series plotting
    # Start with empty pandas data frame derived from
    ed_s_bins, ed_s_weights = ed_s._hist(num_bins=bins)
    if by is not None:
        raise NotImplementedError("TODO")
    # raise error rather than warning when series is not plottable
    if ed_s_bins.empty:
        raise ValueError("{} has no meaningful histogram interval. All values 0."
                        .format(ed_s.name))
    naxes = len(ed_s_bins.columns)
    fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, figsize=figsize, layout=layout)
    _axes = _flatten(axes)
    for i, col in enumerate(com.try_sort(ed_s_bins.columns)):
        ax = _axes[i]
        ax.hist(ed_s_bins[col][:-1], bins=ed_s_bins[col], weights=ed_s_weights[col], **kwds)
        ax.grid(grid)
    _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot,
                     ylabelsize=ylabelsize, yrot=yrot)
    fig.subplots_adjust(wspace=0.3, hspace=0.3)
    return axes
--- a/eland/plotting/init.py
+++ b/eland/plotting/init.py
@ -0,0 +1,28 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 """
 Public plotting API
 Based from https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/plotting/__init__.py
 but only supporting a subset of plotting methods (for now).
 """
 from eland.plotting._core import (
    ed_hist_frame,
    ed_hist_series,
 )
 __all___ = [
    "ed_hist_frame",
    "ed_hist_series",
 ]
--- a/eland/plotting/_core.py
+++ b/eland/plotting/_core.py
@ -0,0 +1,127 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 from eland.plotting._matplotlib.hist import hist_series, hist_frame
 def ed_hist_series(
        self,
        by=None,
        ax=None,
        grid=True,
        xlabelsize=None,
        xrot=None,
        ylabelsize=None,
        yrot=None,
        figsize=None,
        bins=10,
        **kwds
 ):
    """
    Draw histogram of the input series using matplotlib.
    See :pandas_api_docs:`pandas.Series.hist` for usage.
    Notes
    -----
    Derived from ``pandas.plotting._core.hist_frame 0.25.3``
    Examples
    --------
    >>> import matplotlib.pyplot as plt
    >>> df = ed.DataFrame('localhost', 'flights')
    >>> df[df.OriginWeather == 'Sunny']['FlightTimeMin'].hist(alpha=0.5, density=True) # doctest: +SKIP
    >>> df[df.OriginWeather != 'Sunny']['FlightTimeMin'].hist(alpha=0.5, density=True) # doctest: +SKIP
    >>> plt.show() # doctest: +SKIP
    """
    return hist_series(
        self,
        by=by,
        ax=ax,
        grid=grid,
        xlabelsize=xlabelsize,
        xrot=xrot,
        ylabelsize=ylabelsize,
        yrot=yrot,
        figsize=figsize,
        bins=bins,
        **kwds
    )
 def ed_hist_frame(
        data,
        column=None,
        by=None,
        grid=True,
        xlabelsize=None,
        xrot=None,
        ylabelsize=None,
        yrot=None,
        ax=None,
        sharex=False,
        sharey=False,
        figsize=None,
        layout=None,
        bins=10,
        **kwds
 ):
    """
    Make a histogram of the DataFrame's.
    See :pandas_api_docs:`pandas.DataFrame.hist` for usage.
    Notes
    -----
    Derived from ``pandas.plotting._core.hist_frame 0.25.3``
    Ideally, we'd call the pandas method `hist_frame` directly
    with histogram data, but weights are applied to ALL series.
    For example, we can plot a histogram of pre-binned data via:
    .. code-block:: python
        counts, bins = np.histogram(data)
        plt.hist(bins[:-1], bins, weights=counts)
    However,
    .. code-block:: python
        ax.hist(data[col].dropna().values, bins=bins, **kwds)
    is for ``[col]`` and weights are a single array.
    Examples
    --------
    >>> df = ed.DataFrame('localhost', 'flights')
    >>> hist = df.select_dtypes(include=[np.number]).hist(figsize=[10,10]) # doctest: +SKIP
    """
    return hist_frame(
        data,
        column=column,
        by=by,
        grid=grid,
        xlabelsize=xlabelsize,
        xrot=xrot,
        ylabelsize=ylabelsize,
        yrot=yrot,
        ax=ax,
        sharex=sharex,
        sharey=sharey,
        figsize=figsize,
        layout=layout,
        bins=bins,
        **kwds
    )
--- a/eland/plotting/_matplotlib/init.py
+++ b/eland/plotting/_matplotlib/init.py
@ -0,0 +1,40 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 """
 Public plotting API
 Based from https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/plotting/__init__.py
 but only supporting a subset of plotting methods (for now).
 """
 from eland.plotting._matplotlib.hist import (
    hist_frame,
    hist_series,
 )
 __all___ = [
    "hist_frame",
    "hist_series",
 ]
--- a/eland/plotting/_matplotlib/hist.py
+++ b/eland/plotting/_matplotlib/hist.py
@ -0,0 +1,131 @@
 #  Copyright 2019 Elasticsearch BV
 #
 #      Licensed under the Apache License, Version 2.0 (the "License");
 #      you may not use this file except in compliance with the License.
 #      You may obtain a copy of the License at
 #
 #          http://www.apache.org/licenses/LICENSE-2.0
 #
 #      Unless required by applicable law or agreed to in writing, software
 #      distributed under the License is distributed on an "AS IS" BASIS,
 #      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #      See the License for the specific language governing permissions and
 #      limitations under the License.
 import numpy as np
 import pandas.core.common as com
 from pandas.core.dtypes.generic import ABCIndexClass
 from pandas.plotting._matplotlib import converter
 from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
 def hist_series(
        self,
        by=None,
        ax=None,
        grid=True,
        xlabelsize=None,
        xrot=None,
        ylabelsize=None,
        yrot=None,
        figsize=None,
        bins=10,
        **kwds
 ):
    import matplotlib.pyplot as plt
    if by is None:
        if kwds.get("layout", None) is not None:
            raise ValueError(
                "The 'layout' keyword is not supported when " "'by' is None"
            )
        # hack until the plotting interface is a bit more unified
        fig = kwds.pop(
            "figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize)
        )
        if figsize is not None and tuple(figsize) != tuple(fig.get_size_inches()):
            fig.set_size_inches(*figsize, forward=True)
        if ax is None:
            ax = fig.gca()
        elif ax.get_figure() != fig:
            raise AssertionError("passed axis not bound to passed figure")
        self_bins, self_weights = self._hist(num_bins=bins)
        # As this is a series, squeeze Series to arrays
        self_bins = self_bins.squeeze()
        self_weights = self_weights.squeeze()
        ax.hist(self_bins[:-1], bins=self_bins, weights=self_weights, **kwds)
        ax.grid(grid)
        axes = np.array([ax])
        _set_ticks_props(
            axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot
        )
    else:
        raise NotImplementedError("TODO")
    if hasattr(axes, "ndim"):
        if axes.ndim == 1 and len(axes) == 1:
            return axes[0]
    return axes
 def hist_frame(
        data,
        column=None,
        by=None,
        grid=True,
        xlabelsize=None,
        xrot=None,
        ylabelsize=None,
        yrot=None,
        ax=None,
        sharex=False,
        sharey=False,
        figsize=None,
        layout=None,
        bins=10,
        **kwds
 ):
    # Start with empty pandas data frame derived from
    ed_df_bins, ed_df_weights = data._hist(num_bins=bins)
    converter._WARN = False  # no warning for pandas plots
    if by is not None:
        raise NotImplementedError("TODO")
    if column is not None:
        if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
            column = [column]
        ed_df_bins = ed_df_bins[column]
        ed_df_weights = ed_df_weights[column]
    naxes = len(ed_df_bins.columns)
    if naxes == 0:
        raise ValueError("hist method requires numerical columns, " "nothing to plot.")
    fig, axes = _subplots(
        naxes=naxes,
        ax=ax,
        squeeze=False,
        sharex=sharex,
        sharey=sharey,
        figsize=figsize,
        layout=layout,
    )
    _axes = _flatten(axes)
    for i, col in enumerate(com.try_sort(data.columns)):
        ax = _axes[i]
        ax.hist(ed_df_bins[col][:-1], bins=ed_df_bins[col], weights=ed_df_weights[col], **kwds)
        ax.set_title(col)
        ax.grid(grid)
    _set_ticks_props(
        axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot
    )
    fig.subplots_adjust(wspace=0.3, hspace=0.3)
    return axes
--- a/eland/query.py
+++ b/eland/query.py
@ -149,7 +149,6 @@ class Query:
        if interval != 0:
            self._aggs[name] = agg
    def to_search_body(self):
        if self._query.empty():
            if self._aggs:
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@ -732,11 +732,13 @@ def elasticsearch_date_to_pandas_date(value: Union[int, str], date_format: str)
        # TODO investigate how we could generate this just once for a bulk read.
        return pd.to_datetime(value)
 class FieldMappingCache:
    """
    Very simple dict cache for field mappings. This improves performance > 3 times on large datasets as
    DataFrame access is slower than dict access.
    """
    def __init__(self, mappings):
        self._mappings = mappings
@ -764,4 +766,3 @@ class FieldMappingCache:
        self._date_field_format[es_field_name] = es_date_field_format
        return es_date_field_format
--- a/eland/series.py
+++ b/eland/series.py
@ -37,11 +37,11 @@ import numpy as np
 import pandas as pd
 from pandas.io.common import _expand_user, _stringify_path
 import eland.plotting
 from eland import NDFrame
 from eland.arithmetics import ArithmeticSeries, ArithmeticString, ArithmeticNumber
 from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
 from eland.filter import NotFilter, Equal, Greater, Less, GreaterEqual, LessEqual, ScriptFilter, IsIn
 import eland.plotting as gfx
 def _get_method_name():
@ -108,7 +108,7 @@ class Series(NDFrame):
            index_field=index_field,
            query_compiler=query_compiler)
-    hist = gfx.ed_hist_series
+    hist = eland.plotting.ed_hist_series
    @property
    def empty(self):
--- a/eland/tests/field_mappings/test_scripted_fields_pytest.py
+++ b/eland/tests/field_mappings/test_scripted_fields_pytest.py
@ -19,7 +19,6 @@ import numpy as np
 import eland as ed
 from eland.tests import FLIGHTS_INDEX_NAME, ES_TEST_CLIENT
 from eland.tests.common import TestData
--- a/eland/tests/plotting/test_dataframe_hist_pytest.py
+++ b/eland/tests/plotting/test_dataframe_hist_pytest.py
@ -39,6 +39,7 @@ def test_plot_hist(fig_test, fig_ref):
        ed_ax = fig_test.subplots()
        ed_flights.hist(ax=ed_ax)
@check_figures_equal(extensions=['png'])
 def test_plot_filtered_hist(fig_test, fig_ref):
    test_data = TestData()
@ -49,8 +50,6 @@ def test_plot_filtered_hist(fig_test, fig_ref):
    pd_flights = pd_flights[pd_flights.FlightDelayMin > 0]
    ed_flights = ed_flights[ed_flights.FlightDelayMin > 0]
    print(ed_flights.head())
    # This throws a userwarning
    # (https://github.com/pandas-dev/pandas/blob/171c71611886aab8549a8620c5b0071a129ad685/pandas/plotting/_matplotlib/tools.py#L222)
    with pytest.warns(UserWarning):
--- a/eland/tests/plotting/test_series_hist_pytest.py
+++ b/eland/tests/plotting/test_series_hist_pytest.py
@ -25,8 +25,32 @@ def test_plot_hist(fig_test, fig_ref):
    pd_flights = test_data.pd_flights()['FlightDelayMin']
    ed_flights = test_data.ed_flights()['FlightDelayMin']
-    pd_ax = fig_ref.subplots()
+    pd_flights.hist(figure=fig_ref)
-    ed_ax = fig_test.subplots()
+    ed_flights.hist(figure=fig_test)
-    pd_flights.hist(ax=pd_ax)
+
-    ed_flights.hist(ax=ed_ax)
+@check_figures_equal(extensions=['png'])
 def test_plot_multiple_hists(fig_test, fig_ref):
    test_data = TestData()
    pd_flights = test_data.pd_flights()
    ed_flights = test_data.ed_flights()
    pd_flights[pd_flights.AvgTicketPrice < 250]['FlightDelayMin'].hist(figure=fig_ref, alpha=0.5, density=True)
    pd_flights[pd_flights.AvgTicketPrice > 250]['FlightDelayMin'].hist(figure=fig_ref, alpha=0.5, density=True)
    ed_flights[ed_flights.AvgTicketPrice < 250]['FlightDelayMin'].hist(figure=fig_test, alpha=0.5, density=True)
    ed_flights[ed_flights.AvgTicketPrice > 250]['FlightDelayMin'].hist(figure=fig_test, alpha=0.5, density=True)
@check_figures_equal(extensions=['png'])
 def test_plot_multiple_hists_pretty(fig_test, fig_ref):
    test_data = TestData()
    pd_flights = test_data.pd_flights()
    ed_flights = test_data.ed_flights()
    pd_flights[pd_flights.OriginWeather == 'Sunny']['FlightTimeMin'].hist(figure=fig_ref, alpha=0.5, density=True)
    pd_flights[pd_flights.OriginWeather != 'Sunny']['FlightTimeMin'].hist(figure=fig_ref, alpha=0.5, density=True)
    ed_flights[ed_flights.OriginWeather == 'Sunny']['FlightTimeMin'].hist(figure=fig_test, alpha=0.5, density=True)
    ed_flights[ed_flights.OriginWeather != 'Sunny']['FlightTimeMin'].hist(figure=fig_test, alpha=0.5, density=True)
--- a/eland/tests/series/test_hist_pytest.py
+++ b/eland/tests/series/test_hist_pytest.py
@ -16,8 +16,8 @@
 import numpy as np
 import pandas as pd
 from pandas.util.testing import assert_almost_equal
 import pytest
 from pandas.util.testing import assert_almost_equal
 from eland.tests.common import TestData
--- a/setup.py
+++ b/setup.py
@ -15,7 +15,7 @@
 from codecs import open
 from os import path
-from setuptools import setup
+from setuptools import setup, find_packages
 here = path.abspath(path.dirname(__file__))
 about = {}
@ -183,7 +183,7 @@ setup(
    license='Apache 2.0',
    classifiers=CLASSIFIERS,
    keywords='elastic eland pandas python',
-    packages=['eland'],
+    packages=find_packages(include=["eland", "eland.*"]),
    install_requires=[
        'elasticsearch>=7.0.5',
        'pandas==0.25.3',