More doc updates.

2025-07-11 00:02:14 +08:00 · 2019-11-13 18:23:43 +00:00 · 2019-11-13 18:23:43 +00:00 · dff49d01fe
commit dff49d01fe
parent d8c1e18161
27 changed files with 518 additions and 144 deletions
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -40,7 +40,10 @@ release = '0.1'
 extensions = [
    'sphinx.ext.autodoc',
    "sphinx.ext.doctest",
-    'numpydoc'
+    "sphinx.ext.extlinks",
    'numpydoc',
    "matplotlib.sphinxext.plot_directive",
    "sphinx.ext.todo",
 ]
 doctest_global_setup = '''
@ -54,7 +57,18 @@ except ImportError:
    pd = None
 '''
 extlinks = {'pandas_docs': ('https://pandas.pydata.org/pandas-docs/version/0.25.1/reference/api/%s.html', '')}
 numpydoc_attributes_as_param_list = False
 numpydoc_show_class_members = False
 # matplotlib plot directive
 plot_include_source = True
 plot_formats = [("png", 90)]
 plot_html_show_formats = False
 plot_html_show_source_link = False
 plot_pre_code = """import numpy as np
 import eland as ed"""
 # Add any paths that contain templates here, relative to this directory.
--- a/docs/source/reference/api/eland.DataFrame.agg.rst
+++ b/docs/source/reference/api/eland.DataFrame.agg.rst
@ -0,0 +1,6 @@
 eland.DataFrame.agg
 ===================
 .. currentmodule:: eland
 .. automethod:: DataFrame.agg
--- a/docs/source/reference/api/eland.DataFrame.aggregate.rst
+++ b/docs/source/reference/api/eland.DataFrame.aggregate.rst
@ -0,0 +1,6 @@
 eland.DataFrame.aggregate
 =========================
 .. currentmodule:: eland
 .. automethod:: DataFrame.aggregate
--- a/docs/source/reference/api/eland.DataFrame.count.rst
+++ b/docs/source/reference/api/eland.DataFrame.count.rst
@ -0,0 +1,6 @@
 eland.DataFrame.count
 =====================
 .. currentmodule:: eland
 .. automethod:: DataFrame.count
--- a/docs/source/reference/api/eland.DataFrame.describe.rst
+++ b/docs/source/reference/api/eland.DataFrame.describe.rst
@ -0,0 +1,6 @@
 eland.DataFrame.describe
 ========================
 .. currentmodule:: eland
 .. automethod:: DataFrame.describe
--- a/docs/source/reference/api/eland.DataFrame.drop.rst
+++ b/docs/source/reference/api/eland.DataFrame.drop.rst
@ -0,0 +1,6 @@
 eland.DataFrame.drop
 ====================
 .. currentmodule:: eland
 .. automethod:: DataFrame.drop
--- a/docs/source/reference/api/eland.DataFrame.dtypes.rst
+++ b/docs/source/reference/api/eland.DataFrame.dtypes.rst
@ -0,0 +1,6 @@
 eland.DataFrame.dtypes
 ======================
 .. currentmodule:: eland
 .. autoattribute:: DataFrame.dtypes
--- a/docs/source/reference/api/eland.DataFrame.empty.rst
+++ b/docs/source/reference/api/eland.DataFrame.empty.rst
@ -0,0 +1,6 @@
 eland.DataFrame.empty
 =====================
 .. currentmodule:: eland
 .. autoattribute:: DataFrame.empty
--- a/docs/source/reference/api/eland.DataFrame.get.rst
+++ b/docs/source/reference/api/eland.DataFrame.get.rst
@ -0,0 +1,6 @@
 eland.DataFrame.get
 ===================
 .. currentmodule:: eland
 .. automethod:: DataFrame.get
--- a/docs/source/reference/api/eland.DataFrame.hist.rst
+++ b/docs/source/reference/api/eland.DataFrame.hist.rst
@ -0,0 +1,6 @@
 eland.DataFrame.hist
 ====================
 .. currentmodule:: eland
 .. automethod:: DataFrame.hist
--- a/docs/source/reference/api/eland.DataFrame.info.rst
+++ b/docs/source/reference/api/eland.DataFrame.info.rst
@ -0,0 +1,6 @@
 eland.DataFrame.info
 ====================
 .. currentmodule:: eland
 .. automethod:: DataFrame.info
--- a/docs/source/reference/api/eland.DataFrame.select_dtypes.rst
+++ b/docs/source/reference/api/eland.DataFrame.select_dtypes.rst
@ -0,0 +1,6 @@
 eland.DataFrame.select_dtypes
 =============================
 .. currentmodule:: eland
 .. automethod:: DataFrame.select_dtypes
--- a/docs/source/reference/api/eland.Index.rst
+++ b/docs/source/reference/api/eland.Index.rst
@ -0,0 +1,6 @@
 eland.Index
 ===========
 .. currentmodule:: eland
 .. autoclass:: Index
--- a/docs/source/reference/dataframe.rst
+++ b/docs/source/reference/dataframe.rst
@ -21,6 +21,9 @@ Attributes and underlying data
   DataFrame.index
   DataFrame.columns
   DataFrame.dtypes   
   DataFrame.select_dtypes   
   DataFrame.empty   
 Indexing, iteration
 ~~~~~~~~~~~~~~~~~~~
@ -29,7 +32,45 @@ Indexing, iteration
   DataFrame.head
   DataFrame.tail
   DataFrame.get
 Function application, GroupBy & window
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
   :toctree: api/
   DataFrame.agg
   DataFrame.aggregate
 .. _api.dataframe.stats:
 Computations / descriptive stats
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
   :toctree: api/
   DataFrame.count
   DataFrame.describe
   DataFrame.info
 Reindexing / selection / label manipulation
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
   :toctree: api/
   DataFrame.drop
 Plotting
 ~~~~~~~~
 .. autosummary::
   :toctree: api/
   DataFrame.hist
 Serialization / IO / conversion
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
   :toctree: api/
   DataFrame.info
--- a/docs/source/reference/index.rst
+++ b/docs/source/reference/index.rst
@ -12,3 +12,4 @@ methods. All classes and functions exposed in ``eland.*`` namespace are public.
   general_utility_functions
   dataframe
   indexing
--- a/docs/source/reference/indexing.rst
+++ b/docs/source/reference/indexing.rst
@ -0,0 +1,15 @@
 .. _api.index:
 =====
 Index
 =====
 .. currentmodule:: eland
 **Many of these methods or variants thereof are available on the objects
 that contain an index (Series/DataFrame) and those should most likely be
 used before calling these methods directly.**
 .. autosummary::
   :toctree: api/
   Index
--- a/eland/conftest.py
+++ b/eland/conftest.py
@ -0,0 +1,17 @@
 import pytest
 import numpy as np
 import pandas as pd
 import eland as ed
 # Fix console sizxe for consistent test results
 pd.set_option('display.max_rows', 10)
 pd.set_option('display.max_columns', 5)
 pd.set_option('display.width', 100)
@pytest.fixture(autouse=True)
 def add_imports(doctest_namespace):
    doctest_namespace["np"] = np
    doctest_namespace["pd"] = pd
    doctest_namespace["ed"] = ed
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@ -8,7 +8,6 @@ import six
 from pandas.core.common import apply_if_callable, is_bool_indexer
 from pandas.core.dtypes.common import is_list_like
 from pandas.core.indexing import check_bool_indexer
 from pandas.io.common import _expand_user, _stringify_path
 from pandas.io.formats import console
 from pandas.io.formats import format as fmt
@ -19,6 +18,7 @@ from eland import NDFrame
 from eland import Series
 from eland.filter import BooleanFilter, ScriptFilter
 class DataFrame(NDFrame):
    """
    Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes
@ -39,21 +39,26 @@ class DataFrame(NDFrame):
    index_field: str, optional
        The Elasticsearch index field to use as the DataFrame index. Defaults to _id if None is used.
    See Also
    --------
    :pandas_docs:`pandas.DataFrame`
    Examples
    --------
    Constructing DataFrame from an Elasticsearch configuration arguments and an Elasticsearch index
    >>> df = ed.DataFrame('localhost:9200', 'flights')
    >>> df.head()
-       AvgTicketPrice  Cancelled           Carrier                                          Dest  ... OriginRegion        OriginWeather dayOfWeek           timestamp
+       AvgTicketPrice  Cancelled  ... dayOfWeek           timestamp
-    0      841.265642      False   Kibana Airlines  Sydney Kingsford Smith International Airport  ...        DE-HE                Sunny         0 2018-01-01 00:00:00
+    0      841.265642      False  ...         0 2018-01-01 00:00:00
-    1      882.982662      False  Logstash Airways                     Venice Marco Polo Airport  ...        SE-BD                Clear         0 2018-01-01 18:27:00
+    1      882.982662      False  ...         0 2018-01-01 18:27:00
-    2      190.636904      False  Logstash Airways                     Venice Marco Polo Airport  ...        IT-34                 Rain         0 2018-01-01 17:11:14
+    2      190.636904      False  ...         0 2018-01-01 17:11:14
-    3      181.694216       True   Kibana Airlines                   Treviso-Sant'Angelo Airport  ...        IT-72  Thunder & Lightning         0 2018-01-01 10:33:28
+    3      181.694216       True  ...         0 2018-01-01 10:33:28
-    4      730.041778      False   Kibana Airlines          Xi'an Xianyang International Airport  ...       MX-DIF        Damaging Wind         0 2018-01-01 05:13:00
+    4      730.041778      False  ...         0 2018-01-01 05:13:00
    <BLANKLINE>
    [5 rows x 27 columns]
    Constructing DataFrame from an Elasticsearch client and an Elasticsearch index
    >>> from elasticsearch import Elasticsearch
@ -82,6 +87,7 @@ class DataFrame(NDFrame):
    <BLANKLINE>
    [5 rows x 2 columns]
    """
    def __init__(self,
                 client=None,
                 index_pattern=None,
@ -115,18 +121,21 @@ class DataFrame(NDFrame):
        -------
        Elasticsearch field names as pandas.Index
        See Also
        --------
        :pandas_docs:`pandas.DataFrame.columns`
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights')
        >>> assert isinstance(df.columns, pd.Index)
        >>> df.columns
-        Index(['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID',
+        Index(['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID', 'DestCityName',
-        ...   'DestCityName', 'DestCountry', 'DestLocation', 'DestRegion',
+        ...   'DestCountry', 'DestLocation', 'DestRegion', 'DestWeather', 'DistanceKilometers',
-        ...   'DestWeather', 'DistanceKilometers', 'DistanceMiles', 'FlightDelay',
+        ...   'DistanceMiles', 'FlightDelay', 'FlightDelayMin', 'FlightDelayType', 'FlightNum',
-        ...   'FlightDelayMin', 'FlightDelayType', 'FlightNum', 'FlightTimeHour',
+        ...   'FlightTimeHour', 'FlightTimeMin', 'Origin', 'OriginAirportID', 'OriginCityName',
-        ...   'FlightTimeMin', 'Origin', 'OriginAirportID', 'OriginCityName',
+        ...   'OriginCountry', 'OriginLocation', 'OriginRegion', 'OriginWeather', 'dayOfWeek',
-        ...   'OriginCountry', 'OriginLocation', 'OriginRegion', 'OriginWeather',
+        ...   'timestamp'],
        ...   'dayOfWeek', 'timestamp'],
        ...   dtype='object')
        """
        return self._query_compiler.columns
@ -137,9 +146,20 @@ class DataFrame(NDFrame):
    def empty(self):
        """Determines if the DataFrame is empty.
-        Returns:
+        Returns
-            True if the DataFrame is empty.
+        -------
-            False otherwise.
+        bool
            If DataFrame is empty, return True, if not return False.
        See Also
        --------
        :pandas_docs:`pandas.DataFrame.empty`
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights')
        >>> df.empty
        False
        """
        return len(self.columns) == 0 or len(self.index) == 0
@ -161,6 +181,10 @@ class DataFrame(NDFrame):
        eland.DataFrame
            eland DataFrame filtered on first n rows sorted by index field
        See Also
        --------
        :pandas_docs:`pandas.DataFrame.head`
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'Dest'])
@ -192,6 +216,10 @@ class DataFrame(NDFrame):
        eland.DataFrame:
            eland DataFrame filtered on last n rows sorted by index field
        See Also
        --------
        :pandas_docs:`pandas.DataFrame.tail`
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'Dest'])
@ -257,20 +285,45 @@ class DataFrame(NDFrame):
    def count(self):
        """
-        Count non-NA cells for each column (TODO row)
+        Count non-NA cells for each column.
-        Counts are based on exists queries against ES
+        Counts are based on exists queries against ES.
        This is inefficient, as it creates N queries (N is number of fields).
        An alternative approach is to use value_count aggregations. However, they have issues in that:
-        1. They can only be used with aggregatable fields (e.g. keyword not text)
+
-        2. For list fields they return multiple counts. E.g. tags=['elastic', 'ml'] returns value_count=2
+        - They can only be used with aggregatable fields (e.g. keyword not text)
-        for a single document.
+        - For list fields they return multiple counts. E.g. tags=['elastic', 'ml'] returns value_count=2 for a single document.
        TODO - add additional pandas.DataFrame.count features
        Returns
        -------
        pandas.Series:
            Summary of column counts
        See Also
        --------
        :pandas_docs:`pandas.DataFrame.count`
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'geoip.city_name'])
        >>> df.count()
        customer_first_name    4675
        geoip.city_name        4094
        dtype: int64
        """
        return self._query_compiler.count()
    def info_es(self):
        """
        Returns
        -------
        None
            This method prints a debug summary of the task list Elasticsearch
        """
        buf = StringIO()
        super()._info_es(buf)
@ -297,9 +350,25 @@ class DataFrame(NDFrame):
        This method prints information about a DataFrame including
        the index dtype and column dtypes, non-null values and memory usage.
        See :pandas_docs:`pandas.DataFrame.info` for full details.
        Notes
        -----
        This copies a lot of code from pandas.DataFrame.info as it is difficult
        to split out the appropriate code or creating a SparseDataFrame gives
        incorrect results on types and counts.
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'geoip.city_name'])
        >>> df.info()
        <class 'eland.dataframe.DataFrame'>
        Index: 4675 entries, 0 to 4674
        Data columns (total 2 columns):
        customer_first_name    4675 non-null object
        geoip.city_name        4094 non-null object
        dtypes: object(2)
        memory usage: 96.0 bytes
        """
        if buf is None:  # pragma: no cover
            buf = sys.stdout
@ -386,7 +455,7 @@ class DataFrame(NDFrame):
            else:
                _verbose_repr()
-        counts = self.get_dtype_counts()
+        counts = self.dtypes.value_counts()
        dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k
                  in sorted(counts.items())]
        lines.append('dtypes: {types}'.format(types=', '.join(dtypes)))
@ -623,7 +692,11 @@ class DataFrame(NDFrame):
        )
    def select_dtypes(self, include=None, exclude=None):
-        # get empty df
+        """
        Return a subset of the DataFrame's columns based on the column dtypes.
        Compatible with :pandas_docs:`pandas.DataFrame.select_dtypes`
        """
        empty_df = self._empty_pd_df()
        empty_df = empty_df.select_dtypes(include=include, exclude=exclude)
@ -649,19 +722,13 @@ class DataFrame(NDFrame):
    def keys(self):
        return self.columns
    def groupby(self, by=None, axis=0, *args, **kwargs):
        axis = pd.DataFrame._get_axis_number(axis)
        if axis == 1:
            raise NotImplementedError("Aggregating via index not currently implemented - needs index transform")
    def aggregate(self, func, axis=0, *args, **kwargs):
        """
        Aggregate using one or more operations over the specified axis.
        Parameters
        ----------
-        func : function, str, list or dict
+        func: function, str, list or dict
            Function to use for aggregating the data. If a function, must either
            work when passed a %(klass)s or when passed to %(klass)s.apply.
@ -671,11 +738,15 @@ class DataFrame(NDFrame):
            - string function name
            - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
            - dict of axis labels -> functions, function names or list of such.
            Currently, we only support ``['count', 'mad', 'max', 'mean', 'median', 'min', 'mode', 'quantile',
            'rank', 'sem', 'skew', 'sum', 'std', 'var']``
        axis
            Currently, we only support axis=0 (index)
        *args
-            Positional arguments to pass to `func`.
+            Positional arguments to pass to `func`
        **kwargs
-            Keyword arguments to pass to `func`.
+            Keyword arguments to pass to `func`
        Returns
        -------
@ -684,6 +755,19 @@ class DataFrame(NDFrame):
            if DataFrame.agg is called with several functions, returns a DataFrame
            if Series.agg is called with single function, returns a scalar
            if Series.agg is called with several functions, returns a Series
        See Also
        --------
        :pandas_docs:`pandas.DataFrame.aggregate`
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights')
        >>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std'])
             DistanceKilometers  AvgTicketPrice
        sum        9.261629e+07    8.204365e+06
        min        0.000000e+00    1.000205e+02
        std        4.578263e+03    2.663867e+02
        """
        axis = pd.DataFrame._get_axis_number(axis)
@ -722,16 +806,38 @@ class DataFrame(NDFrame):
            raise NotImplementedError(expr, type(expr))
    def get(self, key, default=None):
-        """Get item from object for given key (DataFrame column, Panel
+        """
-                slice, etc.). Returns default value if not found.
+        Get item from object for given key (ex: DataFrame column).
        Returns default value if not found.
-                Args:
+        Parameters
-                    key (DataFrame column, Panel slice) : the key for which value
+        ----------
-                    to get
+        key: object
-                Returns:
+        Returns
-                    value (type of items contained in object) : A value that is
+        -------
-                    stored at the key
+        value: same type as items contained in object
        See Also
        --------
        :pandas_docs:`pandas.DataFrame.get`
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights')
        >>> df.get('Carrier')
        0         Kibana Airlines
        1        Logstash Airways
        2        Logstash Airways
        3         Kibana Airlines
        4         Kibana Airlines
                       ...
        13054    Logstash Airways
        13055    Logstash Airways
        13056    Logstash Airways
        13057            JetBeats
        13058            JetBeats
        Name: Carrier, Length: 13059, dtype: object
        """
        if key in self.keys():
            return self._getitem(key)
--- a/eland/index.py
+++ b/eland/index.py
@ -1,27 +1,23 @@
 """
 class Index
 The index for an eland.DataFrame.
 Currently, the index is a field that exists in every document in an Elasticsearch index.
 For slicing and sorting operations it must be a docvalues field. By default _id is used,
 which can't be used for range queries and is inefficient for sorting:
 https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-id-field.html
 (The value of the _id field is also accessible in aggregations or for sorting,
 but doing so is discouraged as it requires to load a lot of data in memory.
 In case sorting or aggregating on the _id field is required, it is advised to duplicate
 the content of the _id field in another field that has doc_values enabled.)
 """
 class Index:
    """
    The index for an eland.DataFrame.
    TODO - This currently has very different behaviour than pandas.Index
    Currently, the index is a field that exists in every document in an Elasticsearch index.
    For slicing and sorting operations it must be a docvalues field. By default _id is used,
    which can't be used for range queries and is inefficient for sorting:
    https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-id-field.html
    (The value of the _id field is also accessible in aggregations or for sorting,
    but doing so is discouraged as it requires to load a lot of data in memory.
    In case sorting or aggregating on the _id field is required, it is advised to duplicate
    the content of the _id field in another field that has doc_values enabled.)
    """
    ID_INDEX_FIELD = '_id'
    ID_SORT_FIELD = '_doc'  # if index field is _id, sort by _doc
    def __init__(self, query_compiler, index_field=None):
        # Calls setter
        self.index_field = index_field
        self._query_compiler = query_compiler
--- a/eland/mappings.py
+++ b/eland/mappings.py
@ -420,13 +420,13 @@ class Mappings:
                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
                                                    (self._mappings_capabilities.pd_dtype == 'float64') |
-                                                    (self._mappings_capabilities.pd_dtype == 'bool'))].loc[
+                                                    (self._mappings_capabilities.pd_dtype == 'bool'))].reindex(
-                    columns].index.tolist()
+                    columns).index.tolist()
            else:
                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
-                                                    (self._mappings_capabilities.pd_dtype == 'float64'))].loc[
+                                                    (self._mappings_capabilities.pd_dtype == 'float64'))].reindex(
-                    columns].index.tolist()
+                    columns).index.tolist()
        else:
            if include_bool == True:
                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
@ -469,26 +469,6 @@ class Mappings:
        return pd.Series(self._source_field_pd_dtypes)
    def get_dtype_counts(self, columns=None):
        """
        Return counts of unique dtypes in this object.
        Returns
        -------
        get_dtype_counts : Series
            Series with the count of columns with each dtype.
        """
        if columns is not None:
            return pd.Series(self._mappings_capabilities[self._mappings_capabilities._source == True]
                             .loc[columns]
                             .groupby('pd_dtype')['_source']
                             .count().to_dict())
        return pd.Series(self._mappings_capabilities[self._mappings_capabilities._source == True]
                         .groupby('pd_dtype')['_source']
                         .count().to_dict())
    def info_es(self, buf):
        buf.write("Mappings:\n")
        buf.write("\tcapabilities: {0}\n".format(self._mappings_capabilities))
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@ -57,10 +57,23 @@ class NDFrame:
    def _get_index(self):
        """
        Return eland index referencing Elasticsearch field to index a DataFrame/Series
        Returns
        -------
        eland.Index:
            Note eland.Index has a very limited API compared to pandas.Index
        See Also
        --------
        :pandas_docs:`pandas.DataFrame.index`
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights')
        >>> assert isinstance(df.index, ed.Index)
        >>> df.index.index_field
        '_id'
        """
        return self._query_compiler.index
@ -68,10 +81,30 @@ class NDFrame:
    @property
    def dtypes(self):
-        return self._query_compiler.dtypes
+        """
        Return the pandas dtypes in the DataFrame. Elasticsearch types are mapped
        to pandas dtypes via Mappings._es_dtype_to_pd_dtype.__doc__
-    def get_dtype_counts(self):
+        Returns
-        return self._query_compiler.get_dtype_counts()
+        -------
        pandas.Series
            The data type of each column.
        See Also
        --------
        :pandas_docs:`pandas.DataFrame.dtypes`
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'AvgTicketPrice', 'timestamp', 'dayOfWeek'])
        >>> df.dtypes
        Origin                    object
        AvgTicketPrice           float64
        timestamp         datetime64[ns]
        dayOfWeek                  int64
        dtype: object
        """
        return self._query_compiler.dtypes
    def _build_repr_df(self, num_rows, num_cols):
        # Overriden version of BasePandasDataset._build_repr_df
@ -134,21 +167,71 @@ class NDFrame:
            errors="raise",
    ):
        """Return new object with labels in requested axis removed.
        Args:
            labels: Index or column labels to drop.
            axis: Whether to drop labels from the index (0 / 'index') or
                columns (1 / 'columns').
            index, columns: Alternative to specifying axis (labels, axis=1 is
                equivalent to columns=labels).
            level: For MultiIndex
            inplace: If True, do operation inplace and return None.
            errors: If 'ignore', suppress error and existing labels are
                dropped.
        Returns:
            dropped : type of caller
-        (derived from modin.base.BasePandasDataset)
+        Parameters
        ----------
        labels:
            Index or column labels to drop.
        axis:
            Whether to drop labels from the index (0 / 'index') or columns (1 / 'columns').
        index, columns:
            Alternative to specifying axis (labels, axis=1 is equivalent to columns=labels).
        level:
            For MultiIndex - not supported
        inplace:
            If True, do operation inplace and return None.
        errors:
            If 'ignore', suppress error and existing labels are dropped.
        Returns
        -------
        dropped:
            type of caller
        See Also
        --------
        :pandas_docs:`pandas.DataFrame.drop`
        Examples
        --------
        Drop a column
        >>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'email', 'user'])
        >>> df.drop(columns=['user'])
             customer_first_name                       email
        0                  Eddie  eddie@underwood-family.zzz
        1                   Mary      mary@bailey-family.zzz
        2                   Gwen      gwen@butler-family.zzz
        3                  Diane   diane@chandler-family.zzz
        4                  Eddie      eddie@weber-family.zzz
        ...                  ...                         ...
        4670                Mary     mary@lambert-family.zzz
        4671                 Jim      jim@gilbert-family.zzz
        4672               Yahya     yahya@rivera-family.zzz
        4673                Mary     mary@hampton-family.zzz
        4674             Jackson  jackson@hopkins-family.zzz
        <BLANKLINE>
        [4675 rows x 2 columns]
        Drop rows by index value (axis=0)
        >>> df.drop(['1', '2'])
             customer_first_name                       email     user
        0                  Eddie  eddie@underwood-family.zzz    eddie
        3                  Diane   diane@chandler-family.zzz    diane
        4                  Eddie      eddie@weber-family.zzz    eddie
        5                  Diane    diane@goodwin-family.zzz    diane
        6                 Oliver      oliver@rios-family.zzz   oliver
        ...                  ...                         ...      ...
        4670                Mary     mary@lambert-family.zzz     mary
        4671                 Jim      jim@gilbert-family.zzz      jim
        4672               Yahya     yahya@rivera-family.zzz    yahya
        4673                Mary     mary@hampton-family.zzz     mary
        4674             Jackson  jackson@hopkins-family.zzz  jackson
        <BLANKLINE>
        [4673 rows x 3 columns]
        """
        #(derived from modin.base.BasePandasDataset)
        # Level not supported
        if level is not None:
            raise NotImplementedError("level not supported {}".format(level))
@ -242,4 +325,36 @@ class NDFrame:
        return self._query_compiler._hist(num_bins)
    def describe(self):
        """
        Generate descriptive statistics that summarize the central tendency, dispersion and shape of a
        dataset’s distribution, excluding NaN values.
        Analyzes both numeric and object series, as well as DataFrame column sets of mixed data types.
        The output will vary depending on what is provided. Refer to the notes below for more detail.
        TODO - add additional arguments (current only numeric values supported)
        Returns
        -------
        pandas.Dataframe:
            Summary information
        See Also
        --------
        :pandas_docs:`pandas.DataFrame.describe`
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights', columns=['AvgTicketPrice', 'FlightDelay'])
        >>> df.describe() # ignoring percentiles as they don't generate consistent results
               AvgTicketPrice   FlightDelay
        count    13059.000000  13059.000000
        mean       628.253689      0.251168
        std        266.386661      0.433685
        min        100.020531      0.000000
        ...
        ...
        ...
        max       1199.729004      1.000000
        """
        return self._query_compiler.describe()
--- a/eland/plotting.py
+++ b/eland/plotting.py
@ -10,36 +10,42 @@ def ed_hist_frame(ed_df, column=None, by=None, grid=True, xlabelsize=None,
                  xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False,
                  sharey=False, figsize=None, layout=None, bins=10, **kwds):
    """
-    Derived from pandas.plotting._core.hist_frame 0.24.2 - TODO update to 0.25.1
+    See :pandas_docs:`pandas.DataFrame.hist` for usage.
-    Ideally, we'd call hist_frame directly with histogram data,
+    Notes
    -----
    Derived from ``pandas.plotting._core.hist_frame 0.24.2`` - TODO update to ``0.25.1``
    Ideally, we'd call `hist_frame` directly with histogram data,
    but weights are applied to ALL series. For example, we can
    plot a histogram of pre-binned data via:
    .. code-block:: python
        counts, bins = np.histogram(data)
        plt.hist(bins[:-1], bins, weights=counts)
    However,
    .. code-block:: python
        ax.hist(data[col].dropna().values, bins=bins, **kwds)
-    is for [col] and weights are a single array.
+    is for ``[col]`` and weights are a single array.
-    We therefore cut/paste code.
+    Examples
    --------
    .. plot::
        :context: close-figs
        >>> df = ed.DataFrame('localhost', 'flights')
        >>> hist = df.select_dtypes(include=[np.number]).hist(figsize=[10,10])
    """
    # Start with empty pandas data frame derived from
    ed_df_bins, ed_df_weights = ed_df._hist(num_bins=bins)
    if by is not None:
        raise NotImplementedError("TODO")
        """
        axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid,
                            figsize=figsize, sharex=sharex, sharey=sharey,
                            layout=layout, bins=bins, xlabelsize=xlabelsize,
                            xrot=xrot, ylabelsize=ylabelsize,
                            yrot=yrot, **kwds)
        """
        return axes
    if column is not None:
        if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@ -84,11 +84,6 @@ class ElandQueryCompiler:
        return self._mappings.dtypes(columns)
    def get_dtype_counts(self):
        columns = self._operations.get_columns()
        return self._mappings.get_dtype_counts(columns)
    # END Index, columns, and dtypes objects
    def _es_results_to_pandas(self, results, batch_size=None):
--- a/eland/series.py
+++ b/eland/series.py
@ -150,7 +150,7 @@ class Series(NDFrame):
        )
    def _to_pandas(self):
-        return self._query_compiler._to_pandas()[self.name]
+        return self._query_compiler.to_pandas()[self.name]
    def __gt__(self, other):
        if isinstance(other, Series):
--- a/eland/tests/dataframe/test_count_pytest.py
+++ b/eland/tests/dataframe/test_count_pytest.py
@ -4,6 +4,7 @@ from pandas.util.testing import assert_series_equal
 from eland.tests.common import TestData
 import pandas as pd
 class TestDataFrameCount(TestData):
--- a/eland/tests/mappings/test_dtypes_pytest.py
+++ b/eland/tests/mappings/test_dtypes_pytest.py
@ -24,22 +24,3 @@ class TestMappingsDtypes(TestData):
        ed_dtypes = ed_flights._query_compiler._mappings.dtypes(columns=['Carrier', 'AvgTicketPrice', 'Cancelled'])
        assert_series_equal(pd_dtypes, ed_dtypes)
    def test_flights_get_dtype_counts_all(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
        pd_dtypes = pd_flights.get_dtype_counts().sort_index()
        ed_dtypes = ed_flights._query_compiler._mappings.get_dtype_counts().sort_index()
        assert_series_equal(pd_dtypes, ed_dtypes)
    def test_flights_get_dtype_counts_columns(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()[['Carrier', 'AvgTicketPrice', 'Cancelled']]
        pd_dtypes = pd_flights.get_dtype_counts().sort_index()
        ed_dtypes = ed_flights._query_compiler._mappings. \
            get_dtype_counts(columns=['Carrier', 'AvgTicketPrice', 'Cancelled']).sort_index()
        assert_series_equal(pd_dtypes, ed_dtypes)
--- a/eland/utils.py
+++ b/eland/utils.py
@ -141,3 +141,37 @@ def ed_to_pd(ed_df):
    eland.pd_to_ed: Create an eland.Dataframe from pandas.DataFrame
    """
    return ed_df._to_pandas()
 def _inherit_docstrings(parent, excluded=[]):
    """Creates a decorator which overwrites a decorated class' __doc__
    attribute with parent's __doc__ attribute. Also overwrites __doc__ of
    methods and properties defined in the class with the __doc__ of matching
    methods and properties in parent.
    Args:
        parent (object): Class from which the decorated class inherits __doc__.
        excluded (list): List of parent objects from which the class does not
            inherit docstrings.
    Returns:
        function: decorator which replaces the decorated class' documentation
            parent's documentation.
    """
    def decorator(cls):
        if parent not in excluded:
            cls.__doc__ = parent.__doc__
        for attr, obj in cls.__dict__.items():
            parent_obj = getattr(parent, attr, None)
            if parent_obj in excluded or (
                not callable(parent_obj) and not isinstance(parent_obj, property)
            ):
                continue
            if callable(obj):
                obj.__doc__ = parent_obj.__doc__
            elif isinstance(obj, property) and obj.fget is not None:
                p = property(obj.fget, obj.fset, obj.fdel, parent_obj.__doc__)
                setattr(cls, attr, p)
        return cls
    return decorator