More doc updates.

2025-07-24 00:00:39 +08:00 · 2019-11-13 18:23:43 +00:00 · 2019-11-13 18:23:43 +00:00 · dff49d01fe
commit dff49d01fe
parent d8c1e18161
27 changed files with 518 additions and 144 deletions
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -40,7 +40,10 @@ release = '0.1'
 extensions = [
    'sphinx.ext.autodoc',
    "sphinx.ext.doctest",
-    'numpydoc'
+    "sphinx.ext.extlinks",
+    'numpydoc',
+    "matplotlib.sphinxext.plot_directive",
+    "sphinx.ext.todo",
 ]

 doctest_global_setup = '''
@ -54,7 +57,18 @@ except ImportError:
    pd = None
 '''

+extlinks = {'pandas_docs': ('https://pandas.pydata.org/pandas-docs/version/0.25.1/reference/api/%s.html', '')}
+
 numpydoc_attributes_as_param_list = False
+numpydoc_show_class_members = False
+
+# matplotlib plot directive
+plot_include_source = True
+plot_formats = [("png", 90)]
+plot_html_show_formats = False
+plot_html_show_source_link = False
+plot_pre_code = """import numpy as np
+import eland as ed"""


 # Add any paths that contain templates here, relative to this directory.
--- a/docs/source/reference/api/eland.DataFrame.agg.rst
+++ b/docs/source/reference/api/eland.DataFrame.agg.rst
@ -0,0 +1,6 @@
+eland.DataFrame.agg
+===================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.agg
--- a/docs/source/reference/api/eland.DataFrame.aggregate.rst
+++ b/docs/source/reference/api/eland.DataFrame.aggregate.rst
@ -0,0 +1,6 @@
+eland.DataFrame.aggregate
+=========================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.aggregate
--- a/docs/source/reference/api/eland.DataFrame.count.rst
+++ b/docs/source/reference/api/eland.DataFrame.count.rst
@ -0,0 +1,6 @@
+eland.DataFrame.count
+=====================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.count
--- a/docs/source/reference/api/eland.DataFrame.describe.rst
+++ b/docs/source/reference/api/eland.DataFrame.describe.rst
@ -0,0 +1,6 @@
+eland.DataFrame.describe
+========================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.describe
--- a/docs/source/reference/api/eland.DataFrame.drop.rst
+++ b/docs/source/reference/api/eland.DataFrame.drop.rst
@ -0,0 +1,6 @@
+eland.DataFrame.drop
+====================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.drop
--- a/docs/source/reference/api/eland.DataFrame.dtypes.rst
+++ b/docs/source/reference/api/eland.DataFrame.dtypes.rst
@ -0,0 +1,6 @@
+eland.DataFrame.dtypes
+======================
+
+.. currentmodule:: eland
+
+.. autoattribute:: DataFrame.dtypes
--- a/docs/source/reference/api/eland.DataFrame.empty.rst
+++ b/docs/source/reference/api/eland.DataFrame.empty.rst
@ -0,0 +1,6 @@
+eland.DataFrame.empty
+=====================
+
+.. currentmodule:: eland
+
+.. autoattribute:: DataFrame.empty
--- a/docs/source/reference/api/eland.DataFrame.get.rst
+++ b/docs/source/reference/api/eland.DataFrame.get.rst
@ -0,0 +1,6 @@
+eland.DataFrame.get
+===================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.get
--- a/docs/source/reference/api/eland.DataFrame.hist.rst
+++ b/docs/source/reference/api/eland.DataFrame.hist.rst
@ -0,0 +1,6 @@
+eland.DataFrame.hist
+====================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.hist
--- a/docs/source/reference/api/eland.DataFrame.info.rst
+++ b/docs/source/reference/api/eland.DataFrame.info.rst
@ -0,0 +1,6 @@
+eland.DataFrame.info
+====================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.info
--- a/docs/source/reference/api/eland.DataFrame.select_dtypes.rst
+++ b/docs/source/reference/api/eland.DataFrame.select_dtypes.rst
@ -0,0 +1,6 @@
+eland.DataFrame.select_dtypes
+=============================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.select_dtypes
--- a/docs/source/reference/api/eland.Index.rst
+++ b/docs/source/reference/api/eland.Index.rst
@ -0,0 +1,6 @@
+eland.Index
+===========
+
+.. currentmodule:: eland
+
+.. autoclass:: Index
--- a/docs/source/reference/dataframe.rst
+++ b/docs/source/reference/dataframe.rst
@ -21,6 +21,9 @@ Attributes and underlying data

   DataFrame.index
   DataFrame.columns
+   DataFrame.dtypes   
+   DataFrame.select_dtypes   
+   DataFrame.empty   

 Indexing, iteration
 ~~~~~~~~~~~~~~~~~~~
@ -29,7 +32,45 @@ Indexing, iteration

   DataFrame.head
   DataFrame.tail
+   DataFrame.get

+Function application, GroupBy & window
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/

+   DataFrame.agg
+   DataFrame.aggregate

+.. _api.dataframe.stats:
+
+Computations / descriptive stats
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+   DataFrame.count
+   DataFrame.describe
+   DataFrame.info
+
+Reindexing / selection / label manipulation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+   DataFrame.drop
+
+Plotting
+~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+   DataFrame.hist
+
+Serialization / IO / conversion
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+   DataFrame.info

--- a/docs/source/reference/index.rst
+++ b/docs/source/reference/index.rst
@ -12,3 +12,4 @@ methods. All classes and functions exposed in ``eland.*`` namespace are public.

   general_utility_functions
   dataframe
+   indexing
--- a/docs/source/reference/indexing.rst
+++ b/docs/source/reference/indexing.rst
@ -0,0 +1,15 @@
+.. _api.index:
+
+=====
+Index
+=====
+.. currentmodule:: eland
+
+**Many of these methods or variants thereof are available on the objects
+that contain an index (Series/DataFrame) and those should most likely be
+used before calling these methods directly.**
+
+.. autosummary::
+   :toctree: api/
+
+   Index
--- a/eland/conftest.py
+++ b/eland/conftest.py
@ -0,0 +1,17 @@
+import pytest
+
+import numpy as np
+import pandas as pd
+import eland as ed
+
+# Fix console sizxe for consistent test results
+pd.set_option('display.max_rows', 10)
+pd.set_option('display.max_columns', 5)
+pd.set_option('display.width', 100)
+
+@pytest.fixture(autouse=True)
+def add_imports(doctest_namespace):
+    doctest_namespace["np"] = np
+    doctest_namespace["pd"] = pd
+    doctest_namespace["ed"] = ed
+
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@ -8,7 +8,6 @@ import six
 from pandas.core.common import apply_if_callable, is_bool_indexer
 from pandas.core.dtypes.common import is_list_like
 from pandas.core.indexing import check_bool_indexer
-
 from pandas.io.common import _expand_user, _stringify_path
 from pandas.io.formats import console
 from pandas.io.formats import format as fmt
@ -19,6 +18,7 @@ from eland import NDFrame
 from eland import Series
 from eland.filter import BooleanFilter, ScriptFilter

+
 class DataFrame(NDFrame):
    """
    Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes
@ -39,21 +39,26 @@ class DataFrame(NDFrame):
    index_field: str, optional
        The Elasticsearch index field to use as the DataFrame index. Defaults to _id if None is used.

+    See Also
+    --------
+    :pandas_docs:`pandas.DataFrame`
+
    Examples
    --------
    Constructing DataFrame from an Elasticsearch configuration arguments and an Elasticsearch index

    >>> df = ed.DataFrame('localhost:9200', 'flights')
    >>> df.head()
-       AvgTicketPrice  Cancelled           Carrier                                          Dest  ... OriginRegion        OriginWeather dayOfWeek           timestamp
-    0      841.265642      False   Kibana Airlines  Sydney Kingsford Smith International Airport  ...        DE-HE                Sunny         0 2018-01-01 00:00:00
-    1      882.982662      False  Logstash Airways                     Venice Marco Polo Airport  ...        SE-BD                Clear         0 2018-01-01 18:27:00
-    2      190.636904      False  Logstash Airways                     Venice Marco Polo Airport  ...        IT-34                 Rain         0 2018-01-01 17:11:14
-    3      181.694216       True   Kibana Airlines                   Treviso-Sant'Angelo Airport  ...        IT-72  Thunder & Lightning         0 2018-01-01 10:33:28
-    4      730.041778      False   Kibana Airlines          Xi'an Xianyang International Airport  ...       MX-DIF        Damaging Wind         0 2018-01-01 05:13:00
+       AvgTicketPrice  Cancelled  ... dayOfWeek           timestamp
+    0      841.265642      False  ...         0 2018-01-01 00:00:00
+    1      882.982662      False  ...         0 2018-01-01 18:27:00
+    2      190.636904      False  ...         0 2018-01-01 17:11:14
+    3      181.694216       True  ...         0 2018-01-01 10:33:28
+    4      730.041778      False  ...         0 2018-01-01 05:13:00
    <BLANKLINE>
    [5 rows x 27 columns]

+
    Constructing DataFrame from an Elasticsearch client and an Elasticsearch index

    >>> from elasticsearch import Elasticsearch
@ -82,6 +87,7 @@ class DataFrame(NDFrame):
    <BLANKLINE>
    [5 rows x 2 columns]
    """
+
    def __init__(self,
                 client=None,
                 index_pattern=None,
@ -115,18 +121,21 @@ class DataFrame(NDFrame):
        -------
        Elasticsearch field names as pandas.Index

+        See Also
+        --------
+        :pandas_docs:`pandas.DataFrame.columns`
+
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights')
        >>> assert isinstance(df.columns, pd.Index)
        >>> df.columns
-        Index(['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID',
-        ...   'DestCityName', 'DestCountry', 'DestLocation', 'DestRegion',
-        ...   'DestWeather', 'DistanceKilometers', 'DistanceMiles', 'FlightDelay',
-        ...   'FlightDelayMin', 'FlightDelayType', 'FlightNum', 'FlightTimeHour',
-        ...   'FlightTimeMin', 'Origin', 'OriginAirportID', 'OriginCityName',
-        ...   'OriginCountry', 'OriginLocation', 'OriginRegion', 'OriginWeather',
-        ...   'dayOfWeek', 'timestamp'],
+        Index(['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID', 'DestCityName',
+        ...   'DestCountry', 'DestLocation', 'DestRegion', 'DestWeather', 'DistanceKilometers',
+        ...   'DistanceMiles', 'FlightDelay', 'FlightDelayMin', 'FlightDelayType', 'FlightNum',
+        ...   'FlightTimeHour', 'FlightTimeMin', 'Origin', 'OriginAirportID', 'OriginCityName',
+        ...   'OriginCountry', 'OriginLocation', 'OriginRegion', 'OriginWeather', 'dayOfWeek',
+        ...   'timestamp'],
        ...   dtype='object')
        """
        return self._query_compiler.columns
@ -137,9 +146,20 @@ class DataFrame(NDFrame):
    def empty(self):
        """Determines if the DataFrame is empty.

-        Returns:
-            True if the DataFrame is empty.
-            False otherwise.
+        Returns
+        -------
+        bool
+            If DataFrame is empty, return True, if not return False.
+
+        See Also
+        --------
+        :pandas_docs:`pandas.DataFrame.empty`
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'flights')
+        >>> df.empty
+        False
        """
        return len(self.columns) == 0 or len(self.index) == 0

@ -161,6 +181,10 @@ class DataFrame(NDFrame):
        eland.DataFrame
            eland DataFrame filtered on first n rows sorted by index field

+        See Also
+        --------
+        :pandas_docs:`pandas.DataFrame.head`
+
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'Dest'])
@ -192,6 +216,10 @@ class DataFrame(NDFrame):
        eland.DataFrame:
            eland DataFrame filtered on last n rows sorted by index field

+        See Also
+        --------
+        :pandas_docs:`pandas.DataFrame.tail`
+
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'Dest'])
@ -257,20 +285,45 @@ class DataFrame(NDFrame):

    def count(self):
        """
-        Count non-NA cells for each column (TODO row)
+        Count non-NA cells for each column.

-        Counts are based on exists queries against ES
+        Counts are based on exists queries against ES.

        This is inefficient, as it creates N queries (N is number of fields).
-
        An alternative approach is to use value_count aggregations. However, they have issues in that:
-        1. They can only be used with aggregatable fields (e.g. keyword not text)
-        2. For list fields they return multiple counts. E.g. tags=['elastic', 'ml'] returns value_count=2
-        for a single document.
+
+        - They can only be used with aggregatable fields (e.g. keyword not text)
+        - For list fields they return multiple counts. E.g. tags=['elastic', 'ml'] returns value_count=2 for a single document.
+
+        TODO - add additional pandas.DataFrame.count features
+
+        Returns
+        -------
+        pandas.Series:
+            Summary of column counts
+
+        See Also
+        --------
+        :pandas_docs:`pandas.DataFrame.count`
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'geoip.city_name'])
+        >>> df.count()
+        customer_first_name    4675
+        geoip.city_name        4094
+        dtype: int64
        """
        return self._query_compiler.count()

    def info_es(self):
+        """
+
+        Returns
+        -------
+        None
+            This method prints a debug summary of the task list Elasticsearch
+        """
        buf = StringIO()

        super()._info_es(buf)
@ -297,9 +350,25 @@ class DataFrame(NDFrame):
        This method prints information about a DataFrame including
        the index dtype and column dtypes, non-null values and memory usage.

+        See :pandas_docs:`pandas.DataFrame.info` for full details.
+
+        Notes
+        -----
        This copies a lot of code from pandas.DataFrame.info as it is difficult
        to split out the appropriate code or creating a SparseDataFrame gives
        incorrect results on types and counts.
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'geoip.city_name'])
+        >>> df.info()
+        <class 'eland.dataframe.DataFrame'>
+        Index: 4675 entries, 0 to 4674
+        Data columns (total 2 columns):
+        customer_first_name    4675 non-null object
+        geoip.city_name        4094 non-null object
+        dtypes: object(2)
+        memory usage: 96.0 bytes
        """
        if buf is None:  # pragma: no cover
            buf = sys.stdout
@ -386,7 +455,7 @@ class DataFrame(NDFrame):
            else:
                _verbose_repr()

-        counts = self.get_dtype_counts()
+        counts = self.dtypes.value_counts()
        dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k
                  in sorted(counts.items())]
        lines.append('dtypes: {types}'.format(types=', '.join(dtypes)))
@ -623,7 +692,11 @@ class DataFrame(NDFrame):
        )

    def select_dtypes(self, include=None, exclude=None):
-        # get empty df
+        """
+        Return a subset of the DataFrame's columns based on the column dtypes.
+
+        Compatible with :pandas_docs:`pandas.DataFrame.select_dtypes`
+        """
        empty_df = self._empty_pd_df()

        empty_df = empty_df.select_dtypes(include=include, exclude=exclude)
@ -649,12 +722,6 @@ class DataFrame(NDFrame):
    def keys(self):
        return self.columns

-    def groupby(self, by=None, axis=0, *args, **kwargs):
-        axis = pd.DataFrame._get_axis_number(axis)
-
-        if axis == 1:
-            raise NotImplementedError("Aggregating via index not currently implemented - needs index transform")
-
    def aggregate(self, func, axis=0, *args, **kwargs):
        """
        Aggregate using one or more operations over the specified axis.
@ -671,11 +738,15 @@ class DataFrame(NDFrame):
            - string function name
            - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
            - dict of axis labels -> functions, function names or list of such.
+
+            Currently, we only support ``['count', 'mad', 'max', 'mean', 'median', 'min', 'mode', 'quantile',
+            'rank', 'sem', 'skew', 'sum', 'std', 'var']``
        axis
+            Currently, we only support axis=0 (index)
        *args
-            Positional arguments to pass to `func`.
+            Positional arguments to pass to `func`
        **kwargs
-            Keyword arguments to pass to `func`.
+            Keyword arguments to pass to `func`

        Returns
        -------
@ -684,6 +755,19 @@ class DataFrame(NDFrame):
            if DataFrame.agg is called with several functions, returns a DataFrame
            if Series.agg is called with single function, returns a scalar
            if Series.agg is called with several functions, returns a Series
+
+        See Also
+        --------
+        :pandas_docs:`pandas.DataFrame.aggregate`
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'flights')
+        >>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std'])
+             DistanceKilometers  AvgTicketPrice
+        sum        9.261629e+07    8.204365e+06
+        min        0.000000e+00    1.000205e+02
+        std        4.578263e+03    2.663867e+02
        """
        axis = pd.DataFrame._get_axis_number(axis)

@ -722,16 +806,38 @@ class DataFrame(NDFrame):
            raise NotImplementedError(expr, type(expr))

    def get(self, key, default=None):
-        """Get item from object for given key (DataFrame column, Panel
-                slice, etc.). Returns default value if not found.
+        """
+        Get item from object for given key (ex: DataFrame column).
+        Returns default value if not found.

-                Args:
-                    key (DataFrame column, Panel slice) : the key for which value
-                    to get
+        Parameters
+        ----------
+        key: object

-                Returns:
-                    value (type of items contained in object) : A value that is
-                    stored at the key
+        Returns
+        -------
+        value: same type as items contained in object
+
+        See Also
+        --------
+        :pandas_docs:`pandas.DataFrame.get`
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'flights')
+        >>> df.get('Carrier')
+        0         Kibana Airlines
+        1        Logstash Airways
+        2        Logstash Airways
+        3         Kibana Airlines
+        4         Kibana Airlines
+                       ...
+        13054    Logstash Airways
+        13055    Logstash Airways
+        13056    Logstash Airways
+        13057            JetBeats
+        13058            JetBeats
+        Name: Carrier, Length: 13059, dtype: object
        """
        if key in self.keys():
            return self._getitem(key)
--- a/eland/index.py
+++ b/eland/index.py
@ -1,8 +1,9 @@
+class Index:
    """
-class Index
-
    The index for an eland.DataFrame.

+    TODO - This currently has very different behaviour than pandas.Index
+
    Currently, the index is a field that exists in every document in an Elasticsearch index.
    For slicing and sorting operations it must be a docvalues field. By default _id is used,
    which can't be used for range queries and is inefficient for sorting:
@ -12,16 +13,11 @@ https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-id-field
    but doing so is discouraged as it requires to load a lot of data in memory.
    In case sorting or aggregating on the _id field is required, it is advised to duplicate
    the content of the _id field in another field that has doc_values enabled.)
-
    """
-
-
-class Index:
    ID_INDEX_FIELD = '_id'
    ID_SORT_FIELD = '_doc'  # if index field is _id, sort by _doc

    def __init__(self, query_compiler, index_field=None):
-        # Calls setter
        self.index_field = index_field

        self._query_compiler = query_compiler
--- a/eland/mappings.py
+++ b/eland/mappings.py
@ -420,13 +420,13 @@ class Mappings:
                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
                                                    (self._mappings_capabilities.pd_dtype == 'float64') |
-                                                    (self._mappings_capabilities.pd_dtype == 'bool'))].loc[
-                    columns].index.tolist()
+                                                    (self._mappings_capabilities.pd_dtype == 'bool'))].reindex(
+                    columns).index.tolist()
            else:
                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
-                                                    (self._mappings_capabilities.pd_dtype == 'float64'))].loc[
-                    columns].index.tolist()
+                                                    (self._mappings_capabilities.pd_dtype == 'float64'))].reindex(
+                    columns).index.tolist()
        else:
            if include_bool == True:
                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
@ -469,26 +469,6 @@ class Mappings:

        return pd.Series(self._source_field_pd_dtypes)

-    def get_dtype_counts(self, columns=None):
-        """
-        Return counts of unique dtypes in this object.
-
-        Returns
-        -------
-        get_dtype_counts : Series
-            Series with the count of columns with each dtype.
-        """
-
-        if columns is not None:
-            return pd.Series(self._mappings_capabilities[self._mappings_capabilities._source == True]
-                             .loc[columns]
-                             .groupby('pd_dtype')['_source']
-                             .count().to_dict())
-
-        return pd.Series(self._mappings_capabilities[self._mappings_capabilities._source == True]
-                         .groupby('pd_dtype')['_source']
-                         .count().to_dict())
-
    def info_es(self, buf):
        buf.write("Mappings:\n")
        buf.write("\tcapabilities: {0}\n".format(self._mappings_capabilities))
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@ -57,10 +57,23 @@ class NDFrame:

    def _get_index(self):
        """
+        Return eland index referencing Elasticsearch field to index a DataFrame/Series

        Returns
        -------
+        eland.Index:
+            Note eland.Index has a very limited API compared to pandas.Index

+        See Also
+        --------
+        :pandas_docs:`pandas.DataFrame.index`
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'flights')
+        >>> assert isinstance(df.index, ed.Index)
+        >>> df.index.index_field
+        '_id'
        """
        return self._query_compiler.index

@ -68,10 +81,30 @@ class NDFrame:

    @property
    def dtypes(self):
-        return self._query_compiler.dtypes
+        """
+        Return the pandas dtypes in the DataFrame. Elasticsearch types are mapped
+        to pandas dtypes via Mappings._es_dtype_to_pd_dtype.__doc__

-    def get_dtype_counts(self):
-        return self._query_compiler.get_dtype_counts()
+        Returns
+        -------
+        pandas.Series
+            The data type of each column.
+
+        See Also
+        --------
+        :pandas_docs:`pandas.DataFrame.dtypes`
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'AvgTicketPrice', 'timestamp', 'dayOfWeek'])
+        >>> df.dtypes
+        Origin                    object
+        AvgTicketPrice           float64
+        timestamp         datetime64[ns]
+        dayOfWeek                  int64
+        dtype: object
+        """
+        return self._query_compiler.dtypes

    def _build_repr_df(self, num_rows, num_cols):
        # Overriden version of BasePandasDataset._build_repr_df
@ -134,21 +167,71 @@ class NDFrame:
            errors="raise",
    ):
        """Return new object with labels in requested axis removed.
-        Args:
-            labels: Index or column labels to drop.
-            axis: Whether to drop labels from the index (0 / 'index') or
-                columns (1 / 'columns').
-            index, columns: Alternative to specifying axis (labels, axis=1 is
-                equivalent to columns=labels).
-            level: For MultiIndex
-            inplace: If True, do operation inplace and return None.
-            errors: If 'ignore', suppress error and existing labels are
-                dropped.
-        Returns:
-            dropped : type of caller

-        (derived from modin.base.BasePandasDataset)
+        Parameters
+        ----------
+        labels:
+            Index or column labels to drop.
+        axis:
+            Whether to drop labels from the index (0 / 'index') or columns (1 / 'columns').
+        index, columns:
+            Alternative to specifying axis (labels, axis=1 is equivalent to columns=labels).
+        level:
+            For MultiIndex - not supported
+        inplace:
+            If True, do operation inplace and return None.
+        errors:
+            If 'ignore', suppress error and existing labels are dropped.
+
+        Returns
+        -------
+        dropped:
+            type of caller
+
+        See Also
+        --------
+        :pandas_docs:`pandas.DataFrame.drop`
+
+        Examples
+        --------
+        Drop a column
+
+        >>> df = ed.DataFrame('localhost', 'ecommerce', columns=['customer_first_name', 'email', 'user'])
+        >>> df.drop(columns=['user'])
+             customer_first_name                       email
+        0                  Eddie  eddie@underwood-family.zzz
+        1                   Mary      mary@bailey-family.zzz
+        2                   Gwen      gwen@butler-family.zzz
+        3                  Diane   diane@chandler-family.zzz
+        4                  Eddie      eddie@weber-family.zzz
+        ...                  ...                         ...
+        4670                Mary     mary@lambert-family.zzz
+        4671                 Jim      jim@gilbert-family.zzz
+        4672               Yahya     yahya@rivera-family.zzz
+        4673                Mary     mary@hampton-family.zzz
+        4674             Jackson  jackson@hopkins-family.zzz
+        <BLANKLINE>
+        [4675 rows x 2 columns]
+
+        Drop rows by index value (axis=0)
+
+        >>> df.drop(['1', '2'])
+             customer_first_name                       email     user
+        0                  Eddie  eddie@underwood-family.zzz    eddie
+        3                  Diane   diane@chandler-family.zzz    diane
+        4                  Eddie      eddie@weber-family.zzz    eddie
+        5                  Diane    diane@goodwin-family.zzz    diane
+        6                 Oliver      oliver@rios-family.zzz   oliver
+        ...                  ...                         ...      ...
+        4670                Mary     mary@lambert-family.zzz     mary
+        4671                 Jim      jim@gilbert-family.zzz      jim
+        4672               Yahya     yahya@rivera-family.zzz    yahya
+        4673                Mary     mary@hampton-family.zzz     mary
+        4674             Jackson  jackson@hopkins-family.zzz  jackson
+        <BLANKLINE>
+        [4673 rows x 3 columns]
        """
+        #(derived from modin.base.BasePandasDataset)
        # Level not supported
        if level is not None:
            raise NotImplementedError("level not supported {}".format(level))
@ -242,4 +325,36 @@ class NDFrame:
        return self._query_compiler._hist(num_bins)

    def describe(self):
+        """
+        Generate descriptive statistics that summarize the central tendency, dispersion and shape of a
+        dataset’s distribution, excluding NaN values.
+
+        Analyzes both numeric and object series, as well as DataFrame column sets of mixed data types.
+        The output will vary depending on what is provided. Refer to the notes below for more detail.
+
+        TODO - add additional arguments (current only numeric values supported)
+
+        Returns
+        -------
+        pandas.Dataframe:
+            Summary information
+
+        See Also
+        --------
+        :pandas_docs:`pandas.DataFrame.describe`
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'flights', columns=['AvgTicketPrice', 'FlightDelay'])
+        >>> df.describe() # ignoring percentiles as they don't generate consistent results
+               AvgTicketPrice   FlightDelay
+        count    13059.000000  13059.000000
+        mean       628.253689      0.251168
+        std        266.386661      0.433685
+        min        100.020531      0.000000
+        ...
+        ...
+        ...
+        max       1199.729004      1.000000
+        """
        return self._query_compiler.describe()
--- a/eland/plotting.py
+++ b/eland/plotting.py
@ -10,36 +10,42 @@ def ed_hist_frame(ed_df, column=None, by=None, grid=True, xlabelsize=None,
                  xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False,
                  sharey=False, figsize=None, layout=None, bins=10, **kwds):
    """
-    Derived from pandas.plotting._core.hist_frame 0.24.2 - TODO update to 0.25.1
+    See :pandas_docs:`pandas.DataFrame.hist` for usage.

-    Ideally, we'd call hist_frame directly with histogram data,
+    Notes
+    -----
+    Derived from ``pandas.plotting._core.hist_frame 0.24.2`` - TODO update to ``0.25.1``
+
+    Ideally, we'd call `hist_frame` directly with histogram data,
    but weights are applied to ALL series. For example, we can
    plot a histogram of pre-binned data via:

+    .. code-block:: python
+
        counts, bins = np.histogram(data)
        plt.hist(bins[:-1], bins, weights=counts)

    However,

+    .. code-block:: python
+
        ax.hist(data[col].dropna().values, bins=bins, **kwds)

-    is for [col] and weights are a single array.
+    is for ``[col]`` and weights are a single array.

-    We therefore cut/paste code.
+    Examples
+    --------
+    .. plot::
+        :context: close-figs
+
+        >>> df = ed.DataFrame('localhost', 'flights')
+        >>> hist = df.select_dtypes(include=[np.number]).hist(figsize=[10,10])
    """
    # Start with empty pandas data frame derived from
    ed_df_bins, ed_df_weights = ed_df._hist(num_bins=bins)

    if by is not None:
        raise NotImplementedError("TODO")
-        """
-        axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid,
-                            figsize=figsize, sharex=sharex, sharey=sharey,
-                            layout=layout, bins=bins, xlabelsize=xlabelsize,
-                            xrot=xrot, ylabelsize=ylabelsize,
-                            yrot=yrot, **kwds)
-        """
-        return axes

    if column is not None:
        if not isinstance(column, (list, np.ndarray, ABCIndexClass)):
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@ -84,11 +84,6 @@ class ElandQueryCompiler:

        return self._mappings.dtypes(columns)

-    def get_dtype_counts(self):
-        columns = self._operations.get_columns()
-
-        return self._mappings.get_dtype_counts(columns)
-
    # END Index, columns, and dtypes objects

    def _es_results_to_pandas(self, results, batch_size=None):
--- a/eland/series.py
+++ b/eland/series.py
@ -150,7 +150,7 @@ class Series(NDFrame):
        )

    def _to_pandas(self):
-        return self._query_compiler._to_pandas()[self.name]
+        return self._query_compiler.to_pandas()[self.name]

    def __gt__(self, other):
        if isinstance(other, Series):
--- a/eland/tests/dataframe/test_count_pytest.py
+++ b/eland/tests/dataframe/test_count_pytest.py
@ -4,6 +4,7 @@ from pandas.util.testing import assert_series_equal

 from eland.tests.common import TestData

+import pandas as pd

 class TestDataFrameCount(TestData):

--- a/eland/tests/mappings/test_dtypes_pytest.py
+++ b/eland/tests/mappings/test_dtypes_pytest.py
@ -24,22 +24,3 @@ class TestMappingsDtypes(TestData):
        ed_dtypes = ed_flights._query_compiler._mappings.dtypes(columns=['Carrier', 'AvgTicketPrice', 'Cancelled'])

        assert_series_equal(pd_dtypes, ed_dtypes)
-
-    def test_flights_get_dtype_counts_all(self):
-        ed_flights = self.ed_flights()
-        pd_flights = self.pd_flights()
-
-        pd_dtypes = pd_flights.get_dtype_counts().sort_index()
-        ed_dtypes = ed_flights._query_compiler._mappings.get_dtype_counts().sort_index()
-
-        assert_series_equal(pd_dtypes, ed_dtypes)
-
-    def test_flights_get_dtype_counts_columns(self):
-        ed_flights = self.ed_flights()
-        pd_flights = self.pd_flights()[['Carrier', 'AvgTicketPrice', 'Cancelled']]
-
-        pd_dtypes = pd_flights.get_dtype_counts().sort_index()
-        ed_dtypes = ed_flights._query_compiler._mappings. \
-            get_dtype_counts(columns=['Carrier', 'AvgTicketPrice', 'Cancelled']).sort_index()
-
-        assert_series_equal(pd_dtypes, ed_dtypes)
--- a/eland/utils.py
+++ b/eland/utils.py
@ -141,3 +141,37 @@ def ed_to_pd(ed_df):
    eland.pd_to_ed: Create an eland.Dataframe from pandas.DataFrame
    """
    return ed_df._to_pandas()
+
+def _inherit_docstrings(parent, excluded=[]):
+    """Creates a decorator which overwrites a decorated class' __doc__
+    attribute with parent's __doc__ attribute. Also overwrites __doc__ of
+    methods and properties defined in the class with the __doc__ of matching
+    methods and properties in parent.
+
+    Args:
+        parent (object): Class from which the decorated class inherits __doc__.
+        excluded (list): List of parent objects from which the class does not
+            inherit docstrings.
+
+    Returns:
+        function: decorator which replaces the decorated class' documentation
+            parent's documentation.
+    """
+
+    def decorator(cls):
+        if parent not in excluded:
+            cls.__doc__ = parent.__doc__
+        for attr, obj in cls.__dict__.items():
+            parent_obj = getattr(parent, attr, None)
+            if parent_obj in excluded or (
+                not callable(parent_obj) and not isinstance(parent_obj, property)
+            ):
+                continue
+            if callable(obj):
+                obj.__doc__ = parent_obj.__doc__
+            elif isinstance(obj, property) and obj.fget is not None:
+                p = property(obj.fget, obj.fset, obj.fdel, parent_obj.__doc__)
+                setattr(cls, attr, p)
+        return cls
+
+    return decorator