Merge pull request #31 from stevedodson/master

Creating docs framework.
2025-07-11 00:02:14 +08:00 · 2019-11-12 21:30:33 +01:00 · 2019-11-12 21:30:33 +01:00 · fd35fbd9f5
commit fd35fbd9f5
parent 862f58ec63 d8c1e18161
30 changed files with 6134 additions and 3898 deletions
--- a/NOTES.md
+++ b/NOTES.md
@ -47,7 +47,7 @@ the `pandas.DataFrame` API. This resolves some of the issues above as:
 than a new index
 * Instead of supporting the enitre `pandas.DataFrame` API we can support a subset appropriate for
-Elasticsearch. If addition calls are required, we could to create a `eland.DataFrame.to_pandas()` 
+Elasticsearch. If addition calls are required, we could to create a `eland.DataFrame._to_pandas()` 
 method which would explicitly export all data to a `pandas.DataFrame` 
 * Creating a new `eland.DataFrame` API gives us full flexibility in terms of implementation. However, 
--- a/docs/Makefile
+++ b/docs/Makefile
@ -0,0 +1,20 @@
 # Minimal makefile for Sphinx documentation
 #
 # You can set these variables from the command line, and also
 # from the environment for the first two.
 SPHINXOPTS    ?=
 SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = source
 BUILDDIR      = build
 # Put it first so that "make" without argument is like "make help".
 help:
 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
 .PHONY: help Makefile
 # Catch-all target: route all unknown targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
 %: Makefile
 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/make.bat
+++ b/docs/make.bat
@ -0,0 +1,35 @@
@ECHO OFF
 pushd %~dp0
 REM Command file for Sphinx documentation
 if "%SPHINXBUILD%" == "" (
 	set SPHINXBUILD=sphinx-build
 )
 set SOURCEDIR=source
 set BUILDDIR=build
 if "%1" == "" goto help
 %SPHINXBUILD% >NUL 2>NUL
 if errorlevel 9009 (
 	echo.
 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 	echo.installed, then set the SPHINXBUILD environment variable to point
 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 	echo.may add the Sphinx directory to PATH.
 	echo.
 	echo.If you don't have Sphinx installed, grab it from
 	echo.http://sphinx-doc.org/
 	exit /b 1
 )
 %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
 goto end
 :help
 %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
 :end
 popd
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -0,0 +1,79 @@
 # Configuration file for the Sphinx documentation builder.
 #
 # This file only contains a selection of the most common options. For a full
 # list see the documentation:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 # -- Path setup --------------------------------------------------------------
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
 import os
 import sys
 sys.path.insert(0, os.path.abspath("../sphinxext"))
 sys.path.extend(
    [
        # numpy standard doc extensions
        os.path.join(os.path.dirname(__file__), "..", "../..", "sphinxext")
    ]
 )
 # -- Project information -----------------------------------------------------
 project = 'eland'
 copyright = '2019, Stephen Dodson'
 author = 'Stephen Dodson'
 # The full version, including alpha/beta/rc tags
 release = '0.1'
 # -- General configuration ---------------------------------------------------
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
    'sphinx.ext.autodoc',
    "sphinx.ext.doctest",
    'numpydoc'
 ]
 doctest_global_setup = '''
 try:
    import eland as ed
 except ImportError:
    ed = None
 try:
    import pandas as pd
 except ImportError:
    pd = None
 '''
 numpydoc_attributes_as_param_list = False
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
 exclude_patterns = []
 # -- Options for HTML output -------------------------------------------------
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
 html_theme = 'sphinx_rtd_theme'
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@ -0,0 +1,30 @@
 .. eland documentation master file, created by
 .. module:: eland
 ****************************************************************
 eland: pandas-like data analysis toolkit backed by Elasticsearch
 ****************************************************************
 **Date**: |today| **Version**: |version|
 **Useful links**:
 `Source Repository <https://github.com/elastic/eland>`__ |
 `Issues & Ideas <https://github.com/elastic/eland/issues>`__ |
 `Q&A Support <https://discuss.elastic.co>`__ |
 :mod:`eland` is an open source, Apache2-licensed elasticsearch Python client to analyse, explore and manipulate data that resides in elasticsearch.
 Where possible the package uses existing Python APIs and data structures to make it easy to switch between Numpy, Pandas, Scikit-learn to their elasticsearch powered equivalents.
 In general, the data resides in elasticsearch and not in memory, which allows eland to access large datasets stored in elasticsearch.
 .. toctree::
   :maxdepth: 2
   :hidden:
   reference/index
 * :doc:`reference/index`
  * :doc:`reference/general_utility_functions`
  * :doc:`reference/dataframe`
--- a/docs/source/reference/api/eland.DataFrame.columns.rst
+++ b/docs/source/reference/api/eland.DataFrame.columns.rst
@ -0,0 +1,6 @@
 eland.DataFrame.columns
 =======================
 .. currentmodule:: eland
 .. autoattribute:: DataFrame.columns
--- a/docs/source/reference/api/eland.DataFrame.head.rst
+++ b/docs/source/reference/api/eland.DataFrame.head.rst
@ -0,0 +1,6 @@
 eland.DataFrame.head
 ====================
 .. currentmodule:: eland
 .. automethod:: DataFrame.head
--- a/docs/source/reference/api/eland.DataFrame.index.rst
+++ b/docs/source/reference/api/eland.DataFrame.index.rst
@ -0,0 +1,6 @@
 eland.DataFrame.index
 =====================
 .. currentmodule:: eland
 .. autoattribute:: DataFrame.index
--- a/docs/source/reference/api/eland.DataFrame.rst
+++ b/docs/source/reference/api/eland.DataFrame.rst
@ -0,0 +1,18 @@
 eland.DataFrame
 ================
 .. currentmodule:: eland
 .. autoclass:: DataFrame
 ..
   HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
   .. autosummary::
      :toctree:
      DataFrame.abs
      DataFrame.add
--- a/docs/source/reference/api/eland.DataFrame.tail.rst
+++ b/docs/source/reference/api/eland.DataFrame.tail.rst
@ -0,0 +1,6 @@
 eland.DataFrame.tail
 ====================
 .. currentmodule:: eland
 .. automethod:: DataFrame.tail
--- a/docs/source/reference/api/eland.ed_to_pd.rst
+++ b/docs/source/reference/api/eland.ed_to_pd.rst
@ -0,0 +1,6 @@
 eland.ed_to_pd
 ==============
 .. currentmodule:: eland
 .. autofunction:: ed_to_pd
--- a/docs/source/reference/api/eland.pd_to_ed.rst
+++ b/docs/source/reference/api/eland.pd_to_ed.rst
@ -0,0 +1,6 @@
 eland.pd_to_ed
 ==============
 .. currentmodule:: eland
 .. autofunction:: pd_to_ed
--- a/docs/source/reference/api/eland.read_es.rst
+++ b/docs/source/reference/api/eland.read_es.rst
@ -0,0 +1,6 @@
 eland.read_es
 =============
 .. currentmodule:: eland
 .. autofunction:: read_es
--- a/docs/source/reference/dataframe.rst
+++ b/docs/source/reference/dataframe.rst
@ -0,0 +1,35 @@
 .. _api.dataframe:
 =========
 DataFrame
 =========
 .. currentmodule:: eland
 Constructor
 ~~~~~~~~~~~
 .. autosummary::
   :toctree: api/
   DataFrame
 Attributes and underlying data
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 **Axes**
 .. autosummary::
   :toctree: api/
   DataFrame.index
   DataFrame.columns
 Indexing, iteration
 ~~~~~~~~~~~~~~~~~~~
 .. autosummary::
   :toctree: api/
   DataFrame.head
   DataFrame.tail
--- a/docs/source/reference/general_utility_functions.rst
+++ b/docs/source/reference/general_utility_functions.rst
@ -0,0 +1,21 @@
 .. _api.general_utility_functions:
 =========================
 General utility functions
 =========================
 .. currentmodule:: eland
 Elasticsearch access
 ~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
   :toctree: api/
    read_es
 Pandas and Eland
 ~~~~~~~~~~~~~~~~
 .. autosummary::
   :toctree: api/
    pd_to_ed
    ed_to_pd
--- a/docs/source/reference/index.rst
+++ b/docs/source/reference/index.rst
@ -0,0 +1,14 @@
 .. _api:
 =============
 API reference
 =============
 This page gives an overview of all public eland objects, functions and
 methods. All classes and functions exposed in ``eland.*`` namespace are public.
 .. toctree::
   :maxdepth: 2
   general_utility_functions
   dataframe
--- a/eland/init.py
+++ b/eland/init.py
@ -1,14 +1,14 @@
 from __future__ import absolute_import
 from eland.client import *
 from eland.dataframe import *
 from eland.filter import *
 from eland.index import *
 from eland.mappings import *
 from eland.ndframe import *
 from eland.operations import *
 from eland.plotting import *
 from eland.query import *
 from eland.operations import *
 from eland.query_compiler import *
 from eland.plotting import *
 from eland.ndframe import *
 from eland.series import *
 from eland.dataframe import *
 from eland.utils import *
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@ -1,6 +1,5 @@
 import sys
 import warnings
 from distutils.version import LooseVersion
 from io import StringIO
 import numpy as np
@ -20,17 +19,86 @@ from eland import NDFrame
 from eland import Series
 from eland.filter import BooleanFilter, ScriptFilter
 class DataFrame(NDFrame):
-    # This is effectively 2 constructors
+    """
-    # 1. client, index_pattern, columns, index_field
+    Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes
-    # 2. query_compiler
+    (rows and columns) referencing data stored in Elasticsearch indices.
    Where possible APIs mirror pandas.DataFrame APIs.
    The underlying data is stored in Elasticsearch rather than core memory.
    Parameters
    ----------
    client: Elasticsearch client argument(s) (e.g. 'localhost:9200')
        - elasticsearch-py parameters or
        - elasticsearch-py instance or
        - eland.Client instance
    index_pattern: str
        Elasticsearch index pattern (e.g. 'flights' or 'filebeat-\*')
    columns: list of str, optional
        List of DataFrame columns. A subset of the Elasticsearch index's fields.
    index_field: str, optional
        The Elasticsearch index field to use as the DataFrame index. Defaults to _id if None is used.
    Examples
    --------
    Constructing DataFrame from an Elasticsearch configuration arguments and an Elasticsearch index
    >>> df = ed.DataFrame('localhost:9200', 'flights')
    >>> df.head()
       AvgTicketPrice  Cancelled           Carrier                                          Dest  ... OriginRegion        OriginWeather dayOfWeek           timestamp
    0      841.265642      False   Kibana Airlines  Sydney Kingsford Smith International Airport  ...        DE-HE                Sunny         0 2018-01-01 00:00:00
    1      882.982662      False  Logstash Airways                     Venice Marco Polo Airport  ...        SE-BD                Clear         0 2018-01-01 18:27:00
    2      190.636904      False  Logstash Airways                     Venice Marco Polo Airport  ...        IT-34                 Rain         0 2018-01-01 17:11:14
    3      181.694216       True   Kibana Airlines                   Treviso-Sant'Angelo Airport  ...        IT-72  Thunder & Lightning         0 2018-01-01 10:33:28
    4      730.041778      False   Kibana Airlines          Xi'an Xianyang International Airport  ...       MX-DIF        Damaging Wind         0 2018-01-01 05:13:00
    <BLANKLINE>
    [5 rows x 27 columns]
    Constructing DataFrame from an Elasticsearch client and an Elasticsearch index
    >>> from elasticsearch import Elasticsearch
    >>> es = Elasticsearch("localhost:9200")
    >>> df = ed.DataFrame(client=es, index_pattern='flights', columns=['AvgTicketPrice', 'Cancelled'])
    >>> df.head()
       AvgTicketPrice  Cancelled
    0      841.265642      False
    1      882.982662      False
    2      190.636904      False
    3      181.694216       True
    4      730.041778      False
    <BLANKLINE>
    [5 rows x 2 columns]
    Constructing DataFrame from an Elasticsearch client and an Elasticsearch index, with 'timestamp' as the  DataFrame index field
    >>> df = ed.DataFrame(client='localhost', index_pattern='flights', columns=['AvgTicketPrice', 'timestamp'], index_field='timestamp')
    >>> df.head()
                         AvgTicketPrice           timestamp
    2018-01-01T00:00:00      841.265642 2018-01-01 00:00:00
    2018-01-01T00:02:06      772.100846 2018-01-01 00:02:06
    2018-01-01T00:06:27      159.990962 2018-01-01 00:06:27
    2018-01-01T00:33:31      800.217104 2018-01-01 00:33:31
    2018-01-01T00:36:51      803.015200 2018-01-01 00:36:51
    <BLANKLINE>
    [5 rows x 2 columns]
    """
    def __init__(self,
                 client=None,
                 index_pattern=None,
                 columns=None,
                 index_field=None,
                 query_compiler=None):
        """
        There are effectively 2 constructors:
        1. client, index_pattern, columns, index_field
        2. query_compiler (eland.ElandQueryCompiler)
        The constructor with 'query_compiler' is for internal use only.
        """
        if query_compiler is None:
            if client is None or index_pattern is None:
                raise ValueError("client and index_pattern must be defined in DataFrame constructor")
        # python 3 syntax
        super().__init__(
            client=client,
@ -40,6 +108,27 @@ class DataFrame(NDFrame):
            query_compiler=query_compiler)
    def _get_columns(self):
        """
        The column labels of the DataFrame.
        Returns
        -------
        Elasticsearch field names as pandas.Index
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights')
        >>> assert isinstance(df.columns, pd.Index)
        >>> df.columns
        Index(['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID',
        ...   'DestCityName', 'DestCountry', 'DestLocation', 'DestRegion',
        ...   'DestWeather', 'DistanceKilometers', 'DistanceMiles', 'FlightDelay',
        ...   'FlightDelayMin', 'FlightDelayType', 'FlightNum', 'FlightTimeHour',
        ...   'FlightTimeMin', 'Origin', 'OriginAirportID', 'OriginCityName',
        ...   'OriginCountry', 'OriginLocation', 'OriginRegion', 'OriginWeather',
        ...   'dayOfWeek', 'timestamp'],
        ...  dtype='object')
        """
        return self._query_compiler.columns
    columns = property(_get_columns)
@ -52,14 +141,70 @@ class DataFrame(NDFrame):
            True if the DataFrame is empty.
            False otherwise.
        """
        # TODO - this is called on every attribute get (most methods) from modin/pandas/base.py:3337
        #  (as Index.__len__ performs an query) we may want to cache self.index.empty()
        return len(self.columns) == 0 or len(self.index) == 0
    def head(self, n=5):
        """
        Return the first n rows.
        This function returns the first n rows for the object based on position.
        The row order is sorted by index field.
        It is useful for quickly testing if your object has the right type of data in it.
        Parameters
        ----------
        n: int, default 5
            Number of rows to select.
        Returns
        -------
        eland.DataFrame
            eland DataFrame filtered on first n rows sorted by index field
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'Dest'])
        >>> df.head(3)
                                    Origin                                          Dest
        0        Frankfurt am Main Airport  Sydney Kingsford Smith International Airport
        1  Cape Town International Airport                     Venice Marco Polo Airport
        2        Venice Marco Polo Airport                     Venice Marco Polo Airport
        <BLANKLINE>
        [3 rows x 2 columns]
        """
        return DataFrame(query_compiler=self._query_compiler.head(n))
    def tail(self, n=5):
        """
        Return the last n rows.
        This function returns the last n rows for the object based on position.
        The row order is sorted by index field.
        It is useful for quickly testing if your object has the right type of data in it.
        Parameters
        ----------
        n: int, default 5
            Number of rows to select.
        Returns
        -------
        eland.DataFrame:
            eland DataFrame filtered on last n rows sorted by index field
        Examples
        --------
        >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'Dest'])
        >>> df.tail()
                                                          Origin                                      Dest
        13054                         Pisa International Airport      Xi'an Xianyang International Airport
        13055  Winnipeg / James Armstrong Richardson Internat...                            Zurich Airport
        13056     Licenciado Benito Juarez International Airport                         Ukrainka Air Base
        13057                                      Itami Airport  Ministro Pistarini International Airport
        13058                     Adelaide International Airport   Washington Dulles International Airport
        <BLANKLINE>
        [5 rows x 2 columns]
        """
        return DataFrame(query_compiler=self._query_compiler.tail(n))
    def __repr__(self):
@ -92,18 +237,8 @@ class DataFrame(NDFrame):
        """
        From pandas
        """
        try:
            import IPython
        except ImportError:
            pass
        else:
            if LooseVersion(IPython.__version__) < LooseVersion('3.0'):
                if console.in_qtconsole():
                    # 'HTML output is disabled in QtConsole'
                    return None
        if self._info_repr():
-            buf = StringIO()
+            buf = StringIO("")
            self.info(buf=buf)
            # need to escape the <class>, should be the first line.
            val = buf.getvalue().replace('<', r'&lt;', 1)
@ -138,7 +273,7 @@ class DataFrame(NDFrame):
    def info_es(self):
        buf = StringIO()
-        super().info_es(buf)
+        super()._info_es(buf)
        return buf.getvalue()
@ -470,6 +605,13 @@ class DataFrame(NDFrame):
        return self._query_compiler.to_csv(**kwargs)
    def _to_pandas(self):
        """
        Utility method to convert eland.Dataframe to pandas.Dataframe
        Returns
        -------
        pandas.DataFrame
        """
        return self._query_compiler.to_pandas()
    def _empty_pd_df(self):
@ -529,7 +671,7 @@ class DataFrame(NDFrame):
            - string function name
            - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
            - dict of axis labels -> functions, function names or list of such.
-        %(axis)s
+        axis
        *args
            Positional arguments to pass to `func`.
        **kwargs
@ -570,7 +712,7 @@ class DataFrame(NDFrame):
        """
        if isinstance(expr, BooleanFilter):
            return DataFrame(
-                query_compiler=self._query_compiler._update_query(key)
+                query_compiler=self._query_compiler._update_query(BooleanFilter(expr))
            )
        elif isinstance(expr, six.string_types):
            return DataFrame(
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@ -56,6 +56,12 @@ class NDFrame:
        self._query_compiler = query_compiler
    def _get_index(self):
        """
        Returns
        -------
        """
        return self._query_compiler.index
    index = property(_get_index)
@ -114,14 +120,7 @@ class NDFrame:
        """
        return len(self.index)
-    @property
+    def _info_es(self, buf):
    def iloc(self):
        """Purely integer-location based indexing for selection by position.
        """
        return _iLocIndexer(self)
    def info_es(self, buf):
        self._query_compiler.info_es(buf)
    def drop(
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@ -436,34 +436,6 @@ class ElandQueryCompiler:
    def _hist(self, num_bins):
        return self._operations.hist(self, num_bins)
    def apply(self, func, axis, *args, **kwargs):
        """Apply func across given axis.
        Args:
            func: The function to apply.
            axis: Target axis to apply the function along.
        Returns:
            A new QueryCompiler.
        """
        """Apply func across given axis.
                Args:
                    func: The function to apply.
                    axis: Target axis to apply the function along.
                Returns:
                    A new PandasQueryCompiler.
                """
        if callable(func):
            return self._callable_func(func, axis, *args, **kwargs)
        elif isinstance(func, dict):
            return self._dict_func(func, axis, *args, **kwargs)
        elif is_list_like(func):
            return self._list_like_func(func, axis, *args, **kwargs)
        else:
            pass
    def _update_query(self, boolean_filter):
        result = self.copy()
--- a/eland/series.py
+++ b/eland/series.py
@ -35,7 +35,7 @@ class Series(NDFrame):
    index_pattern : str
        An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-*).
-    field_name : str
+    index_field : str
        The field to base the series on
    See Also
@ -91,8 +91,6 @@ class Series(NDFrame):
            True if the Series is empty.
            False otherwise.
        """
        # TODO - this is called on every attribute get (most methods) from modin/pandas/base.py:3337
        #  (as Index.__len__ performs an query) we may want to cache self.index.empty()
        return len(self.index) == 0
    def _get_name(self):
@ -152,7 +150,7 @@ class Series(NDFrame):
        )
    def _to_pandas(self):
-        return self._query_compiler.to_pandas()[self.name]
+        return self._query_compiler._to_pandas()[self.name]
    def __gt__(self, other):
        if isinstance(other, Series):
--- a/eland/tests/Eland
+++ b/eland/tests/Eland
--- a/eland/tests/dataframe/test_datetime_pytest.py
+++ b/eland/tests/dataframe/test_datetime_pytest.py
@ -37,9 +37,7 @@ class TestDataFrameDateTime(TestData):
        # Now create index
        index_name = 'eland_test_generate_es_mappings'
-        ed.pandas_to_es(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
+        ed_df = ed.pd_to_ed(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
        ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
        ed_df_head = ed_df.head()
        assert_pandas_eland_frame_equal(df, ed_df_head)
--- a/eland/tests/dataframe/test_init_pytest.py
+++ b/eland/tests/dataframe/test_init_pytest.py
@ -0,0 +1,31 @@
 # File called _pytest for PyCharm compatability
 import eland as ed
 import pytest
 from eland.tests import ELASTICSEARCH_HOST
 from eland.tests import FLIGHTS_INDEX_NAME
 class TestDataFrameInit:
    def test_init(self):
        # Construct empty DataFrame (throws)
        with pytest.raises(ValueError):
            df = ed.DataFrame()
        # Construct invalid DataFrame (throws)
        with pytest.raises(ValueError):
            df = ed.DataFrame(client=ELASTICSEARCH_HOST)
        # Construct invalid DataFrame (throws)
        with pytest.raises(ValueError):
            df = ed.DataFrame(index_pattern=FLIGHTS_INDEX_NAME)
        # Good constructors
        df0 = ed.DataFrame(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME)
        df1 = ed.DataFrame(client=ELASTICSEARCH_HOST, index_pattern=FLIGHTS_INDEX_NAME)
        qc = ed.ElandQueryCompiler(client=ELASTICSEARCH_HOST, index_pattern=FLIGHTS_INDEX_NAME)
        df2 = ed.DataFrame(query_compiler=qc)
--- a/eland/tests/dataframe/test_query_pytest.py
+++ b/eland/tests/dataframe/test_query_pytest.py
@ -19,8 +19,7 @@ class TestDataFrameQuery(TestData):
        # Now create index
        index_name = 'eland_test_query1'
-        ed.pandas_to_es(pd_df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
+        ed_df = ed.pd_to_ed(pd_df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
        ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
        assert_pandas_eland_frame_equal(pd_df, ed_df)
--- a/eland/tests/dataframe/test_utils_pytest.py
+++ b/eland/tests/dataframe/test_utils_pytest.py
@ -4,7 +4,7 @@ import numpy as np
 import pandas as pd
 import eland as ed
-from eland.tests.common import ELASTICSEARCH_HOST
+from eland.tests.common import ELASTICSEARCH_HOST, assert_pandas_eland_frame_equal
 from eland.tests.common import TestData
@ -36,9 +36,7 @@ class TestDataFrameUtils(TestData):
        # Now create index
        index_name = 'eland_test_generate_es_mappings'
-        ed.pandas_to_es(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
+        ed_df = ed.pd_to_ed(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
        ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
        ed_df_head = ed_df.head()
-        # assert_frame_equal(df, ed_df_head)
+        assert_pandas_eland_frame_equal(df, ed_df_head)
--- a/eland/tests/demo_day_20190815.ipynb
+++ b/eland/tests/demo_day_20190815.ipynb
@ -7144,7 +7144,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.4"
  }
 },
 "nbformat": 4,
--- a/eland/utils.py
+++ b/eland/utils.py
@ -2,44 +2,71 @@ from eland import Client
 from eland import DataFrame
 from eland import Mappings
 import pandas as pd
 def read_es(es_params, index_pattern):
    return DataFrame(client=es_params, index_pattern=index_pattern)
 def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False, dropna=False,
                 geo_points=None):
    """
-    Append a pandas DataFrame to an Elasticsearch index.
+    Utility method to create an eland.Dataframe from an Elasticsearch index_pattern.
-    Mainly used in testing.
+    (Similar to pandas.read_csv, but source data is an Elasticsearch index rather than
    a csv file)
    Parameters
    ----------
-    es_params : Elasticsearch client argument
+    es_params: Elasticsearch client argument(s)
-        elasticsearch-py parameters or
+        - elasticsearch-py parameters or
-        elasticsearch-py instance or
+        - elasticsearch-py instance or
-        eland.Client instance
+        - eland.Client instance
    index_pattern: str
        Elasticsearch index pattern
-    destination_index : str
+    Returns
-        Name of Elasticsearch index to be written
+    -------
    eland.DataFrame
-    if_exists : str, default 'fail'
+    See Also
-        Behavior when the destination index exists. Value can be one of:
+    --------
-        ``'fail'``
+    eland.pd_to_ed: Create an eland.Dataframe from pandas.DataFrame
-            If table exists, do nothing.
+    eland.ed_to_pd: Create a pandas.Dataframe from eland.DataFrame
-        ``'replace'``
+    """
-            If table exists, drop it, recreate it, and insert data.
+    return DataFrame(client=es_params, index_pattern=index_pattern)
        ``'append'``
                If table exists, insert data. Create if does not exist.
-    dropna : bool
+def pd_to_ed(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False, dropna=False,
-        ``'True'``
+             geo_points=None):
-            Remove missing values (see pandas.Series.dropna)
+    """
-        ``'False;``
+    Append a pandas DataFrame to an Elasticsearch index.
-            Include missing values - may cause bulk to fail
+    Mainly used in testing.
    Modifies the elasticsearch destination index
-    geo_points : list or None
+    Parameters
    ----------
    es_params: Elasticsearch client argument(s)
        - elasticsearch-py parameters or
        - elasticsearch-py instance or
        - eland.Client instance
    destination_index: str
        Name of Elasticsearch index to be appended to
    if_exists : {'fail', 'replace', 'append'}, default 'fail'
        How to behave if the index already exists.
        - fail: Raise a ValueError.
        - replace: Delete the index before inserting new values.
        - append: Insert new values to the existing index. Create if does not exist.
    dropna: bool, default 'False'
        * True: Remove missing values (see pandas.Series.dropna)
        * False: Include missing values - may cause bulk to fail
    geo_points: list, default None
        List of columns to map to geo_point data type
    Returns
    -------
    eland.Dataframe
        eland.DataFrame referencing data in destination_index
    See Also
    --------
    eland.read_es: Create an eland.Dataframe from an Elasticsearch index
    eland.ed_to_pd: Create a pandas.Dataframe from eland.DataFrame
    """
    client = Client(es_params)
@ -86,3 +113,31 @@ def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=
            actions = []
    client.bulk(actions, refresh=refresh)
    ed_df = DataFrame(client, destination_index)
    return ed_df
 def ed_to_pd(ed_df):
    """
    Convert an eland.Dataframe to a pandas.DataFrame
    **Note: this loads the entire Elasticsearch index into in core pandas.DataFrame structures. For large
    indices this can create significant load on the Elasticsearch cluster and require signficant memory**
    Parameters
    ----------
    ed_df: eland.DataFrame
        The source eland.Dataframe referencing the Elasticsearch index
    Returns
    -------
    pandas.Dataframe
        pandas.DataFrame contains all rows and columns in eland.DataFrame
    See Also
    --------
    eland.read_es: Create an eland.Dataframe from an Elasticsearch index
    eland.pd_to_ed: Create an eland.Dataframe from pandas.DataFrame
    """
    return ed_df._to_pandas()
--- a/make_docs.sh
+++ b/make_docs.sh
@ -0,0 +1,9 @@
 #!/bin/sh
 python setup.py install
 cd docs
 make clean
 make html
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -1,3 +1,6 @@
 elasticsearch>=7.0.5
 pandas==0.25.1
 matplotlib
 pytest>=5.2.1
 sphinx_rtd_theme
 numpydoc==0.8