Merge pull request #31 from stevedodson/master

Creating docs framework.
2025-07-11 00:02:14 +08:00 · 2019-11-12 21:30:33 +01:00 · 2019-11-12 21:30:33 +01:00 · fd35fbd9f5
commit fd35fbd9f5
parent 862f58ec63 d8c1e18161
30 changed files with 6134 additions and 3898 deletions
--- a/NOTES.md
+++ b/NOTES.md
@ -47,7 +47,7 @@ the `pandas.DataFrame` API. This resolves some of the issues above as:
 than a new index

 * Instead of supporting the enitre `pandas.DataFrame` API we can support a subset appropriate for
-Elasticsearch. If addition calls are required, we could to create a `eland.DataFrame.to_pandas()` 
+Elasticsearch. If addition calls are required, we could to create a `eland.DataFrame._to_pandas()` 
 method which would explicitly export all data to a `pandas.DataFrame` 

 * Creating a new `eland.DataFrame` API gives us full flexibility in terms of implementation. However, 
--- a/docs/Makefile
+++ b/docs/Makefile
@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/make.bat
+++ b/docs/make.bat
@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -0,0 +1,79 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath("../sphinxext"))
+sys.path.extend(
+    [
+        # numpy standard doc extensions
+        os.path.join(os.path.dirname(__file__), "..", "../..", "sphinxext")
+    ]
+)
+
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'eland'
+copyright = '2019, Stephen Dodson'
+author = 'Stephen Dodson'
+
+# The full version, including alpha/beta/rc tags
+release = '0.1'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    "sphinx.ext.doctest",
+    'numpydoc'
+]
+
+doctest_global_setup = '''
+try:
+    import eland as ed
+except ImportError:
+    ed = None
+try:
+    import pandas as pd
+except ImportError:
+    pd = None
+'''
+
+numpydoc_attributes_as_param_list = False
+
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@ -0,0 +1,30 @@
+.. eland documentation master file, created by
+
+.. module:: eland
+
+****************************************************************
+eland: pandas-like data analysis toolkit backed by Elasticsearch
+****************************************************************
+
+**Date**: |today| **Version**: |version|
+
+**Useful links**:
+`Source Repository <https://github.com/elastic/eland>`__ |
+`Issues & Ideas <https://github.com/elastic/eland/issues>`__ |
+`Q&A Support <https://discuss.elastic.co>`__ |
+
+:mod:`eland` is an open source, Apache2-licensed elasticsearch Python client to analyse, explore and manipulate data that resides in elasticsearch.
+Where possible the package uses existing Python APIs and data structures to make it easy to switch between Numpy, Pandas, Scikit-learn to their elasticsearch powered equivalents.
+In general, the data resides in elasticsearch and not in memory, which allows eland to access large datasets stored in elasticsearch.
+
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+
+   reference/index
+
+* :doc:`reference/index`
+
+  * :doc:`reference/general_utility_functions`
+  * :doc:`reference/dataframe`
--- a/docs/source/reference/api/eland.DataFrame.columns.rst
+++ b/docs/source/reference/api/eland.DataFrame.columns.rst
@ -0,0 +1,6 @@
+eland.DataFrame.columns
+=======================
+
+.. currentmodule:: eland
+
+.. autoattribute:: DataFrame.columns
--- a/docs/source/reference/api/eland.DataFrame.head.rst
+++ b/docs/source/reference/api/eland.DataFrame.head.rst
@ -0,0 +1,6 @@
+eland.DataFrame.head
+====================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.head
--- a/docs/source/reference/api/eland.DataFrame.index.rst
+++ b/docs/source/reference/api/eland.DataFrame.index.rst
@ -0,0 +1,6 @@
+eland.DataFrame.index
+=====================
+
+.. currentmodule:: eland
+
+.. autoattribute:: DataFrame.index
--- a/docs/source/reference/api/eland.DataFrame.rst
+++ b/docs/source/reference/api/eland.DataFrame.rst
@ -0,0 +1,18 @@
+eland.DataFrame
+================
+
+.. currentmodule:: eland
+
+.. autoclass:: DataFrame
+
+   
+
+
+..
+   HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
+   .. autosummary::
+      :toctree:
+      
+      DataFrame.abs
+      DataFrame.add
+
--- a/docs/source/reference/api/eland.DataFrame.tail.rst
+++ b/docs/source/reference/api/eland.DataFrame.tail.rst
@ -0,0 +1,6 @@
+eland.DataFrame.tail
+====================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.tail
--- a/docs/source/reference/api/eland.ed_to_pd.rst
+++ b/docs/source/reference/api/eland.ed_to_pd.rst
@ -0,0 +1,6 @@
+eland.ed_to_pd
+==============
+
+.. currentmodule:: eland
+
+.. autofunction:: ed_to_pd
--- a/docs/source/reference/api/eland.pd_to_ed.rst
+++ b/docs/source/reference/api/eland.pd_to_ed.rst
@ -0,0 +1,6 @@
+eland.pd_to_ed
+==============
+
+.. currentmodule:: eland
+
+.. autofunction:: pd_to_ed
--- a/docs/source/reference/api/eland.read_es.rst
+++ b/docs/source/reference/api/eland.read_es.rst
@ -0,0 +1,6 @@
+eland.read_es
+=============
+
+.. currentmodule:: eland
+
+.. autofunction:: read_es
--- a/docs/source/reference/dataframe.rst
+++ b/docs/source/reference/dataframe.rst
@ -0,0 +1,35 @@
+.. _api.dataframe:
+
+=========
+DataFrame
+=========
+.. currentmodule:: eland
+
+Constructor
+~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+   DataFrame
+
+Attributes and underlying data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+**Axes**
+
+.. autosummary::
+   :toctree: api/
+
+   DataFrame.index
+   DataFrame.columns
+
+Indexing, iteration
+~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+   DataFrame.head
+   DataFrame.tail
+
+
+
+
--- a/docs/source/reference/general_utility_functions.rst
+++ b/docs/source/reference/general_utility_functions.rst
@ -0,0 +1,21 @@
+.. _api.general_utility_functions:
+
+=========================
+General utility functions
+=========================
+.. currentmodule:: eland
+
+Elasticsearch access
+~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+    read_es
+
+Pandas and Eland
+~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+    pd_to_ed
+    ed_to_pd
--- a/docs/source/reference/index.rst
+++ b/docs/source/reference/index.rst
@ -0,0 +1,14 @@
+.. _api:
+
+=============
+API reference
+=============
+
+This page gives an overview of all public eland objects, functions and
+methods. All classes and functions exposed in ``eland.*`` namespace are public.
+
+.. toctree::
+   :maxdepth: 2
+
+   general_utility_functions
+   dataframe
--- a/eland/init.py
+++ b/eland/init.py
@ -1,14 +1,14 @@
 from __future__ import absolute_import

 from eland.client import *
-from eland.dataframe import *
 from eland.filter import *
 from eland.index import *
 from eland.mappings import *
-from eland.ndframe import *
-from eland.operations import *
-from eland.plotting import *
 from eland.query import *
+from eland.operations import *
 from eland.query_compiler import *
+from eland.plotting import *
+from eland.ndframe import *
 from eland.series import *
+from eland.dataframe import *
 from eland.utils import *
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@ -1,6 +1,5 @@
 import sys
 import warnings
-from distutils.version import LooseVersion
 from io import StringIO

 import numpy as np
@ -20,17 +19,86 @@ from eland import NDFrame
 from eland import Series
 from eland.filter import BooleanFilter, ScriptFilter

-
 class DataFrame(NDFrame):
-    # This is effectively 2 constructors
-    # 1. client, index_pattern, columns, index_field
-    # 2. query_compiler
+    """
+    Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes
+    (rows and columns) referencing data stored in Elasticsearch indices.
+    Where possible APIs mirror pandas.DataFrame APIs.
+    The underlying data is stored in Elasticsearch rather than core memory.
+
+    Parameters
+    ----------
+    client: Elasticsearch client argument(s) (e.g. 'localhost:9200')
+        - elasticsearch-py parameters or
+        - elasticsearch-py instance or
+        - eland.Client instance
+    index_pattern: str
+        Elasticsearch index pattern (e.g. 'flights' or 'filebeat-\*')
+    columns: list of str, optional
+        List of DataFrame columns. A subset of the Elasticsearch index's fields.
+    index_field: str, optional
+        The Elasticsearch index field to use as the DataFrame index. Defaults to _id if None is used.
+
+    Examples
+    --------
+    Constructing DataFrame from an Elasticsearch configuration arguments and an Elasticsearch index
+
+    >>> df = ed.DataFrame('localhost:9200', 'flights')
+    >>> df.head()
+       AvgTicketPrice  Cancelled           Carrier                                          Dest  ... OriginRegion        OriginWeather dayOfWeek           timestamp
+    0      841.265642      False   Kibana Airlines  Sydney Kingsford Smith International Airport  ...        DE-HE                Sunny         0 2018-01-01 00:00:00
+    1      882.982662      False  Logstash Airways                     Venice Marco Polo Airport  ...        SE-BD                Clear         0 2018-01-01 18:27:00
+    2      190.636904      False  Logstash Airways                     Venice Marco Polo Airport  ...        IT-34                 Rain         0 2018-01-01 17:11:14
+    3      181.694216       True   Kibana Airlines                   Treviso-Sant'Angelo Airport  ...        IT-72  Thunder & Lightning         0 2018-01-01 10:33:28
+    4      730.041778      False   Kibana Airlines          Xi'an Xianyang International Airport  ...       MX-DIF        Damaging Wind         0 2018-01-01 05:13:00
+    <BLANKLINE>
+    [5 rows x 27 columns]
+
+    Constructing DataFrame from an Elasticsearch client and an Elasticsearch index
+
+    >>> from elasticsearch import Elasticsearch
+    >>> es = Elasticsearch("localhost:9200")
+    >>> df = ed.DataFrame(client=es, index_pattern='flights', columns=['AvgTicketPrice', 'Cancelled'])
+    >>> df.head()
+       AvgTicketPrice  Cancelled
+    0      841.265642      False
+    1      882.982662      False
+    2      190.636904      False
+    3      181.694216       True
+    4      730.041778      False
+    <BLANKLINE>
+    [5 rows x 2 columns]
+
+    Constructing DataFrame from an Elasticsearch client and an Elasticsearch index, with 'timestamp' as the  DataFrame index field
+
+    >>> df = ed.DataFrame(client='localhost', index_pattern='flights', columns=['AvgTicketPrice', 'timestamp'], index_field='timestamp')
+    >>> df.head()
+                         AvgTicketPrice           timestamp
+    2018-01-01T00:00:00      841.265642 2018-01-01 00:00:00
+    2018-01-01T00:02:06      772.100846 2018-01-01 00:02:06
+    2018-01-01T00:06:27      159.990962 2018-01-01 00:06:27
+    2018-01-01T00:33:31      800.217104 2018-01-01 00:33:31
+    2018-01-01T00:36:51      803.015200 2018-01-01 00:36:51
+    <BLANKLINE>
+    [5 rows x 2 columns]
+    """
    def __init__(self,
                 client=None,
                 index_pattern=None,
                 columns=None,
                 index_field=None,
                 query_compiler=None):
+        """
+        There are effectively 2 constructors:
+
+        1. client, index_pattern, columns, index_field
+        2. query_compiler (eland.ElandQueryCompiler)
+
+        The constructor with 'query_compiler' is for internal use only.
+        """
+        if query_compiler is None:
+            if client is None or index_pattern is None:
+                raise ValueError("client and index_pattern must be defined in DataFrame constructor")
        # python 3 syntax
        super().__init__(
            client=client,
@ -40,6 +108,27 @@ class DataFrame(NDFrame):
            query_compiler=query_compiler)

    def _get_columns(self):
+        """
+        The column labels of the DataFrame.
+
+        Returns
+        -------
+        Elasticsearch field names as pandas.Index
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'flights')
+        >>> assert isinstance(df.columns, pd.Index)
+        >>> df.columns
+        Index(['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID',
+        ...   'DestCityName', 'DestCountry', 'DestLocation', 'DestRegion',
+        ...   'DestWeather', 'DistanceKilometers', 'DistanceMiles', 'FlightDelay',
+        ...   'FlightDelayMin', 'FlightDelayType', 'FlightNum', 'FlightTimeHour',
+        ...   'FlightTimeMin', 'Origin', 'OriginAirportID', 'OriginCityName',
+        ...   'OriginCountry', 'OriginLocation', 'OriginRegion', 'OriginWeather',
+        ...   'dayOfWeek', 'timestamp'],
+        ...  dtype='object')
+        """
        return self._query_compiler.columns

    columns = property(_get_columns)
@ -52,14 +141,70 @@ class DataFrame(NDFrame):
            True if the DataFrame is empty.
            False otherwise.
        """
-        # TODO - this is called on every attribute get (most methods) from modin/pandas/base.py:3337
-        #  (as Index.__len__ performs an query) we may want to cache self.index.empty()
        return len(self.columns) == 0 or len(self.index) == 0

    def head(self, n=5):
+        """
+        Return the first n rows.
+
+        This function returns the first n rows for the object based on position.
+        The row order is sorted by index field.
+        It is useful for quickly testing if your object has the right type of data in it.
+
+        Parameters
+        ----------
+        n: int, default 5
+            Number of rows to select.
+
+        Returns
+        -------
+        eland.DataFrame
+            eland DataFrame filtered on first n rows sorted by index field
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'Dest'])
+        >>> df.head(3)
+                                    Origin                                          Dest
+        0        Frankfurt am Main Airport  Sydney Kingsford Smith International Airport
+        1  Cape Town International Airport                     Venice Marco Polo Airport
+        2        Venice Marco Polo Airport                     Venice Marco Polo Airport
+        <BLANKLINE>
+        [3 rows x 2 columns]
+        """
        return DataFrame(query_compiler=self._query_compiler.head(n))

    def tail(self, n=5):
+        """
+        Return the last n rows.
+
+        This function returns the last n rows for the object based on position.
+        The row order is sorted by index field.
+        It is useful for quickly testing if your object has the right type of data in it.
+
+        Parameters
+        ----------
+        n: int, default 5
+            Number of rows to select.
+
+        Returns
+        -------
+        eland.DataFrame:
+            eland DataFrame filtered on last n rows sorted by index field
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'Dest'])
+        >>> df.tail()
+                                                          Origin                                      Dest
+        13054                         Pisa International Airport      Xi'an Xianyang International Airport
+        13055  Winnipeg / James Armstrong Richardson Internat...                            Zurich Airport
+        13056     Licenciado Benito Juarez International Airport                         Ukrainka Air Base
+        13057                                      Itami Airport  Ministro Pistarini International Airport
+        13058                     Adelaide International Airport   Washington Dulles International Airport
+        <BLANKLINE>
+        [5 rows x 2 columns]
+        """
        return DataFrame(query_compiler=self._query_compiler.tail(n))

    def __repr__(self):
@ -92,18 +237,8 @@ class DataFrame(NDFrame):
        """
        From pandas
        """
-        try:
-            import IPython
-        except ImportError:
-            pass
-        else:
-            if LooseVersion(IPython.__version__) < LooseVersion('3.0'):
-                if console.in_qtconsole():
-                    # 'HTML output is disabled in QtConsole'
-                    return None
-
        if self._info_repr():
-            buf = StringIO()
+            buf = StringIO("")
            self.info(buf=buf)
            # need to escape the <class>, should be the first line.
            val = buf.getvalue().replace('<', r'&lt;', 1)
@ -138,7 +273,7 @@ class DataFrame(NDFrame):
    def info_es(self):
        buf = StringIO()

-        super().info_es(buf)
+        super()._info_es(buf)

        return buf.getvalue()

@ -470,6 +605,13 @@ class DataFrame(NDFrame):
        return self._query_compiler.to_csv(**kwargs)

    def _to_pandas(self):
+        """
+        Utility method to convert eland.Dataframe to pandas.Dataframe
+
+        Returns
+        -------
+        pandas.DataFrame
+        """
        return self._query_compiler.to_pandas()

    def _empty_pd_df(self):
@ -529,7 +671,7 @@ class DataFrame(NDFrame):
            - string function name
            - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
            - dict of axis labels -> functions, function names or list of such.
-        %(axis)s
+        axis
        *args
            Positional arguments to pass to `func`.
        **kwargs
@ -570,7 +712,7 @@ class DataFrame(NDFrame):
        """
        if isinstance(expr, BooleanFilter):
            return DataFrame(
-                query_compiler=self._query_compiler._update_query(key)
+                query_compiler=self._query_compiler._update_query(BooleanFilter(expr))
            )
        elif isinstance(expr, six.string_types):
            return DataFrame(
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@ -56,6 +56,12 @@ class NDFrame:
        self._query_compiler = query_compiler

    def _get_index(self):
+        """
+
+        Returns
+        -------
+
+        """
        return self._query_compiler.index

    index = property(_get_index)
@ -114,14 +120,7 @@ class NDFrame:
        """
        return len(self.index)

-    @property
-    def iloc(self):
-        """Purely integer-location based indexing for selection by position.
-
-        """
-        return _iLocIndexer(self)
-
-    def info_es(self, buf):
+    def _info_es(self, buf):
        self._query_compiler.info_es(buf)

    def drop(
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@ -436,34 +436,6 @@ class ElandQueryCompiler:
    def _hist(self, num_bins):
        return self._operations.hist(self, num_bins)

-    def apply(self, func, axis, *args, **kwargs):
-        """Apply func across given axis.
-
-        Args:
-            func: The function to apply.
-            axis: Target axis to apply the function along.
-
-        Returns:
-            A new QueryCompiler.
-        """
-        """Apply func across given axis.
-
-                Args:
-                    func: The function to apply.
-                    axis: Target axis to apply the function along.
-
-                Returns:
-                    A new PandasQueryCompiler.
-                """
-        if callable(func):
-            return self._callable_func(func, axis, *args, **kwargs)
-        elif isinstance(func, dict):
-            return self._dict_func(func, axis, *args, **kwargs)
-        elif is_list_like(func):
-            return self._list_like_func(func, axis, *args, **kwargs)
-        else:
-            pass
-
    def _update_query(self, boolean_filter):
        result = self.copy()

--- a/eland/series.py
+++ b/eland/series.py
@ -35,7 +35,7 @@ class Series(NDFrame):
    index_pattern : str
        An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-*).

-    field_name : str
+    index_field : str
        The field to base the series on

    See Also
@ -91,8 +91,6 @@ class Series(NDFrame):
            True if the Series is empty.
            False otherwise.
        """
-        # TODO - this is called on every attribute get (most methods) from modin/pandas/base.py:3337
-        #  (as Index.__len__ performs an query) we may want to cache self.index.empty()
        return len(self.index) == 0

    def _get_name(self):
@ -152,7 +150,7 @@ class Series(NDFrame):
        )

    def _to_pandas(self):
-        return self._query_compiler.to_pandas()[self.name]
+        return self._query_compiler._to_pandas()[self.name]

    def __gt__(self, other):
        if isinstance(other, Series):
--- a/eland/tests/Eland
+++ b/eland/tests/Eland
--- a/eland/tests/dataframe/test_datetime_pytest.py
+++ b/eland/tests/dataframe/test_datetime_pytest.py
@ -37,9 +37,7 @@ class TestDataFrameDateTime(TestData):
        # Now create index
        index_name = 'eland_test_generate_es_mappings'

-        ed.pandas_to_es(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
-
-        ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
+        ed_df = ed.pd_to_ed(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
        ed_df_head = ed_df.head()

        assert_pandas_eland_frame_equal(df, ed_df_head)
--- a/eland/tests/dataframe/test_init_pytest.py
+++ b/eland/tests/dataframe/test_init_pytest.py
@ -0,0 +1,31 @@
+# File called _pytest for PyCharm compatability
+
+import eland as ed
+
+import pytest
+
+from eland.tests import ELASTICSEARCH_HOST
+from eland.tests import FLIGHTS_INDEX_NAME
+
+class TestDataFrameInit:
+
+    def test_init(self):
+        # Construct empty DataFrame (throws)
+        with pytest.raises(ValueError):
+            df = ed.DataFrame()
+
+        # Construct invalid DataFrame (throws)
+        with pytest.raises(ValueError):
+            df = ed.DataFrame(client=ELASTICSEARCH_HOST)
+
+        # Construct invalid DataFrame (throws)
+        with pytest.raises(ValueError):
+            df = ed.DataFrame(index_pattern=FLIGHTS_INDEX_NAME)
+
+        # Good constructors
+        df0 = ed.DataFrame(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME)
+        df1 = ed.DataFrame(client=ELASTICSEARCH_HOST, index_pattern=FLIGHTS_INDEX_NAME)
+
+        qc = ed.ElandQueryCompiler(client=ELASTICSEARCH_HOST, index_pattern=FLIGHTS_INDEX_NAME)
+        df2 = ed.DataFrame(query_compiler=qc)
+
--- a/eland/tests/dataframe/test_query_pytest.py
+++ b/eland/tests/dataframe/test_query_pytest.py
@ -19,8 +19,7 @@ class TestDataFrameQuery(TestData):
        # Now create index
        index_name = 'eland_test_query1'

-        ed.pandas_to_es(pd_df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
-        ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
+        ed_df = ed.pd_to_ed(pd_df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)

        assert_pandas_eland_frame_equal(pd_df, ed_df)

--- a/eland/tests/dataframe/test_utils_pytest.py
+++ b/eland/tests/dataframe/test_utils_pytest.py
@ -4,7 +4,7 @@ import numpy as np
 import pandas as pd

 import eland as ed
-from eland.tests.common import ELASTICSEARCH_HOST
+from eland.tests.common import ELASTICSEARCH_HOST, assert_pandas_eland_frame_equal
 from eland.tests.common import TestData


@ -36,9 +36,7 @@ class TestDataFrameUtils(TestData):
        # Now create index
        index_name = 'eland_test_generate_es_mappings'

-        ed.pandas_to_es(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
-
-        ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
+        ed_df = ed.pd_to_ed(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
        ed_df_head = ed_df.head()

-        # assert_frame_equal(df, ed_df_head)
+        assert_pandas_eland_frame_equal(df, ed_df_head)
--- a/eland/tests/demo_day_20190815.ipynb
+++ b/eland/tests/demo_day_20190815.ipynb
@ -7144,7 +7144,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.4"
  }
 },
 "nbformat": 4,
--- a/eland/utils.py
+++ b/eland/utils.py
@ -2,44 +2,71 @@ from eland import Client
 from eland import DataFrame
 from eland import Mappings

+import pandas as pd
+

 def read_es(es_params, index_pattern):
+    """
+    Utility method to create an eland.Dataframe from an Elasticsearch index_pattern.
+    (Similar to pandas.read_csv, but source data is an Elasticsearch index rather than
+    a csv file)
+
+    Parameters
+    ----------
+    es_params: Elasticsearch client argument(s)
+        - elasticsearch-py parameters or
+        - elasticsearch-py instance or
+        - eland.Client instance
+    index_pattern: str
+        Elasticsearch index pattern
+
+    Returns
+    -------
+    eland.DataFrame
+
+    See Also
+    --------
+    eland.pd_to_ed: Create an eland.Dataframe from pandas.DataFrame
+    eland.ed_to_pd: Create a pandas.Dataframe from eland.DataFrame
+    """
    return DataFrame(client=es_params, index_pattern=index_pattern)

-
-def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False, dropna=False,
+def pd_to_ed(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False, dropna=False,
             geo_points=None):
    """
    Append a pandas DataFrame to an Elasticsearch index.
    Mainly used in testing.
+    Modifies the elasticsearch destination index

    Parameters
    ----------
-    es_params : Elasticsearch client argument
-        elasticsearch-py parameters or
-        elasticsearch-py instance or
-        eland.Client instance
+    es_params: Elasticsearch client argument(s)
+        - elasticsearch-py parameters or
+        - elasticsearch-py instance or
+        - eland.Client instance
+    destination_index: str
+        Name of Elasticsearch index to be appended to
+    if_exists : {'fail', 'replace', 'append'}, default 'fail'
+        How to behave if the index already exists.

-    destination_index : str
-        Name of Elasticsearch index to be written
-
-    if_exists : str, default 'fail'
-        Behavior when the destination index exists. Value can be one of:
-        ``'fail'``
-            If table exists, do nothing.
-        ``'replace'``
-            If table exists, drop it, recreate it, and insert data.
-        ``'append'``
-                If table exists, insert data. Create if does not exist.
-
-    dropna : bool
-        ``'True'``
-            Remove missing values (see pandas.Series.dropna)
-        ``'False;``
-            Include missing values - may cause bulk to fail
-
-    geo_points : list or None
+        - fail: Raise a ValueError.
+        - replace: Delete the index before inserting new values.
+        - append: Insert new values to the existing index. Create if does not exist.
+    dropna: bool, default 'False'
+        * True: Remove missing values (see pandas.Series.dropna)
+        * False: Include missing values - may cause bulk to fail
+    geo_points: list, default None
        List of columns to map to geo_point data type
+
+    Returns
+    -------
+    eland.Dataframe
+        eland.DataFrame referencing data in destination_index
+
+    See Also
+    --------
+    eland.read_es: Create an eland.Dataframe from an Elasticsearch index
+    eland.ed_to_pd: Create a pandas.Dataframe from eland.DataFrame
    """
    client = Client(es_params)

@ -86,3 +113,31 @@ def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=
            actions = []

    client.bulk(actions, refresh=refresh)
+
+    ed_df = DataFrame(client, destination_index)
+
+    return ed_df
+
+def ed_to_pd(ed_df):
+    """
+    Convert an eland.Dataframe to a pandas.DataFrame
+
+    **Note: this loads the entire Elasticsearch index into in core pandas.DataFrame structures. For large
+    indices this can create significant load on the Elasticsearch cluster and require signficant memory**
+
+    Parameters
+    ----------
+    ed_df: eland.DataFrame
+        The source eland.Dataframe referencing the Elasticsearch index
+
+    Returns
+    -------
+    pandas.Dataframe
+        pandas.DataFrame contains all rows and columns in eland.DataFrame
+
+    See Also
+    --------
+    eland.read_es: Create an eland.Dataframe from an Elasticsearch index
+    eland.pd_to_ed: Create an eland.Dataframe from pandas.DataFrame
+    """
+    return ed_df._to_pandas()
--- a/make_docs.sh
+++ b/make_docs.sh
@ -0,0 +1,9 @@
+#!/bin/sh
+
+python setup.py install
+
+cd docs
+
+make clean
+make html
+
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -1,3 +1,6 @@
 elasticsearch>=7.0.5
 pandas==0.25.1
+matplotlib
 pytest>=5.2.1
+sphinx_rtd_theme
+numpydoc==0.8