From e181476dfe25a2abbe3dcf16ea663d2cbf09eab2 Mon Sep 17 00:00:00 2001
From: Stephen Dodson <steve.dodson@elastic.co>
Date: Tue, 12 Nov 2019 20:26:59 +0000
Subject: [PATCH] First effort at tidying up docs. Still work-in-progress.

---
 NOTES.md                                      |   2 +-
 docs/Makefile                                 |  20 +++
 docs/make.bat                                 |  35 ++++
 docs/source/conf.py                           |  79 +++++++++
 docs/source/index.rst                         |  30 ++++
 .../reference/api/eland.DataFrame.columns.rst |   6 +
 .../reference/api/eland.DataFrame.head.rst    |   6 +
 .../reference/api/eland.DataFrame.index.rst   |   6 +
 docs/source/reference/api/eland.DataFrame.rst |  18 ++
 .../reference/api/eland.DataFrame.tail.rst    |   6 +
 docs/source/reference/api/eland.ed_to_pd.rst  |   6 +
 docs/source/reference/api/eland.pd_to_ed.rst  |   6 +
 docs/source/reference/api/eland.read_es.rst   |   6 +
 docs/source/reference/dataframe.rst           |  35 ++++
 .../reference/general_utility_functions.rst   |  21 +++
 docs/source/reference/index.rst               |  14 ++
 eland/dataframe.py                            | 165 +++++++++++++++++-
 eland/ndframe.py                              |   6 +
 eland/series.py                               |   4 +-
 eland/tests/dataframe/test_datetime_pytest.py |   4 +-
 eland/tests/dataframe/test_init_pytest.py     |  31 ++++
 eland/tests/dataframe/test_query_pytest.py    |   3 +-
 eland/tests/dataframe/test_utils_pytest.py    |   8 +-
 eland/utils.py                                | 109 +++++++++---
 make_docs.sh                                  |   9 +
 requirements-dev.txt                          |   3 +
 26 files changed, 591 insertions(+), 47 deletions(-)
 create mode 100644 docs/Makefile
 create mode 100644 docs/make.bat
 create mode 100644 docs/source/conf.py
 create mode 100644 docs/source/index.rst
 create mode 100644 docs/source/reference/api/eland.DataFrame.columns.rst
 create mode 100644 docs/source/reference/api/eland.DataFrame.head.rst
 create mode 100644 docs/source/reference/api/eland.DataFrame.index.rst
 create mode 100644 docs/source/reference/api/eland.DataFrame.rst
 create mode 100644 docs/source/reference/api/eland.DataFrame.tail.rst
 create mode 100644 docs/source/reference/api/eland.ed_to_pd.rst
 create mode 100644 docs/source/reference/api/eland.pd_to_ed.rst
 create mode 100644 docs/source/reference/api/eland.read_es.rst
 create mode 100644 docs/source/reference/dataframe.rst
 create mode 100644 docs/source/reference/general_utility_functions.rst
 create mode 100644 docs/source/reference/index.rst
 create mode 100644 eland/tests/dataframe/test_init_pytest.py
 create mode 100644 make_docs.sh

diff --git a/NOTES.md b/NOTES.md
index 6b71e1a..7fa3635 100644
--- a/NOTES.md
+++ b/NOTES.md
@@ -47,7 +47,7 @@ the `pandas.DataFrame` API. This resolves some of the issues above as:
 than a new index
 
 * Instead of supporting the enitre `pandas.DataFrame` API we can support a subset appropriate for
-Elasticsearch. If addition calls are required, we could to create a `eland.DataFrame.to_pandas()` 
+Elasticsearch. If addition calls are required, we could to create a `eland.DataFrame._to_pandas()` 
 method which would explicitly export all data to a `pandas.DataFrame` 
 
 * Creating a new `eland.DataFrame` API gives us full flexibility in terms of implementation. However, 
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..d0c3cbf
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..6247f7e
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..f37c4d8
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,79 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath("../sphinxext"))
+sys.path.extend(
+    [
+        # numpy standard doc extensions
+        os.path.join(os.path.dirname(__file__), "..", "../..", "sphinxext")
+    ]
+)
+
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'eland'
+copyright = '2019, Stephen Dodson'
+author = 'Stephen Dodson'
+
+# The full version, including alpha/beta/rc tags
+release = '0.1'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    "sphinx.ext.doctest",
+    'numpydoc'
+]
+
+doctest_global_setup = '''
+try:
+    import eland as ed
+except ImportError:
+    ed = None
+try:
+    import pandas as pd
+except ImportError:
+    pd = None
+'''
+
+numpydoc_attributes_as_param_list = False
+
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..871dd61
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,30 @@
+.. eland documentation master file, created by
+
+.. module:: eland
+
+****************************************************************
+eland: pandas-like data analysis toolkit backed by Elasticsearch
+****************************************************************
+
+**Date**: |today| **Version**: |version|
+
+**Useful links**:
+`Source Repository <https://github.com/elastic/eland>`__ |
+`Issues & Ideas <https://github.com/elastic/eland/issues>`__ |
+`Q&A Support <https://discuss.elastic.co>`__ |
+
+:mod:`eland` is an open source, Apache2-licensed elasticsearch Python client to analyse, explore and manipulate data that resides in elasticsearch.
+Where possible the package uses existing Python APIs and data structures to make it easy to switch between Numpy, Pandas, Scikit-learn to their elasticsearch powered equivalents.
+In general, the data resides in elasticsearch and not in memory, which allows eland to access large datasets stored in elasticsearch.
+
+
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+
+   reference/index
+
+* :doc:`reference/index`
+
+  * :doc:`reference/general_utility_functions`
+  * :doc:`reference/dataframe`
diff --git a/docs/source/reference/api/eland.DataFrame.columns.rst b/docs/source/reference/api/eland.DataFrame.columns.rst
new file mode 100644
index 0000000..8bcdf83
--- /dev/null
+++ b/docs/source/reference/api/eland.DataFrame.columns.rst
@@ -0,0 +1,6 @@
+eland.DataFrame.columns
+=======================
+
+.. currentmodule:: eland
+
+.. autoattribute:: DataFrame.columns
diff --git a/docs/source/reference/api/eland.DataFrame.head.rst b/docs/source/reference/api/eland.DataFrame.head.rst
new file mode 100644
index 0000000..16d4173
--- /dev/null
+++ b/docs/source/reference/api/eland.DataFrame.head.rst
@@ -0,0 +1,6 @@
+eland.DataFrame.head
+====================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.head
diff --git a/docs/source/reference/api/eland.DataFrame.index.rst b/docs/source/reference/api/eland.DataFrame.index.rst
new file mode 100644
index 0000000..c3d0ab0
--- /dev/null
+++ b/docs/source/reference/api/eland.DataFrame.index.rst
@@ -0,0 +1,6 @@
+eland.DataFrame.index
+=====================
+
+.. currentmodule:: eland
+
+.. autoattribute:: DataFrame.index
diff --git a/docs/source/reference/api/eland.DataFrame.rst b/docs/source/reference/api/eland.DataFrame.rst
new file mode 100644
index 0000000..8929d81
--- /dev/null
+++ b/docs/source/reference/api/eland.DataFrame.rst
@@ -0,0 +1,18 @@
+eland.DataFrame
+================
+
+.. currentmodule:: eland
+
+.. autoclass:: DataFrame
+
+   
+
+
+..
+   HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
+   .. autosummary::
+      :toctree:
+      
+      DataFrame.abs
+      DataFrame.add
+
diff --git a/docs/source/reference/api/eland.DataFrame.tail.rst b/docs/source/reference/api/eland.DataFrame.tail.rst
new file mode 100644
index 0000000..b4ec087
--- /dev/null
+++ b/docs/source/reference/api/eland.DataFrame.tail.rst
@@ -0,0 +1,6 @@
+eland.DataFrame.tail
+====================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.tail
diff --git a/docs/source/reference/api/eland.ed_to_pd.rst b/docs/source/reference/api/eland.ed_to_pd.rst
new file mode 100644
index 0000000..55dcf64
--- /dev/null
+++ b/docs/source/reference/api/eland.ed_to_pd.rst
@@ -0,0 +1,6 @@
+eland.ed_to_pd
+==============
+
+.. currentmodule:: eland
+
+.. autofunction:: ed_to_pd
diff --git a/docs/source/reference/api/eland.pd_to_ed.rst b/docs/source/reference/api/eland.pd_to_ed.rst
new file mode 100644
index 0000000..615c987
--- /dev/null
+++ b/docs/source/reference/api/eland.pd_to_ed.rst
@@ -0,0 +1,6 @@
+eland.pd_to_ed
+==============
+
+.. currentmodule:: eland
+
+.. autofunction:: pd_to_ed
diff --git a/docs/source/reference/api/eland.read_es.rst b/docs/source/reference/api/eland.read_es.rst
new file mode 100644
index 0000000..e31751e
--- /dev/null
+++ b/docs/source/reference/api/eland.read_es.rst
@@ -0,0 +1,6 @@
+eland.read_es
+=============
+
+.. currentmodule:: eland
+
+.. autofunction:: read_es
diff --git a/docs/source/reference/dataframe.rst b/docs/source/reference/dataframe.rst
new file mode 100644
index 0000000..f4510b3
--- /dev/null
+++ b/docs/source/reference/dataframe.rst
@@ -0,0 +1,35 @@
+.. _api.dataframe:
+
+=========
+DataFrame
+=========
+.. currentmodule:: eland
+
+Constructor
+~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+   DataFrame
+
+Attributes and underlying data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+**Axes**
+
+.. autosummary::
+   :toctree: api/
+
+   DataFrame.index
+   DataFrame.columns
+
+Indexing, iteration
+~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+   DataFrame.head
+   DataFrame.tail
+
+
+
+
diff --git a/docs/source/reference/general_utility_functions.rst b/docs/source/reference/general_utility_functions.rst
new file mode 100644
index 0000000..63e1865
--- /dev/null
+++ b/docs/source/reference/general_utility_functions.rst
@@ -0,0 +1,21 @@
+.. _api.general_utility_functions:
+
+=========================
+General utility functions
+=========================
+.. currentmodule:: eland
+
+Elasticsearch access
+~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+    read_es
+
+Pandas and Eland
+~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+    pd_to_ed
+    ed_to_pd
diff --git a/docs/source/reference/index.rst b/docs/source/reference/index.rst
new file mode 100644
index 0000000..8f79abe
--- /dev/null
+++ b/docs/source/reference/index.rst
@@ -0,0 +1,14 @@
+.. _api:
+
+=============
+API reference
+=============
+
+This page gives an overview of all public eland objects, functions and
+methods. All classes and functions exposed in ``eland.*`` namespace are public.
+
+.. toctree::
+   :maxdepth: 2
+
+   general_utility_functions
+   dataframe
diff --git a/eland/dataframe.py b/eland/dataframe.py
index 9e58759..516391a 100644
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@@ -19,17 +19,86 @@ from eland import NDFrame
 from eland import Series
 from eland.filter import BooleanFilter, ScriptFilter
 
-
 class DataFrame(NDFrame):
-    # This is effectively 2 constructors
-    # 1. client, index_pattern, columns, index_field
-    # 2. query_compiler
+    """
+    Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes
+    (rows and columns) referencing data stored in Elasticsearch indices.
+    Where possible APIs mirror pandas.DataFrame APIs.
+    The underlying data is stored in Elasticsearch rather than core memory.
+
+    Parameters
+    ----------
+    client: Elasticsearch client argument(s) (e.g. 'localhost:9200')
+        - elasticsearch-py parameters or
+        - elasticsearch-py instance or
+        - eland.Client instance
+    index_pattern: str
+        Elasticsearch index pattern (e.g. 'flights' or 'filebeat-\*')
+    columns: list of str, optional
+        List of DataFrame columns. A subset of the Elasticsearch index's fields.
+    index_field: str, optional
+        The Elasticsearch index field to use as the DataFrame index. Defaults to _id if None is used.
+
+    Examples
+    --------
+    Constructing DataFrame from an Elasticsearch configuration arguments and an Elasticsearch index
+
+    >>> df = ed.DataFrame('localhost:9200', 'flights')
+    >>> df.head()
+       AvgTicketPrice  Cancelled           Carrier                                          Dest  ... OriginRegion        OriginWeather dayOfWeek           timestamp
+    0      841.265642      False   Kibana Airlines  Sydney Kingsford Smith International Airport  ...        DE-HE                Sunny         0 2018-01-01 00:00:00
+    1      882.982662      False  Logstash Airways                     Venice Marco Polo Airport  ...        SE-BD                Clear         0 2018-01-01 18:27:00
+    2      190.636904      False  Logstash Airways                     Venice Marco Polo Airport  ...        IT-34                 Rain         0 2018-01-01 17:11:14
+    3      181.694216       True   Kibana Airlines                   Treviso-Sant'Angelo Airport  ...        IT-72  Thunder & Lightning         0 2018-01-01 10:33:28
+    4      730.041778      False   Kibana Airlines          Xi'an Xianyang International Airport  ...       MX-DIF        Damaging Wind         0 2018-01-01 05:13:00
+    <BLANKLINE>
+    [5 rows x 27 columns]
+
+    Constructing DataFrame from an Elasticsearch client and an Elasticsearch index
+
+    >>> from elasticsearch import Elasticsearch
+    >>> es = Elasticsearch("localhost:9200")
+    >>> df = ed.DataFrame(client=es, index_pattern='flights', columns=['AvgTicketPrice', 'Cancelled'])
+    >>> df.head()
+       AvgTicketPrice  Cancelled
+    0      841.265642      False
+    1      882.982662      False
+    2      190.636904      False
+    3      181.694216       True
+    4      730.041778      False
+    <BLANKLINE>
+    [5 rows x 2 columns]
+
+    Constructing DataFrame from an Elasticsearch client and an Elasticsearch index, with 'timestamp' as the  DataFrame index field
+
+    >>> df = ed.DataFrame(client='localhost', index_pattern='flights', columns=['AvgTicketPrice', 'timestamp'], index_field='timestamp')
+    >>> df.head()
+                         AvgTicketPrice           timestamp
+    2018-01-01T00:00:00      841.265642 2018-01-01 00:00:00
+    2018-01-01T00:02:06      772.100846 2018-01-01 00:02:06
+    2018-01-01T00:06:27      159.990962 2018-01-01 00:06:27
+    2018-01-01T00:33:31      800.217104 2018-01-01 00:33:31
+    2018-01-01T00:36:51      803.015200 2018-01-01 00:36:51
+    <BLANKLINE>
+    [5 rows x 2 columns]
+    """
     def __init__(self,
                  client=None,
                  index_pattern=None,
                  columns=None,
                  index_field=None,
                  query_compiler=None):
+        """
+        There are effectively 2 constructors:
+
+        1. client, index_pattern, columns, index_field
+        2. query_compiler (eland.ElandQueryCompiler)
+
+        The constructor with 'query_compiler' is for internal use only.
+        """
+        if query_compiler is None:
+            if client is None or index_pattern is None:
+                raise ValueError("client and index_pattern must be defined in DataFrame constructor")
         # python 3 syntax
         super().__init__(
             client=client,
@@ -39,6 +108,27 @@ class DataFrame(NDFrame):
             query_compiler=query_compiler)
 
     def _get_columns(self):
+        """
+        The column labels of the DataFrame.
+
+        Returns
+        -------
+        Elasticsearch field names as pandas.Index
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'flights')
+        >>> assert isinstance(df.columns, pd.Index)
+        >>> df.columns
+        Index(['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID',
+        ...   'DestCityName', 'DestCountry', 'DestLocation', 'DestRegion',
+        ...   'DestWeather', 'DistanceKilometers', 'DistanceMiles', 'FlightDelay',
+        ...   'FlightDelayMin', 'FlightDelayType', 'FlightNum', 'FlightTimeHour',
+        ...   'FlightTimeMin', 'Origin', 'OriginAirportID', 'OriginCityName',
+        ...   'OriginCountry', 'OriginLocation', 'OriginRegion', 'OriginWeather',
+        ...   'dayOfWeek', 'timestamp'],
+        ...  dtype='object')
+        """
         return self._query_compiler.columns
 
     columns = property(_get_columns)
@@ -51,14 +141,70 @@ class DataFrame(NDFrame):
             True if the DataFrame is empty.
             False otherwise.
         """
-        # TODO - this is called on every attribute get (most methods) from modin/pandas/base.py:3337
-        #  (as Index.__len__ performs an query) we may want to cache self.index.empty()
         return len(self.columns) == 0 or len(self.index) == 0
 
     def head(self, n=5):
+        """
+        Return the first n rows.
+
+        This function returns the first n rows for the object based on position.
+        The row order is sorted by index field.
+        It is useful for quickly testing if your object has the right type of data in it.
+
+        Parameters
+        ----------
+        n: int, default 5
+            Number of rows to select.
+
+        Returns
+        -------
+        eland.DataFrame
+            eland DataFrame filtered on first n rows sorted by index field
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'Dest'])
+        >>> df.head(3)
+                                    Origin                                          Dest
+        0        Frankfurt am Main Airport  Sydney Kingsford Smith International Airport
+        1  Cape Town International Airport                     Venice Marco Polo Airport
+        2        Venice Marco Polo Airport                     Venice Marco Polo Airport
+        <BLANKLINE>
+        [3 rows x 2 columns]
+        """
         return DataFrame(query_compiler=self._query_compiler.head(n))
 
     def tail(self, n=5):
+        """
+        Return the last n rows.
+
+        This function returns the last n rows for the object based on position.
+        The row order is sorted by index field.
+        It is useful for quickly testing if your object has the right type of data in it.
+
+        Parameters
+        ----------
+        n: int, default 5
+            Number of rows to select.
+
+        Returns
+        -------
+        eland.DataFrame:
+            eland DataFrame filtered on last n rows sorted by index field
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost', 'flights', columns=['Origin', 'Dest'])
+        >>> df.tail()
+                                                          Origin                                      Dest
+        13054                         Pisa International Airport      Xi'an Xianyang International Airport
+        13055  Winnipeg / James Armstrong Richardson Internat...                            Zurich Airport
+        13056     Licenciado Benito Juarez International Airport                         Ukrainka Air Base
+        13057                                      Itami Airport  Ministro Pistarini International Airport
+        13058                     Adelaide International Airport   Washington Dulles International Airport
+        <BLANKLINE>
+        [5 rows x 2 columns]
+        """
         return DataFrame(query_compiler=self._query_compiler.tail(n))
 
     def __repr__(self):
@@ -459,6 +605,13 @@ class DataFrame(NDFrame):
         return self._query_compiler.to_csv(**kwargs)
 
     def _to_pandas(self):
+        """
+        Utility method to convert eland.Dataframe to pandas.Dataframe
+
+        Returns
+        -------
+        pandas.DataFrame
+        """
         return self._query_compiler.to_pandas()
 
     def _empty_pd_df(self):
diff --git a/eland/ndframe.py b/eland/ndframe.py
index 4b46e63..3c8f53b 100644
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@@ -56,6 +56,12 @@ class NDFrame:
         self._query_compiler = query_compiler
 
     def _get_index(self):
+        """
+
+        Returns
+        -------
+
+        """
         return self._query_compiler.index
 
     index = property(_get_index)
diff --git a/eland/series.py b/eland/series.py
index 198f5b5..66f27e3 100644
--- a/eland/series.py
+++ b/eland/series.py
@@ -91,8 +91,6 @@ class Series(NDFrame):
             True if the Series is empty.
             False otherwise.
         """
-        # TODO - this is called on every attribute get (most methods) from modin/pandas/base.py:3337
-        #  (as Index.__len__ performs an query) we may want to cache self.index.empty()
         return len(self.index) == 0
 
     def _get_name(self):
@@ -152,7 +150,7 @@ class Series(NDFrame):
         )
 
     def _to_pandas(self):
-        return self._query_compiler.to_pandas()[self.name]
+        return self._query_compiler._to_pandas()[self.name]
 
     def __gt__(self, other):
         if isinstance(other, Series):
diff --git a/eland/tests/dataframe/test_datetime_pytest.py b/eland/tests/dataframe/test_datetime_pytest.py
index 5f4d580..ae7fe8a 100644
--- a/eland/tests/dataframe/test_datetime_pytest.py
+++ b/eland/tests/dataframe/test_datetime_pytest.py
@@ -37,9 +37,7 @@ class TestDataFrameDateTime(TestData):
         # Now create index
         index_name = 'eland_test_generate_es_mappings'
 
-        ed.pandas_to_es(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
-
-        ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
+        ed_df = ed.pd_to_ed(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
         ed_df_head = ed_df.head()
 
         assert_pandas_eland_frame_equal(df, ed_df_head)
diff --git a/eland/tests/dataframe/test_init_pytest.py b/eland/tests/dataframe/test_init_pytest.py
new file mode 100644
index 0000000..9754b0f
--- /dev/null
+++ b/eland/tests/dataframe/test_init_pytest.py
@@ -0,0 +1,31 @@
+# File called _pytest for PyCharm compatability
+
+import eland as ed
+
+import pytest
+
+from eland.tests import ELASTICSEARCH_HOST
+from eland.tests import FLIGHTS_INDEX_NAME
+
+class TestDataFrameInit:
+
+    def test_init(self):
+        # Construct empty DataFrame (throws)
+        with pytest.raises(ValueError):
+            df = ed.DataFrame()
+
+        # Construct invalid DataFrame (throws)
+        with pytest.raises(ValueError):
+            df = ed.DataFrame(client=ELASTICSEARCH_HOST)
+
+        # Construct invalid DataFrame (throws)
+        with pytest.raises(ValueError):
+            df = ed.DataFrame(index_pattern=FLIGHTS_INDEX_NAME)
+
+        # Good constructors
+        df0 = ed.DataFrame(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME)
+        df1 = ed.DataFrame(client=ELASTICSEARCH_HOST, index_pattern=FLIGHTS_INDEX_NAME)
+
+        qc = ed.ElandQueryCompiler(client=ELASTICSEARCH_HOST, index_pattern=FLIGHTS_INDEX_NAME)
+        df2 = ed.DataFrame(query_compiler=qc)
+
diff --git a/eland/tests/dataframe/test_query_pytest.py b/eland/tests/dataframe/test_query_pytest.py
index 7cab24b..cabac07 100644
--- a/eland/tests/dataframe/test_query_pytest.py
+++ b/eland/tests/dataframe/test_query_pytest.py
@@ -19,8 +19,7 @@ class TestDataFrameQuery(TestData):
         # Now create index
         index_name = 'eland_test_query1'
 
-        ed.pandas_to_es(pd_df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
-        ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
+        ed_df = ed.pd_to_ed(pd_df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
 
         assert_pandas_eland_frame_equal(pd_df, ed_df)
 
diff --git a/eland/tests/dataframe/test_utils_pytest.py b/eland/tests/dataframe/test_utils_pytest.py
index a2ce298..021f32e 100644
--- a/eland/tests/dataframe/test_utils_pytest.py
+++ b/eland/tests/dataframe/test_utils_pytest.py
@@ -4,7 +4,7 @@ import numpy as np
 import pandas as pd
 
 import eland as ed
-from eland.tests.common import ELASTICSEARCH_HOST
+from eland.tests.common import ELASTICSEARCH_HOST, assert_pandas_eland_frame_equal
 from eland.tests.common import TestData
 
 
@@ -36,9 +36,7 @@ class TestDataFrameUtils(TestData):
         # Now create index
         index_name = 'eland_test_generate_es_mappings'
 
-        ed.pandas_to_es(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
-
-        ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
+        ed_df = ed.pd_to_ed(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True)
         ed_df_head = ed_df.head()
 
-        # assert_frame_equal(df, ed_df_head)
+        assert_pandas_eland_frame_equal(df, ed_df_head)
diff --git a/eland/utils.py b/eland/utils.py
index 98774c3..1299f6c 100644
--- a/eland/utils.py
+++ b/eland/utils.py
@@ -2,44 +2,71 @@ from eland import Client
 from eland import DataFrame
 from eland import Mappings
 
+import pandas as pd
+
 
 def read_es(es_params, index_pattern):
-    return DataFrame(client=es_params, index_pattern=index_pattern)
-
-
-def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False, dropna=False,
-                 geo_points=None):
     """
-    Append a pandas DataFrame to an Elasticsearch index.
-    Mainly used in testing.
+    Utility method to create an eland.Dataframe from an Elasticsearch index_pattern.
+    (Similar to pandas.read_csv, but source data is an Elasticsearch index rather than
+    a csv file)
 
     Parameters
     ----------
-    es_params : Elasticsearch client argument
-        elasticsearch-py parameters or
-        elasticsearch-py instance or
-        eland.Client instance
+    es_params: Elasticsearch client argument(s)
+        - elasticsearch-py parameters or
+        - elasticsearch-py instance or
+        - eland.Client instance
+    index_pattern: str
+        Elasticsearch index pattern
 
-    destination_index : str
-        Name of Elasticsearch index to be written
+    Returns
+    -------
+    eland.DataFrame
 
-    if_exists : str, default 'fail'
-        Behavior when the destination index exists. Value can be one of:
-        ``'fail'``
-            If table exists, do nothing.
-        ``'replace'``
-            If table exists, drop it, recreate it, and insert data.
-        ``'append'``
-                If table exists, insert data. Create if does not exist.
+    See Also
+    --------
+    eland.pd_to_ed: Create an eland.Dataframe from pandas.DataFrame
+    eland.ed_to_pd: Create a pandas.Dataframe from eland.DataFrame
+    """
+    return DataFrame(client=es_params, index_pattern=index_pattern)
 
-    dropna : bool
-        ``'True'``
-            Remove missing values (see pandas.Series.dropna)
-        ``'False;``
-            Include missing values - may cause bulk to fail
+def pd_to_ed(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False, dropna=False,
+             geo_points=None):
+    """
+    Append a pandas DataFrame to an Elasticsearch index.
+    Mainly used in testing.
+    Modifies the elasticsearch destination index
 
-    geo_points : list or None
+    Parameters
+    ----------
+    es_params: Elasticsearch client argument(s)
+        - elasticsearch-py parameters or
+        - elasticsearch-py instance or
+        - eland.Client instance
+    destination_index: str
+        Name of Elasticsearch index to be appended to
+    if_exists : {'fail', 'replace', 'append'}, default 'fail'
+        How to behave if the index already exists.
+
+        - fail: Raise a ValueError.
+        - replace: Delete the index before inserting new values.
+        - append: Insert new values to the existing index. Create if does not exist.
+    dropna: bool, default 'False'
+        * True: Remove missing values (see pandas.Series.dropna)
+        * False: Include missing values - may cause bulk to fail
+    geo_points: list, default None
         List of columns to map to geo_point data type
+
+    Returns
+    -------
+    eland.Dataframe
+        eland.DataFrame referencing data in destination_index
+
+    See Also
+    --------
+    eland.read_es: Create an eland.Dataframe from an Elasticsearch index
+    eland.ed_to_pd: Create a pandas.Dataframe from eland.DataFrame
     """
     client = Client(es_params)
 
@@ -86,3 +113,31 @@ def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=
             actions = []
 
     client.bulk(actions, refresh=refresh)
+
+    ed_df = DataFrame(client, destination_index)
+
+    return ed_df
+
+def ed_to_pd(ed_df):
+    """
+    Convert an eland.Dataframe to a pandas.DataFrame
+
+    **Note: this loads the entire Elasticsearch index into in core pandas.DataFrame structures. For large
+    indices this can create significant load on the Elasticsearch cluster and require signficant memory**
+
+    Parameters
+    ----------
+    ed_df: eland.DataFrame
+        The source eland.Dataframe referencing the Elasticsearch index
+
+    Returns
+    -------
+    pandas.Dataframe
+        pandas.DataFrame contains all rows and columns in eland.DataFrame
+
+    See Also
+    --------
+    eland.read_es: Create an eland.Dataframe from an Elasticsearch index
+    eland.pd_to_ed: Create an eland.Dataframe from pandas.DataFrame
+    """
+    return ed_df._to_pandas()
diff --git a/make_docs.sh b/make_docs.sh
new file mode 100644
index 0000000..5134e70
--- /dev/null
+++ b/make_docs.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+python setup.py install
+
+cd docs
+
+make clean
+make html
+
diff --git a/requirements-dev.txt b/requirements-dev.txt
index d18de0e..f6a7ec9 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,3 +1,6 @@
 elasticsearch>=7.0.5
 pandas==0.25.1
+matplotlib
 pytest>=5.2.1
+sphinx_rtd_theme
+numpydoc==0.8