Merge pull request #30 from stevedodson/master

Major cleanup - removed modin
2025-07-11 00:02:14 +08:00 · 2019-11-04 14:21:44 +01:00 · 2019-11-04 14:21:44 +01:00 · 90ea637707
commit 90ea637707
parent 8cb9682086 5ee6228a95
49 changed files with 8629 additions and 2333 deletions
--- a/eland/init.py
+++ b/eland/init.py
@ -1,19 +1,14 @@
 from __future__ import absolute_import
-import os
-
-# Set modin to pandas to avoid starting ray or other
-os.environ["MODIN_ENGINE"] = 'python'
-os.environ["MODIN_BACKEND"] = 'pandas'

 from eland.client import *
+from eland.dataframe import *
+from eland.filter import *
 from eland.index import *
 from eland.mappings import *
-from eland.filter import *
-from eland.query import *
-from eland.operations import *
-from eland.query_compiler import *
-from eland.plotting import *
 from eland.ndframe import *
+from eland.operations import *
+from eland.plotting import *
+from eland.query import *
+from eland.query_compiler import *
 from eland.series import *
-from eland.dataframe import *
 from eland.utils import *
--- a/eland/client.py
+++ b/eland/client.py
@ -1,10 +1,12 @@
 from elasticsearch import Elasticsearch
 from elasticsearch import helpers

+
 class Client:
    """
    eland client - implemented as facade to control access to Elasticsearch methods
    """
+
    def __init__(self, es=None):
        if isinstance(es, Elasticsearch):
            self._es = es
@ -40,4 +42,3 @@ class Client:
    def count(self, **kwargs):
        count_json = self._es.count(**kwargs)
        return count_json['count']
-
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@ -1,16 +1,15 @@
 import sys
 import warnings
 from distutils.version import LooseVersion
+from io import StringIO

 import numpy as np
 import pandas as pd
-import pandas.compat as compat
 import six
-from io import StringIO
 from pandas.core.common import apply_if_callable, is_bool_indexer
-from pandas.core.dtypes.common import (
-    is_list_like
-)
+from pandas.core.dtypes.common import is_list_like
+from pandas.core.indexing import check_bool_indexer
+
 from pandas.io.common import _expand_user, _stringify_path
 from pandas.io.formats import console
 from pandas.io.formats import format as fmt
@ -58,10 +57,10 @@ class DataFrame(NDFrame):
        return len(self.columns) == 0 or len(self.index) == 0

    def head(self, n=5):
-        return super().head(n)
+        return DataFrame(query_compiler=self._query_compiler.head(n))

    def tail(self, n=5):
-        return super().tail(n)
+        return DataFrame(query_compiler=self._query_compiler.tail(n))

    def __repr__(self):
        """
@ -104,7 +103,7 @@ class DataFrame(NDFrame):
                    return None

        if self._info_repr():
-            buf = StringIO(u(""))
+            buf = StringIO()
            self.info(buf=buf)
            # need to escape the <class>, should be the first line.
            val = buf.getvalue().replace('<', r'&lt;', 1)
@ -463,7 +462,6 @@ class DataFrame(NDFrame):
            "quotechar": quotechar,
            "line_terminator": line_terminator,
            "chunksize": chunksize,
-            "tupleize_cols": tupleize_cols,
            "date_format": date_format,
            "doublequote": doublequote,
            "escapechar": escapechar,
@ -510,7 +508,7 @@ class DataFrame(NDFrame):
        return self.columns

    def groupby(self, by=None, axis=0, *args, **kwargs):
-        axis = self._get_axis_number(axis)
+        axis = pd.DataFrame._get_axis_number(axis)

        if axis == 1:
            raise NotImplementedError("Aggregating via index not currently implemented - needs index transform")
@ -545,14 +543,14 @@ class DataFrame(NDFrame):
            if Series.agg is called with single function, returns a scalar
            if Series.agg is called with several functions, returns a Series
        """
-        axis = self._get_axis_number(axis)
+        axis = pd.DataFrame._get_axis_number(axis)

        if axis == 1:
            raise NotImplementedError("Aggregating via index not currently implemented - needs index transform")

        # currently we only support a subset of functions that aggregate columns.
        # ['count', 'mad', 'max', 'mean', 'median', 'min', 'mode', 'quantile', 'rank', 'sem', 'skew', 'sum', 'std', 'var', 'nunique']
-        if isinstance(func, compat.string_types):
+        if isinstance(func, str):
            # wrap in list
            func = [func]
            return self._query_compiler.aggs(func)
@ -580,3 +578,20 @@ class DataFrame(NDFrame):
            )
        else:
            raise NotImplementedError(expr, type(expr))
+
+    def get(self, key, default=None):
+        """Get item from object for given key (DataFrame column, Panel
+                slice, etc.). Returns default value if not found.
+
+                Args:
+                    key (DataFrame column, Panel slice) : the key for which value
+                    to get
+
+                Returns:
+                    value (type of items contained in object) : A value that is
+                    stored at the key
+                """
+        if key in self.keys():
+            return self._getitem(key)
+        else:
+            return default
--- a/eland/filter.py
+++ b/eland/filter.py
@ -1,7 +1,7 @@
 # Derived from pandasticsearch filters

 # Es filter builder for BooleanCond
-class BooleanFilter(object):
+class BooleanFilter:
    def __init__(self, *args):
        self._filter = None

--- a/eland/index.py
+++ b/eland/index.py
@ -14,9 +14,11 @@ In case sorting or aggregating on the _id field is required, it is advised to du
 the content of the _id field in another field that has doc_values enabled.)

 """
+
+
 class Index:
    ID_INDEX_FIELD = '_id'
-    ID_SORT_FIELD = '_doc' # if index field is _id, sort by _doc
+    ID_SORT_FIELD = '_doc'  # if index field is _id, sort by _doc

    def __init__(self, query_compiler, index_field=None):
        # Calls setter
--- a/eland/mappings.py
+++ b/eland/mappings.py
@ -75,6 +75,7 @@ class Mappings:
            pd_dtype = self._mappings_capabilities.loc[field_name]['pd_dtype']
            self._source_field_pd_dtypes[field_name] = pd_dtype

+    @staticmethod
    def _extract_fields_from_mapping(mappings, source_only=False):
        """
        Extract all field names and types from a mapping.
@ -151,6 +152,7 @@ class Mappings:

        return fields

+    @staticmethod
    def _create_capability_matrix(all_fields, source_fields, all_fields_caps):
        """
        {
@ -290,7 +292,7 @@ class Mappings:
        return es_dtype

    @staticmethod
-    def _generate_es_mappings(dataframe):
+    def _generate_es_mappings(dataframe, geo_points=None):
        """Given a pandas dataframe, generate the associated Elasticsearch mapping

        Parameters
@ -325,7 +327,10 @@ class Mappings:
        mappings = {}
        mappings['properties'] = {}
        for column_name, dtype in dataframe.dtypes.iteritems():
-            es_dtype = Mappings._pd_dtype_to_es_dtype(dtype)
+            if geo_points is not None and column_name in geo_points:
+                es_dtype = 'geo_point'
+            else:
+                es_dtype = Mappings._pd_dtype_to_es_dtype(dtype)

            mappings['properties'][column_name] = {}
            mappings['properties'][column_name]['type'] = es_dtype
@ -411,15 +416,27 @@ class Mappings:
            List of source fields where pd_dtype == (int64 or float64 or bool)
        """
        if columns is not None:
-            return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
-                                               ((self._mappings_capabilities.pd_dtype == 'int64') |
-                                                (self._mappings_capabilities.pd_dtype == 'float64') |
-                                                (self._mappings_capabilities.pd_dtype == 'bool'))].loc[columns].index.tolist()
+            if include_bool == True:
+                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
+                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
+                                                    (self._mappings_capabilities.pd_dtype == 'float64') |
+                                                    (self._mappings_capabilities.pd_dtype == 'bool'))].loc[
+                    columns].index.tolist()
+            else:
+                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
+                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
+                                                    (self._mappings_capabilities.pd_dtype == 'float64'))].loc[
+                    columns].index.tolist()
        else:
-            return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
-                                               ((self._mappings_capabilities.pd_dtype == 'int64') |
-                                                (self._mappings_capabilities.pd_dtype == 'float64') |
-                                                (self._mappings_capabilities.pd_dtype == 'bool'))].index.tolist()
+            if include_bool == True:
+                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
+                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
+                                                    (self._mappings_capabilities.pd_dtype == 'float64') |
+                                                    (self._mappings_capabilities.pd_dtype == 'bool'))].index.tolist()
+            else:
+                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
+                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
+                                                    (self._mappings_capabilities.pd_dtype == 'float64'))].index.tolist()

    def source_fields(self):
        """
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@ -26,15 +26,13 @@ only Elasticsearch aggregatable fields can be aggregated or grouped.
 import sys

 import pandas as pd
-from modin.pandas.base import BasePandasDataset
-from modin.pandas.indexing import _iLocIndexer
-from pandas.util._validators import validate_bool_kwarg
 from pandas.core.dtypes.common import is_list_like
+from pandas.util._validators import validate_bool_kwarg

 from eland import ElandQueryCompiler


-class NDFrame(BasePandasDataset):
+class NDFrame:

    def __init__(self,
                 client=None,
@ -85,6 +83,9 @@ class NDFrame(BasePandasDataset):

        return head.append(tail)

+    def __getitem__(self, key):
+        return self._getitem(key)
+
    def __getattr__(self, key):
        """After regular attribute access, looks up the name in the columns

@ -105,6 +106,14 @@ class NDFrame(BasePandasDataset):
        # Don't default to pandas, just return approximation TODO - make this more accurate
        return sys.getsizeof(self._query_compiler)

+    def __len__(self):
+        """Gets the length of the DataFrame.
+
+        Returns:
+            Returns an integer length of the DataFrame object.
+        """
+        return len(self.index)
+
    @property
    def iloc(self):
        """Purely integer-location based indexing for selection by position.
@ -235,21 +244,3 @@ class NDFrame(BasePandasDataset):

    def describe(self):
        return self._query_compiler.describe()
-
-    def get(self, key, default=None):
-        """Get item from object for given key (DataFrame column, Panel
-                slice, etc.). Returns default value if not found.
-
-                Args:
-                    key (DataFrame column, Panel slice) : the key for which value
-                    to get
-
-                Returns:
-                    value (type of items contained in object) : A value that is
-                    stored at the key
-                """
-        if key in self.keys():
-            return self.__getitem__(key)
-        else:
-            return default
-
--- a/eland/operations.py
+++ b/eland/operations.py
@ -1,9 +1,7 @@
 import copy
 from enum import Enum
-from io import StringIO

 import pandas as pd
-import numpy as np

 from eland import Index
 from eland import Query
@ -170,7 +168,7 @@ class Operations:
            results[field] = response['aggregations'][field]['value']

        # Return single value if this is a series
-        #if len(numeric_source_fields) == 1:
+        # if len(numeric_source_fields) == 1:
        #    return np.float64(results[numeric_source_fields[0]])

        s = pd.Series(data=results, index=numeric_source_fields)
@ -391,7 +389,7 @@ class Operations:
            values = list()
            for es_agg in es_aggs:
                if isinstance(es_agg, tuple):
-                        values.append(response['aggregations'][es_agg[0] + '_' + field][es_agg[1]])
+                    values.append(response['aggregations'][es_agg[0] + '_' + field][es_agg[1]])
                else:
                    values.append(response['aggregations'][es_agg + '_' + field]['value'])

@ -410,7 +408,7 @@ class Operations:

        columns = self.get_columns()

-        numeric_source_fields = query_compiler._mappings.numeric_source_fields(columns)
+        numeric_source_fields = query_compiler._mappings.numeric_source_fields(columns, include_bool=False)

        # for each field we compute:
        # count, mean, std, min, 25%, 50%, 75%, max
@ -450,6 +448,7 @@ class Operations:
        class PandasDataFrameCollector:
            def collect(self, df):
                self.df = df
+
            def batch_size(self):
                return None

@ -465,6 +464,7 @@ class Operations:
                self.kwargs = kwargs
                self.ret = None
                self.first_time = True
+
            def collect(self, df):
                # If this is the first time we collect results, then write header, otherwise don't write header
                # and append results
--- a/eland/plotting.py
+++ b/eland/plotting.py
@ -3,13 +3,14 @@ import numpy as np
 import pandas.core.common as com
 from pandas.core.dtypes.generic import (
    ABCIndexClass)
+from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots


 def ed_hist_frame(ed_df, column=None, by=None, grid=True, xlabelsize=None,
-               xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False,
-               sharey=False, figsize=None, layout=None, bins=10, **kwds):
+                  xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False,
+                  sharey=False, figsize=None, layout=None, bins=10, **kwds):
    """
-    Derived from pandas.plotting._core.hist_frame 0.24.2
+    Derived from pandas.plotting._core.hist_frame 0.24.2 - TODO update to 0.25.1

    Ideally, we'd call hist_frame directly with histogram data,
    but weights are applied to ALL series. For example, we can
@ -29,8 +30,6 @@ def ed_hist_frame(ed_df, column=None, by=None, grid=True, xlabelsize=None,
    # Start with empty pandas data frame derived from
    ed_df_bins, ed_df_weights = ed_df._hist(num_bins=bins)

-    _raise_if_no_mpl()
-    _converter._WARN = False
    if by is not None:
        raise NotImplementedError("TODO")
        """
--- a/eland/query.py
+++ b/eland/query.py
@ -3,6 +3,7 @@ from copy import deepcopy

 from eland.filter import BooleanFilter, NotNull, IsNull, IsIn

+
 class Query:
    """
    Simple class to manage building Elasticsearch queries.
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@ -1,20 +1,15 @@
 import pandas as pd
-from modin.backends.base.query_compiler import BaseQueryCompiler
+from pandas.core.dtypes.common import (
+    is_list_like
+)

 from eland import Client
 from eland import Index
 from eland import Mappings
 from eland import Operations

-from pandas.core.dtypes.common import (
-    is_list_like
-)

-from pandas.core.indexes.numeric import Int64Index
-from pandas.core.indexes.range import RangeIndex
-
-
-class ElandQueryCompiler(BaseQueryCompiler):
+class ElandQueryCompiler:
    """
    Some notes on what can and can not be mapped:

@ -318,10 +313,10 @@ class ElandQueryCompiler(BaseQueryCompiler):
        return df

    def copy(self):
-        return self.__constructor__(
+        return ElandQueryCompiler(
            client=self._client,
            index_pattern=self._index_pattern,
-            columns=None,   # columns are embedded in operations
+            columns=None,  # columns are embedded in operations
            index_field=self._index.index_field,
            operations=self._operations.copy()
        )
@ -412,14 +407,19 @@ class ElandQueryCompiler(BaseQueryCompiler):

    def count(self):
        return self._operations.count(self)
+
    def mean(self):
        return self._operations.mean(self)
+
    def sum(self):
        return self._operations.sum(self)
+
    def min(self):
        return self._operations.min(self)
+
    def max(self):
        return self._operations.max(self)
+
    def nunique(self):
        return self._operations.nunique(self)

@ -471,6 +471,4 @@ class ElandQueryCompiler(BaseQueryCompiler):

        return result

-    #def isna(self):
-
-
+    # def isna(self):
--- a/eland/series.py
+++ b/eland/series.py
@ -101,10 +101,10 @@ class Series(NDFrame):
    name = property(_get_name)

    def head(self, n=5):
-        return super().head(n)
+        return Series(query_compiler=self._query_compiler.head(n))

    def tail(self, n=5):
-        return super().tail(n)
+        return Series(query_compiler=self._query_compiler.tail(n))

    # ----------------------------------------------------------------------
    # Rendering Methods
@ -194,7 +194,6 @@ class Series(NDFrame):
        else:
            raise NotImplementedError(other, type(other))

-
    def __eq__(self, other):
        if isinstance(other, Series):
            # Need to use scripted query to compare to values
--- a/eland/tests/Eland
+++ b/eland/tests/Eland
--- a/eland/tests/init.py
+++ b/eland/tests/init.py
@ -1,101 +1,98 @@
 import os
+
 import pandas as pd

 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

-# Set modin to pandas to avoid starting ray or other
-os.environ["MODIN_ENGINE"] = 'python'
-os.environ["MODIN_BACKEND"] = 'pandas'
-
 # Define test files and indices
-ELASTICSEARCH_HOST = 'localhost' # TODO externalise this
+ELASTICSEARCH_HOST = 'localhost'  # TODO externalise this

 FLIGHTS_INDEX_NAME = 'flights'
-FLIGHTS_MAPPING = { "mappings" : {
-      "properties" : {
-        "AvgTicketPrice" : {
-          "type" : "float"
+FLIGHTS_MAPPING = {"mappings": {
+    "properties": {
+        "AvgTicketPrice": {
+            "type": "float"
        },
-        "Cancelled" : {
-          "type" : "boolean"
+        "Cancelled": {
+            "type": "boolean"
        },
-        "Carrier" : {
-          "type" : "keyword"
+        "Carrier": {
+            "type": "keyword"
        },
-        "Dest" : {
-          "type" : "keyword"
+        "Dest": {
+            "type": "keyword"
        },
-        "DestAirportID" : {
-          "type" : "keyword"
+        "DestAirportID": {
+            "type": "keyword"
        },
-        "DestCityName" : {
-          "type" : "keyword"
+        "DestCityName": {
+            "type": "keyword"
        },
-        "DestCountry" : {
-          "type" : "keyword"
+        "DestCountry": {
+            "type": "keyword"
        },
-        "DestLocation" : {
-          "type" : "geo_point"
+        "DestLocation": {
+            "type": "geo_point"
        },
-        "DestRegion" : {
-          "type" : "keyword"
+        "DestRegion": {
+            "type": "keyword"
        },
-        "DestWeather" : {
-          "type" : "keyword"
+        "DestWeather": {
+            "type": "keyword"
        },
-        "DistanceKilometers" : {
-          "type" : "float"
+        "DistanceKilometers": {
+            "type": "float"
        },
-        "DistanceMiles" : {
-          "type" : "float"
+        "DistanceMiles": {
+            "type": "float"
        },
-        "FlightDelay" : {
-          "type" : "boolean"
+        "FlightDelay": {
+            "type": "boolean"
        },
-        "FlightDelayMin" : {
-          "type" : "integer"
+        "FlightDelayMin": {
+            "type": "integer"
        },
-        "FlightDelayType" : {
-          "type" : "keyword"
+        "FlightDelayType": {
+            "type": "keyword"
        },
-        "FlightNum" : {
-          "type" : "keyword"
+        "FlightNum": {
+            "type": "keyword"
        },
-        "FlightTimeHour" : {
-          "type" : "float"
+        "FlightTimeHour": {
+            "type": "float"
        },
-        "FlightTimeMin" : {
-          "type" : "float"
+        "FlightTimeMin": {
+            "type": "float"
        },
-        "Origin" : {
-          "type" : "keyword"
+        "Origin": {
+            "type": "keyword"
        },
-        "OriginAirportID" : {
-          "type" : "keyword"
+        "OriginAirportID": {
+            "type": "keyword"
        },
-        "OriginCityName" : {
-          "type" : "keyword"
+        "OriginCityName": {
+            "type": "keyword"
        },
-        "OriginCountry" : {
-          "type" : "keyword"
+        "OriginCountry": {
+            "type": "keyword"
        },
-        "OriginLocation" : {
-          "type" : "geo_point"
+        "OriginLocation": {
+            "type": "geo_point"
        },
-        "OriginRegion" : {
-          "type" : "keyword"
+        "OriginRegion": {
+            "type": "keyword"
        },
-        "OriginWeather" : {
-          "type" : "keyword"
+        "OriginWeather": {
+            "type": "keyword"
        },
-        "dayOfWeek" : {
-          "type" : "integer"
+        "dayOfWeek": {
+            "type": "integer"
        },
-        "timestamp" : {
-          "type" : "date"
+        "timestamp": {
+            "type": "date"
        }
-      }
-    } }
+    }
+}}
 FLIGHTS_FILE_NAME = ROOT_DIR + '/flights.json.gz'
 FLIGHTS_DF_FILE_NAME = ROOT_DIR + '/flights_df.json.gz'

@ -104,319 +101,319 @@ FLIGHTS_SMALL_MAPPING = FLIGHTS_MAPPING
 FLIGHTS_SMALL_FILE_NAME = ROOT_DIR + '/flights_small.json.gz'

 ECOMMERCE_INDEX_NAME = 'ecommerce'
-ECOMMERCE_MAPPING = { "mappings" : {
-      "properties" : {
-        "category" : {
-          "type" : "text",
-          "fields" : {
-            "keyword" : {
-              "type" : "keyword"
-            }
-          }
-        },
-        "currency" : {
-          "type" : "keyword"
-        },
-        "customer_birth_date" : {
-          "type" : "date"
-        },
-        "customer_first_name" : {
-          "type" : "text",
-          "fields" : {
-            "keyword" : {
-              "type" : "keyword",
-              "ignore_above" : 256
-            }
-          }
-        },
-        "customer_full_name" : {
-          "type" : "text",
-          "fields" : {
-            "keyword" : {
-              "type" : "keyword",
-              "ignore_above" : 256
-            }
-          }
-        },
-        "customer_gender" : {
-          "type" : "keyword"
-        },
-        "customer_id" : {
-          "type" : "keyword"
-        },
-        "customer_last_name" : {
-          "type" : "text",
-          "fields" : {
-            "keyword" : {
-              "type" : "keyword",
-              "ignore_above" : 256
-            }
-          }
-        },
-        "customer_phone" : {
-          "type" : "keyword"
-        },
-        "day_of_week" : {
-          "type" : "keyword"
-        },
-        "day_of_week_i" : {
-          "type" : "integer"
-        },
-        "email" : {
-          "type" : "keyword"
-        },
-        "geoip" : {
-          "properties" : {
-            "city_name" : {
-              "type" : "keyword"
-            },
-            "continent_name" : {
-              "type" : "keyword"
-            },
-            "country_iso_code" : {
-              "type" : "keyword"
-            },
-            "location" : {
-              "type" : "geo_point"
-            },
-            "region_name" : {
-              "type" : "keyword"
-            }
-          }
-        },
-        "manufacturer" : {
-          "type" : "text",
-          "fields" : {
-            "keyword" : {
-              "type" : "keyword"
-            }
-          }
-        },
-        "order_date" : {
-          "type" : "date"
-        },
-        "order_id" : {
-          "type" : "keyword"
-        },
-        "products" : {
-          "properties" : {
-            "_id" : {
-              "type" : "text",
-              "fields" : {
-                "keyword" : {
-                  "type" : "keyword",
-                  "ignore_above" : 256
+ECOMMERCE_MAPPING = {"mappings": {
+    "properties": {
+        "category": {
+            "type": "text",
+            "fields": {
+                "keyword": {
+                    "type": "keyword"
                }
-              }
-            },
-            "base_price" : {
-              "type" : "half_float"
-            },
-            "base_unit_price" : {
-              "type" : "half_float"
-            },
-            "category" : {
-              "type" : "text",
-              "fields" : {
-                "keyword" : {
-                  "type" : "keyword"
-                }
-              }
-            },
-            "created_on" : {
-              "type" : "date"
-            },
-            "discount_amount" : {
-              "type" : "half_float"
-            },
-            "discount_percentage" : {
-              "type" : "half_float"
-            },
-            "manufacturer" : {
-              "type" : "text",
-              "fields" : {
-                "keyword" : {
-                  "type" : "keyword"
-                }
-              }
-            },
-            "min_price" : {
-              "type" : "half_float"
-            },
-            "price" : {
-              "type" : "half_float"
-            },
-            "product_id" : {
-              "type" : "long"
-            },
-            "product_name" : {
-              "type" : "text",
-              "fields" : {
-                "keyword" : {
-                  "type" : "keyword"
-                }
-              },
-              "analyzer" : "english"
-            },
-            "quantity" : {
-              "type" : "integer"
-            },
-            "sku" : {
-              "type" : "keyword"
-            },
-            "tax_amount" : {
-              "type" : "half_float"
-            },
-            "taxful_price" : {
-              "type" : "half_float"
-            },
-            "taxless_price" : {
-              "type" : "half_float"
-            },
-            "unit_discount_amount" : {
-              "type" : "half_float"
            }
-          }
        },
-        "sku" : {
-          "type" : "keyword"
+        "currency": {
+            "type": "keyword"
        },
-        "taxful_total_price" : {
-          "type" : "half_float"
+        "customer_birth_date": {
+            "type": "date"
        },
-        "taxless_total_price" : {
-          "type" : "half_float"
+        "customer_first_name": {
+            "type": "text",
+            "fields": {
+                "keyword": {
+                    "type": "keyword",
+                    "ignore_above": 256
+                }
+            }
        },
-        "total_quantity" : {
-          "type" : "integer"
+        "customer_full_name": {
+            "type": "text",
+            "fields": {
+                "keyword": {
+                    "type": "keyword",
+                    "ignore_above": 256
+                }
+            }
        },
-        "total_unique_products" : {
-          "type" : "integer"
+        "customer_gender": {
+            "type": "keyword"
        },
-        "type" : {
-          "type" : "keyword"
+        "customer_id": {
+            "type": "keyword"
        },
-        "user" : {
-          "type" : "keyword"
+        "customer_last_name": {
+            "type": "text",
+            "fields": {
+                "keyword": {
+                    "type": "keyword",
+                    "ignore_above": 256
+                }
+            }
+        },
+        "customer_phone": {
+            "type": "keyword"
+        },
+        "day_of_week": {
+            "type": "keyword"
+        },
+        "day_of_week_i": {
+            "type": "integer"
+        },
+        "email": {
+            "type": "keyword"
+        },
+        "geoip": {
+            "properties": {
+                "city_name": {
+                    "type": "keyword"
+                },
+                "continent_name": {
+                    "type": "keyword"
+                },
+                "country_iso_code": {
+                    "type": "keyword"
+                },
+                "location": {
+                    "type": "geo_point"
+                },
+                "region_name": {
+                    "type": "keyword"
+                }
+            }
+        },
+        "manufacturer": {
+            "type": "text",
+            "fields": {
+                "keyword": {
+                    "type": "keyword"
+                }
+            }
+        },
+        "order_date": {
+            "type": "date"
+        },
+        "order_id": {
+            "type": "keyword"
+        },
+        "products": {
+            "properties": {
+                "_id": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "base_price": {
+                    "type": "half_float"
+                },
+                "base_unit_price": {
+                    "type": "half_float"
+                },
+                "category": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    }
+                },
+                "created_on": {
+                    "type": "date"
+                },
+                "discount_amount": {
+                    "type": "half_float"
+                },
+                "discount_percentage": {
+                    "type": "half_float"
+                },
+                "manufacturer": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    }
+                },
+                "min_price": {
+                    "type": "half_float"
+                },
+                "price": {
+                    "type": "half_float"
+                },
+                "product_id": {
+                    "type": "long"
+                },
+                "product_name": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword"
+                        }
+                    },
+                    "analyzer": "english"
+                },
+                "quantity": {
+                    "type": "integer"
+                },
+                "sku": {
+                    "type": "keyword"
+                },
+                "tax_amount": {
+                    "type": "half_float"
+                },
+                "taxful_price": {
+                    "type": "half_float"
+                },
+                "taxless_price": {
+                    "type": "half_float"
+                },
+                "unit_discount_amount": {
+                    "type": "half_float"
+                }
+            }
+        },
+        "sku": {
+            "type": "keyword"
+        },
+        "taxful_total_price": {
+            "type": "half_float"
+        },
+        "taxless_total_price": {
+            "type": "half_float"
+        },
+        "total_quantity": {
+            "type": "integer"
+        },
+        "total_unique_products": {
+            "type": "integer"
+        },
+        "type": {
+            "type": "keyword"
+        },
+        "user": {
+            "type": "keyword"
        }
-      }
-    } }
+    }
+}}
 ECOMMERCE_FILE_NAME = ROOT_DIR + '/ecommerce.json.gz'
 ECOMMERCE_DF_FILE_NAME = ROOT_DIR + '/ecommerce_df.json.gz'

 TEST_MAPPING1 = {
-      'mappings': {
+    'mappings': {
        'properties': {
-          'city': {
-            'type': 'text',
-            'fields': {
-              'raw': {
-                'type': 'keyword'
-              }
-            }
-          },
-          'text': {
-            'type': 'text',
-            'fields': {
-              'english': {
-                'type': 'text',
-                'analyzer': 'english'
-              }
-            }
-          },
-          'origin_location': {
-            'properties': {
-              'lat': {
-                'type': 'text',
-                'index_prefixes': {},
-                'fields': {
-                  'keyword': {
-                    'type': 'keyword',
-                    'ignore_above': 256
-                  }
-                }
-              },
-              'lon': {
+            'city': {
                'type': 'text',
                'fields': {
-                  'keyword': {
-                    'type': 'keyword',
-                    'ignore_above': 256
-                  }
+                    'raw': {
+                        'type': 'keyword'
+                    }
                }
-              }
-            }
-          },
-          'maps-telemetry': {
-            'properties': {
-              'attributesPerMap': {
+            },
+            'text': {
+                'type': 'text',
+                'fields': {
+                    'english': {
+                        'type': 'text',
+                        'analyzer': 'english'
+                    }
+                }
+            },
+            'origin_location': {
                'properties': {
-                  'dataSourcesCount': {
-                    'properties': {
-                      'avg': {
-                        'type': 'long'
-                      },
-                      'max': {
-                        'type': 'long'
-                      },
-                      'min': {
-                        'type': 'long'
-                      }
-                    }
-                  },
-                  'emsVectorLayersCount': {
-                    'dynamic': 'true',
-                    'properties': {
-                      'france_departments': {
-                        'properties': {
-                          'avg': {
-                            'type': 'float'
-                          },
-                          'max': {
-                            'type': 'long'
-                          },
-                          'min': {
-                            'type': 'long'
-                          }
+                    'lat': {
+                        'type': 'text',
+                        'index_prefixes': {},
+                        'fields': {
+                            'keyword': {
+                                'type': 'keyword',
+                                'ignore_above': 256
+                            }
+                        }
+                    },
+                    'lon': {
+                        'type': 'text',
+                        'fields': {
+                            'keyword': {
+                                'type': 'keyword',
+                                'ignore_above': 256
+                            }
                        }
-                      }
                    }
-                  }
                }
-              }
+            },
+            'maps-telemetry': {
+                'properties': {
+                    'attributesPerMap': {
+                        'properties': {
+                            'dataSourcesCount': {
+                                'properties': {
+                                    'avg': {
+                                        'type': 'long'
+                                    },
+                                    'max': {
+                                        'type': 'long'
+                                    },
+                                    'min': {
+                                        'type': 'long'
+                                    }
+                                }
+                            },
+                            'emsVectorLayersCount': {
+                                'dynamic': 'true',
+                                'properties': {
+                                    'france_departments': {
+                                        'properties': {
+                                            'avg': {
+                                                'type': 'float'
+                                            },
+                                            'max': {
+                                                'type': 'long'
+                                            },
+                                            'min': {
+                                                'type': 'long'
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            },
+            'type': {
+                'type': 'keyword'
+            },
+            'name': {
+                'type': 'text'
+            },
+            'user_name': {
+                'type': 'keyword'
+            },
+            'email': {
+                'type': 'keyword'
+            },
+            'content': {
+                'type': 'text'
+            },
+            'tweeted_at': {
+                'type': 'date'
+            },
+            'dest_location': {
+                'type': 'geo_point'
+            },
+            'my_join_field': {
+                'type': 'join',
+                'relations': {
+                    'question': ['answer', 'comment'],
+                    'answer': 'vote'
+                }
            }
-          },
-          'type': {
-            'type': 'keyword'
-          },
-          'name': {
-            'type': 'text'
-          },
-          'user_name': {
-            'type': 'keyword'
-          },
-          'email': {
-            'type': 'keyword'
-          },
-          'content': {
-            'type': 'text'
-          },
-          'tweeted_at': {
-            'type': 'date'
-          },
-          'dest_location': {
-            'type': 'geo_point'
-          },
-          'my_join_field': {
-            'type': 'join',
-            'relations': {
-              'question': ['answer', 'comment'],
-              'answer': 'vote'
-            }
-          }
        }
-      }
    }
+}

 TEST_MAPPING1_INDEX_NAME = 'mapping1'

@ -447,48 +444,47 @@ TEST_MAPPING1_EXPECTED = {

 TEST_MAPPING1_EXPECTED_DF = pd.DataFrame.from_dict(data=TEST_MAPPING1_EXPECTED, orient='index', columns=['es_dtype'])
 TEST_MAPPING1_EXPECTED_SOURCE_FIELD_DF = TEST_MAPPING1_EXPECTED_DF.drop(index=['city.raw',
-                                                   'origin_location.lat.keyword',
-                                                   'origin_location.lon.keyword',
-                                                   'text.english'])
+                                                                               'origin_location.lat.keyword',
+                                                                               'origin_location.lon.keyword',
+                                                                               'text.english'])
 TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT = len(TEST_MAPPING1_EXPECTED_SOURCE_FIELD_DF.index)

 TEST_NESTED_USER_GROUP_INDEX_NAME = 'nested_user_group'
 TEST_NESTED_USER_GROUP_MAPPING = {
-  'mappings': {
-    'properties': {
-      'group': {
-        'type': 'keyword'
-      },
-      'user': {
+    'mappings': {
        'properties': {
-            'first': {
-              'type': 'keyword'
+            'group': {
+                'type': 'keyword'
            },
-            'last': {
-              'type': 'keyword'
-            },
-            'address' : {
-              'type' : 'keyword'
+            'user': {
+                'properties': {
+                    'first': {
+                        'type': 'keyword'
+                    },
+                    'last': {
+                        'type': 'keyword'
+                    },
+                    'address': {
+                        'type': 'keyword'
+                    }
+                }
            }
        }
-      }
    }
 }
-}

 TEST_NESTED_USER_GROUP_DOCS = [
-{'_index':TEST_NESTED_USER_GROUP_INDEX_NAME,
-'_source':
-    {'group':'amsterdam','user':[
-        {'first':'Manke','last':'Nelis','address':['Elandsgracht', 'Amsterdam']},
-        {'first':'Johnny','last':'Jordaan','address':['Elandsstraat', 'Amsterdam']}]}},
-{'_index':TEST_NESTED_USER_GROUP_INDEX_NAME,
-'_source':
-    {'group':'london','user':[
-        {'first':'Alice','last':'Monkton'},
-        {'first':'Jimmy','last':'White','address':['London']}]}},
-{'_index':TEST_NESTED_USER_GROUP_INDEX_NAME,
-'_source':{'group':'new york','user':[
-    {'first':'Bill','last':'Jones'}]}}
+    {'_index': TEST_NESTED_USER_GROUP_INDEX_NAME,
+     '_source':
+         {'group': 'amsterdam', 'user': [
+             {'first': 'Manke', 'last': 'Nelis', 'address': ['Elandsgracht', 'Amsterdam']},
+             {'first': 'Johnny', 'last': 'Jordaan', 'address': ['Elandsstraat', 'Amsterdam']}]}},
+    {'_index': TEST_NESTED_USER_GROUP_INDEX_NAME,
+     '_source':
+         {'group': 'london', 'user': [
+             {'first': 'Alice', 'last': 'Monkton'},
+             {'first': 'Jimmy', 'last': 'White', 'address': ['London']}]}},
+    {'_index': TEST_NESTED_USER_GROUP_INDEX_NAME,
+     '_source': {'group': 'new york', 'user': [
+         {'first': 'Bill', 'last': 'Jones'}]}}
 ]
-
--- a/eland/tests/common.py
+++ b/eland/tests/common.py
@ -1,24 +1,22 @@
-import pytest
+import os

-import eland as ed
 import pandas as pd
-
 from pandas.util.testing import (assert_frame_equal, assert_series_equal)

-import os
+import eland as ed

 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

 # Create pandas and eland data frames
 from eland.tests import ELASTICSEARCH_HOST
-from eland.tests import FLIGHTS_DF_FILE_NAME, FLIGHTS_INDEX_NAME,\
-    FLIGHTS_SMALL_INDEX_NAME,\
+from eland.tests import FLIGHTS_DF_FILE_NAME, FLIGHTS_INDEX_NAME, \
+    FLIGHTS_SMALL_INDEX_NAME, \
    ECOMMERCE_DF_FILE_NAME, ECOMMERCE_INDEX_NAME

 _pd_flights = pd.read_json(FLIGHTS_DF_FILE_NAME).sort_index()
 _pd_flights['timestamp'] = \
    pd.to_datetime(_pd_flights['timestamp'])
-_pd_flights.index = _pd_flights.index.map(str) # make index 'object' not int
+_pd_flights.index = _pd_flights.index.map(str)  # make index 'object' not int
 _ed_flights = ed.read_es(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME)

 _pd_flights_small = _pd_flights.head(48)
@ -30,10 +28,11 @@ _pd_ecommerce['order_date'] = \
 _pd_ecommerce['products.created_on'] = \
    _pd_ecommerce['products.created_on'].apply(lambda x: pd.to_datetime(x))
 _pd_ecommerce.insert(2, 'customer_birth_date', None)
-_pd_ecommerce.index = _pd_ecommerce.index.map(str) # make index 'object' not int
+_pd_ecommerce.index = _pd_ecommerce.index.map(str)  # make index 'object' not int
 _pd_ecommerce['customer_birth_date'].astype('datetime64')
 _ed_ecommerce = ed.read_es(ELASTICSEARCH_HOST, ECOMMERCE_INDEX_NAME)

+
 class TestData:

    def pd_flights(self):
@ -48,25 +47,26 @@ class TestData:
    def ed_flights_small(self):
        return _ed_flights_small

-
    def pd_ecommerce(self):
        return _pd_ecommerce

    def ed_ecommerce(self):
        return _ed_ecommerce

+
 def assert_pandas_eland_frame_equal(left, right):
    if not isinstance(left, pd.DataFrame):
        raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
-                             exp_type='pd.DataFrame', act_type=type(left)))
+            exp_type='pd.DataFrame', act_type=type(left)))

    if not isinstance(right, ed.DataFrame):
        raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
-                             exp_type='ed.DataFrame', act_type=type(right)))
+            exp_type='ed.DataFrame', act_type=type(right)))

    # Use pandas tests to check similarity
    assert_frame_equal(left, right._to_pandas())

+
 def assert_eland_frame_equal(left, right):
    if not isinstance(left, ed.DataFrame):
        raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
@ -83,12 +83,11 @@ def assert_eland_frame_equal(left, right):
 def assert_pandas_eland_series_equal(left, right):
    if not isinstance(left, pd.Series):
        raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
-                             exp_type='pd.Series', act_type=type(left)))
+            exp_type='pd.Series', act_type=type(left)))

    if not isinstance(right, ed.Series):
        raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
-                             exp_type='ed.Series', act_type=type(right)))
+            exp_type='ed.Series', act_type=type(right)))

    # Use pandas tests to check similarity
    assert_series_equal(left, right._to_pandas())
-
--- a/eland/tests/dataframe/test_aggs_pytest.py
+++ b/eland/tests/dataframe/test_aggs_pytest.py
@ -1,15 +1,14 @@
 # File called _pytest for PyCharm compatability

 import numpy as np
-import pandas as pd
-from pandas.util.testing import (assert_almost_equal)
+from pandas.util.testing import assert_almost_equal

 from eland.tests.common import TestData


 class TestDataFrameAggs(TestData):

-    def test_to_aggs1(self):
+    def test_basic_aggs(self):
        pd_flights = self.pd_flights()
        ed_flights = self.ed_flights()

--- a/eland/tests/dataframe/test_count_pytest.py
+++ b/eland/tests/dataframe/test_count_pytest.py
@ -1,19 +1,17 @@
 # File called _pytest for PyCharm compatability

+from pandas.util.testing import assert_series_equal
+
 from eland.tests.common import TestData


 class TestDataFrameCount(TestData):

-    def test_to_count1(self):
+    def test_ecommerce_count(self):
        pd_ecommerce = self.pd_ecommerce()
        ed_ecommerce = self.ed_ecommerce()

        pd_count = pd_ecommerce.count()
        ed_count = ed_ecommerce.count()

-        print(pd_count)
-        print(ed_count)
-
-
-
+        assert_series_equal(pd_count, ed_count)
--- a/eland/tests/dataframe/test_datetime_pytest.py
+++ b/eland/tests/dataframe/test_datetime_pytest.py
@ -6,6 +6,7 @@ import pandas as pd
 import eland as ed
 from eland.tests.common import ELASTICSEARCH_HOST
 from eland.tests.common import TestData
+from eland.tests.common import assert_pandas_eland_frame_equal


 class TestDataFrameDateTime(TestData):
@ -41,4 +42,4 @@ class TestDataFrameDateTime(TestData):
        ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
        ed_df_head = ed_df.head()

-        # assert_frame_equal(df, ed_df_head)
+        assert_pandas_eland_frame_equal(df, ed_df_head)
--- a/eland/tests/dataframe/test_describe_pytest.py
+++ b/eland/tests/dataframe/test_describe_pytest.py
@ -1,35 +1,34 @@
 # File called _pytest for PyCharm compatability
-from io import StringIO
+
+from pandas.util.testing import assert_almost_equal

 from eland.tests.common import TestData


 class TestDataFrameDescribe(TestData):

-    def test_to_describe1(self):
+    def test_flights_describe(self):
        pd_flights = self.pd_flights()
        ed_flights = self.ed_flights()

        pd_describe = pd_flights.describe()
        ed_describe = ed_flights.describe()

-        print(pd_describe)
-        print(ed_describe)
+        assert_almost_equal(pd_describe[['AvgTicketPrice']],
+                            ed_describe[['AvgTicketPrice']],
+                            check_less_precise=True)

-        # TODO - this fails now as ES aggregations are approximate
+        # TODO - this fails for all fields now as ES aggregations are approximate
        #        if ES percentile agg uses
        #        "hdr": {
        #           "number_of_significant_value_digits": 3
        #         }
        #        this works
-        # assert_almost_equal(pd_flights_describe, ed_flights_describe)
-
-        pd_ecommerce_describe = self.pd_ecommerce().describe()
-        ed_ecommerce_describe = self.ed_ecommerce().describe()

+        # pd_ecommerce_describe = self.pd_ecommerce().describe()
+        # ed_ecommerce_describe = self.ed_ecommerce().describe()
        # We don't compare ecommerce here as the default dtypes in pandas from read_json
        # don't match the mapping types. This is mainly because the products field is
        # nested and so can be treated as a multi-field in ES, but not in pandas

        # We can not also run 'describe' on a truncate ed dataframe
-
--- a/eland/tests/dataframe/test_drop_pytest.py
+++ b/eland/tests/dataframe/test_drop_pytest.py
@ -1,19 +1,14 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd
-import eland as ed

 from eland.tests.common import TestData
 from eland.tests.common import (
-    assert_eland_frame_equal,
-    assert_pandas_eland_frame_equal,
-    assert_pandas_eland_series_equal
+    assert_pandas_eland_frame_equal
 )

-import numpy as np

 class TestDataFrameDrop(TestData):

-    def test_drop1(self):
+    def test_flights_small_drop(self):
        ed_flights_small = self.ed_flights_small()
        pd_flights_small = self.pd_flights_small()

--- a/eland/tests/dataframe/test_dtypes_pytest.py
+++ b/eland/tests/dataframe/test_dtypes_pytest.py
@ -0,0 +1,14 @@
+# File called _pytest for PyCharm compatability
+
+from pandas.util.testing import assert_series_equal
+
+from eland.tests.common import TestData
+
+
+class TestDataFrameDtypes(TestData):
+
+    def test_flights_dtypes(self):
+        ed_flights = self.ed_flights()
+        pd_flights = self.pd_flights()
+
+        assert_series_equal(pd_flights.dtypes, ed_flights.dtypes)
--- a/eland/tests/dataframe/test_get_pytest.py
+++ b/eland/tests/dataframe/test_get_pytest.py
@ -1,18 +1,11 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd
-import eland as ed

 from eland.tests.common import TestData
-from eland.tests.common import (
-    assert_pandas_eland_frame_equal,
-    assert_pandas_eland_series_equal
-)

-import numpy as np

 class TestDataFrameGet(TestData):

-    def test_get1(self):
+    def test_get_one_attribute(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

--- a/eland/tests/dataframe/test_getitem_pytest.py
+++ b/eland/tests/dataframe/test_getitem_pytest.py
@ -1,5 +1,4 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd

 from eland.tests.common import TestData
 from eland.tests.common import (
@ -8,10 +7,9 @@ from eland.tests.common import (
 )


-
 class TestDataFrameGetItem(TestData):

-    def test_getitem1(self):
+    def test_getitem_one_attribute(self):
        ed_flights = self.ed_flights().head(103)
        pd_flights = self.pd_flights().head(103)

@ -20,7 +18,7 @@ class TestDataFrameGetItem(TestData):

        assert_pandas_eland_series_equal(pd_flights_OriginAirportID, ed_flights_OriginAirportID)

-    def test_getitem2(self):
+    def test_getitem_attribute_list(self):
        ed_flights = self.ed_flights().head(42)
        pd_flights = self.pd_flights().head(42)

@ -29,7 +27,7 @@ class TestDataFrameGetItem(TestData):

        assert_pandas_eland_frame_equal(pd_flights_slice, ed_flights_slice)

-    def test_getitem3(self):
+    def test_getitem_one_argument(self):
        ed_flights = self.ed_flights().head(89)
        pd_flights = self.pd_flights().head(89)

@ -38,7 +36,7 @@ class TestDataFrameGetItem(TestData):

        assert_pandas_eland_series_equal(pd_flights_OriginAirportID, ed_flights_OriginAirportID)

-    def test_getitem4(self):
+    def test_getitem_multiple_calls(self):
        ed_flights = self.ed_flights().head(89)
        pd_flights = self.pd_flights().head(89)

@ -52,4 +50,3 @@ class TestDataFrameGetItem(TestData):
        ed_col1 = ed_col0['DestCountry']

        assert_pandas_eland_series_equal(pd_col1, ed_col1)
-
--- a/eland/tests/dataframe/test_head_tail_pytest.py
+++ b/eland/tests/dataframe/test_head_tail_pytest.py
@ -1,11 +1,9 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd

 from eland.tests.common import TestData
 from eland.tests.common import assert_pandas_eland_frame_equal


-
 class TestDataFrameHeadTail(TestData):

    def test_head(self):
--- a/eland/tests/dataframe/test_hist_pytest.py
+++ b/eland/tests/dataframe/test_hist_pytest.py
@ -1,6 +1,5 @@
 # File called _pytest for PyCharm compatability

-import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from pandas.util.testing import assert_almost_equal
@ -10,7 +9,7 @@ from eland.tests.common import TestData

 class TestDataFrameHist(TestData):

-    def test_hist1(self):
+    def test_flights_hist(self):
        pd_flights = self.pd_flights()
        ed_flights = self.ed_flights()

@ -30,15 +29,3 @@ class TestDataFrameHist(TestData):
        # Numbers are slightly different
        assert_almost_equal(pd_bins, ed_bins)
        assert_almost_equal(pd_weights, ed_weights)
-
-    def test_hist2(self):
-        pd_df = self.pd_flights()[['DistanceKilometers', 'DistanceMiles', 'FlightDelayMin', 'FlightTimeHour']]
-        ed_df = self.ed_flights()[['DistanceKilometers', 'DistanceMiles', 'FlightDelayMin', 'FlightTimeHour']]
-
-        num_bins = 10
-
-        ed_bins, ed_weights = ed_df._hist(num_bins=num_bins)
-
-        print(ed_bins)
-
-
--- a/eland/tests/dataframe/test_iloc_pytest.py
+++ b/eland/tests/dataframe/test_iloc_pytest.py
@ -1,54 +0,0 @@
-# File called _pytest for PyCharm compatability
-import pandas as pd
-import eland as ed
-
-from eland.tests.common import TestData
-from eland.tests.common import (
-    assert_pandas_eland_frame_equal,
-    assert_pandas_eland_series_equal
-)
-
-import numpy as np
-
-class TestDataFrameiLoc(TestData):
-
-    def test_iloc1(self):
-        ed_flights = self.ed_flights()
-        pd_flights = self.pd_flights()
-
-        # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html#pandas.DataFrame.iloc
-
-        #pd_flights.info()
-
-        pd_iloc0 = pd_flights.iloc[0]
-        pd_iloc1= pd_flights.iloc[[0]]
-        pd_iloc2= pd_flights.iloc[[0, 1]]
-        pd_iloc3 = pd_flights.iloc[:3]
-        pd_iloc4 = pd_flights.iloc[[True, False, True]]
-        pd_iloc5 = pd_flights.iloc[0, 1]
-        pd_iloc6 = pd_flights.iloc[[0, 2], [1, 3]]
-        pd_iloc7 = pd_flights.iloc[1:3, 0:3]
-        pd_iloc8 = pd_flights.iloc[:, [True, False, True, False]]
-        pd_iloc9 = pd_flights.iloc[[True, False, True, False]]
-
-        ed_iloc0 = ed_flights.iloc[0]
-        ed_iloc1 = ed_flights.iloc[[0]]
-        ed_iloc2 = ed_flights.iloc[[0, 1]]
-        ed_iloc3 = ed_flights.iloc[:3]
-        ed_iloc4 = ed_flights.iloc[[True, False, True]]
-        ed_iloc5 = ed_flights.iloc[0, 1]
-        ed_iloc6 = ed_flights.iloc[[0, 2], [1, 3]]
-        ed_iloc7 = ed_flights.iloc[1:3, 0:3]
-        ed_iloc8 = ed_flights.iloc[:, [True, False, True, False]]
-        ed_iloc9 = ed_flights.iloc[[True, False, True, False]]
-
-        #assert_pandas_eland_frame_equal(pd_iloc0, ed_iloc0) # pd_iloc0 is Series
-        assert_pandas_eland_frame_equal(pd_iloc1, ed_iloc1)
-        assert_pandas_eland_frame_equal(pd_iloc2, ed_iloc2)
-        assert_pandas_eland_frame_equal(pd_iloc3, ed_iloc3)
-        assert_pandas_eland_frame_equal(pd_iloc4, ed_iloc4)
-        #assert_pandas_eland_frame_equal(pd_iloc5, ed_iloc5) # pd_iloc5 is numpy_bool
-        assert_pandas_eland_frame_equal(pd_iloc6, ed_iloc6)
-        assert_pandas_eland_frame_equal(pd_iloc7, ed_iloc7)
-        assert_pandas_eland_frame_equal(pd_iloc8, ed_iloc8)
-        assert_pandas_eland_frame_equal(pd_iloc9, ed_iloc9)
--- a/eland/tests/dataframe/test_info_es_pytest.py
+++ b/eland/tests/dataframe/test_info_es_pytest.py
@ -1,15 +0,0 @@
-# File called _pytest for PyCharm compatability
-
-from eland.tests.common import TestData
-
-
-class TestDataFrameInfoEs(TestData):
-
-    def test_to_info1(self):
-        ed_flights = self.ed_flights()
-
-        head = ed_flights.head(103)
-        slice = head[['timestamp', 'OriginRegion', 'Carrier']]
-        iloc = slice.iloc[10:92, [0,2]]
-        print(iloc.info_es())
-        print(iloc)
--- a/eland/tests/dataframe/test_info_pytest.py
+++ b/eland/tests/dataframe/test_info_pytest.py
@ -6,7 +6,7 @@ from eland.tests.common import TestData

 class TestDataFrameInfo(TestData):

-    def test_to_info1(self):
+    def test_flights_info(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

--- a/eland/tests/dataframe/test_metrics_pytest.py
+++ b/eland/tests/dataframe/test_metrics_pytest.py
@ -1,10 +1,9 @@
 # File called _pytest for PyCharm compatability

-from eland.tests.common import TestData
-
-
 from pandas.util.testing import assert_series_equal

+from eland.tests.common import TestData
+

 class TestDataFrameMetrics(TestData):

@ -43,4 +42,3 @@ class TestDataFrameMetrics(TestData):
        ed_max = ed_flights.max(numeric_only=True)

        assert_series_equal(pd_max, ed_max)
-
--- a/eland/tests/dataframe/test_nunique_pytest.py
+++ b/eland/tests/dataframe/test_nunique_pytest.py
@ -1,22 +0,0 @@
-# File called _pytest for PyCharm compatability
-import pandas as pd
-import eland as ed
-
-from eland.tests.common import TestData
-from eland.tests.common import (
-    assert_pandas_eland_frame_equal,
-    assert_pandas_eland_series_equal
-)
-
-import numpy as np
-
-class TestDataFrameNUnique(TestData):
-
-    def test_nunique1(self):
-        ed_flights = self.ed_flights()
-        pd_flights = self.pd_flights()
-
-        print(pd_flights.dtypes)
-        print(ed_flights.dtypes)
-        print(ed_flights.nunique())
-
--- a/eland/tests/dataframe/test_query_pytest.py
+++ b/eland/tests/dataframe/test_query_pytest.py
@ -10,7 +10,7 @@ from eland.tests.common import assert_pandas_eland_frame_equal

 class TestDataFrameQuery(TestData):

-    def test_query1(self):
+    def test_query(self):
        # Examples from:
        # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html
        pd_df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2), 'C': range(10, 5, -1)},
@ -43,4 +43,3 @@ class TestDataFrameQuery(TestData):
        ed_q4 = ed_df[(ed_df.A > 2) & (ed_df.B > 3)]

        assert_pandas_eland_frame_equal(pd_q4, ed_q4)
-
--- a/eland/tests/dataframe/test_repr_pytest.py
+++ b/eland/tests/dataframe/test_repr_pytest.py
@ -3,9 +3,9 @@
 from eland.tests.common import TestData


-class TestDataFrameHeadTail(TestData):
+class TestDataFrameRepr(TestData):

-    def test_to_string1(self):
+    def test_head_101_to_string(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

@ -18,7 +18,7 @@ class TestDataFrameHeadTail(TestData):

        assert pd_head_101_str == ed_head_101_str

-    def test_to_string2(self):
+    def test_head_11_to_string2(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

@ -30,7 +30,7 @@ class TestDataFrameHeadTail(TestData):

        assert pd_head_11_str == ed_head_11_str

-    def test_to_repr(self):
+    def test_repr(self):
        ed_ecommerce = self.ed_ecommerce()
        pd_ecommerce = self.pd_ecommerce()

--- a/eland/tests/dataframe/test_reviews_pytest.py
+++ b/eland/tests/dataframe/test_reviews_pytest.py
@ -1,31 +0,0 @@
-# File called _pytest for PyCharm compatability
-
-import gzip
-
-import pandas as pd
-
-import eland as ed
-from eland.tests.common import TestData
-
-
-class TestDataFrameReviews(TestData):
-
-    def test_explore(self):
-        ed_reviews = ed.DataFrame('localhost', 'anonreviews')
-
-        print(ed_reviews.head())
-        print(ed_reviews.describe())
-        print(ed_reviews.info())
-        print(ed_reviews.hist(column="rating", bins=5))
-        # print(ed_reviews.head().info_es())
-
-    def test_review(self):
-        csv_handle = gzip.open('../anonreviews.csv.gz')
-
-        reviews = pd.read_csv(csv_handle)
-
-        reviews['date'] = pd.to_datetime(reviews['date'])
-
-        g = reviews.groupby('reviewerId')
-
-        print(g.describe())
--- a/eland/tests/dataframe/test_select_dtypes_pytest.py
+++ b/eland/tests/dataframe/test_select_dtypes_pytest.py
@ -1,5 +1,4 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd
 import numpy as np

 from eland.tests.common import TestData
@ -8,10 +7,9 @@ from eland.tests.common import (
 )


-
 class TestDataFrameSelectDTypes(TestData):

-    def test_select_dtypes1(self):
+    def test_select_dtypes_include_number(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

@ -20,7 +18,7 @@ class TestDataFrameSelectDTypes(TestData):

        assert_pandas_eland_frame_equal(pd_flights_numeric.head(103), ed_flights_numeric.head(103))

-    def test_select_dtypes2(self):
+    def test_select_dtypes_exclude_number(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

@ -28,4 +26,3 @@ class TestDataFrameSelectDTypes(TestData):
        pd_flights_non_numeric = pd_flights.select_dtypes(exclude=[np.number])

        assert_pandas_eland_frame_equal(pd_flights_non_numeric.head(103), ed_flights_non_numeric.head(103))
-
--- a/eland/tests/dataframe/test_shape_pytest.py
+++ b/eland/tests/dataframe/test_shape_pytest.py
@ -22,5 +22,3 @@ class TestDataFrameShape(TestData):
        ed_shape = ed_flights.shape

        assert pd_shape == ed_shape
-
-
--- a/eland/tests/dataframe/test_to_csv_pytest.py
+++ b/eland/tests/dataframe/test_to_csv_pytest.py
@ -1,22 +1,24 @@
 # File called _pytest for PyCharm compatability

-import pandas as pd
+import ast

+import pandas as pd
+from pandas.util.testing import (assert_frame_equal)
+
+from eland.tests.common import ROOT_DIR
 from eland.tests.common import TestData

-from pandas.util.testing import (assert_equal, assert_frame_equal)
-
-import ast

 class TestDataFrameToCSV(TestData):

    def test_to_csv_head(self):
+        results_file = ROOT_DIR + '/dataframe/results/test_to_csv_head.csv'
+
        ed_flights = self.ed_flights().head()
        pd_flights = self.pd_flights().head()
-
-        ed_flights.to_csv('results/test_to_csv_head.csv')
+        ed_flights.to_csv(results_file)
        # Converting back from csv is messy as pd_flights is created from a json file
-        pd_from_csv = pd.read_csv('results/test_to_csv_head.csv', index_col=0, converters={
+        pd_from_csv = pd.read_csv(results_file, index_col=0, converters={
            'DestLocation': lambda x: ast.literal_eval(x),
            'OriginLocation': lambda x: ast.literal_eval(x)})
        pd_from_csv.index = pd_from_csv.index.map(str)
@ -25,19 +27,18 @@ class TestDataFrameToCSV(TestData):
        assert_frame_equal(pd_flights, pd_from_csv)

    def test_to_csv_full(self):
+        results_file = ROOT_DIR + '/dataframe/results/test_to_csv_full.csv'
+
        # Test is slow as it's for the full dataset, but it is useful as it goes over 10000 docs
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

-        ed_flights.to_csv('results/test_to_csv_full.csv')
+        ed_flights.to_csv(results_file)
        # Converting back from csv is messy as pd_flights is created from a json file
-        pd_from_csv = pd.read_csv('results/test_to_csv_full.csv', index_col=0, converters={
+        pd_from_csv = pd.read_csv(results_file, index_col=0, converters={
            'DestLocation': lambda x: ast.literal_eval(x),
            'OriginLocation': lambda x: ast.literal_eval(x)})
        pd_from_csv.index = pd_from_csv.index.map(str)
        pd_from_csv.timestamp = pd.to_datetime(pd_from_csv.timestamp)

        assert_frame_equal(pd_flights, pd_from_csv)
-
-
-
--- a/eland/tests/demo_day_20190815.ipynb
+++ b/eland/tests/demo_day_20190815.ipynb
@ -7144,7 +7144,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
+   "version": "3.6.8"
  }
 },
 "nbformat": 4,
--- a/eland/tests/mappings/test_dtypes_pytest.py
+++ b/eland/tests/mappings/test_dtypes_pytest.py
@ -1,12 +1,13 @@
 # File called _pytest for PyCharm compatability

-from eland.tests.common import TestData
-
 from pandas.util.testing import assert_series_equal

+from eland.tests.common import TestData
+
+
 class TestMappingsDtypes(TestData):

-    def test_dtypes1(self):
+    def test_flights_dtypes_all(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

@ -15,7 +16,7 @@ class TestMappingsDtypes(TestData):

        assert_series_equal(pd_dtypes, ed_dtypes)

-    def test_dtypes2(self):
+    def test_flights_dtypes_columns(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()[['Carrier', 'AvgTicketPrice', 'Cancelled']]

@ -24,7 +25,7 @@ class TestMappingsDtypes(TestData):

        assert_series_equal(pd_dtypes, ed_dtypes)

-    def test_get_dtype_counts1(self):
+    def test_flights_get_dtype_counts_all(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

@ -33,12 +34,12 @@ class TestMappingsDtypes(TestData):

        assert_series_equal(pd_dtypes, ed_dtypes)

-    def test_get_dtype_counts2(self):
+    def test_flights_get_dtype_counts_columns(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()[['Carrier', 'AvgTicketPrice', 'Cancelled']]

        pd_dtypes = pd_flights.get_dtype_counts().sort_index()
-        ed_dtypes = ed_flights._query_compiler._mappings.\
+        ed_dtypes = ed_flights._query_compiler._mappings. \
            get_dtype_counts(columns=['Carrier', 'AvgTicketPrice', 'Cancelled']).sort_index()

        assert_series_equal(pd_dtypes, ed_dtypes)
--- a/eland/tests/operators/test_operators_pytest.py
+++ b/eland/tests/operators/test_operators_pytest.py
@ -1,5 +1,5 @@
 # -*- coding: UTF-8 -*-
-from eland.operators import *
+from eland.filter import *


 class TestOperators():
@ -21,11 +21,6 @@ class TestOperators():
            'script': {'script': {'inline': 'doc["num1"].value > params.param1', 'params': {'param1': 5}}}}
        assert IsIn('ids', [1, 2, 3]).build() == {'ids': {'values': [1, 2, 3]}}

-    def test_and_none(self):
-        exp = None
-        exp = exp & Less('b', 3)
-        print(exp.build())
-
    def test_and_filter1(self):
        exp = GreaterEqual('a', 2) & Less('b', 3)
        assert exp.build() == {'bool': {'must': [{'range': {'a': {'gte': 2}}}, {'range': {'b': {'lt': 3}}}]}}
@ -33,145 +28,145 @@ class TestOperators():
    def test_and_filter2(self):
        exp = GreaterEqual('a', 2) & Less('b', 3) & Equal('c', 4)
        assert exp.build() == \
-        {
-            'bool': {
-                'must': [
-                    {'range': {'a': {'gte': 2}}},
-                    {'range': {'b': {'lt': 3}}},
-                    {'term': {'c': 4}}
-                ]
-            }
-        }
+               {
+                   'bool': {
+                       'must': [
+                           {'range': {'a': {'gte': 2}}},
+                           {'range': {'b': {'lt': 3}}},
+                           {'term': {'c': 4}}
+                       ]
+                   }
+               }

    def test_and_filter3(self):
        exp = GreaterEqual('a', 2) & (Less('b', 3) & Equal('c', 4))
        assert exp.build() == \
-        {
-            'bool': {
-                'must': [
-                    {'range': {'b': {'lt': 3}}},
-                    {'term': {'c': 4}},
-                    {'range': {'a': {'gte': 2}}}
-                ]
-            }
-        }
+               {
+                   'bool': {
+                       'must': [
+                           {'range': {'b': {'lt': 3}}},
+                           {'term': {'c': 4}},
+                           {'range': {'a': {'gte': 2}}}
+                       ]
+                   }
+               }

    def test_or_filter1(self):
        exp = GreaterEqual('a', 2) | Less('b', 3)
        assert exp.build() == \
-        {
-            'bool': {
-                'should': [
-                    {'range': {'a': {'gte': 2}}},
-                    {'range': {'b': {'lt': 3}}}
-                ]
-            }
-        }
+               {
+                   'bool': {
+                       'should': [
+                           {'range': {'a': {'gte': 2}}},
+                           {'range': {'b': {'lt': 3}}}
+                       ]
+                   }
+               }

    def test_or_filter2(self):
        exp = GreaterEqual('a', 2) | Less('b', 3) | Equal('c', 4)
        assert exp.build() == \
-        {
-            'bool': {
-                'should': [
-                    {'range': {'a': {'gte': 2}}},
-                    {'range': {'b': {'lt': 3}}},
-                    {'term': {'c': 4}}
-                ]
-            }
-        }
+               {
+                   'bool': {
+                       'should': [
+                           {'range': {'a': {'gte': 2}}},
+                           {'range': {'b': {'lt': 3}}},
+                           {'term': {'c': 4}}
+                       ]
+                   }
+               }

    def test_or_filter3(self):
        exp = GreaterEqual('a', 2) | (Less('b', 3) | Equal('c', 4))
        assert exp.build() == \
-        {
-            'bool': {
-                'should': [
-                    {'range': {'b': {'lt': 3}}},
-                    {'term': {'c': 4}},
-                    {'range': {'a': {'gte': 2}}}
-                ]
-            }
-        }
+               {
+                   'bool': {
+                       'should': [
+                           {'range': {'b': {'lt': 3}}},
+                           {'term': {'c': 4}},
+                           {'range': {'a': {'gte': 2}}}
+                       ]
+                   }
+               }

    def test_not_filter(self):
        exp = ~GreaterEqual('a', 2)
        assert exp.build() == \
-        {
-            'bool': {
-                'must_not': {'range': {'a': {'gte': 2}}}
-            }
-        }
+               {
+                   'bool': {
+                       'must_not': {'range': {'a': {'gte': 2}}}
+                   }
+               }

    def test_not_not_filter(self):
        exp = ~~GreaterEqual('a', 2)

        assert exp.build() == \
-        {
-            'bool': {
-                'must_not': {
-                    'bool': {
-                        'must_not': {'range': {'a': {'gte': 2}}}
-                    }
-                }
-            }
-        }
+               {
+                   'bool': {
+                       'must_not': {
+                           'bool': {
+                               'must_not': {'range': {'a': {'gte': 2}}}
+                           }
+                       }
+                   }
+               }

    def test_not_and_filter(self):
        exp = ~(GreaterEqual('a', 2) & Less('b', 3))
        assert exp.build() == \
-        {
-            'bool': {
-                'must_not': {
-                    'bool': {
-                        'must': [
-                            {'range': {'a': {'gte': 2}}},
-                            {'range': {'b': {'lt': 3}}}
-                        ]
-                    }
-                }
-            }
-        }
+               {
+                   'bool': {
+                       'must_not': {
+                           'bool': {
+                               'must': [
+                                   {'range': {'a': {'gte': 2}}},
+                                   {'range': {'b': {'lt': 3}}}
+                               ]
+                           }
+                       }
+                   }
+               }

    def test_and_or_filter(self):
        exp = GreaterEqual('a', 2) & (Less('b', 3) | Equal('c', 4))
        assert exp.build() == \
-        {
-            'bool': {
-                'must': [
-                    {'range': {'a': {'gte': 2}}},
-                    {
-                        'bool': {
-                            'should': [
-                                {'range': {'b': {'lt': 3}}},
-                                {'term': {'c': 4}}
-                            ]
-                        }
-                    }
-                ]
-            }
-        }
+               {
+                   'bool': {
+                       'must': [
+                           {'range': {'a': {'gte': 2}}},
+                           {
+                               'bool': {
+                                   'should': [
+                                       {'range': {'b': {'lt': 3}}},
+                                       {'term': {'c': 4}}
+                                   ]
+                               }
+                           }
+                       ]
+                   }
+               }

    def test_and_not_or_filter(self):
        exp = GreaterEqual('a', 2) & ~(Less('b', 3) | Equal('c', 4))
        assert exp.build() == \
-        {
-            'bool': {
-                'must': [
-                    {'range': {'a': {'gte': 2}}},
-                    {
-                        'bool': {
-                            'must_not': {
-                                'bool': {
-                                    'should': [
-                                        {'range': {'b': {'lt': 3}}},
-                                        {'term': {'c': 4}}
-                                    ]
-                                }
+               {
+                   'bool': {
+                       'must': [
+                           {'range': {'a': {'gte': 2}}},
+                           {
+                               'bool': {
+                                   'must_not': {
+                                       'bool': {
+                                           'should': [
+                                               {'range': {'b': {'lt': 3}}},
+                                               {'term': {'c': 4}}
+                                           ]
+                                       }

-                            }
-                        }
-                    }
-                ]
-            }
-        }
+                                   }
+                               }
+                           }
+                       ]
+                   }
+               }
--- a/eland/tests/pivot_review_data_pandas.ipynb
+++ b/eland/tests/pivot_review_data_pandas.ipynb
--- a/eland/tests/plotting/test_dataframe_hist_pytest.py
+++ b/eland/tests/plotting/test_dataframe_hist_pytest.py
@ -1,8 +1,9 @@
 # File called _pytest for PyCharm compatability

+from matplotlib.testing.decorators import check_figures_equal
+
 from eland.tests.common import TestData

-from matplotlib.testing.decorators import check_figures_equal

@check_figures_equal(extensions=['png'])
 def test_plot_hist(fig_test, fig_ref):
--- a/eland/tests/query/test_count_pytest.py
+++ b/eland/tests/query/test_count_pytest.py
@ -1,8 +1,7 @@
 # File called _pytest for PyCharm compatability

-from eland.tests.common import TestData
-
 from eland import Query
+from eland.tests.common import TestData


 class TestQueryCopy(TestData):
@ -22,6 +21,3 @@ class TestQueryCopy(TestData):

        print(q.to_search_body())
        print(q1.to_search_body())
-
-
-
--- a/eland/tests/series/test_head_tail_pytest.py
+++ b/eland/tests/series/test_head_tail_pytest.py
@ -1,15 +1,9 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd
 import eland as ed
-
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_series_equal
-
 from eland.tests import ELASTICSEARCH_HOST
 from eland.tests import FLIGHTS_INDEX_NAME
-
-from pandas.util.testing import assert_series_equal
-
+from eland.tests.common import TestData
+from eland.tests.common import assert_pandas_eland_series_equal


 class TestSeriesHeadTail(TestData):
--- a/eland/tests/series/test_repr_pytest.py
+++ b/eland/tests/series/test_repr_pytest.py
@ -1,15 +1,8 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd
 import eland as ed
-
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_frame_equal
-
 from eland.tests import ELASTICSEARCH_HOST
 from eland.tests import FLIGHTS_INDEX_NAME
-
-from pandas.util.testing import assert_series_equal
-
+from eland.tests.common import TestData


 class TestSeriesRepr(TestData):
--- a/eland/tests/setup_tests.py
+++ b/eland/tests/setup_tests.py
@ -1,4 +1,3 @@
-import pandas as pd
 from elasticsearch import Elasticsearch
 from elasticsearch import helpers

@ -10,6 +9,7 @@ DATA_LIST = [
    (ECOMMERCE_FILE_NAME, ECOMMERCE_INDEX_NAME, ECOMMERCE_MAPPING)
 ]

+
 def _setup_data(es):
    # Read json file and index records into Elasticsearch
    for data in DATA_LIST:
@ -32,7 +32,7 @@ def _setup_data(es):
        for index, row in df.iterrows():
            values = row.to_dict()
            # make timestamp datetime 2018-01-01T12:09:35
-            #values['timestamp'] = datetime.strptime(values['timestamp'], '%Y-%m-%dT%H:%M:%S')
+            # values['timestamp'] = datetime.strptime(values['timestamp'], '%Y-%m-%dT%H:%M:%S')

            # Use integer as id field for repeatable results
            action = {'_index': index_name, '_source': values, '_id': str(n)}
@ -50,17 +50,20 @@ def _setup_data(es):

        print("Done", index_name)

+
 def _setup_test_mappings(es):
    # Create a complex mapping containing many Elasticsearch features
    es.indices.delete(index=TEST_MAPPING1_INDEX_NAME, ignore=[400, 404])
    es.indices.create(index=TEST_MAPPING1_INDEX_NAME, body=TEST_MAPPING1)

+
 def _setup_test_nested(es):
    es.indices.delete(index=TEST_NESTED_USER_GROUP_INDEX_NAME, ignore=[400, 404])
    es.indices.create(index=TEST_NESTED_USER_GROUP_INDEX_NAME, body=TEST_NESTED_USER_GROUP_MAPPING)

    helpers.bulk(es, TEST_NESTED_USER_GROUP_DOCS)

+
 if __name__ == '__main__':
    # Create connection to Elasticsearch - use defaults
    es = Elasticsearch(ELASTICSEARCH_HOST)
--- a/eland/utils.py
+++ b/eland/utils.py
@ -7,7 +7,8 @@ def read_es(es_params, index_pattern):
    return DataFrame(client=es_params, index_pattern=index_pattern)


-def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False):
+def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False, dropna=False,
+                 geo_points=None):
    """
    Append a pandas DataFrame to an Elasticsearch index.
    Mainly used in testing.
@ -30,10 +31,19 @@ def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=
            If table exists, drop it, recreate it, and insert data.
        ``'append'``
                If table exists, insert data. Create if does not exist.
+
+    dropna : bool
+        ``'True'``
+            Remove missing values (see pandas.Series.dropna)
+        ``'False;``
+            Include missing values - may cause bulk to fail
+
+    geo_points : list or None
+        List of columns to map to geo_point data type
    """
    client = Client(es_params)

-    mapping = Mappings._generate_es_mappings(df)
+    mapping = Mappings._generate_es_mappings(df, geo_points)

    # If table exists, check if_exists parameter
    if client.index_exists(index=destination_index):
@ -58,7 +68,11 @@ def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=
    for row in df.iterrows():
        # Use index as _id
        id = row[0]
-        values = row[1].to_dict()
+
+        if dropna:
+            values = row[1].dropna().to_dict()
+        else:
+            values = row[1].to_dict()

        # Use integer as id field for repeatable results
        action = {'_index': destination_index, '_source': values, '_id': str(id)}
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -0,0 +1,3 @@
+elasticsearch>=7.0.5
+pandas==0.25.1
+pytest>=5.2.1
--- a/requirements.txt
+++ b/requirements.txt
@ -1,8 +1,3 @@
-elasticsearch==7.0.2
-elasticsearch-dsl==7.0.0
-numpy==1.16.4
-pandas==0.24.2
-python-dateutil==2.8.0
-pytz==2019.1
-six==1.12.0
-urllib3==1.25.3
+elasticsearch>=7.0.5
+pandas==0.25.1
+matplotlib
--- a/setup.py
+++ b/setup.py
@ -1,9 +1,11 @@
 from setuptools import setup

+
 def readme():
    with open('README.rst') as f:
        return f.read()

+
 setup(name='eland',
      version='0.1',
      description='Python elasticsearch client to analyse, explore and manipulate data that resides in elasticsearch',
@ -13,8 +15,7 @@ setup(name='eland',
      license='ELASTIC LICENSE',
      packages=['eland'],
      install_requires=[
-          'elasticsearch',
-          'elasticsearch_dsl',
-          'pandas'
+          'elasticsearch>=7.0.5',
+          'pandas==0.25.1'
      ],
      zip_safe=False)