Major cleanup - removed modin as dependency

modin removed as a dependency and iloc feature removed for now - TODO add back in.
2025-07-11 00:02:14 +08:00 · 2019-11-04 13:13:42 +00:00 · 2019-11-04 13:13:42 +00:00 · c1ee409a33
commit c1ee409a33
parent 9dad8613d3
46 changed files with 8593 additions and 882 deletions
--- a/eland/init.py
+++ b/eland/init.py
@ -1,19 +1,14 @@
 from __future__ import absolute_import
-import os
-
-# Set modin to pandas to avoid starting ray or other
-os.environ["MODIN_ENGINE"] = 'python'
-os.environ["MODIN_BACKEND"] = 'pandas'

 from eland.client import *
+from eland.dataframe import *
+from eland.filter import *
 from eland.index import *
 from eland.mappings import *
-from eland.filter import *
-from eland.query import *
-from eland.operations import *
-from eland.query_compiler import *
-from eland.plotting import *
 from eland.ndframe import *
+from eland.operations import *
+from eland.plotting import *
+from eland.query import *
+from eland.query_compiler import *
 from eland.series import *
-from eland.dataframe import *
 from eland.utils import *
--- a/eland/client.py
+++ b/eland/client.py
@ -1,10 +1,12 @@
 from elasticsearch import Elasticsearch
 from elasticsearch import helpers

+
 class Client:
    """
    eland client - implemented as facade to control access to Elasticsearch methods
    """
+
    def __init__(self, es=None):
        if isinstance(es, Elasticsearch):
            self._es = es
@ -40,4 +42,3 @@ class Client:
    def count(self, **kwargs):
        count_json = self._es.count(**kwargs)
        return count_json['count']
-
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@ -1,16 +1,15 @@
 import sys
 import warnings
 from distutils.version import LooseVersion
+from io import StringIO

 import numpy as np
 import pandas as pd
-import pandas.compat as compat
 import six
-from io import StringIO
 from pandas.core.common import apply_if_callable, is_bool_indexer
-from pandas.core.dtypes.common import (
-    is_list_like
-)
+from pandas.core.dtypes.common import is_list_like
+from pandas.core.indexing import check_bool_indexer
+
 from pandas.io.common import _expand_user, _stringify_path
 from pandas.io.formats import console
 from pandas.io.formats import format as fmt
@ -58,10 +57,10 @@ class DataFrame(NDFrame):
        return len(self.columns) == 0 or len(self.index) == 0

    def head(self, n=5):
-        return super().head(n)
+        return DataFrame(query_compiler=self._query_compiler.head(n))

    def tail(self, n=5):
-        return super().tail(n)
+        return DataFrame(query_compiler=self._query_compiler.tail(n))

    def __repr__(self):
        """
@ -104,7 +103,7 @@ class DataFrame(NDFrame):
                    return None

        if self._info_repr():
-            buf = StringIO(u(""))
+            buf = StringIO()
            self.info(buf=buf)
            # need to escape the <class>, should be the first line.
            val = buf.getvalue().replace('<', r'&lt;', 1)
@ -509,7 +508,7 @@ class DataFrame(NDFrame):
        return self.columns

    def groupby(self, by=None, axis=0, *args, **kwargs):
-        axis = self._get_axis_number(axis)
+        axis = pd.DataFrame._get_axis_number(axis)

        if axis == 1:
            raise NotImplementedError("Aggregating via index not currently implemented - needs index transform")
@ -544,7 +543,7 @@ class DataFrame(NDFrame):
            if Series.agg is called with single function, returns a scalar
            if Series.agg is called with several functions, returns a Series
        """
-        axis = self._get_axis_number(axis)
+        axis = pd.DataFrame._get_axis_number(axis)

        if axis == 1:
            raise NotImplementedError("Aggregating via index not currently implemented - needs index transform")
@ -579,3 +578,20 @@ class DataFrame(NDFrame):
            )
        else:
            raise NotImplementedError(expr, type(expr))
+
+    def get(self, key, default=None):
+        """Get item from object for given key (DataFrame column, Panel
+                slice, etc.). Returns default value if not found.
+
+                Args:
+                    key (DataFrame column, Panel slice) : the key for which value
+                    to get
+
+                Returns:
+                    value (type of items contained in object) : A value that is
+                    stored at the key
+                """
+        if key in self.keys():
+            return self._getitem(key)
+        else:
+            return default
--- a/eland/filter.py
+++ b/eland/filter.py
@ -1,7 +1,7 @@
 # Derived from pandasticsearch filters

 # Es filter builder for BooleanCond
-class BooleanFilter(object):
+class BooleanFilter:
    def __init__(self, *args):
        self._filter = None

--- a/eland/index.py
+++ b/eland/index.py
@ -14,6 +14,8 @@ In case sorting or aggregating on the _id field is required, it is advised to du
 the content of the _id field in another field that has doc_values enabled.)

 """
+
+
 class Index:
    ID_INDEX_FIELD = '_id'
    ID_SORT_FIELD = '_doc'  # if index field is _id, sort by _doc
--- a/eland/mappings.py
+++ b/eland/mappings.py
@ -75,6 +75,7 @@ class Mappings:
            pd_dtype = self._mappings_capabilities.loc[field_name]['pd_dtype']
            self._source_field_pd_dtypes[field_name] = pd_dtype

+    @staticmethod
    def _extract_fields_from_mapping(mappings, source_only=False):
        """
        Extract all field names and types from a mapping.
@ -151,6 +152,7 @@ class Mappings:

        return fields

+    @staticmethod
    def _create_capability_matrix(all_fields, source_fields, all_fields_caps):
        """
        {
@ -414,15 +416,27 @@ class Mappings:
            List of source fields where pd_dtype == (int64 or float64 or bool)
        """
        if columns is not None:
+            if include_bool == True:
                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
                                                    (self._mappings_capabilities.pd_dtype == 'float64') |
-                                                (self._mappings_capabilities.pd_dtype == 'bool'))].loc[columns].index.tolist()
+                                                    (self._mappings_capabilities.pd_dtype == 'bool'))].loc[
+                    columns].index.tolist()
            else:
+                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
+                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
+                                                    (self._mappings_capabilities.pd_dtype == 'float64'))].loc[
+                    columns].index.tolist()
+        else:
+            if include_bool == True:
                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
                                                    (self._mappings_capabilities.pd_dtype == 'float64') |
                                                    (self._mappings_capabilities.pd_dtype == 'bool'))].index.tolist()
+            else:
+                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
+                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
+                                                    (self._mappings_capabilities.pd_dtype == 'float64'))].index.tolist()

    def source_fields(self):
        """
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@ -26,15 +26,13 @@ only Elasticsearch aggregatable fields can be aggregated or grouped.
 import sys

 import pandas as pd
-from modin.pandas.base import BasePandasDataset
-from modin.pandas.indexing import _iLocIndexer
-from pandas.util._validators import validate_bool_kwarg
 from pandas.core.dtypes.common import is_list_like
+from pandas.util._validators import validate_bool_kwarg

 from eland import ElandQueryCompiler


-class NDFrame(BasePandasDataset):
+class NDFrame:

    def __init__(self,
                 client=None,
@ -85,6 +83,9 @@ class NDFrame(BasePandasDataset):

        return head.append(tail)

+    def __getitem__(self, key):
+        return self._getitem(key)
+
    def __getattr__(self, key):
        """After regular attribute access, looks up the name in the columns

@ -105,6 +106,14 @@ class NDFrame(BasePandasDataset):
        # Don't default to pandas, just return approximation TODO - make this more accurate
        return sys.getsizeof(self._query_compiler)

+    def __len__(self):
+        """Gets the length of the DataFrame.
+
+        Returns:
+            Returns an integer length of the DataFrame object.
+        """
+        return len(self.index)
+
    @property
    def iloc(self):
        """Purely integer-location based indexing for selection by position.
@ -235,21 +244,3 @@ class NDFrame(BasePandasDataset):

    def describe(self):
        return self._query_compiler.describe()
-
-    def get(self, key, default=None):
-        """Get item from object for given key (DataFrame column, Panel
-                slice, etc.). Returns default value if not found.
-
-                Args:
-                    key (DataFrame column, Panel slice) : the key for which value
-                    to get
-
-                Returns:
-                    value (type of items contained in object) : A value that is
-                    stored at the key
-                """
-        if key in self.keys():
-            return self.__getitem__(key)
-        else:
-            return default
-
--- a/eland/operations.py
+++ b/eland/operations.py
@ -1,9 +1,7 @@
 import copy
 from enum import Enum
-from io import StringIO

 import pandas as pd
-import numpy as np

 from eland import Index
 from eland import Query
@ -410,7 +408,7 @@ class Operations:

        columns = self.get_columns()

-        numeric_source_fields = query_compiler._mappings.numeric_source_fields(columns)
+        numeric_source_fields = query_compiler._mappings.numeric_source_fields(columns, include_bool=False)

        # for each field we compute:
        # count, mean, std, min, 25%, 50%, 75%, max
@ -450,6 +448,7 @@ class Operations:
        class PandasDataFrameCollector:
            def collect(self, df):
                self.df = df
+
            def batch_size(self):
                return None

@ -465,6 +464,7 @@ class Operations:
                self.kwargs = kwargs
                self.ret = None
                self.first_time = True
+
            def collect(self, df):
                # If this is the first time we collect results, then write header, otherwise don't write header
                # and append results
--- a/eland/query.py
+++ b/eland/query.py
@ -3,6 +3,7 @@ from copy import deepcopy

 from eland.filter import BooleanFilter, NotNull, IsNull, IsIn

+
 class Query:
    """
    Simple class to manage building Elasticsearch queries.
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@ -1,20 +1,15 @@
 import pandas as pd
-from modin.backends.base.query_compiler import BaseQueryCompiler
+from pandas.core.dtypes.common import (
+    is_list_like
+)

 from eland import Client
 from eland import Index
 from eland import Mappings
 from eland import Operations

-from pandas.core.dtypes.common import (
-    is_list_like
-)

-from pandas.core.indexes.numeric import Int64Index
-from pandas.core.indexes.range import RangeIndex
-
-
-class ElandQueryCompiler(BaseQueryCompiler):
+class ElandQueryCompiler:
    """
    Some notes on what can and can not be mapped:

@ -318,7 +313,7 @@ class ElandQueryCompiler(BaseQueryCompiler):
        return df

    def copy(self):
-        return self.__constructor__(
+        return ElandQueryCompiler(
            client=self._client,
            index_pattern=self._index_pattern,
            columns=None,  # columns are embedded in operations
@ -412,14 +407,19 @@ class ElandQueryCompiler(BaseQueryCompiler):

    def count(self):
        return self._operations.count(self)
+
    def mean(self):
        return self._operations.mean(self)
+
    def sum(self):
        return self._operations.sum(self)
+
    def min(self):
        return self._operations.min(self)
+
    def max(self):
        return self._operations.max(self)
+
    def nunique(self):
        return self._operations.nunique(self)

@ -472,5 +472,3 @@ class ElandQueryCompiler(BaseQueryCompiler):
        return result

    # def isna(self):
-
-
--- a/eland/series.py
+++ b/eland/series.py
@ -101,10 +101,10 @@ class Series(NDFrame):
    name = property(_get_name)

    def head(self, n=5):
-        return super().head(n)
+        return Series(query_compiler=self._query_compiler.head(n))

    def tail(self, n=5):
-        return super().tail(n)
+        return Series(query_compiler=self._query_compiler.tail(n))

    # ----------------------------------------------------------------------
    # Rendering Methods
@ -194,7 +194,6 @@ class Series(NDFrame):
        else:
            raise NotImplementedError(other, type(other))

-
    def __eq__(self, other):
        if isinstance(other, Series):
            # Need to use scripted query to compare to values
--- a/eland/tests/Eland
+++ b/eland/tests/Eland
--- a/eland/tests/init.py
+++ b/eland/tests/init.py
@ -1,12 +1,9 @@
 import os
+
 import pandas as pd

 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

-# Set modin to pandas to avoid starting ray or other
-os.environ["MODIN_ENGINE"] = 'python'
-os.environ["MODIN_BACKEND"] = 'pandas'
-
 # Define test files and indices
 ELASTICSEARCH_HOST = 'localhost'  # TODO externalise this

@ -491,4 +488,3 @@ TEST_NESTED_USER_GROUP_DOCS = [
     '_source': {'group': 'new york', 'user': [
         {'first': 'Bill', 'last': 'Jones'}]}}
 ]
-
--- a/eland/tests/common.py
+++ b/eland/tests/common.py
@ -1,11 +1,9 @@
-import pytest
+import os

-import eland as ed
 import pandas as pd
-
 from pandas.util.testing import (assert_frame_equal, assert_series_equal)

-import os
+import eland as ed

 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

@ -34,6 +32,7 @@ _pd_ecommerce.index = _pd_ecommerce.index.map(str) # make index 'object' not int
 _pd_ecommerce['customer_birth_date'].astype('datetime64')
 _ed_ecommerce = ed.read_es(ELASTICSEARCH_HOST, ECOMMERCE_INDEX_NAME)

+
 class TestData:

    def pd_flights(self):
@ -48,13 +47,13 @@ class TestData:
    def ed_flights_small(self):
        return _ed_flights_small

-
    def pd_ecommerce(self):
        return _pd_ecommerce

    def ed_ecommerce(self):
        return _ed_ecommerce

+
 def assert_pandas_eland_frame_equal(left, right):
    if not isinstance(left, pd.DataFrame):
        raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
@ -67,6 +66,7 @@ def assert_pandas_eland_frame_equal(left, right):
    # Use pandas tests to check similarity
    assert_frame_equal(left, right._to_pandas())

+
 def assert_eland_frame_equal(left, right):
    if not isinstance(left, ed.DataFrame):
        raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
@ -91,4 +91,3 @@ def assert_pandas_eland_series_equal(left, right):

    # Use pandas tests to check similarity
    assert_series_equal(left, right._to_pandas())
-
--- a/eland/tests/dataframe/test_aggs_pytest.py
+++ b/eland/tests/dataframe/test_aggs_pytest.py
@ -1,15 +1,14 @@
 # File called _pytest for PyCharm compatability

 import numpy as np
-import pandas as pd
-from pandas.util.testing import (assert_almost_equal)
+from pandas.util.testing import assert_almost_equal

 from eland.tests.common import TestData


 class TestDataFrameAggs(TestData):

-    def test_to_aggs1(self):
+    def test_basic_aggs(self):
        pd_flights = self.pd_flights()
        ed_flights = self.ed_flights()

--- a/eland/tests/dataframe/test_count_pytest.py
+++ b/eland/tests/dataframe/test_count_pytest.py
@ -1,19 +1,17 @@
 # File called _pytest for PyCharm compatability

+from pandas.util.testing import assert_series_equal
+
 from eland.tests.common import TestData


 class TestDataFrameCount(TestData):

-    def test_to_count1(self):
+    def test_ecommerce_count(self):
        pd_ecommerce = self.pd_ecommerce()
        ed_ecommerce = self.ed_ecommerce()

        pd_count = pd_ecommerce.count()
        ed_count = ed_ecommerce.count()

-        print(pd_count)
-        print(ed_count)
-
-
-
+        assert_series_equal(pd_count, ed_count)
--- a/eland/tests/dataframe/test_datetime_pytest.py
+++ b/eland/tests/dataframe/test_datetime_pytest.py
@ -6,6 +6,7 @@ import pandas as pd
 import eland as ed
 from eland.tests.common import ELASTICSEARCH_HOST
 from eland.tests.common import TestData
+from eland.tests.common import assert_pandas_eland_frame_equal


 class TestDataFrameDateTime(TestData):
@ -41,4 +42,4 @@ class TestDataFrameDateTime(TestData):
        ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
        ed_df_head = ed_df.head()

-        # assert_frame_equal(df, ed_df_head)
+        assert_pandas_eland_frame_equal(df, ed_df_head)
--- a/eland/tests/dataframe/test_describe_pytest.py
+++ b/eland/tests/dataframe/test_describe_pytest.py
@ -1,35 +1,34 @@
 # File called _pytest for PyCharm compatability
-from io import StringIO
+
+from pandas.util.testing import assert_almost_equal

 from eland.tests.common import TestData


 class TestDataFrameDescribe(TestData):

-    def test_to_describe1(self):
+    def test_flights_describe(self):
        pd_flights = self.pd_flights()
        ed_flights = self.ed_flights()

        pd_describe = pd_flights.describe()
        ed_describe = ed_flights.describe()

-        print(pd_describe)
-        print(ed_describe)
+        assert_almost_equal(pd_describe[['AvgTicketPrice']],
+                            ed_describe[['AvgTicketPrice']],
+                            check_less_precise=True)

-        # TODO - this fails now as ES aggregations are approximate
+        # TODO - this fails for all fields now as ES aggregations are approximate
        #        if ES percentile agg uses
        #        "hdr": {
        #           "number_of_significant_value_digits": 3
        #         }
        #        this works
-        # assert_almost_equal(pd_flights_describe, ed_flights_describe)
-
-        pd_ecommerce_describe = self.pd_ecommerce().describe()
-        ed_ecommerce_describe = self.ed_ecommerce().describe()

+        # pd_ecommerce_describe = self.pd_ecommerce().describe()
+        # ed_ecommerce_describe = self.ed_ecommerce().describe()
        # We don't compare ecommerce here as the default dtypes in pandas from read_json
        # don't match the mapping types. This is mainly because the products field is
        # nested and so can be treated as a multi-field in ES, but not in pandas

        # We can not also run 'describe' on a truncate ed dataframe
-
--- a/eland/tests/dataframe/test_drop_pytest.py
+++ b/eland/tests/dataframe/test_drop_pytest.py
@ -1,19 +1,14 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd
-import eland as ed

 from eland.tests.common import TestData
 from eland.tests.common import (
-    assert_eland_frame_equal,
-    assert_pandas_eland_frame_equal,
-    assert_pandas_eland_series_equal
+    assert_pandas_eland_frame_equal
 )

-import numpy as np

 class TestDataFrameDrop(TestData):

-    def test_drop1(self):
+    def test_flights_small_drop(self):
        ed_flights_small = self.ed_flights_small()
        pd_flights_small = self.pd_flights_small()

--- a/eland/tests/dataframe/test_dtypes_pytest.py
+++ b/eland/tests/dataframe/test_dtypes_pytest.py
@ -0,0 +1,14 @@
+# File called _pytest for PyCharm compatability
+
+from pandas.util.testing import assert_series_equal
+
+from eland.tests.common import TestData
+
+
+class TestDataFrameDtypes(TestData):
+
+    def test_flights_dtypes(self):
+        ed_flights = self.ed_flights()
+        pd_flights = self.pd_flights()
+
+        assert_series_equal(pd_flights.dtypes, ed_flights.dtypes)
--- a/eland/tests/dataframe/test_get_pytest.py
+++ b/eland/tests/dataframe/test_get_pytest.py
@ -1,18 +1,11 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd
-import eland as ed

 from eland.tests.common import TestData
-from eland.tests.common import (
-    assert_pandas_eland_frame_equal,
-    assert_pandas_eland_series_equal
-)

-import numpy as np

 class TestDataFrameGet(TestData):

-    def test_get1(self):
+    def test_get_one_attribute(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

--- a/eland/tests/dataframe/test_getitem_pytest.py
+++ b/eland/tests/dataframe/test_getitem_pytest.py
@ -1,5 +1,4 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd

 from eland.tests.common import TestData
 from eland.tests.common import (
@ -8,10 +7,9 @@ from eland.tests.common import (
 )


-
 class TestDataFrameGetItem(TestData):

-    def test_getitem1(self):
+    def test_getitem_one_attribute(self):
        ed_flights = self.ed_flights().head(103)
        pd_flights = self.pd_flights().head(103)

@ -20,7 +18,7 @@ class TestDataFrameGetItem(TestData):

        assert_pandas_eland_series_equal(pd_flights_OriginAirportID, ed_flights_OriginAirportID)

-    def test_getitem2(self):
+    def test_getitem_attribute_list(self):
        ed_flights = self.ed_flights().head(42)
        pd_flights = self.pd_flights().head(42)

@ -29,7 +27,7 @@ class TestDataFrameGetItem(TestData):

        assert_pandas_eland_frame_equal(pd_flights_slice, ed_flights_slice)

-    def test_getitem3(self):
+    def test_getitem_one_argument(self):
        ed_flights = self.ed_flights().head(89)
        pd_flights = self.pd_flights().head(89)

@ -38,7 +36,7 @@ class TestDataFrameGetItem(TestData):

        assert_pandas_eland_series_equal(pd_flights_OriginAirportID, ed_flights_OriginAirportID)

-    def test_getitem4(self):
+    def test_getitem_multiple_calls(self):
        ed_flights = self.ed_flights().head(89)
        pd_flights = self.pd_flights().head(89)

@ -52,4 +50,3 @@ class TestDataFrameGetItem(TestData):
        ed_col1 = ed_col0['DestCountry']

        assert_pandas_eland_series_equal(pd_col1, ed_col1)
-
--- a/eland/tests/dataframe/test_head_tail_pytest.py
+++ b/eland/tests/dataframe/test_head_tail_pytest.py
@ -1,11 +1,9 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd

 from eland.tests.common import TestData
 from eland.tests.common import assert_pandas_eland_frame_equal


-
 class TestDataFrameHeadTail(TestData):

    def test_head(self):
--- a/eland/tests/dataframe/test_hist_pytest.py
+++ b/eland/tests/dataframe/test_hist_pytest.py
@ -1,6 +1,5 @@
 # File called _pytest for PyCharm compatability

-import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from pandas.util.testing import assert_almost_equal
@ -10,7 +9,7 @@ from eland.tests.common import TestData

 class TestDataFrameHist(TestData):

-    def test_hist1(self):
+    def test_flights_hist(self):
        pd_flights = self.pd_flights()
        ed_flights = self.ed_flights()

@ -30,15 +29,3 @@ class TestDataFrameHist(TestData):
        # Numbers are slightly different
        assert_almost_equal(pd_bins, ed_bins)
        assert_almost_equal(pd_weights, ed_weights)
-
-    def test_hist2(self):
-        pd_df = self.pd_flights()[['DistanceKilometers', 'DistanceMiles', 'FlightDelayMin', 'FlightTimeHour']]
-        ed_df = self.ed_flights()[['DistanceKilometers', 'DistanceMiles', 'FlightDelayMin', 'FlightTimeHour']]
-
-        num_bins = 10
-
-        ed_bins, ed_weights = ed_df._hist(num_bins=num_bins)
-
-        print(ed_bins)
-
-
--- a/eland/tests/dataframe/test_iloc_pytest.py
+++ b/eland/tests/dataframe/test_iloc_pytest.py
@ -1,45 +0,0 @@
-# File called _pytest for PyCharm compatability
-import pandas as pd
-import eland as ed
-
-from eland.tests.common import TestData
-from eland.tests.common import (
-    assert_pandas_eland_frame_equal,
-    assert_pandas_eland_series_equal
-)
-
-import numpy as np
-
-class TestDataFrameiLoc(TestData):
-
-    def test_iloc1(self):
-        ed_flights = self.ed_flights()
-        pd_flights = self.pd_flights()
-
-        # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html#pandas.DataFrame.iloc
-
-        #pd_flights.info()
-
-        pd_iloc0 = pd_flights.iloc[0]
-        pd_iloc1= pd_flights.iloc[[0]]
-        pd_iloc2= pd_flights.iloc[[0, 1]]
-        pd_iloc3 = pd_flights.iloc[:3]
-        pd_iloc5 = pd_flights.iloc[0, 1]
-        pd_iloc6 = pd_flights.iloc[[0, 2], [1, 3]]
-        pd_iloc7 = pd_flights.iloc[1:3, 0:3]
-
-        ed_iloc0 = ed_flights.iloc[0]
-        ed_iloc1 = ed_flights.iloc[[0]]
-        ed_iloc2 = ed_flights.iloc[[0, 1]]
-        ed_iloc3 = ed_flights.iloc[:3]
-        ed_iloc5 = ed_flights.iloc[0, 1]
-        ed_iloc6 = ed_flights.iloc[[0, 2], [1, 3]]
-        ed_iloc7 = ed_flights.iloc[1:3, 0:3]
-
-        #assert_pandas_eland_frame_equal(pd_iloc0, ed_iloc0) # pd_iloc0 is Series
-        assert_pandas_eland_frame_equal(pd_iloc1, ed_iloc1)
-        assert_pandas_eland_frame_equal(pd_iloc2, ed_iloc2)
-        assert_pandas_eland_frame_equal(pd_iloc3, ed_iloc3)
-        #assert_pandas_eland_frame_equal(pd_iloc5, ed_iloc5) # pd_iloc5 is numpy_bool
-        assert_pandas_eland_frame_equal(pd_iloc6, ed_iloc6)
-        assert_pandas_eland_frame_equal(pd_iloc7, ed_iloc7)
--- a/eland/tests/dataframe/test_info_es_pytest.py
+++ b/eland/tests/dataframe/test_info_es_pytest.py
@ -1,15 +0,0 @@
-# File called _pytest for PyCharm compatability
-
-from eland.tests.common import TestData
-
-
-class TestDataFrameInfoEs(TestData):
-
-    def test_to_info1(self):
-        ed_flights = self.ed_flights()
-
-        head = ed_flights.head(103)
-        slice = head[['timestamp', 'OriginRegion', 'Carrier']]
-        iloc = slice.iloc[10:92, [0,2]]
-        print(iloc.info_es())
-        print(iloc)
--- a/eland/tests/dataframe/test_info_pytest.py
+++ b/eland/tests/dataframe/test_info_pytest.py
@ -6,7 +6,7 @@ from eland.tests.common import TestData

 class TestDataFrameInfo(TestData):

-    def test_to_info1(self):
+    def test_flights_info(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

--- a/eland/tests/dataframe/test_metrics_pytest.py
+++ b/eland/tests/dataframe/test_metrics_pytest.py
@ -1,10 +1,9 @@
 # File called _pytest for PyCharm compatability

-from eland.tests.common import TestData
-
-
 from pandas.util.testing import assert_series_equal

+from eland.tests.common import TestData
+

 class TestDataFrameMetrics(TestData):

@ -43,4 +42,3 @@ class TestDataFrameMetrics(TestData):
        ed_max = ed_flights.max(numeric_only=True)

        assert_series_equal(pd_max, ed_max)
-
--- a/eland/tests/dataframe/test_nunique_pytest.py
+++ b/eland/tests/dataframe/test_nunique_pytest.py
@ -1,22 +0,0 @@
-# File called _pytest for PyCharm compatability
-import pandas as pd
-import eland as ed
-
-from eland.tests.common import TestData
-from eland.tests.common import (
-    assert_pandas_eland_frame_equal,
-    assert_pandas_eland_series_equal
-)
-
-import numpy as np
-
-class TestDataFrameNUnique(TestData):
-
-    def test_nunique1(self):
-        ed_flights = self.ed_flights()
-        pd_flights = self.pd_flights()
-
-        print(pd_flights.dtypes)
-        print(ed_flights.dtypes)
-        print(ed_flights.nunique())
-
--- a/eland/tests/dataframe/test_query_pytest.py
+++ b/eland/tests/dataframe/test_query_pytest.py
@ -10,7 +10,7 @@ from eland.tests.common import assert_pandas_eland_frame_equal

 class TestDataFrameQuery(TestData):

-    def test_query1(self):
+    def test_query(self):
        # Examples from:
        # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html
        pd_df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2), 'C': range(10, 5, -1)},
@ -43,4 +43,3 @@ class TestDataFrameQuery(TestData):
        ed_q4 = ed_df[(ed_df.A > 2) & (ed_df.B > 3)]

        assert_pandas_eland_frame_equal(pd_q4, ed_q4)
-
--- a/eland/tests/dataframe/test_repr_pytest.py
+++ b/eland/tests/dataframe/test_repr_pytest.py
@ -3,9 +3,9 @@
 from eland.tests.common import TestData


-class TestDataFrameHeadTail(TestData):
+class TestDataFrameRepr(TestData):

-    def test_to_string1(self):
+    def test_head_101_to_string(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

@ -18,7 +18,7 @@ class TestDataFrameHeadTail(TestData):

        assert pd_head_101_str == ed_head_101_str

-    def test_to_string2(self):
+    def test_head_11_to_string2(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

@ -30,7 +30,7 @@ class TestDataFrameHeadTail(TestData):

        assert pd_head_11_str == ed_head_11_str

-    def test_to_repr(self):
+    def test_repr(self):
        ed_ecommerce = self.ed_ecommerce()
        pd_ecommerce = self.pd_ecommerce()

--- a/eland/tests/dataframe/test_select_dtypes_pytest.py
+++ b/eland/tests/dataframe/test_select_dtypes_pytest.py
@ -1,5 +1,4 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd
 import numpy as np

 from eland.tests.common import TestData
@ -8,10 +7,9 @@ from eland.tests.common import (
 )


-
 class TestDataFrameSelectDTypes(TestData):

-    def test_select_dtypes1(self):
+    def test_select_dtypes_include_number(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

@ -20,7 +18,7 @@ class TestDataFrameSelectDTypes(TestData):

        assert_pandas_eland_frame_equal(pd_flights_numeric.head(103), ed_flights_numeric.head(103))

-    def test_select_dtypes2(self):
+    def test_select_dtypes_exclude_number(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

@ -28,4 +26,3 @@ class TestDataFrameSelectDTypes(TestData):
        pd_flights_non_numeric = pd_flights.select_dtypes(exclude=[np.number])

        assert_pandas_eland_frame_equal(pd_flights_non_numeric.head(103), ed_flights_non_numeric.head(103))
-
--- a/eland/tests/dataframe/test_shape_pytest.py
+++ b/eland/tests/dataframe/test_shape_pytest.py
@ -22,5 +22,3 @@ class TestDataFrameShape(TestData):
        ed_shape = ed_flights.shape

        assert pd_shape == ed_shape
-
-
--- a/eland/tests/dataframe/test_to_csv_pytest.py
+++ b/eland/tests/dataframe/test_to_csv_pytest.py
@ -1,14 +1,13 @@
 # File called _pytest for PyCharm compatability

-import pandas as pd
-
-from eland.tests.common import TestData
-from eland.tests.common import ROOT_DIR
-
-from pandas.util.testing import (assert_equal, assert_frame_equal)
-
 import ast

+import pandas as pd
+from pandas.util.testing import (assert_frame_equal)
+
+from eland.tests.common import ROOT_DIR
+from eland.tests.common import TestData
+

 class TestDataFrameToCSV(TestData):

@ -43,6 +42,3 @@ class TestDataFrameToCSV(TestData):
        pd_from_csv.timestamp = pd.to_datetime(pd_from_csv.timestamp)

        assert_frame_equal(pd_flights, pd_from_csv)
-
-
-
--- a/eland/tests/mappings/test_dtypes_pytest.py
+++ b/eland/tests/mappings/test_dtypes_pytest.py
@ -1,12 +1,13 @@
 # File called _pytest for PyCharm compatability

-from eland.tests.common import TestData
-
 from pandas.util.testing import assert_series_equal

+from eland.tests.common import TestData
+
+
 class TestMappingsDtypes(TestData):

-    def test_dtypes1(self):
+    def test_flights_dtypes_all(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

@ -15,7 +16,7 @@ class TestMappingsDtypes(TestData):

        assert_series_equal(pd_dtypes, ed_dtypes)

-    def test_dtypes2(self):
+    def test_flights_dtypes_columns(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()[['Carrier', 'AvgTicketPrice', 'Cancelled']]

@ -24,7 +25,7 @@ class TestMappingsDtypes(TestData):

        assert_series_equal(pd_dtypes, ed_dtypes)

-    def test_get_dtype_counts1(self):
+    def test_flights_get_dtype_counts_all(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

@ -33,7 +34,7 @@ class TestMappingsDtypes(TestData):

        assert_series_equal(pd_dtypes, ed_dtypes)

-    def test_get_dtype_counts2(self):
+    def test_flights_get_dtype_counts_columns(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()[['Carrier', 'AvgTicketPrice', 'Cancelled']]

--- a/eland/tests/plotting/test_dataframe_hist_pytest.py
+++ b/eland/tests/plotting/test_dataframe_hist_pytest.py
@ -1,8 +1,9 @@
 # File called _pytest for PyCharm compatability

+from matplotlib.testing.decorators import check_figures_equal
+
 from eland.tests.common import TestData

-from matplotlib.testing.decorators import check_figures_equal

@check_figures_equal(extensions=['png'])
 def test_plot_hist(fig_test, fig_ref):
--- a/eland/tests/query/test_count_pytest.py
+++ b/eland/tests/query/test_count_pytest.py
@ -1,8 +1,7 @@
 # File called _pytest for PyCharm compatability

-from eland.tests.common import TestData
-
 from eland import Query
+from eland.tests.common import TestData


 class TestQueryCopy(TestData):
@ -22,6 +21,3 @@ class TestQueryCopy(TestData):

        print(q.to_search_body())
        print(q1.to_search_body())
-
-
-
--- a/eland/tests/series/test_head_tail_pytest.py
+++ b/eland/tests/series/test_head_tail_pytest.py
@ -1,15 +1,9 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd
 import eland as ed
-
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_series_equal
-
 from eland.tests import ELASTICSEARCH_HOST
 from eland.tests import FLIGHTS_INDEX_NAME
-
-from pandas.util.testing import assert_series_equal
-
+from eland.tests.common import TestData
+from eland.tests.common import assert_pandas_eland_series_equal


 class TestSeriesHeadTail(TestData):
--- a/eland/tests/series/test_repr_pytest.py
+++ b/eland/tests/series/test_repr_pytest.py
@ -1,15 +1,8 @@
 # File called _pytest for PyCharm compatability
-import pandas as pd
 import eland as ed
-
-from eland.tests.common import TestData
-from eland.tests.common import assert_pandas_eland_frame_equal
-
 from eland.tests import ELASTICSEARCH_HOST
 from eland.tests import FLIGHTS_INDEX_NAME
-
-from pandas.util.testing import assert_series_equal
-
+from eland.tests.common import TestData


 class TestSeriesRepr(TestData):
--- a/eland/tests/setup_tests.py
+++ b/eland/tests/setup_tests.py
@ -1,4 +1,3 @@
-import pandas as pd
 from elasticsearch import Elasticsearch
 from elasticsearch import helpers

@ -10,6 +9,7 @@ DATA_LIST = [
    (ECOMMERCE_FILE_NAME, ECOMMERCE_INDEX_NAME, ECOMMERCE_MAPPING)
 ]

+
 def _setup_data(es):
    # Read json file and index records into Elasticsearch
    for data in DATA_LIST:
@ -50,17 +50,20 @@ def _setup_data(es):

        print("Done", index_name)

+
 def _setup_test_mappings(es):
    # Create a complex mapping containing many Elasticsearch features
    es.indices.delete(index=TEST_MAPPING1_INDEX_NAME, ignore=[400, 404])
    es.indices.create(index=TEST_MAPPING1_INDEX_NAME, body=TEST_MAPPING1)

+
 def _setup_test_nested(es):
    es.indices.delete(index=TEST_NESTED_USER_GROUP_INDEX_NAME, ignore=[400, 404])
    es.indices.create(index=TEST_NESTED_USER_GROUP_INDEX_NAME, body=TEST_NESTED_USER_GROUP_MAPPING)

    helpers.bulk(es, TEST_NESTED_USER_GROUP_DOCS)

+
 if __name__ == '__main__':
    # Create connection to Elasticsearch - use defaults
    es = Elasticsearch(ELASTICSEARCH_HOST)
--- a/eland/utils.py
+++ b/eland/utils.py
@ -7,7 +7,8 @@ def read_es(es_params, index_pattern):
    return DataFrame(client=es_params, index_pattern=index_pattern)


-def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False, dropna=False, geo_points=None):
+def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False, dropna=False,
+                 geo_points=None):
    """
    Append a pandas DataFrame to an Elasticsearch index.
    Mainly used in testing.
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -0,0 +1,3 @@
+elasticsearch>=7.0.5
+pandas==0.25.1
+pytest>=5.2.1
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1,3 @@
 elasticsearch>=7.0.5
 pandas==0.25.1
+matplotlib
--- a/setup.py
+++ b/setup.py
@ -1,9 +1,11 @@
 from setuptools import setup

+
 def readme():
    with open('README.rst') as f:
        return f.read()

+
 setup(name='eland',
      version='0.1',
      description='Python elasticsearch client to analyse, explore and manipulate data that resides in elasticsearch',