Major cleanup - removed modin as dependency

modin removed as a dependency and iloc feature removed for now - TODO add back in.
2025-07-11 00:02:14 +08:00 · 2019-11-04 13:13:42 +00:00 · 2019-11-04 13:13:42 +00:00 · c1ee409a33
commit c1ee409a33
parent 9dad8613d3
46 changed files with 8593 additions and 882 deletions
--- a/eland/init.py
+++ b/eland/init.py
@ -1,19 +1,14 @@
 from __future__ import absolute_import
 import os
 # Set modin to pandas to avoid starting ray or other
 os.environ["MODIN_ENGINE"] = 'python'
 os.environ["MODIN_BACKEND"] = 'pandas'
 from eland.client import *
 from eland.dataframe import *
 from eland.filter import *
 from eland.index import *
 from eland.mappings import *
 from eland.filter import *
 from eland.query import *
 from eland.operations import *
 from eland.query_compiler import *
 from eland.plotting import *
 from eland.ndframe import *
 from eland.operations import *
 from eland.plotting import *
 from eland.query import *
 from eland.query_compiler import *
 from eland.series import *
 from eland.dataframe import *
 from eland.utils import *
--- a/eland/client.py
+++ b/eland/client.py
@ -1,10 +1,12 @@
 from elasticsearch import Elasticsearch
 from elasticsearch import helpers
 class Client:
    """
    eland client - implemented as facade to control access to Elasticsearch methods
    """
    def __init__(self, es=None):
        if isinstance(es, Elasticsearch):
            self._es = es
@ -40,4 +42,3 @@ class Client:
    def count(self, **kwargs):
        count_json = self._es.count(**kwargs)
        return count_json['count']
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@ -1,16 +1,15 @@
 import sys
 import warnings
 from distutils.version import LooseVersion
 from io import StringIO
 import numpy as np
 import pandas as pd
 import pandas.compat as compat
 import six
 from io import StringIO
 from pandas.core.common import apply_if_callable, is_bool_indexer
-from pandas.core.dtypes.common import (
+from pandas.core.dtypes.common import is_list_like
-    is_list_like
+from pandas.core.indexing import check_bool_indexer
-)
+
 from pandas.io.common import _expand_user, _stringify_path
 from pandas.io.formats import console
 from pandas.io.formats import format as fmt
@ -58,10 +57,10 @@ class DataFrame(NDFrame):
        return len(self.columns) == 0 or len(self.index) == 0
    def head(self, n=5):
-        return super().head(n)
+        return DataFrame(query_compiler=self._query_compiler.head(n))
    def tail(self, n=5):
-        return super().tail(n)
+        return DataFrame(query_compiler=self._query_compiler.tail(n))
    def __repr__(self):
        """
@ -104,7 +103,7 @@ class DataFrame(NDFrame):
                    return None
        if self._info_repr():
-            buf = StringIO(u(""))
+            buf = StringIO()
            self.info(buf=buf)
            # need to escape the <class>, should be the first line.
            val = buf.getvalue().replace('<', r'&lt;', 1)
@ -509,7 +508,7 @@ class DataFrame(NDFrame):
        return self.columns
    def groupby(self, by=None, axis=0, *args, **kwargs):
-        axis = self._get_axis_number(axis)
+        axis = pd.DataFrame._get_axis_number(axis)
        if axis == 1:
            raise NotImplementedError("Aggregating via index not currently implemented - needs index transform")
@ -544,7 +543,7 @@ class DataFrame(NDFrame):
            if Series.agg is called with single function, returns a scalar
            if Series.agg is called with several functions, returns a Series
        """
-        axis = self._get_axis_number(axis)
+        axis = pd.DataFrame._get_axis_number(axis)
        if axis == 1:
            raise NotImplementedError("Aggregating via index not currently implemented - needs index transform")
@ -579,3 +578,20 @@ class DataFrame(NDFrame):
            )
        else:
            raise NotImplementedError(expr, type(expr))
    def get(self, key, default=None):
        """Get item from object for given key (DataFrame column, Panel
                slice, etc.). Returns default value if not found.
                Args:
                    key (DataFrame column, Panel slice) : the key for which value
                    to get
                Returns:
                    value (type of items contained in object) : A value that is
                    stored at the key
                """
        if key in self.keys():
            return self._getitem(key)
        else:
            return default
--- a/eland/filter.py
+++ b/eland/filter.py
@ -1,7 +1,7 @@
 # Derived from pandasticsearch filters
 # Es filter builder for BooleanCond
-class BooleanFilter(object):
+class BooleanFilter:
    def __init__(self, *args):
        self._filter = None
--- a/eland/index.py
+++ b/eland/index.py
@ -14,6 +14,8 @@ In case sorting or aggregating on the _id field is required, it is advised to du
 the content of the _id field in another field that has doc_values enabled.)
 """
 class Index:
    ID_INDEX_FIELD = '_id'
    ID_SORT_FIELD = '_doc'  # if index field is _id, sort by _doc
--- a/eland/mappings.py
+++ b/eland/mappings.py
@ -75,6 +75,7 @@ class Mappings:
            pd_dtype = self._mappings_capabilities.loc[field_name]['pd_dtype']
            self._source_field_pd_dtypes[field_name] = pd_dtype
    @staticmethod
    def _extract_fields_from_mapping(mappings, source_only=False):
        """
        Extract all field names and types from a mapping.
@ -151,6 +152,7 @@ class Mappings:
        return fields
    @staticmethod
    def _create_capability_matrix(all_fields, source_fields, all_fields_caps):
        """
        {
@ -414,15 +416,27 @@ class Mappings:
            List of source fields where pd_dtype == (int64 or float64 or bool)
        """
        if columns is not None:
            if include_bool == True:
                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
                                                    (self._mappings_capabilities.pd_dtype == 'float64') |
-                                                (self._mappings_capabilities.pd_dtype == 'bool'))].loc[columns].index.tolist()
+                                                    (self._mappings_capabilities.pd_dtype == 'bool'))].loc[
                    columns].index.tolist()
            else:
                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
                                                    (self._mappings_capabilities.pd_dtype == 'float64'))].loc[
                    columns].index.tolist()
        else:
            if include_bool == True:
                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
                                                    (self._mappings_capabilities.pd_dtype == 'float64') |
                                                    (self._mappings_capabilities.pd_dtype == 'bool'))].index.tolist()
            else:
                return self._mappings_capabilities[(self._mappings_capabilities._source == True) &
                                                   ((self._mappings_capabilities.pd_dtype == 'int64') |
                                                    (self._mappings_capabilities.pd_dtype == 'float64'))].index.tolist()
    def source_fields(self):
        """
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@ -26,15 +26,13 @@ only Elasticsearch aggregatable fields can be aggregated or grouped.
 import sys
 import pandas as pd
 from modin.pandas.base import BasePandasDataset
 from modin.pandas.indexing import _iLocIndexer
 from pandas.util._validators import validate_bool_kwarg
 from pandas.core.dtypes.common import is_list_like
 from pandas.util._validators import validate_bool_kwarg
 from eland import ElandQueryCompiler
-class NDFrame(BasePandasDataset):
+class NDFrame:
    def __init__(self,
                 client=None,
@ -85,6 +83,9 @@ class NDFrame(BasePandasDataset):
        return head.append(tail)
    def __getitem__(self, key):
        return self._getitem(key)
    def __getattr__(self, key):
        """After regular attribute access, looks up the name in the columns
@ -105,6 +106,14 @@ class NDFrame(BasePandasDataset):
        # Don't default to pandas, just return approximation TODO - make this more accurate
        return sys.getsizeof(self._query_compiler)
    def __len__(self):
        """Gets the length of the DataFrame.
        Returns:
            Returns an integer length of the DataFrame object.
        """
        return len(self.index)
    @property
    def iloc(self):
        """Purely integer-location based indexing for selection by position.
@ -235,21 +244,3 @@ class NDFrame(BasePandasDataset):
    def describe(self):
        return self._query_compiler.describe()
    def get(self, key, default=None):
        """Get item from object for given key (DataFrame column, Panel
                slice, etc.). Returns default value if not found.
                Args:
                    key (DataFrame column, Panel slice) : the key for which value
                    to get
                Returns:
                    value (type of items contained in object) : A value that is
                    stored at the key
                """
        if key in self.keys():
            return self.__getitem__(key)
        else:
            return default
--- a/eland/operations.py
+++ b/eland/operations.py
@ -1,9 +1,7 @@
 import copy
 from enum import Enum
 from io import StringIO
 import pandas as pd
 import numpy as np
 from eland import Index
 from eland import Query
@ -410,7 +408,7 @@ class Operations:
        columns = self.get_columns()
-        numeric_source_fields = query_compiler._mappings.numeric_source_fields(columns)
+        numeric_source_fields = query_compiler._mappings.numeric_source_fields(columns, include_bool=False)
        # for each field we compute:
        # count, mean, std, min, 25%, 50%, 75%, max
@ -450,6 +448,7 @@ class Operations:
        class PandasDataFrameCollector:
            def collect(self, df):
                self.df = df
            def batch_size(self):
                return None
@ -465,6 +464,7 @@ class Operations:
                self.kwargs = kwargs
                self.ret = None
                self.first_time = True
            def collect(self, df):
                # If this is the first time we collect results, then write header, otherwise don't write header
                # and append results
--- a/eland/query.py
+++ b/eland/query.py
@ -3,6 +3,7 @@ from copy import deepcopy
 from eland.filter import BooleanFilter, NotNull, IsNull, IsIn
 class Query:
    """
    Simple class to manage building Elasticsearch queries.
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@ -1,20 +1,15 @@
 import pandas as pd
-from modin.backends.base.query_compiler import BaseQueryCompiler
+from pandas.core.dtypes.common import (
    is_list_like
 )
 from eland import Client
 from eland import Index
 from eland import Mappings
 from eland import Operations
 from pandas.core.dtypes.common import (
    is_list_like
 )
-from pandas.core.indexes.numeric import Int64Index
+class ElandQueryCompiler:
 from pandas.core.indexes.range import RangeIndex
 class ElandQueryCompiler(BaseQueryCompiler):
    """
    Some notes on what can and can not be mapped:
@ -318,7 +313,7 @@ class ElandQueryCompiler(BaseQueryCompiler):
        return df
    def copy(self):
-        return self.__constructor__(
+        return ElandQueryCompiler(
            client=self._client,
            index_pattern=self._index_pattern,
            columns=None,  # columns are embedded in operations
@ -412,14 +407,19 @@ class ElandQueryCompiler(BaseQueryCompiler):
    def count(self):
        return self._operations.count(self)
    def mean(self):
        return self._operations.mean(self)
    def sum(self):
        return self._operations.sum(self)
    def min(self):
        return self._operations.min(self)
    def max(self):
        return self._operations.max(self)
    def nunique(self):
        return self._operations.nunique(self)
@ -472,5 +472,3 @@ class ElandQueryCompiler(BaseQueryCompiler):
        return result
    # def isna(self):
--- a/eland/series.py
+++ b/eland/series.py
@ -101,10 +101,10 @@ class Series(NDFrame):
    name = property(_get_name)
    def head(self, n=5):
-        return super().head(n)
+        return Series(query_compiler=self._query_compiler.head(n))
    def tail(self, n=5):
-        return super().tail(n)
+        return Series(query_compiler=self._query_compiler.tail(n))
    # ----------------------------------------------------------------------
    # Rendering Methods
@ -194,7 +194,6 @@ class Series(NDFrame):
        else:
            raise NotImplementedError(other, type(other))
    def __eq__(self, other):
        if isinstance(other, Series):
            # Need to use scripted query to compare to values
--- a/eland/tests/Eland
+++ b/eland/tests/Eland
--- a/eland/tests/init.py
+++ b/eland/tests/init.py
@ -1,12 +1,9 @@
 import os
 import pandas as pd
 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 # Set modin to pandas to avoid starting ray or other
 os.environ["MODIN_ENGINE"] = 'python'
 os.environ["MODIN_BACKEND"] = 'pandas'
 # Define test files and indices
 ELASTICSEARCH_HOST = 'localhost'  # TODO externalise this
@ -491,4 +488,3 @@ TEST_NESTED_USER_GROUP_DOCS = [
     '_source': {'group': 'new york', 'user': [
         {'first': 'Bill', 'last': 'Jones'}]}}
 ]
--- a/eland/tests/common.py
+++ b/eland/tests/common.py
@ -1,11 +1,9 @@
-import pytest
+import os
 import eland as ed
 import pandas as pd
 from pandas.util.testing import (assert_frame_equal, assert_series_equal)
-import os
+import eland as ed
 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
@ -34,6 +32,7 @@ _pd_ecommerce.index = _pd_ecommerce.index.map(str) # make index 'object' not int
 _pd_ecommerce['customer_birth_date'].astype('datetime64')
 _ed_ecommerce = ed.read_es(ELASTICSEARCH_HOST, ECOMMERCE_INDEX_NAME)
 class TestData:
    def pd_flights(self):
@ -48,13 +47,13 @@ class TestData:
    def ed_flights_small(self):
        return _ed_flights_small
    def pd_ecommerce(self):
        return _pd_ecommerce
    def ed_ecommerce(self):
        return _ed_ecommerce
 def assert_pandas_eland_frame_equal(left, right):
    if not isinstance(left, pd.DataFrame):
        raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
@ -67,6 +66,7 @@ def assert_pandas_eland_frame_equal(left, right):
    # Use pandas tests to check similarity
    assert_frame_equal(left, right._to_pandas())
 def assert_eland_frame_equal(left, right):
    if not isinstance(left, ed.DataFrame):
        raise AssertionError("Expected type {exp_type}, found {act_type} instead".format(
@ -91,4 +91,3 @@ def assert_pandas_eland_series_equal(left, right):
    # Use pandas tests to check similarity
    assert_series_equal(left, right._to_pandas())
--- a/eland/tests/dataframe/test_aggs_pytest.py
+++ b/eland/tests/dataframe/test_aggs_pytest.py
@ -1,15 +1,14 @@
 # File called _pytest for PyCharm compatability
 import numpy as np
-import pandas as pd
+from pandas.util.testing import assert_almost_equal
 from pandas.util.testing import (assert_almost_equal)
 from eland.tests.common import TestData
 class TestDataFrameAggs(TestData):
-    def test_to_aggs1(self):
+    def test_basic_aggs(self):
        pd_flights = self.pd_flights()
        ed_flights = self.ed_flights()
--- a/eland/tests/dataframe/test_count_pytest.py
+++ b/eland/tests/dataframe/test_count_pytest.py
@ -1,19 +1,17 @@
 # File called _pytest for PyCharm compatability
 from pandas.util.testing import assert_series_equal
 from eland.tests.common import TestData
 class TestDataFrameCount(TestData):
-    def test_to_count1(self):
+    def test_ecommerce_count(self):
        pd_ecommerce = self.pd_ecommerce()
        ed_ecommerce = self.ed_ecommerce()
        pd_count = pd_ecommerce.count()
        ed_count = ed_ecommerce.count()
-        print(pd_count)
+        assert_series_equal(pd_count, ed_count)
        print(ed_count)
--- a/eland/tests/dataframe/test_datetime_pytest.py
+++ b/eland/tests/dataframe/test_datetime_pytest.py
@ -6,6 +6,7 @@ import pandas as pd
 import eland as ed
 from eland.tests.common import ELASTICSEARCH_HOST
 from eland.tests.common import TestData
 from eland.tests.common import assert_pandas_eland_frame_equal
 class TestDataFrameDateTime(TestData):
@ -41,4 +42,4 @@ class TestDataFrameDateTime(TestData):
        ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name)
        ed_df_head = ed_df.head()
-        # assert_frame_equal(df, ed_df_head)
+        assert_pandas_eland_frame_equal(df, ed_df_head)
--- a/eland/tests/dataframe/test_describe_pytest.py
+++ b/eland/tests/dataframe/test_describe_pytest.py
@ -1,35 +1,34 @@
 # File called _pytest for PyCharm compatability
-from io import StringIO
+
 from pandas.util.testing import assert_almost_equal
 from eland.tests.common import TestData
 class TestDataFrameDescribe(TestData):
-    def test_to_describe1(self):
+    def test_flights_describe(self):
        pd_flights = self.pd_flights()
        ed_flights = self.ed_flights()
        pd_describe = pd_flights.describe()
        ed_describe = ed_flights.describe()
-        print(pd_describe)
+        assert_almost_equal(pd_describe[['AvgTicketPrice']],
-        print(ed_describe)
+                            ed_describe[['AvgTicketPrice']],
                            check_less_precise=True)
-        # TODO - this fails now as ES aggregations are approximate
+        # TODO - this fails for all fields now as ES aggregations are approximate
        #        if ES percentile agg uses
        #        "hdr": {
        #           "number_of_significant_value_digits": 3
        #         }
        #        this works
        # assert_almost_equal(pd_flights_describe, ed_flights_describe)
        pd_ecommerce_describe = self.pd_ecommerce().describe()
        ed_ecommerce_describe = self.ed_ecommerce().describe()
        # pd_ecommerce_describe = self.pd_ecommerce().describe()
        # ed_ecommerce_describe = self.ed_ecommerce().describe()
        # We don't compare ecommerce here as the default dtypes in pandas from read_json
        # don't match the mapping types. This is mainly because the products field is
        # nested and so can be treated as a multi-field in ES, but not in pandas
        # We can not also run 'describe' on a truncate ed dataframe
--- a/eland/tests/dataframe/test_drop_pytest.py
+++ b/eland/tests/dataframe/test_drop_pytest.py
@ -1,19 +1,14 @@
 # File called _pytest for PyCharm compatability
 import pandas as pd
 import eland as ed
 from eland.tests.common import TestData
 from eland.tests.common import (
-    assert_eland_frame_equal,
+    assert_pandas_eland_frame_equal
    assert_pandas_eland_frame_equal,
    assert_pandas_eland_series_equal
 )
 import numpy as np
 class TestDataFrameDrop(TestData):
-    def test_drop1(self):
+    def test_flights_small_drop(self):
        ed_flights_small = self.ed_flights_small()
        pd_flights_small = self.pd_flights_small()
--- a/eland/tests/dataframe/test_dtypes_pytest.py
+++ b/eland/tests/dataframe/test_dtypes_pytest.py
@ -0,0 +1,14 @@
 # File called _pytest for PyCharm compatability
 from pandas.util.testing import assert_series_equal
 from eland.tests.common import TestData
 class TestDataFrameDtypes(TestData):
    def test_flights_dtypes(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
        assert_series_equal(pd_flights.dtypes, ed_flights.dtypes)
--- a/eland/tests/dataframe/test_get_pytest.py
+++ b/eland/tests/dataframe/test_get_pytest.py
@ -1,18 +1,11 @@
 # File called _pytest for PyCharm compatability
 import pandas as pd
 import eland as ed
 from eland.tests.common import TestData
 from eland.tests.common import (
    assert_pandas_eland_frame_equal,
    assert_pandas_eland_series_equal
 )
 import numpy as np
 class TestDataFrameGet(TestData):
-    def test_get1(self):
+    def test_get_one_attribute(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
--- a/eland/tests/dataframe/test_getitem_pytest.py
+++ b/eland/tests/dataframe/test_getitem_pytest.py
@ -1,5 +1,4 @@
 # File called _pytest for PyCharm compatability
 import pandas as pd
 from eland.tests.common import TestData
 from eland.tests.common import (
@ -8,10 +7,9 @@ from eland.tests.common import (
 )
 class TestDataFrameGetItem(TestData):
-    def test_getitem1(self):
+    def test_getitem_one_attribute(self):
        ed_flights = self.ed_flights().head(103)
        pd_flights = self.pd_flights().head(103)
@ -20,7 +18,7 @@ class TestDataFrameGetItem(TestData):
        assert_pandas_eland_series_equal(pd_flights_OriginAirportID, ed_flights_OriginAirportID)
-    def test_getitem2(self):
+    def test_getitem_attribute_list(self):
        ed_flights = self.ed_flights().head(42)
        pd_flights = self.pd_flights().head(42)
@ -29,7 +27,7 @@ class TestDataFrameGetItem(TestData):
        assert_pandas_eland_frame_equal(pd_flights_slice, ed_flights_slice)
-    def test_getitem3(self):
+    def test_getitem_one_argument(self):
        ed_flights = self.ed_flights().head(89)
        pd_flights = self.pd_flights().head(89)
@ -38,7 +36,7 @@ class TestDataFrameGetItem(TestData):
        assert_pandas_eland_series_equal(pd_flights_OriginAirportID, ed_flights_OriginAirportID)
-    def test_getitem4(self):
+    def test_getitem_multiple_calls(self):
        ed_flights = self.ed_flights().head(89)
        pd_flights = self.pd_flights().head(89)
@ -52,4 +50,3 @@ class TestDataFrameGetItem(TestData):
        ed_col1 = ed_col0['DestCountry']
        assert_pandas_eland_series_equal(pd_col1, ed_col1)
--- a/eland/tests/dataframe/test_head_tail_pytest.py
+++ b/eland/tests/dataframe/test_head_tail_pytest.py
@ -1,11 +1,9 @@
 # File called _pytest for PyCharm compatability
 import pandas as pd
 from eland.tests.common import TestData
 from eland.tests.common import assert_pandas_eland_frame_equal
 class TestDataFrameHeadTail(TestData):
    def test_head(self):
--- a/eland/tests/dataframe/test_hist_pytest.py
+++ b/eland/tests/dataframe/test_hist_pytest.py
@ -1,6 +1,5 @@
 # File called _pytest for PyCharm compatability
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from pandas.util.testing import assert_almost_equal
@ -10,7 +9,7 @@ from eland.tests.common import TestData
 class TestDataFrameHist(TestData):
-    def test_hist1(self):
+    def test_flights_hist(self):
        pd_flights = self.pd_flights()
        ed_flights = self.ed_flights()
@ -30,15 +29,3 @@ class TestDataFrameHist(TestData):
        # Numbers are slightly different
        assert_almost_equal(pd_bins, ed_bins)
        assert_almost_equal(pd_weights, ed_weights)
    def test_hist2(self):
        pd_df = self.pd_flights()[['DistanceKilometers', 'DistanceMiles', 'FlightDelayMin', 'FlightTimeHour']]
        ed_df = self.ed_flights()[['DistanceKilometers', 'DistanceMiles', 'FlightDelayMin', 'FlightTimeHour']]
        num_bins = 10
        ed_bins, ed_weights = ed_df._hist(num_bins=num_bins)
        print(ed_bins)
--- a/eland/tests/dataframe/test_iloc_pytest.py
+++ b/eland/tests/dataframe/test_iloc_pytest.py
@ -1,45 +0,0 @@
 # File called _pytest for PyCharm compatability
 import pandas as pd
 import eland as ed
 from eland.tests.common import TestData
 from eland.tests.common import (
    assert_pandas_eland_frame_equal,
    assert_pandas_eland_series_equal
 )
 import numpy as np
 class TestDataFrameiLoc(TestData):
    def test_iloc1(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
        # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.iloc.html#pandas.DataFrame.iloc
        #pd_flights.info()
        pd_iloc0 = pd_flights.iloc[0]
        pd_iloc1= pd_flights.iloc[[0]]
        pd_iloc2= pd_flights.iloc[[0, 1]]
        pd_iloc3 = pd_flights.iloc[:3]
        pd_iloc5 = pd_flights.iloc[0, 1]
        pd_iloc6 = pd_flights.iloc[[0, 2], [1, 3]]
        pd_iloc7 = pd_flights.iloc[1:3, 0:3]
        ed_iloc0 = ed_flights.iloc[0]
        ed_iloc1 = ed_flights.iloc[[0]]
        ed_iloc2 = ed_flights.iloc[[0, 1]]
        ed_iloc3 = ed_flights.iloc[:3]
        ed_iloc5 = ed_flights.iloc[0, 1]
        ed_iloc6 = ed_flights.iloc[[0, 2], [1, 3]]
        ed_iloc7 = ed_flights.iloc[1:3, 0:3]
        #assert_pandas_eland_frame_equal(pd_iloc0, ed_iloc0) # pd_iloc0 is Series
        assert_pandas_eland_frame_equal(pd_iloc1, ed_iloc1)
        assert_pandas_eland_frame_equal(pd_iloc2, ed_iloc2)
        assert_pandas_eland_frame_equal(pd_iloc3, ed_iloc3)
        #assert_pandas_eland_frame_equal(pd_iloc5, ed_iloc5) # pd_iloc5 is numpy_bool
        assert_pandas_eland_frame_equal(pd_iloc6, ed_iloc6)
        assert_pandas_eland_frame_equal(pd_iloc7, ed_iloc7)
--- a/eland/tests/dataframe/test_info_es_pytest.py
+++ b/eland/tests/dataframe/test_info_es_pytest.py
@ -1,15 +0,0 @@
 # File called _pytest for PyCharm compatability
 from eland.tests.common import TestData
 class TestDataFrameInfoEs(TestData):
    def test_to_info1(self):
        ed_flights = self.ed_flights()
        head = ed_flights.head(103)
        slice = head[['timestamp', 'OriginRegion', 'Carrier']]
        iloc = slice.iloc[10:92, [0,2]]
        print(iloc.info_es())
        print(iloc)
--- a/eland/tests/dataframe/test_info_pytest.py
+++ b/eland/tests/dataframe/test_info_pytest.py
@ -6,7 +6,7 @@ from eland.tests.common import TestData
 class TestDataFrameInfo(TestData):
-    def test_to_info1(self):
+    def test_flights_info(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
--- a/eland/tests/dataframe/test_metrics_pytest.py
+++ b/eland/tests/dataframe/test_metrics_pytest.py
@ -1,10 +1,9 @@
 # File called _pytest for PyCharm compatability
 from eland.tests.common import TestData
 from pandas.util.testing import assert_series_equal
 from eland.tests.common import TestData
 class TestDataFrameMetrics(TestData):
@ -43,4 +42,3 @@ class TestDataFrameMetrics(TestData):
        ed_max = ed_flights.max(numeric_only=True)
        assert_series_equal(pd_max, ed_max)
--- a/eland/tests/dataframe/test_nunique_pytest.py
+++ b/eland/tests/dataframe/test_nunique_pytest.py
@ -1,22 +0,0 @@
 # File called _pytest for PyCharm compatability
 import pandas as pd
 import eland as ed
 from eland.tests.common import TestData
 from eland.tests.common import (
    assert_pandas_eland_frame_equal,
    assert_pandas_eland_series_equal
 )
 import numpy as np
 class TestDataFrameNUnique(TestData):
    def test_nunique1(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
        print(pd_flights.dtypes)
        print(ed_flights.dtypes)
        print(ed_flights.nunique())
--- a/eland/tests/dataframe/test_query_pytest.py
+++ b/eland/tests/dataframe/test_query_pytest.py
@ -10,7 +10,7 @@ from eland.tests.common import assert_pandas_eland_frame_equal
 class TestDataFrameQuery(TestData):
-    def test_query1(self):
+    def test_query(self):
        # Examples from:
        # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html
        pd_df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2), 'C': range(10, 5, -1)},
@ -43,4 +43,3 @@ class TestDataFrameQuery(TestData):
        ed_q4 = ed_df[(ed_df.A > 2) & (ed_df.B > 3)]
        assert_pandas_eland_frame_equal(pd_q4, ed_q4)
--- a/eland/tests/dataframe/test_repr_pytest.py
+++ b/eland/tests/dataframe/test_repr_pytest.py
@ -3,9 +3,9 @@
 from eland.tests.common import TestData
-class TestDataFrameHeadTail(TestData):
+class TestDataFrameRepr(TestData):
-    def test_to_string1(self):
+    def test_head_101_to_string(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
@ -18,7 +18,7 @@ class TestDataFrameHeadTail(TestData):
        assert pd_head_101_str == ed_head_101_str
-    def test_to_string2(self):
+    def test_head_11_to_string2(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
@ -30,7 +30,7 @@ class TestDataFrameHeadTail(TestData):
        assert pd_head_11_str == ed_head_11_str
-    def test_to_repr(self):
+    def test_repr(self):
        ed_ecommerce = self.ed_ecommerce()
        pd_ecommerce = self.pd_ecommerce()
--- a/eland/tests/dataframe/test_select_dtypes_pytest.py
+++ b/eland/tests/dataframe/test_select_dtypes_pytest.py
@ -1,5 +1,4 @@
 # File called _pytest for PyCharm compatability
 import pandas as pd
 import numpy as np
 from eland.tests.common import TestData
@ -8,10 +7,9 @@ from eland.tests.common import (
 )
 class TestDataFrameSelectDTypes(TestData):
-    def test_select_dtypes1(self):
+    def test_select_dtypes_include_number(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
@ -20,7 +18,7 @@ class TestDataFrameSelectDTypes(TestData):
        assert_pandas_eland_frame_equal(pd_flights_numeric.head(103), ed_flights_numeric.head(103))
-    def test_select_dtypes2(self):
+    def test_select_dtypes_exclude_number(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
@ -28,4 +26,3 @@ class TestDataFrameSelectDTypes(TestData):
        pd_flights_non_numeric = pd_flights.select_dtypes(exclude=[np.number])
        assert_pandas_eland_frame_equal(pd_flights_non_numeric.head(103), ed_flights_non_numeric.head(103))
--- a/eland/tests/dataframe/test_shape_pytest.py
+++ b/eland/tests/dataframe/test_shape_pytest.py
@ -22,5 +22,3 @@ class TestDataFrameShape(TestData):
        ed_shape = ed_flights.shape
        assert pd_shape == ed_shape
--- a/eland/tests/dataframe/test_to_csv_pytest.py
+++ b/eland/tests/dataframe/test_to_csv_pytest.py
@ -1,14 +1,13 @@
 # File called _pytest for PyCharm compatability
 import pandas as pd
 from eland.tests.common import TestData
 from eland.tests.common import ROOT_DIR
 from pandas.util.testing import (assert_equal, assert_frame_equal)
 import ast
 import pandas as pd
 from pandas.util.testing import (assert_frame_equal)
 from eland.tests.common import ROOT_DIR
 from eland.tests.common import TestData
 class TestDataFrameToCSV(TestData):
@ -43,6 +42,3 @@ class TestDataFrameToCSV(TestData):
        pd_from_csv.timestamp = pd.to_datetime(pd_from_csv.timestamp)
        assert_frame_equal(pd_flights, pd_from_csv)
--- a/eland/tests/mappings/test_dtypes_pytest.py
+++ b/eland/tests/mappings/test_dtypes_pytest.py
@ -1,12 +1,13 @@
 # File called _pytest for PyCharm compatability
 from eland.tests.common import TestData
 from pandas.util.testing import assert_series_equal
 from eland.tests.common import TestData
 class TestMappingsDtypes(TestData):
-    def test_dtypes1(self):
+    def test_flights_dtypes_all(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
@ -15,7 +16,7 @@ class TestMappingsDtypes(TestData):
        assert_series_equal(pd_dtypes, ed_dtypes)
-    def test_dtypes2(self):
+    def test_flights_dtypes_columns(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()[['Carrier', 'AvgTicketPrice', 'Cancelled']]
@ -24,7 +25,7 @@ class TestMappingsDtypes(TestData):
        assert_series_equal(pd_dtypes, ed_dtypes)
-    def test_get_dtype_counts1(self):
+    def test_flights_get_dtype_counts_all(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()
@ -33,7 +34,7 @@ class TestMappingsDtypes(TestData):
        assert_series_equal(pd_dtypes, ed_dtypes)
-    def test_get_dtype_counts2(self):
+    def test_flights_get_dtype_counts_columns(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()[['Carrier', 'AvgTicketPrice', 'Cancelled']]
--- a/eland/tests/plotting/test_dataframe_hist_pytest.py
+++ b/eland/tests/plotting/test_dataframe_hist_pytest.py
@ -1,8 +1,9 @@
 # File called _pytest for PyCharm compatability
 from matplotlib.testing.decorators import check_figures_equal
 from eland.tests.common import TestData
 from matplotlib.testing.decorators import check_figures_equal
@check_figures_equal(extensions=['png'])
 def test_plot_hist(fig_test, fig_ref):
--- a/eland/tests/query/test_count_pytest.py
+++ b/eland/tests/query/test_count_pytest.py
@ -1,8 +1,7 @@
 # File called _pytest for PyCharm compatability
 from eland.tests.common import TestData
 from eland import Query
 from eland.tests.common import TestData
 class TestQueryCopy(TestData):
@ -22,6 +21,3 @@ class TestQueryCopy(TestData):
        print(q.to_search_body())
        print(q1.to_search_body())
--- a/eland/tests/series/test_head_tail_pytest.py
+++ b/eland/tests/series/test_head_tail_pytest.py
@ -1,15 +1,9 @@
 # File called _pytest for PyCharm compatability
 import pandas as pd
 import eland as ed
 from eland.tests.common import TestData
 from eland.tests.common import assert_pandas_eland_series_equal
 from eland.tests import ELASTICSEARCH_HOST
 from eland.tests import FLIGHTS_INDEX_NAME
-
+from eland.tests.common import TestData
-from pandas.util.testing import assert_series_equal
+from eland.tests.common import assert_pandas_eland_series_equal
 class TestSeriesHeadTail(TestData):
--- a/eland/tests/series/test_repr_pytest.py
+++ b/eland/tests/series/test_repr_pytest.py
@ -1,15 +1,8 @@
 # File called _pytest for PyCharm compatability
 import pandas as pd
 import eland as ed
 from eland.tests.common import TestData
 from eland.tests.common import assert_pandas_eland_frame_equal
 from eland.tests import ELASTICSEARCH_HOST
 from eland.tests import FLIGHTS_INDEX_NAME
-
+from eland.tests.common import TestData
 from pandas.util.testing import assert_series_equal
 class TestSeriesRepr(TestData):
--- a/eland/tests/setup_tests.py
+++ b/eland/tests/setup_tests.py
@ -1,4 +1,3 @@
 import pandas as pd
 from elasticsearch import Elasticsearch
 from elasticsearch import helpers
@ -10,6 +9,7 @@ DATA_LIST = [
    (ECOMMERCE_FILE_NAME, ECOMMERCE_INDEX_NAME, ECOMMERCE_MAPPING)
 ]
 def _setup_data(es):
    # Read json file and index records into Elasticsearch
    for data in DATA_LIST:
@ -50,17 +50,20 @@ def _setup_data(es):
        print("Done", index_name)
 def _setup_test_mappings(es):
    # Create a complex mapping containing many Elasticsearch features
    es.indices.delete(index=TEST_MAPPING1_INDEX_NAME, ignore=[400, 404])
    es.indices.create(index=TEST_MAPPING1_INDEX_NAME, body=TEST_MAPPING1)
 def _setup_test_nested(es):
    es.indices.delete(index=TEST_NESTED_USER_GROUP_INDEX_NAME, ignore=[400, 404])
    es.indices.create(index=TEST_NESTED_USER_GROUP_INDEX_NAME, body=TEST_NESTED_USER_GROUP_MAPPING)
    helpers.bulk(es, TEST_NESTED_USER_GROUP_DOCS)
 if __name__ == '__main__':
    # Create connection to Elasticsearch - use defaults
    es = Elasticsearch(ELASTICSEARCH_HOST)
--- a/eland/utils.py
+++ b/eland/utils.py
@ -7,7 +7,8 @@ def read_es(es_params, index_pattern):
    return DataFrame(client=es_params, index_pattern=index_pattern)
-def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False, dropna=False, geo_points=None):
+def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False, dropna=False,
                 geo_points=None):
    """
    Append a pandas DataFrame to an Elasticsearch index.
    Mainly used in testing.
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -0,0 +1,3 @@
 elasticsearch>=7.0.5
 pandas==0.25.1
 pytest>=5.2.1
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1,3 @@
 elasticsearch>=7.0.5
 pandas==0.25.1
 matplotlib
--- a/setup.py
+++ b/setup.py
@ -1,9 +1,11 @@
 from setuptools import setup
 def readme():
    with open('README.rst') as f:
        return f.read()
 setup(name='eland',
      version='0.1',
      description='Python elasticsearch client to analyse, explore and manipulate data that resides in elasticsearch',
`@ -22,5 +22,3 @@ class TestDataFrameShape(TestData):`
	`ed_shape = ed_flights.shape`	`ed_shape = ed_flights.shape`

	`assert pd_shape == ed_shape`	`assert pd_shape == ed_shape`