From 140623283a3f5a734b7a76be216f140411304be3 Mon Sep 17 00:00:00 2001
From: Seth Michael Larson <seth.larson@elastic.co>
Date: Tue, 14 Jul 2020 11:39:52 -0500
Subject: [PATCH] Support Series/collections in Series.isin(), add type hints

---
 docs/source/reference/supported_apis.rst   |  4 +-
 eland/arithmetics.py                       |  2 +-
 eland/dataframe.py                         | 38 ++++++---
 eland/field_mappings.py                    | 32 ++++---
 eland/ndframe.py                           | 41 ++++-----
 eland/series.py                            | 99 +++++++++++++---------
 eland/tests/dataframe/test_query_pytest.py |  9 +-
 eland/utils.py                             | 29 +++++--
 noxfile.py                                 |  2 +
 9 files changed, 157 insertions(+), 99 deletions(-)

diff --git a/docs/source/reference/supported_apis.rst b/docs/source/reference/supported_apis.rst
index 8a1f3f2..94d9778 100644
--- a/docs/source/reference/supported_apis.rst
+++ b/docs/source/reference/supported_apis.rst
@@ -1040,7 +1040,7 @@ script instead of being modified manually.
 +---------------------------------------+------------+
 | ``ed.Series.__dir__()``               | **Yes**    |
 +---------------------------------------+------------+
-| ``ed.Series.__div__()``               | No         |
+| ``ed.Series.__div__()``               | **Yes**    |
 +---------------------------------------+------------+
 | ``ed.Series.__divmod__()``            | No         |
 +---------------------------------------+------------+
@@ -1134,7 +1134,7 @@ script instead of being modified manually.
 +---------------------------------------+------------+
 | ``ed.Series.__rand__()``              | No         |
 +---------------------------------------+------------+
-| ``ed.Series.__rdiv__()``              | No         |
+| ``ed.Series.__rdiv__()``              | **Yes**    |
 +---------------------------------------+------------+
 | ``ed.Series.__rdivmod__()``           | No         |
 +---------------------------------------+------------+
diff --git a/eland/arithmetics.py b/eland/arithmetics.py
index 927392c..584fb0b 100644
--- a/eland/arithmetics.py
+++ b/eland/arithmetics.py
@@ -138,7 +138,7 @@ class ArithmeticSeries(ArithmeticObject):
         for task in self._tasks:
             if task.op_name == "__add__":
                 value = f"({value} + {task.object.resolve()})"
-            elif task.op_name == "__truediv__":
+            elif task.op_name in ("__truediv__", "__div__"):
                 value = f"({value} / {task.object.resolve()})"
             elif task.op_name == "__floordiv__":
                 value = f"Math.floor({value} / {task.object.resolve()})"
diff --git a/eland/dataframe.py b/eland/dataframe.py
index ed79ed2..875cdb1 100644
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@@ -19,7 +19,7 @@ import sys
 import warnings
 from io import StringIO
 import re
-from typing import Optional, Sequence, Union
+from typing import Optional, Sequence, Union, Tuple
 
 import numpy as np
 import pandas as pd
@@ -631,7 +631,7 @@ class DataFrame(NDFrame):
     def info_es(self):
         return self.es_info()
 
-    def es_query(self, query):
+    def es_query(self, query) -> "DataFrame":
         """Applies an Elasticsearch DSL query to the current DataFrame.
 
         Parameters
@@ -705,7 +705,7 @@ class DataFrame(NDFrame):
 
     def info(
         self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None
-    ):
+    ) -> None:
         """
         Print a concise summary of a DataFrame.
 
@@ -822,7 +822,7 @@ class DataFrame(NDFrame):
                 dtype = dtypes.iloc[i]
                 col = pprint_thing(col)
 
-                line_no = _put_str(" {num}".format(num=i), space_num)
+                line_no = _put_str(f" {i}", space_num)
 
                 count = ""
                 if show_counts:
@@ -1223,7 +1223,7 @@ class DataFrame(NDFrame):
         }
         return self._query_compiler.to_csv(**kwargs)
 
-    def to_pandas(self, show_progress: bool = False) -> "DataFrame":
+    def to_pandas(self, show_progress: bool = False) -> pd.DataFrame:
         """
         Utility method to convert eland.Dataframe to pandas.Dataframe
 
@@ -1233,10 +1233,10 @@ class DataFrame(NDFrame):
         """
         return self._query_compiler.to_pandas(show_progress=show_progress)
 
-    def _empty_pd_df(self):
+    def _empty_pd_df(self) -> pd.DataFrame:
         return self._query_compiler._empty_pd_ef()
 
-    def select_dtypes(self, include=None, exclude=None):
+    def select_dtypes(self, include=None, exclude=None) -> "DataFrame":
         """
         Return a subset of the DataFrame's columns based on the column dtypes.
 
@@ -1272,7 +1272,7 @@ class DataFrame(NDFrame):
         return self._getitem_array(empty_df.columns)
 
     @property
-    def shape(self):
+    def shape(self) -> Tuple[int, int]:
         """
         Return a tuple representing the dimensionality of the DataFrame.
 
@@ -1299,7 +1299,23 @@ class DataFrame(NDFrame):
 
         return num_rows, num_columns
 
-    def keys(self):
+    @property
+    def ndim(self) -> int:
+        """
+        Returns 2 by definition of a DataFrame
+
+        Returns
+        -------
+        int
+            By definition 2
+
+        See Also
+        --------
+        :pandas_api_docs:`pandas.DataFrame.ndim`
+        """
+        return 2
+
+    def keys(self) -> pd.Index:
         """
         Return columns
 
@@ -1381,7 +1397,7 @@ class DataFrame(NDFrame):
 
     hist = gfx.ed_hist_frame
 
-    def query(self, expr):
+    def query(self, expr) -> "DataFrame":
         """
         Query the columns of a DataFrame with a boolean expression.
 
@@ -1474,7 +1490,7 @@ class DataFrame(NDFrame):
         like: Optional[str] = None,
         regex: Optional[str] = None,
         axis: Optional[Union[int, str]] = None,
-    ):
+    ) -> "DataFrame":
         """
         Subset the dataframe rows or columns according to the specified index labels.
         Note that this routine does not filter a dataframe on its
diff --git a/eland/field_mappings.py b/eland/field_mappings.py
index 33dbf1c..aa58bac 100644
--- a/eland/field_mappings.py
+++ b/eland/field_mappings.py
@@ -27,15 +27,16 @@ from pandas.core.dtypes.common import (
     is_string_dtype,
 )
 from pandas.core.dtypes.inference import is_list_like
-from typing import NamedTuple, Optional, Mapping, Dict, Any, TYPE_CHECKING
+from typing import NamedTuple, Optional, Mapping, Dict, Any, TYPE_CHECKING, List, Set
 
 if TYPE_CHECKING:
+    from elasticsearch import Elasticsearch
     from eland import DataFrame
 
 
-ES_FLOAT_TYPES = {"double", "float", "half_float", "scaled_float"}
-ES_INTEGER_TYPES = {"long", "integer", "short", "byte"}
-ES_COMPATIBLE_TYPES = {
+ES_FLOAT_TYPES: Set[str] = {"double", "float", "half_float", "scaled_float"}
+ES_INTEGER_TYPES: Set[str] = {"long", "integer", "short", "byte"}
+ES_COMPATIBLE_TYPES: Dict[str, Set[str]] = {
     "double": ES_FLOAT_TYPES,
     "scaled_float": ES_FLOAT_TYPES,
     "float": ES_FLOAT_TYPES,
@@ -80,7 +81,7 @@ class Field(NamedTuple):
     def np_dtype(self):
         return np.dtype(self.pd_dtype)
 
-    def is_es_agg_compatible(self, es_agg):
+    def is_es_agg_compatible(self, es_agg) -> bool:
         # Cardinality works for all types
         # Numerics and bools work for all aggs
         if es_agg == "cardinality" or self.is_numeric or self.is_bool:
@@ -115,7 +116,7 @@ class FieldMappings:
                                       or es_field_name.keyword (if exists) or None
     """
 
-    ES_DTYPE_TO_PD_DTYPE = {
+    ES_DTYPE_TO_PD_DTYPE: Dict[str, str] = {
         "text": "object",
         "keyword": "object",
         "long": "int64",
@@ -133,7 +134,7 @@ class FieldMappings:
     }
 
     # the labels for each column (display_name is index)
-    column_labels = [
+    column_labels: List[str] = [
         "es_field_name",
         "is_source",
         "es_dtype",
@@ -145,7 +146,12 @@ class FieldMappings:
         "aggregatable_es_field_name",
     ]
 
-    def __init__(self, client=None, index_pattern=None, display_names=None):
+    def __init__(
+        self,
+        client: "Elasticsearch",
+        index_pattern: str,
+        display_names: Optional[List[str]] = None,
+    ):
         """
         Parameters
         ----------
@@ -184,7 +190,9 @@ class FieldMappings:
             self.display_names = display_names
 
     @staticmethod
-    def _extract_fields_from_mapping(mappings, source_only=False, date_format=None):
+    def _extract_fields_from_mapping(
+        mappings: Dict[str, Any], source_only: bool = False
+    ) -> Dict[str, str]:
         """
         Extract all field names and types from a mapping.
         ```
@@ -256,10 +264,10 @@ class FieldMappings:
 
         # Recurse until we get a 'type: xxx'
         def flatten(x, name=""):
-            if type(x) is dict:
+            if isinstance(x, dict):
                 for a in x:
-                    if (
-                        a == "type" and type(x[a]) is str
+                    if a == "type" and isinstance(
+                        x[a], str
                     ):  # 'type' can be a name of a field
                         field_name = name[:-1]
                         field_type = x[a]
diff --git a/eland/ndframe.py b/eland/ndframe.py
index b3c2a96..16a01df 100644
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@@ -17,10 +17,14 @@
 
 import sys
 from abc import ABC, abstractmethod
-from typing import Tuple
-
+from typing import TYPE_CHECKING, Tuple
+import pandas as pd
 from eland.query_compiler import QueryCompiler
 
+
+if TYPE_CHECKING:
+    from eland.index import Index
+
 """
 NDFrame
 ---------
@@ -73,7 +77,8 @@ class NDFrame(ABC):
             )
         self._query_compiler = _query_compiler
 
-    def _get_index(self):
+    @property
+    def index(self) -> "Index":
         """
         Return eland index referencing Elasticsearch field to index a DataFrame/Series
 
@@ -100,10 +105,8 @@ class NDFrame(ABC):
         """
         return self._query_compiler.index
 
-    index = property(_get_index)
-
     @property
-    def dtypes(self):
+    def dtypes(self) -> pd.Series:
         """
         Return the pandas dtypes in the DataFrame. Elasticsearch types are mapped
         to pandas dtypes via Mappings._es_dtype_to_pd_dtype.__doc__
@@ -129,7 +132,7 @@ class NDFrame(ABC):
         """
         return self._query_compiler.dtypes
 
-    def _build_repr(self, num_rows):
+    def _build_repr(self, num_rows) -> pd.DataFrame:
         # self could be Series or DataFrame
         if len(self.index) <= num_rows:
             return self.to_pandas()
@@ -144,11 +147,11 @@ class NDFrame(ABC):
 
         return head.append(tail)
 
-    def __sizeof__(self):
+    def __sizeof__(self) -> int:
         # Don't default to pandas, just return approximation TODO - make this more accurate
         return sys.getsizeof(self._query_compiler)
 
-    def __len__(self):
+    def __len__(self) -> int:
         """Gets the length of the DataFrame.
 
         Returns:
@@ -159,7 +162,7 @@ class NDFrame(ABC):
     def _es_info(self, buf):
         self._query_compiler.es_info(buf)
 
-    def mean(self, numeric_only=True):
+    def mean(self, numeric_only: bool = True) -> pd.Series:
         """
         Return mean value for each numeric column
 
@@ -191,7 +194,7 @@ class NDFrame(ABC):
         """
         return self._query_compiler.mean(numeric_only=numeric_only)
 
-    def sum(self, numeric_only=True):
+    def sum(self, numeric_only: bool = True) -> pd.Series:
         """
         Return sum for each numeric column
 
@@ -223,7 +226,7 @@ class NDFrame(ABC):
         """
         return self._query_compiler.sum(numeric_only=numeric_only)
 
-    def min(self, numeric_only=True):
+    def min(self, numeric_only: bool = True) -> pd.Series:
         """
         Return the minimum value for each numeric column
 
@@ -255,7 +258,7 @@ class NDFrame(ABC):
         """
         return self._query_compiler.min(numeric_only=numeric_only)
 
-    def var(self, numeric_only=True):
+    def var(self, numeric_only: bool = True) -> pd.Series:
         """
         Return variance for each numeric column
 
@@ -285,7 +288,7 @@ class NDFrame(ABC):
         """
         return self._query_compiler.var(numeric_only=numeric_only)
 
-    def std(self, numeric_only=True):
+    def std(self, numeric_only: bool = True) -> pd.Series:
         """
         Return standard deviation for each numeric column
 
@@ -315,7 +318,7 @@ class NDFrame(ABC):
         """
         return self._query_compiler.std(numeric_only=numeric_only)
 
-    def median(self, numeric_only=True):
+    def median(self, numeric_only: bool = True) -> pd.Series:
         """
         Return the median value for each numeric column
 
@@ -345,7 +348,7 @@ class NDFrame(ABC):
         """
         return self._query_compiler.median(numeric_only=numeric_only)
 
-    def max(self, numeric_only=True):
+    def max(self, numeric_only: bool = True) -> pd.Series:
         """
         Return the maximum value for each numeric column
 
@@ -377,7 +380,7 @@ class NDFrame(ABC):
         """
         return self._query_compiler.max(numeric_only=numeric_only)
 
-    def nunique(self):
+    def nunique(self) -> pd.Series:
         """
         Return cardinality of each field.
 
@@ -423,7 +426,7 @@ class NDFrame(ABC):
         """
         return self._query_compiler.nunique()
 
-    def mad(self, numeric_only=True):
+    def mad(self, numeric_only: bool = True) -> pd.Series:
         """
         Return standard deviation for each numeric column
 
@@ -456,7 +459,7 @@ class NDFrame(ABC):
     def _hist(self, num_bins):
         return self._query_compiler._hist(num_bins)
 
-    def describe(self):
+    def describe(self) -> pd.DataFrame:
         """
         Generate descriptive statistics that summarize the central tendency, dispersion and shape of a
         dataset’s distribution, excluding NaN values.
diff --git a/eland/series.py b/eland/series.py
index b52345c..011bd0c 100644
--- a/eland/series.py
+++ b/eland/series.py
@@ -33,8 +33,9 @@ Based on NDFrame which underpins eland.DataFrame
 
 import sys
 import warnings
+from collections.abc import Collection
 from io import StringIO
-from typing import Optional, Union, Sequence
+from typing import Optional, Union, Sequence, Any, Tuple, TYPE_CHECKING
 
 import numpy as np
 import pandas as pd
@@ -45,6 +46,7 @@ from eland import NDFrame
 from eland.arithmetics import ArithmeticSeries, ArithmeticString, ArithmeticNumber
 from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
 from eland.filter import (
+    BooleanFilter,
     NotFilter,
     Equal,
     Greater,
@@ -56,10 +58,14 @@ from eland.filter import (
     IsNull,
     NotNull,
 )
-from eland.utils import deprecated_api
+from eland.utils import deprecated_api, to_list
+
+if TYPE_CHECKING:  # type: ignore
+    from elasticsearch import Elasticsearch  # noqa: F401
+    from eland.query_compiler import QueryCompiler  # noqa: F401
 
 
-def _get_method_name():
+def _get_method_name() -> str:
     return sys._getframe(1).f_code.co_name
 
 
@@ -106,12 +112,12 @@ class Series(NDFrame):
 
     def __init__(
         self,
-        es_client=None,
-        es_index_pattern=None,
-        name=None,
-        es_index_field=None,
-        _query_compiler=None,
-    ):
+        es_client: Optional["Elasticsearch"] = None,
+        es_index_pattern: Optional[str] = None,
+        name: Optional[str] = None,
+        es_index_field: Optional[str] = None,
+        _query_compiler: Optional["QueryCompiler"] = None,
+    ) -> None:
         # Series has 1 column
         if name is None:
             columns = None
@@ -129,7 +135,7 @@ class Series(NDFrame):
     hist = eland.plotting.ed_hist_series
 
     @property
-    def empty(self):
+    def empty(self) -> bool:
         """Determines if the Series is empty.
 
         Returns:
@@ -139,7 +145,7 @@ class Series(NDFrame):
         return len(self.index) == 0
 
     @property
-    def shape(self):
+    def shape(self) -> Tuple[int, int]:
         """
         Return a tuple representing the dimensionality of the Series.
 
@@ -167,7 +173,7 @@ class Series(NDFrame):
         return num_rows, num_columns
 
     @property
-    def es_field_name(self):
+    def es_field_name(self) -> str:
         """
         Returns
         -------
@@ -176,15 +182,15 @@ class Series(NDFrame):
         """
         return self._query_compiler.get_field_names(include_scripted_fields=True)[0]
 
-    def _get_name(self):
+    @property
+    def name(self) -> str:
         return self._query_compiler.columns[0]
 
-    def _set_name(self, name):
+    @name.setter
+    def name(self, name: str) -> None:
         self._query_compiler.rename({self.name: name}, inplace=True)
 
-    name = property(_get_name, _set_name)
-
-    def rename(self, new_name):
+    def rename(self, new_name: str) -> "Series":
         """
         Rename name of series. Only column rename is supported. This does not change the underlying
         Elasticsearch index, but adds a symbolic link from the new name (column) to the Elasticsearch field name.
@@ -238,18 +244,23 @@ class Series(NDFrame):
             _query_compiler=self._query_compiler.rename({self.name: new_name})
         )
 
-    def head(self, n=5):
+    def head(self, n: int = 5) -> "Series":
         return Series(_query_compiler=self._query_compiler.head(n))
 
-    def tail(self, n=5):
+    def tail(self, n: int = 5) -> "Series":
         return Series(_query_compiler=self._query_compiler.tail(n))
 
-    def sample(self, n: int = None, frac: float = None, random_state: int = None):
+    def sample(
+        self,
+        n: Optional[int] = None,
+        frac: Optional[float] = None,
+        random_state: Optional[int] = None,
+    ) -> "Series":
         return Series(
             _query_compiler=self._query_compiler.sample(n, frac, random_state)
         )
 
-    def value_counts(self, es_size=10):
+    def value_counts(self, es_size: int = 10) -> pd.Series:
         """
         Return the value counts for the specified field.
 
@@ -287,9 +298,8 @@ class Series(NDFrame):
         """
         if not isinstance(es_size, int):
             raise TypeError("es_size must be a positive integer.")
-        if not es_size > 0:
+        elif es_size <= 0:
             raise ValueError("es_size must be a positive integer.")
-
         return self._query_compiler.value_counts(es_size)
 
     # dtype not implemented for Series as causes query to fail
@@ -297,7 +307,7 @@ class Series(NDFrame):
 
     # ----------------------------------------------------------------------
     # Rendering Methods
-    def __repr__(self):
+    def __repr__(self) -> str:
         """
         Return a string representation for a particular Series.
         """
@@ -339,7 +349,7 @@ class Series(NDFrame):
         name=False,
         max_rows=None,
         min_rows=None,
-    ):
+    ) -> Optional[str]:
         """
         Render a string representation of the Series.
 
@@ -411,15 +421,15 @@ class Series(NDFrame):
             result = _buf.getvalue()
             return result
 
-    def to_pandas(self, show_progress=False):
+    def to_pandas(self, show_progress: bool = False) -> pd.Series:
         return self._query_compiler.to_pandas(show_progress=show_progress)[self.name]
 
     @property
-    def _dtype(self):
+    def _dtype(self) -> np.dtype:
         # DO NOT MAKE PUBLIC (i.e. def dtype) as this breaks query eval implementation
         return self._query_compiler.dtypes[0]
 
-    def __gt__(self, other):
+    def __gt__(self, other: Union[int, float, "Series"]) -> BooleanFilter:
         if isinstance(other, Series):
             # Need to use scripted query to compare to values
             painless = f"doc['{self.name}'].value > doc['{other.name}'].value"
@@ -429,7 +439,7 @@ class Series(NDFrame):
         else:
             raise NotImplementedError(other, type(other))
 
-    def __lt__(self, other):
+    def __lt__(self, other: Union[int, float, "Series"]) -> BooleanFilter:
         if isinstance(other, Series):
             # Need to use scripted query to compare to values
             painless = f"doc['{self.name}'].value < doc['{other.name}'].value"
@@ -439,7 +449,7 @@ class Series(NDFrame):
         else:
             raise NotImplementedError(other, type(other))
 
-    def __ge__(self, other):
+    def __ge__(self, other: Union[int, float, "Series"]) -> BooleanFilter:
         if isinstance(other, Series):
             # Need to use scripted query to compare to values
             painless = f"doc['{self.name}'].value >= doc['{other.name}'].value"
@@ -449,7 +459,7 @@ class Series(NDFrame):
         else:
             raise NotImplementedError(other, type(other))
 
-    def __le__(self, other):
+    def __le__(self, other: Union[int, float, "Series"]) -> BooleanFilter:
         if isinstance(other, Series):
             # Need to use scripted query to compare to values
             painless = f"doc['{self.name}'].value <= doc['{other.name}'].value"
@@ -459,7 +469,7 @@ class Series(NDFrame):
         else:
             raise NotImplementedError(other, type(other))
 
-    def __eq__(self, other):
+    def __eq__(self, other: Union[int, float, str, "Series"]) -> BooleanFilter:
         if isinstance(other, Series):
             # Need to use scripted query to compare to values
             painless = f"doc['{self.name}'].value == doc['{other.name}'].value"
@@ -471,7 +481,7 @@ class Series(NDFrame):
         else:
             raise NotImplementedError(other, type(other))
 
-    def __ne__(self, other):
+    def __ne__(self, other: Union[int, float, str, "Series"]) -> BooleanFilter:
         if isinstance(other, Series):
             # Need to use scripted query to compare to values
             painless = f"doc['{self.name}'].value != doc['{other.name}'].value"
@@ -483,13 +493,13 @@ class Series(NDFrame):
         else:
             raise NotImplementedError(other, type(other))
 
-    def isin(self, other):
-        if isinstance(other, list):
-            return IsIn(field=self.name, value=other)
+    def isin(self, other: Union[Collection, pd.Series]) -> BooleanFilter:
+        if isinstance(other, (Collection, pd.Series)):
+            return IsIn(field=self.name, value=to_list(other))
         else:
             raise NotImplementedError(other, type(other))
 
-    def isna(self):
+    def isna(self) -> BooleanFilter:
         """
         Detect missing values.
 
@@ -506,7 +516,7 @@ class Series(NDFrame):
 
     isnull = isna
 
-    def notna(self):
+    def notna(self) -> BooleanFilter:
         """
         Detect existing (non-missing) values.
 
@@ -525,7 +535,7 @@ class Series(NDFrame):
     notnull = notna
 
     @property
-    def ndim(self):
+    def ndim(self) -> int:
         """
         Returns 1 by definition of a Series
 
@@ -596,7 +606,7 @@ class Series(NDFrame):
         )
         return Series(_query_compiler=new_query_compiler)
 
-    def es_info(self):
+    def es_info(self) -> str:
         buf = StringIO()
 
         super()._es_info(buf)
@@ -604,7 +614,7 @@ class Series(NDFrame):
         return buf.getvalue()
 
     @deprecated_api("eland.Series.es_info()")
-    def info_es(self):
+    def info_es(self) -> str:
         return self.es_info()
 
     def __add__(self, right):
@@ -1149,7 +1159,12 @@ class Series(NDFrame):
     rsubtract = __rsub__
     rtruediv = __rtruediv__
 
-    def _numeric_op(self, right, method_name):
+    # __div__ is technically Python 2.x only
+    # but pandas has it so we do too.
+    __div__ = __truediv__
+    __rdiv__ = __rtruediv__
+
+    def _numeric_op(self, right: Any, method_name: str) -> "Series":
         """
         return a op b
 
diff --git a/eland/tests/dataframe/test_query_pytest.py b/eland/tests/dataframe/test_query_pytest.py
index 20a35e6..97a9eeb 100644
--- a/eland/tests/dataframe/test_query_pytest.py
+++ b/eland/tests/dataframe/test_query_pytest.py
@@ -86,10 +86,11 @@ class TestDataFrameQuery(TestData):
         ed_flights = self.ed_flights()
         pd_flights = self.pd_flights()
 
-        assert (
-            pd_flights[pd_flights.OriginAirportID.isin(["LHR", "SYD"])].shape
-            == ed_flights[ed_flights.OriginAirportID.isin(["LHR", "SYD"])].shape
-        )
+        for obj in (["LHR", "SYD"], ("LHR", "SYD"), pd.Series(data=["LHR", "SYD"])):
+            assert (
+                pd_flights[pd_flights.OriginAirportID.isin(obj)].shape
+                == ed_flights[ed_flights.OriginAirportID.isin(obj)].shape
+            )
 
     def test_multiitem_query(self):
         # Examples from:
diff --git a/eland/utils.py b/eland/utils.py
index 8e302e1..4530fcb 100644
--- a/eland/utils.py
+++ b/eland/utils.py
@@ -18,16 +18,21 @@
 import re
 import functools
 import warnings
-from typing import Callable, TypeVar
+from typing import Callable, TypeVar, Any, Union, List, cast, Collection
+from collections.abc import Collection as ABCCollection
+import pandas as pd  # type: ignore
 
 
-F = TypeVar("F")
+RT = TypeVar("RT")
+Item = TypeVar("Item")
 
 
-def deprecated_api(replace_with: str) -> Callable[[F], F]:
-    def wrapper(f: F) -> F:
+def deprecated_api(
+    replace_with: str,
+) -> Callable[[Callable[..., RT]], Callable[..., RT]]:
+    def wrapper(f: Callable[..., RT]) -> Callable[..., RT]:
         @functools.wraps(f)
-        def wrapped(*args, **kwargs):
+        def wrapped(*args: Any, **kwargs: Any) -> RT:
             warnings.warn(
                 f"{f.__name__} is deprecated, use {replace_with} instead",
                 DeprecationWarning,
@@ -39,10 +44,18 @@ def deprecated_api(replace_with: str) -> Callable[[F], F]:
     return wrapper
 
 
-def is_valid_attr_name(s):
+def is_valid_attr_name(s: str) -> bool:
     """
     Ensure the given string can be used as attribute on an object instance.
     """
-    return isinstance(s, str) and re.search(
-        string=s, pattern=r"^[a-zA-Z_][a-zA-Z0-9_]*$"
+    return bool(
+        isinstance(s, str) and re.search(string=s, pattern=r"^[a-zA-Z_][a-zA-Z0-9_]*$")
     )
+
+
+def to_list(x: Union[Collection[Any], pd.Series]) -> List[Any]:
+    if isinstance(x, ABCCollection):
+        return list(x)
+    elif isinstance(x, pd.Series):
+        return cast(List[Any], x.to_list())
+    raise NotImplementedError(f"Could not convert {type(x).__name__} into a list")
diff --git a/noxfile.py b/noxfile.py
index db6c28e..a8c4731 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -43,6 +43,7 @@ TYPED_FILES = {
     "eland/index.py",
     "eland/query.py",
     "eland/tasks.py",
+    "eland/utils.py",
     "eland/ml/__init__.py",
     "eland/ml/_model_serializer.py",
     "eland/ml/imported_ml_model.py",
@@ -75,6 +76,7 @@ def lint(session):
             session.error(f"The file {typed_file!r} couldn't be found")
         popen = subprocess.Popen(
             f"mypy --strict {typed_file}",
+            env=session.env,
             shell=True,
             stdout=subprocess.PIPE,
             stderr=subprocess.STDOUT,