[Backport 8.x] Fixes for Pandas 2 support (#758)

* Support Pandas 2 (#742) * Fix test setup to match pandas 2.0 demands * Use the now deprecated _append method (Better solution might exist) * Deal with numeric_only being removed in metrics test * Skip mad metric for other pandas versions * Account for differences between pandas versions in describe methods * Run black * Check Pandas version first * Mirror behaviour of installed Pandas version when running value_counts * Allow passing arguments to the individual asserters * Fix for method _construct_axes_from_arguments no longer existing * Skip mad metric if it does not exist * Account for pandas 2.0 timestamp default behaviour * Deal with empty vs other inferred data types * Account for default datetime precision change * Run Black * Solution for differences in inferred_type only * Fix csv and json issues * Skip two doctests * Passing a set as indexer is no longer allowed * Don't validate output where it differs between Pandas versions in the environment * Update test matrix and packaging metadata * Update version of Python in the docs * Update Python version in demo notebook * Match noxfile * Symmetry * Fix trailing comma in JSON * Revert some changes in setup.py to fix building the documentation * Revert "Revert some changes in setup.py to fix building the documentation" This reverts commit ea9879753129d8d8390b3cbbce57155a8b4fb346. * Use PANDAS_VERSION from eland.common * Still skip the doctest, but make the output pandas 2 instead of 1 * Still skip doctest, but switch to pandas 2 output * Prepare for pandas 3 * Reference the right column * Ignore output in tests but switch to pandas 2 output * Add line comment about NBVAL_IGNORE_OUTPUT * Restore missing line and add stderr cell * Use non-private method instead * Fix indentation and parameter issues * If index is not specified, and pandas 1 is present, set it to True From pandas 2 and upwards, index is set to None by default * Run black * Newer version of black might have different opinions? * Add line comment * Remove unused import * Add reason for ignore statement * Add reason for skip --------- Co-authored-by: Quentin Pradet <quentin.pradet@elastic.co> (cherry picked from commit 75c57b077532c459a9490613cbf7b37215c27fae) * Return input_field_names as list as required by Pandas 2 --------- Co-authored-by: Bart Broere <mail@bartbroere.eu> Co-authored-by: Quentin Pradet <quentin.pradet@elastic.co>
2025-07-11 00:02:14 +08:00 · 2025-02-13 14:16:49 +04:00 · 2025-02-13 14:16:49 +04:00 · af20ef9063
commit af20ef9063
parent d50436b01c
19 changed files with 161 additions and 70 deletions
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@ -29,11 +29,16 @@ steps:
      machineType: "n2-standard-4"
    env:
      PYTHON_VERSION: "{{ matrix.python }}"
-      PANDAS_VERSION: '1.5.0'
+      PANDAS_VERSION: "{{ matrix.pandas }}"
      TEST_SUITE: "xpack"
      ELASTICSEARCH_VERSION: "{{ matrix.stack }}"
    matrix:
      setup:
+        # Python and pandas versions need to be added to the nox configuration too
+        # (in the decorators of the test method in noxfile.py)
+        pandas:
+          - '1.5.0'
+          - '2.2.3'
        python:
          - '3.12'
          - '3.11'
--- a/docs/sphinx/examples/demo_notebook.ipynb
+++ b/docs/sphinx/examples/demo_notebook.ipynb
@ -24,7 +24,7 @@
        "\n",
        "For this example, you will need:\n",
        "\n",
-        "- Python 3.8 or later\n",
+        "- Python 3.9 or later\n",
        "- An Elastic deployment\n",
        "  - We'll be using [Elastic Cloud](https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html) for this example (available with a [free trial](https://cloud.elastic.co/registration))\n",
        "\n",
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@ -34,7 +34,7 @@ from pandas.io.formats.printing import pprint_thing  # type: ignore
 from pandas.util._validators import validate_bool_kwarg  # type: ignore

 import eland.plotting as gfx
-from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
+from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, PANDAS_VERSION, docstring_parameter
 from eland.filter import BooleanFilter
 from eland.groupby import DataFrameGroupBy
 from eland.ndframe import NDFrame
@ -411,9 +411,7 @@ class DataFrame(NDFrame):
            axis = pd.DataFrame._get_axis_name(axis)
            axes = {axis: labels}
        elif index is not None or columns is not None:
-            axes, _ = pd.DataFrame()._construct_axes_from_arguments(
-                (index, columns), {}
-            )
+            axes = {"columns": columns, "index": index}
        else:
            raise ValueError(
                "Need to specify at least one of 'labels', 'index' or 'columns'"
@ -1361,7 +1359,7 @@ class DataFrame(NDFrame):
        default_handler=None,
        lines=False,
        compression="infer",
-        index=True,
+        index=None,
        indent=None,
        storage_options=None,
    ):
@ -1376,6 +1374,8 @@ class DataFrame(NDFrame):
        --------
        :pandas_api_docs:`pandas.DataFrame.to_json`
        """
+        if index is None and PANDAS_VERSION[0] == 1:
+            index = True  # switch to the pandas 1 default
        kwargs = {
            "path_or_buf": path_or_buf,
            "orient": orient,
--- a/eland/etl.py
+++ b/eland/etl.py
@ -16,6 +16,7 @@
 #  under the License.

 import csv
+import warnings
 from collections import deque
 from typing import Any, Dict, Generator, List, Mapping, Optional, Tuple, Union

@ -110,15 +111,15 @@ def pandas_to_eland(
    2  3.141  1  ...  3  Long text - to be indexed as es type text
    <BLANKLINE>
    [3 rows x 8 columns]
-    >>> pd_df.dtypes
-    A           float64
-    B             int64
-    C            object
-    D    datetime64[ns]
-    E           float64
-    F              bool
-    G             int64
-    H            object
+    >>> pd_df.dtypes  # doctest skip required for pandas < 2  # doctest: +SKIP
+    A          float64
+    B            int64
+    C           object
+    D    datetime64[s]
+    E          float64
+    F             bool
+    G            int64
+    H           object
    dtype: object

    Convert `pandas.DataFrame` to `eland.DataFrame` - this creates an Elasticsearch index called `pandas_to_eland`.
@ -307,9 +308,9 @@ def csv_to_eland(  # type: ignore
    names=None,
    index_col=None,
    usecols=None,
-    squeeze=False,
+    squeeze=None,
    prefix=None,
-    mangle_dupe_cols=True,
+    mangle_dupe_cols=None,
    # General Parsing Configuration
    dtype=None,
    engine=None,
@ -357,6 +358,7 @@ def csv_to_eland(  # type: ignore
    low_memory: bool = _DEFAULT_LOW_MEMORY,
    memory_map=False,
    float_precision=None,
+    **extra_kwargs,
 ) -> "DataFrame":
    """
    Read a comma-separated values (csv) file into eland.DataFrame (i.e. an Elasticsearch index).
@ -485,7 +487,6 @@ def csv_to_eland(  # type: ignore
        "usecols": usecols,
        "verbose": verbose,
        "encoding": encoding,
-        "squeeze": squeeze,
        "memory_map": memory_map,
        "float_precision": float_precision,
        "na_filter": na_filter,
@ -494,9 +495,9 @@ def csv_to_eland(  # type: ignore
        "error_bad_lines": error_bad_lines,
        "on_bad_lines": on_bad_lines,
        "low_memory": low_memory,
-        "mangle_dupe_cols": mangle_dupe_cols,
        "infer_datetime_format": infer_datetime_format,
        "skip_blank_lines": skip_blank_lines,
+        **extra_kwargs,
    }

    if chunksize is None:
@ -525,6 +526,18 @@ def csv_to_eland(  # type: ignore

        kwargs.pop("on_bad_lines")

+    if "squeeze" in kwargs:
+        kwargs.pop("squeeze")
+        warnings.warn(
+            "This argument no longer works, use .squeeze('columns') on your DataFrame instead"
+        )
+
+    if "mangle_dupe_cols" in kwargs:
+        kwargs.pop("mangle_dupe_cols")
+        warnings.warn(
+            "The mangle_dupe_cols argument no longer works. Furthermore, "
+            "duplicate columns will automatically get a number suffix."
+        )
    # read csv in chunks to pandas DataFrame and dump to eland DataFrame (and Elasticsearch)
    reader = pd.read_csv(filepath_or_buffer, **kwargs)

--- a/eland/field_mappings.py
+++ b/eland/field_mappings.py
@ -712,8 +712,11 @@ class FieldMappings:
            capabilities, orient="index", columns=FieldMappings.column_labels
        )

-        self._mappings_capabilities = self._mappings_capabilities.append(
-            capability_matrix_row
+        self._mappings_capabilities = pd.concat(
+            [
+                self._mappings_capabilities,
+                capability_matrix_row,
+            ]
        )

    def numeric_source_fields(self) -> List[str]:
--- a/eland/ml/exporters/es_gb_models.py
+++ b/eland/ml/exporters/es_gb_models.py
@ -187,7 +187,7 @@ class ESGradientBoostingModel(ABC):
            if field_name in feature_names and field_name not in input_field_names:
                input_field_names.add(field_name)

-        return feature_names, input_field_names
+        return feature_names, list(input_field_names)

    @property
    def preprocessors(self) -> List[Any]:
--- a/eland/series.py
+++ b/eland/series.py
@ -40,11 +40,12 @@ from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Tuple, Union

 import numpy as np
 import pandas as pd  # type: ignore
+from pandas.core.indexes.frozen import FrozenList
 from pandas.io.common import _expand_user, stringify_path  # type: ignore

 import eland.plotting
 from eland.arithmetics import ArithmeticNumber, ArithmeticSeries, ArithmeticString
-from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
+from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, PANDAS_VERSION, docstring_parameter
 from eland.filter import (
    BooleanFilter,
    Equal,
@ -292,18 +293,26 @@ class Series(NDFrame):
        Examples
        --------
        >>> df = ed.DataFrame('http://localhost:9200', 'flights')
-        >>> df['Carrier'].value_counts()
+        >>> df['Carrier'].value_counts()  # doctest: +SKIP
+        Carrier
        Logstash Airways    3331
        JetBeats            3274
        Kibana Airlines     3234
        ES-Air              3220
-        Name: Carrier, dtype: int64
+        Name: count, dtype: int64
        """
        if not isinstance(es_size, int):
            raise TypeError("es_size must be a positive integer.")
        elif es_size <= 0:
            raise ValueError("es_size must be a positive integer.")
-        return self._query_compiler.value_counts(es_size)
+        value_counts = self._query_compiler.value_counts(es_size)
+        # https://pandas.pydata.org/docs/whatsnew/v2.0.0.html#value-counts-sets-the-resulting-name-to-count
+        if PANDAS_VERSION[0] == 2:
+            value_counts.name = "count"
+            value_counts.index.names = FrozenList([self.es_field_name])
+            value_counts.index.name = self.es_field_name
+
+        return value_counts

    # dtype not implemented for Series as causes query to fail
    # in pandas.core.computation.ops.Term.type
--- a/noxfile.py
+++ b/noxfile.py
@ -96,7 +96,7 @@ def lint(session):


@nox.session(python=["3.9", "3.10", "3.11", "3.12"])
-@nox.parametrize("pandas_version", ["1.5.0"])
+@nox.parametrize("pandas_version", ["1.5.0", "2.2.3"])
 def test(session, pandas_version: str):
    session.install("-r", "requirements-dev.txt")
    session.install(".")
--- a/setup.py
+++ b/setup.py
@ -87,7 +87,7 @@ setup(
    packages=find_packages(include=["eland", "eland.*"]),
    install_requires=[
        "elasticsearch>=8.3,<9",
-        "pandas>=1.5,<2",
+        "pandas>=1.5,<3",
        "matplotlib>=3.6",
        "numpy>=1.2.0,<2",
        "packaging",
--- a/tests/common.py
+++ b/tests/common.py
@ -24,6 +24,7 @@ import pandas as pd
 from pandas.testing import assert_frame_equal, assert_series_equal

 import eland as ed
+from eland.common import PANDAS_VERSION

 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

@ -45,7 +46,10 @@ with gzip.open(FLIGHTS_FILE_NAME) as f:
 _pd_flights = pd.DataFrame.from_records(flight_records).reindex(
    _ed_flights.columns, axis=1
 )
-_pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"])
+if PANDAS_VERSION[0] >= 2:
+    _pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"], format="mixed")
+else:
+    _pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"])
 # Mimic what copy_to in an Elasticsearch mapping would do, combining the two fields in a list
 _pd_flights["Cities"] = _pd_flights.apply(
    lambda x: list(sorted([x["OriginCityName"], x["DestCityName"]])), axis=1
@ -62,7 +66,7 @@ _pd_ecommerce["products.created_on"] = _pd_ecommerce["products.created_on"].appl
 )
 _pd_ecommerce.insert(2, "customer_birth_date", None)
 _pd_ecommerce.index = _pd_ecommerce.index.map(str)  # make index 'object' not int
-_pd_ecommerce["customer_birth_date"].astype("datetime64")
+_pd_ecommerce["customer_birth_date"].astype("datetime64[ns]")
 _ed_ecommerce = ed.DataFrame(ES_TEST_CLIENT, ECOMMERCE_INDEX_NAME)


--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -77,7 +77,16 @@ class SymmetricAPIChecker:
                pd_exc = e

            self.check_exception(ed_exc, pd_exc)
-            self.check_values(ed_obj, pd_obj)
+            try:
+                self.check_values(ed_obj, pd_obj)
+            except AssertionError as e:
+                # This is an attribute we allow to differ when comparing zero-length objects
+                if (
+                    'Attribute "inferred_type" are different' in repr(e)
+                    and len(ed_obj) == 0
+                    and len(pd_obj) == 0
+                ):
+                    self.check_values(ed_obj, pd_obj, check_index_type=False)

            if isinstance(ed_obj, (ed.DataFrame, ed.Series)):
                return SymmetricAPIChecker(ed_obj, pd_obj)
@ -85,16 +94,16 @@ class SymmetricAPIChecker:

        return f

-    def check_values(self, ed_obj, pd_obj):
+    def check_values(self, ed_obj, pd_obj, **kwargs):
        """Checks that any two values coming from eland and pandas are equal"""
        if isinstance(ed_obj, ed.DataFrame):
-            assert_pandas_eland_frame_equal(pd_obj, ed_obj)
+            assert_pandas_eland_frame_equal(pd_obj, ed_obj, **kwargs)
        elif isinstance(ed_obj, ed.Series):
-            assert_pandas_eland_series_equal(pd_obj, ed_obj)
+            assert_pandas_eland_series_equal(pd_obj, ed_obj, **kwargs)
        elif isinstance(ed_obj, pd.DataFrame):
-            assert_frame_equal(ed_obj, pd_obj)
+            assert_frame_equal(ed_obj, pd_obj, **kwargs)
        elif isinstance(ed_obj, pd.Series):
-            assert_series_equal(ed_obj, pd_obj)
+            assert_series_equal(ed_obj, pd_obj, **kwargs)
        elif isinstance(ed_obj, pd.Index):
            assert ed_obj.equals(pd_obj)
        else:
--- a/tests/dataframe/test_datetime_pytest.py
+++ b/tests/dataframe/test_datetime_pytest.py
@ -87,6 +87,8 @@ class TestDataFrameDateTime(TestData):
            },
            index=["0", "1", "2"],
        )
+        # https://pandas.pydata.org/docs/whatsnew/v2.0.0.html#construction-with-datetime64-or-timedelta64-dtype-with-unsupported-resolution
+        df["D"] = df["D"].astype("datetime64[ns]")

        expected_mappings = {
            "mappings": {
--- a/tests/dataframe/test_describe_pytest.py
+++ b/tests/dataframe/test_describe_pytest.py
@ -33,9 +33,17 @@ class TestDataFrameDescribe(TestData):
            ["Cancelled", "FlightDelay"], axis="columns"
        )

+        # Pandas >= 2 calculates aggregations such as min and max for timestamps too
+        # This could be implemented in eland, but as of yet this is not the case
+        # We therefore remove it before the comparison
+        if "timestamp" in pd_describe.columns:
+            pd_describe = pd_describe.drop(["timestamp"], axis="columns")
+
+        # Pandas >= 2 orders the aggregations differently than Pandas < 2
+        # A sort_index is applied so tests will succeed in both environments
        assert_frame_equal(
-            pd_describe.drop(["25%", "50%", "75%"], axis="index"),
-            ed_describe.drop(["25%", "50%", "75%"], axis="index"),
+            pd_describe.drop(["25%", "50%", "75%"], axis="index").sort_index(),
+            ed_describe.drop(["25%", "50%", "75%"], axis="index").sort_index(),
            check_exact=False,
            rtol=True,
        )
--- a/tests/dataframe/test_head_tail_pytest.py
+++ b/tests/dataframe/test_head_tail_pytest.py
@ -99,7 +99,7 @@ class TestDataFrameHeadTail(TestData):

        ed_head_0 = ed_flights.head(0)
        pd_head_0 = pd_flights.head(0)
-        assert_pandas_eland_frame_equal(pd_head_0, ed_head_0)
+        assert_pandas_eland_frame_equal(pd_head_0, ed_head_0, check_index_type=False)

    def test_doc_test_tail(self):
        df = self.ed_flights()
--- a/tests/dataframe/test_metrics_pytest.py
+++ b/tests/dataframe/test_metrics_pytest.py
@ -22,6 +22,7 @@ import pandas as pd
 import pytest
 from pandas.testing import assert_frame_equal, assert_series_equal

+from eland.common import PANDAS_VERSION
 from tests.common import TestData, assert_almost_equal


@ -74,6 +75,8 @@ class TestDataFrameMetrics(TestData):
        logger.setLevel(logging.DEBUG)

        for func in self.extended_funcs:
+            if PANDAS_VERSION[0] >= 2 and func == "mad":
+                continue
            pd_metric = getattr(pd_flights, func)(
                **({"numeric_only": True} if func != "mad" else {})
            )
@ -92,6 +95,8 @@ class TestDataFrameMetrics(TestData):
        ed_flights_1 = ed_flights[ed_flights.FlightNum == "9HY9SWR"][["AvgTicketPrice"]]

        for func in self.extended_funcs:
+            if PANDAS_VERSION[0] >= 2 and func == "mad":
+                continue
            pd_metric = getattr(pd_flights_1, func)()
            ed_metric = getattr(ed_flights_1, func)(numeric_only=False)

@ -102,6 +107,8 @@ class TestDataFrameMetrics(TestData):
        ed_flights_0 = ed_flights[ed_flights.FlightNum == "XXX"][["AvgTicketPrice"]]

        for func in self.extended_funcs:
+            if PANDAS_VERSION[0] >= 2 and func == "mad":
+                continue
            pd_metric = getattr(pd_flights_0, func)()
            ed_metric = getattr(ed_flights_0, func)(numeric_only=False)

@ -491,8 +498,13 @@ class TestDataFrameMetrics(TestData):
            ["AvgTicketPrice", "FlightDelayMin", "dayOfWeek"]
        )

-        pd_quantile = pd_flights.agg(["quantile", "min"], numeric_only=numeric_only)
-        ed_quantile = ed_flights.agg(["quantile", "min"], numeric_only=numeric_only)
+        if PANDAS_VERSION[0] == 1:
+            pd_quantile = pd_flights.agg(["quantile", "min"], numeric_only=numeric_only)
+            ed_quantile = ed_flights.agg(["quantile", "min"], numeric_only=numeric_only)
+
+        else:  # numeric_only is no longer available for pandas > 2
+            pd_quantile = pd_flights.agg(["quantile", "min"])
+            ed_quantile = ed_flights.agg(["quantile", "min"])

        assert_frame_equal(
            pd_quantile, ed_quantile, check_exact=False, rtol=4, check_dtype=False
--- a/tests/dataframe/test_utils_pytest.py
+++ b/tests/dataframe/test_utils_pytest.py
@ -69,6 +69,12 @@ class TestDataFrameUtils(TestData):
        )
        ed_df_head = ed_df.head()

+        # https://pandas.pydata.org/docs/whatsnew/v2.0.0.html#construction-with-datetime64-or-timedelta64-dtype-with-unsupported-resolution
+        df["D"] = df["D"].astype("datetime64[ns]")
+        df["H"] = (
+            df["H"].dt.tz_localize(None).astype("datetime64[ns]").dt.tz_localize("UTC")
+        )
+
        assert_pandas_eland_frame_equal(df, ed_df_head)

        ES_TEST_CLIENT.indices.delete(index=index_name)
--- a/tests/notebook/test_demo_notebook.ipynb
+++ b/tests/notebook/test_demo_notebook.ipynb
@ -1647,6 +1647,14 @@
   "execution_count": 32,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+       "/code/eland/.nox/test-3-12-pandas_version-2-2-3/lib/python3.12/site-packages/eland/series.py:464: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
+       "  return self._query_compiler.dtypes[0]\n"
+     ]
+    },
    {
     "data": {
      "text/html": [
@ -1792,6 +1800,9 @@
    }
   ],
   "source": [
+    "# NBVAL_IGNORE_OUTPUT\n",
+    "# The ignore statement above is because of output difference between Pandas 1 and 2\n",
+    "# and can be removed once Pandas 1 support is dropped\n",
    "ed_flights.query('Carrier == \"Kibana Airlines\" & AvgTicketPrice > 900.0 & Cancelled == True')"
   ]
  },
@ -2377,8 +2388,8 @@
       "      <th>AvgTicketPrice</th>\n",
       "      <th>DistanceKilometers</th>\n",
       "      <th>...</th>\n",
-       "      <th>FlightTimeMin</th>\n",
       "      <th>dayOfWeek</th>\n",
+       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
@ -2388,23 +2399,15 @@
       "      <td>13059.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>13059.000000</td>\n",
-       "      <td>13059.000000</td>\n",
+       "      <td>13059</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>628.253689</td>\n",
       "      <td>7092.142455</td>\n",
       "      <td>...</td>\n",
-       "      <td>511.127842</td>\n",
       "      <td>2.835975</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>std</th>\n",
-       "      <td>266.396861</td>\n",
-       "      <td>4578.438497</td>\n",
-       "      <td>...</td>\n",
-       "      <td>334.753952</td>\n",
-       "      <td>1.939439</td>\n",
+       "      <td>2018-01-21 19:20:45.564438016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
@ -2412,57 +2415,65 @@
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
+       "      <td>2018-01-01 00:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>409.893816</td>\n",
       "      <td>2459.705673</td>\n",
       "      <td>...</td>\n",
-       "      <td>252.333192</td>\n",
       "      <td>1.000000</td>\n",
+       "      <td>2018-01-11 05:16:25.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>640.556668</td>\n",
       "      <td>7610.330866</td>\n",
       "      <td>...</td>\n",
-       "      <td>503.045170</td>\n",
       "      <td>3.000000</td>\n",
+       "      <td>2018-01-22 00:32:11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>842.185470</td>\n",
       "      <td>9736.637600</td>\n",
       "      <td>...</td>\n",
-       "      <td>720.416036</td>\n",
       "      <td>4.000000</td>\n",
+       "      <td>2018-02-01 04:51:18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1199.729053</td>\n",
       "      <td>19881.482315</td>\n",
       "      <td>...</td>\n",
-       "      <td>1902.902032</td>\n",
       "      <td>6.000000</td>\n",
+       "      <td>2018-02-11 23:50:12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>266.396861</td>\n",
+       "      <td>4578.438497</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1.939439</td>\n",
+       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
-       "<p>8 rows × 7 columns</p>\n",
+       "<p>8 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
-       "       AvgTicketPrice  DistanceKilometers  ...  FlightTimeMin     dayOfWeek\n",
-       "count    13059.000000        13059.000000  ...   13059.000000  13059.000000\n",
-       "mean       628.253689         7092.142455  ...     511.127842      2.835975\n",
-       "std        266.396861         4578.438497  ...     334.753952      1.939439\n",
-       "min        100.020528            0.000000  ...       0.000000      0.000000\n",
-       "25%        409.893816         2459.705673  ...     252.333192      1.000000\n",
-       "50%        640.556668         7610.330866  ...     503.045170      3.000000\n",
-       "75%        842.185470         9736.637600  ...     720.416036      4.000000\n",
-       "max       1199.729053        19881.482315  ...    1902.902032      6.000000\n",
+       "       AvgTicketPrice  DistanceKilometers  ...     dayOfWeek                      timestamp\n",
+       "count    13059.000000        13059.000000  ...  13059.000000                          13059\n",
+       "mean       628.253689         7092.142455  ...      2.835975  2018-01-21 19:20:45.564438016\n",
+       "min        100.020528            0.000000  ...      0.000000            2018-01-01 00:00:00\n",
+       "25%        409.893816         2459.705673  ...      1.000000     2018-01-11 05:16:25.500000\n",
+       "50%        640.556668         7610.330866  ...      3.000000            2018-01-22 00:32:11\n",
+       "75%        842.185470         9736.637600  ...      4.000000            2018-02-01 04:51:18\n",
+       "max       1199.729053        19881.482315  ...      6.000000            2018-02-11 23:50:12\n",
+       "std        266.396861         4578.438497  ...      1.939439                            NaN\n",
       "\n",
-       "[8 rows x 7 columns]"
+       "[8 rows x 8 columns]"
      ]
     },
     "execution_count": 39,
@ -2471,6 +2482,8 @@
    }
   ],
   "source": [
+    "# NBVAL_IGNORE_OUTPUT\n",
+    "# Once support for pandas <2 is dropped, this and the line above can be removed\n",
    "pd_flights.describe()"
   ]
  },
--- a/tests/series/test_filter_pytest.py
+++ b/tests/series/test_filter_pytest.py
@ -58,7 +58,9 @@ class TestSeriesFilter(TestData):
        ed_ser = ed_flights_small.filter(items=items, axis=0)
        pd_ser = pd_flights_small.filter(items=items, axis=0)

-        assert_pandas_eland_series_equal(pd_ser, ed_ser)
+        # For an empty Series, eland will say the datatype it knows from the Elastic index
+        # Pandas however will state empty as the datatype
+        assert_pandas_eland_series_equal(pd_ser, ed_ser, check_index_type=False)

    def test_flights_filter_index_like_and_regex(self):
        ed_flights_small = self.ed_flights_small()["FlightDelayType"]
--- a/tests/series/test_metrics_pytest.py
+++ b/tests/series/test_metrics_pytest.py
@ -24,6 +24,7 @@ import pandas as pd
 import pytest
 from pandas.testing import assert_series_equal

+from eland.common import PANDAS_VERSION
 from tests.common import TestData, assert_almost_equal


@ -42,6 +43,8 @@ class TestSeriesMetrics(TestData):
        ed_flights = self.ed_flights()["AvgTicketPrice"]

        for func in self.all_funcs:
+            if PANDAS_VERSION[0] >= 2 and func == "mad":
+                continue
            pd_metric = getattr(pd_flights, func)()
            ed_metric = getattr(ed_flights, func)()

@ -87,6 +90,8 @@ class TestSeriesMetrics(TestData):
            ed_ecommerce = self.ed_ecommerce()[column]

            for func in self.all_funcs:
+                if PANDAS_VERSION[0] >= 2 and func == "mad":
+                    continue
                pd_metric = getattr(pd_ecommerce, func)()
                ed_metric = getattr(ed_ecommerce, func)(
                    **({"numeric_only": True} if (func != "nunique") else {})