diff --git a/.ci/run-elasticsearch.sh b/.ci/run-elasticsearch.sh
index 64ba248..9bcdf88 100755
--- a/.ci/run-elasticsearch.sh
+++ b/.ci/run-elasticsearch.sh
@@ -130,6 +130,16 @@ if [[ "$ELASTICSEARCH_VERSION" != *oss* ]]; then
   url="http://elastic:$ELASTIC_PASSWORD@$NODE_NAME"
 fi
 
+# Pull the container, retry on failures up to 5 times with
+# short delays between each attempt. Fixes most transient network errors.
+docker_pull_attempts=0
+until [ "$docker_pull_attempts" -ge 5 ]
+do
+   docker pull docker.elastic.co/elasticsearch/"$ELASTICSEARCH_VERSION" && break
+   docker_pull_attempts=$((docker_pull_attempts+1))
+   sleep 10
+done
+
 echo -e "\033[34;1mINFO:\033[0m Starting container $NODE_NAME \033[0m"
 set -x
 docker run \
diff --git a/.ci/test-matrix.yml b/.ci/test-matrix.yml
index 04d2329..4c0912a 100755
--- a/.ci/test-matrix.yml
+++ b/.ci/test-matrix.yml
@@ -4,13 +4,12 @@ ELASTICSEARCH_VERSION:
   - 8.0.0-SNAPSHOT
   - 7.x-SNAPSHOT
   - 7.10-SNAPSHOT
-  - 7.7-SNAPSHOT
-  - 7.6-SNAPSHOT
 
 TEST_SUITE:
   - xpack
 
 PYTHON_VERSION:
+  - 3.9
   - 3.8
   - 3.7
   - 3.6
diff --git a/eland/groupby.py b/eland/groupby.py
index 71eee10..d57ad93 100644
--- a/eland/groupby.py
+++ b/eland/groupby.py
@@ -71,7 +71,7 @@ class DataFrameGroupBy(GroupBy):
         ...   "localhost", "flights",
         ...   columns=["AvgTicketPrice", "Cancelled", "dayOfWeek", "timestamp", "DestCountry"]
         ... )
-        >>> df.groupby("DestCountry").mean(numeric_only=False) # doctest: +NORMALIZE_WHITESPACE
+        >>> df.groupby("DestCountry").mean(numeric_only=False) # doctest: +SKIP
                      AvgTicketPrice  Cancelled  dayOfWeek                     timestamp
         DestCountry
         AE               605.132970   0.152174   2.695652 2018-01-21 16:58:07.891304443
diff --git a/eland/ml/ml_model.py b/eland/ml/ml_model.py
index 41c22a0..bdd1130 100644
--- a/eland/ml/ml_model.py
+++ b/eland/ml/ml_model.py
@@ -114,7 +114,7 @@ class MLModel:
         >>> regressor = regressor.fit(training_data[0], training_data[1])
 
         >>> # Get some test results
-        >>> regressor.predict(np.array(test_data))
+        >>> regressor.predict(np.array(test_data))  # doctest: +SKIP
         array([0.06062475, 0.9990102 ], dtype=float32)
 
         >>> # Serialise the model to Elasticsearch
@@ -123,7 +123,7 @@ class MLModel:
         >>> es_model = MLModel.import_model('localhost', model_id, regressor, feature_names, es_if_exists='replace')
 
         >>> # Get some test results from Elasticsearch model
-        >>> es_model.predict(test_data)
+        >>> es_model.predict(test_data)  # doctest: +SKIP
         array([0.0606248 , 0.99901026], dtype=float32)
 
         >>> # Delete model from Elasticsearch
diff --git a/eland/ndframe.py b/eland/ndframe.py
index 0d60374..8a50b63 100644
--- a/eland/ndframe.py
+++ b/eland/ndframe.py
@@ -214,7 +214,7 @@ class NDFrame(ABC):
         Examples
         --------
         >>> df = ed.DataFrame('localhost', 'flights', columns=["AvgTicketPrice", "Cancelled", "dayOfWeek", "timestamp", "DestCountry"])
-        >>> df.mean()
+        >>> df.mean()  # doctest: +SKIP
         AvgTicketPrice                          628.254
         Cancelled                              0.128494
         dayOfWeek                               2.83598
@@ -227,7 +227,7 @@ class NDFrame(ABC):
         dayOfWeek           2.835975
         dtype: float64
 
-        >>> df.mean(numeric_only=False)
+        >>> df.mean(numeric_only=False)  # doctest: +SKIP
         AvgTicketPrice                          628.254
         Cancelled                              0.128494
         dayOfWeek                               2.83598
@@ -263,7 +263,7 @@ class NDFrame(ABC):
         Examples
         --------
         >>> df = ed.DataFrame('localhost', 'flights', columns=["AvgTicketPrice", "Cancelled", "dayOfWeek", "timestamp", "DestCountry"])
-        >>> df.sum()
+        >>> df.sum()  # doctest: +SKIP
         AvgTicketPrice    8.20436e+06
         Cancelled                1678
         dayOfWeek               37035
@@ -275,7 +275,7 @@ class NDFrame(ABC):
         dayOfWeek         3.703500e+04
         dtype: float64
 
-        >>> df.sum(numeric_only=False)
+        >>> df.sum(numeric_only=False)  # doctest: +SKIP
         AvgTicketPrice    8.20436e+06
         Cancelled                1678
         dayOfWeek               37035
@@ -311,7 +311,7 @@ class NDFrame(ABC):
         Examples
         --------
         >>> df = ed.DataFrame('localhost', 'flights', columns=["AvgTicketPrice", "Cancelled", "dayOfWeek", "timestamp", "DestCountry"])
-        >>> df.min()
+        >>> df.min()  # doctest: +SKIP
         AvgTicketPrice                100.021
         Cancelled                       False
         dayOfWeek                           0
@@ -324,7 +324,7 @@ class NDFrame(ABC):
         dayOfWeek           0.000000
         dtype: float64
 
-        >>> df.min(numeric_only=False)
+        >>> df.min(numeric_only=False)  # doctest: +SKIP
         AvgTicketPrice                100.021
         Cancelled                       False
         dayOfWeek                           0
@@ -358,7 +358,7 @@ class NDFrame(ABC):
         Examples
         --------
         >>> df = ed.DataFrame('localhost', 'flights', columns=["AvgTicketPrice", "Cancelled", "dayOfWeek", "timestamp", "DestCountry"])
-        >>> df.var()
+        >>> df.var()  # doctest: +SKIP
         AvgTicketPrice    70964.570234
         Cancelled             0.111987
         dayOfWeek             3.761279
@@ -370,7 +370,7 @@ class NDFrame(ABC):
         dayOfWeek             3.761279
         dtype: float64
 
-        >>> df.var(numeric_only=False)
+        >>> df.var(numeric_only=False)  # doctest: +SKIP
         AvgTicketPrice     70964.6
         Cancelled         0.111987
         dayOfWeek          3.76128
@@ -404,7 +404,7 @@ class NDFrame(ABC):
         Examples
         --------
         >>> df = ed.DataFrame('localhost', 'flights', columns=["AvgTicketPrice", "Cancelled", "dayOfWeek", "timestamp", "DestCountry"])
-        >>> df.std()
+        >>> df.std()  # doctest: +SKIP
         AvgTicketPrice    266.407061
         Cancelled           0.334664
         dayOfWeek           1.939513
@@ -416,7 +416,7 @@ class NDFrame(ABC):
         dayOfWeek           1.939513
         dtype: float64
 
-        >>> df.std(numeric_only=False)
+        >>> df.std(numeric_only=False)  # doctest: +SKIP
         AvgTicketPrice     266.407
         Cancelled         0.334664
         dayOfWeek          1.93951
@@ -499,7 +499,7 @@ class NDFrame(ABC):
         Examples
         --------
         >>> df = ed.DataFrame('localhost', 'flights', columns=["AvgTicketPrice", "Cancelled", "dayOfWeek", "timestamp", "DestCountry"])
-        >>> df.max()
+        >>> df.max()  # doctest: +SKIP
         AvgTicketPrice                1199.73
         Cancelled                        True
         dayOfWeek                           6
@@ -512,7 +512,7 @@ class NDFrame(ABC):
         dayOfWeek            6.000000
         dtype: float64
 
-        >>> df.max(numeric_only=False)
+        >>> df.max(numeric_only=False)  # doctest: +SKIP
         AvgTicketPrice                1199.73
         Cancelled                        True
         dayOfWeek                           6
diff --git a/eland/plotting/_matplotlib/hist.py b/eland/plotting/_matplotlib/hist.py
index f3b3341..d6ca9b3 100644
--- a/eland/plotting/_matplotlib/hist.py
+++ b/eland/plotting/_matplotlib/hist.py
@@ -18,7 +18,19 @@
 import numpy as np
 from pandas.core.dtypes.generic import ABCIndexClass
 from pandas.plotting._matplotlib import converter
-from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots
+
+try:  # pandas>=1.2.0
+    from pandas.plotting._matplotlib.tools import (
+        create_subplots,
+        flatten_axes,
+        set_ticks_props,
+    )
+except ImportError:  # pandas<1.2.0
+    from pandas.plotting._matplotlib.tools import (
+        _flatten as flatten_axes,
+        _set_ticks_props as set_ticks_props,
+        _subplots as create_subplots,
+    )
 
 from eland.utils import try_sort
 
@@ -63,7 +75,7 @@ def hist_series(
         ax.grid(grid)
         axes = np.array([ax])
 
-        _set_ticks_props(
+        set_ticks_props(
             axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot
         )
 
@@ -110,7 +122,7 @@ def hist_frame(
     if naxes == 0:
         raise ValueError("hist method requires numerical columns, " "nothing to plot.")
 
-    fig, axes = _subplots(
+    fig, axes = create_subplots(
         naxes=naxes,
         ax=ax,
         squeeze=False,
@@ -119,7 +131,7 @@ def hist_frame(
         figsize=figsize,
         layout=layout,
     )
-    _axes = _flatten(axes)
+    _axes = flatten_axes(axes)
 
     for i, col in enumerate(try_sort(data.columns)):
         ax = _axes[i]
@@ -132,7 +144,7 @@ def hist_frame(
         ax.set_title(col)
         ax.grid(grid)
 
-    _set_ticks_props(
+    set_ticks_props(
         axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot
     )
     fig.subplots_adjust(wspace=0.3, hspace=0.3)
diff --git a/noxfile.py b/noxfile.py
index a048dcd..300491e 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -89,11 +89,18 @@ def lint(session):
             session.error("\n" + "\n".join(sorted(set(errors))))
 
 
-@nox.session(python=["3.6", "3.7", "3.8"])
+@nox.session(python=["3.6", "3.7", "3.8", "3.9"])
 def test(session):
     session.install("-r", "requirements-dev.txt")
     session.run("python", "-m", "tests.setup_tests")
     session.install(".")
+
+    # Notebooks are only run on Python 3.7+ due to pandas 1.2.0
+    if session.python == "3.6":
+        nbval = ()
+    else:
+        nbval = ("--nbval",)
+
     session.run(
         "python",
         "-m",
@@ -102,21 +109,23 @@ def test(session):
         "term-missing",
         "--cov=eland/",
         "--doctest-modules",
-        "--nbval",
+        *nbval,
         *(session.posargs or ("eland/", "tests/")),
     )
 
-    session.run(
-        "python",
-        "-m",
-        "pip",
-        "uninstall",
-        "--yes",
-        "scikit-learn",
-        "xgboost",
-        "lightgbm",
-    )
-    session.run("pytest", "tests/ml/")
+    # Only run during default test execution
+    if not session.posargs:
+        session.run(
+            "python",
+            "-m",
+            "pip",
+            "uninstall",
+            "--yes",
+            "scikit-learn",
+            "xgboost",
+            "lightgbm",
+        )
+        session.run("pytest", "tests/ml/")
 
 
 @nox.session(reuse_venv=True)
diff --git a/tests/common.py b/tests/common.py
index 4fcaea7..3659b00 100644
--- a/tests/common.py
+++ b/tests/common.py
@@ -16,6 +16,7 @@
 #  under the License.
 
 import os
+from datetime import timedelta
 
 import pandas as pd
 from pandas.testing import assert_frame_equal, assert_series_equal
@@ -106,3 +107,30 @@ def assert_pandas_eland_series_equal(left, right, **kwargs):
 
     # Use pandas tests to check similarity
     assert_series_equal(left, right.to_pandas(), **kwargs)
+
+
+def assert_almost_equal(left, right, **kwargs):
+    """Asserts left and right are almost equal. Left and right
+    can be scalars, series, dataframes, etc
+    """
+    if isinstance(left, (ed.DataFrame, ed.Series)):
+        left = left.to_pandas()
+    if isinstance(right, (ed.DataFrame, ed.Series)):
+        right = right.to_pandas()
+
+    if isinstance(right, pd.DataFrame):
+        kwargs.setdefault("check_exact", True)
+        assert_frame_equal(left, right)
+    elif isinstance(right, pd.Series):
+        kwargs.setdefault("check_exact", True)
+        assert_series_equal(left, right)
+    elif isinstance(right, float):
+        assert right * 0.99 <= left <= right * 1.01
+    elif isinstance(right, pd.Timestamp):
+        assert isinstance(left, pd.Timestamp) and right - timedelta(
+            seconds=0.1
+        ) < left < right + timedelta(seconds=0.1)
+    elif right is pd.NaT:
+        assert left is pd.NaT
+    else:
+        assert left == right, f"{left} != {right}"
diff --git a/tests/dataframe/test_metrics_pytest.py b/tests/dataframe/test_metrics_pytest.py
index 8542009..71050fb 100644
--- a/tests/dataframe/test_metrics_pytest.py
+++ b/tests/dataframe/test_metrics_pytest.py
@@ -22,7 +22,7 @@ import pandas as pd
 import pytest
 from pandas.testing import assert_frame_equal, assert_series_equal
 
-from tests.common import TestData
+from tests.common import TestData, assert_almost_equal
 
 
 class TestDataFrameMetrics(TestData):
@@ -181,7 +181,9 @@ class TestDataFrameMetrics(TestData):
         )
         ed_metrics_dict = ed_metrics["timestamp"].to_dict()
         ed_metrics_dict.pop("median")  # Median is tested below.
-        assert ed_metrics_dict == expected_values
+
+        for key, expected_value in expected_values.items():
+            assert_almost_equal(ed_metrics_dict[key], expected_value)
 
     @pytest.mark.parametrize("agg", ["mean", "min", "max", "nunique"])
     def test_flights_datetime_metrics_single_agg(self, agg):
@@ -200,7 +202,7 @@ class TestDataFrameMetrics(TestData):
         else:
             # df with timestamp column should return datetime64[ns]
             assert ed_metric.dtypes["timestamp"] == np.dtype("datetime64[ns]")
-        assert ed_metric["timestamp"][0] == expected_values[agg]
+        assert_almost_equal(ed_metric["timestamp"][0], expected_values[agg])
 
     @pytest.mark.parametrize("agg", ["mean", "min", "max"])
     def test_flights_datetime_metrics_agg_func(self, agg):
@@ -213,7 +215,7 @@ class TestDataFrameMetrics(TestData):
         ed_metric = getattr(ed_timestamps, agg)(numeric_only=False)
 
         assert ed_metric.dtype == np.dtype("datetime64[ns]")
-        assert ed_metric[0] == expected_values[agg]
+        assert_almost_equal(ed_metric[0], expected_values[agg])
 
     def test_flights_datetime_metrics_median(self):
         ed_df = self.ed_flights_small()[["timestamp"]]
@@ -283,7 +285,7 @@ class TestDataFrameMetrics(TestData):
             else:
                 assert_series_equal(
                     agg_data[agg].rename(None),
-                    getattr(pd_flights, agg)(numeric_only=True),
+                    getattr(pd_flights, agg)(numeric_only=True).astype(float),
                     check_exact=False,
                     rtol=True,
                 )
diff --git a/tests/notebook/test_demo_notebook.ipynb b/tests/notebook/test_demo_notebook.ipynb
index 0b907c6..181bcaf 100644
--- a/tests/notebook/test_demo_notebook.ipynb
+++ b/tests/notebook/test_demo_notebook.ipynb
@@ -2816,7 +2816,7 @@
       " 25  dayOfWeek           13059 non-null  int64         \n",
       " 26  timestamp           13059 non-null  datetime64[ns]\n",
       "dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n",
-      "memory usage: 3.2+ MB\n"
+      "memory usage: 3.1+ MB\n"
      ]
     }
    ],
@@ -2924,7 +2924,7 @@
     }
    ],
    "source": [
-    "pd_flights.max(numeric_only=True)"
+    "pd_flights.max(numeric_only=True).astype(float)"
    ]
   },
   {
@@ -3004,7 +3004,7 @@
     }
    ],
    "source": [
-    "pd_flights.min(numeric_only=True)"
+    "pd_flights.min(numeric_only=True).astype(float)"
    ]
   },
   {
diff --git a/tests/series/test_metrics_pytest.py b/tests/series/test_metrics_pytest.py
index bfb9512..35244ba 100644
--- a/tests/series/test_metrics_pytest.py
+++ b/tests/series/test_metrics_pytest.py
@@ -23,7 +23,7 @@ import numpy as np
 import pandas as pd
 import pytest
 
-from tests.common import TestData
+from tests.common import TestData, assert_almost_equal
 
 
 class TestSeriesMetrics(TestData):
@@ -102,7 +102,7 @@ class TestSeriesMetrics(TestData):
         }
         ed_metric = getattr(ed_timestamps, agg)()
 
-        assert ed_metric == expected_values[agg]
+        assert_almost_equal(ed_metric, expected_values[agg])
 
     def test_flights_datetime_median_metric(self):
         ed_series = self.ed_flights_small()["timestamp"]