2025-07-11 00:02:14 +08:00
62 changed files with 1911 additions and 1135 deletions
--- a/.buildkite/Dockerfile
+++ b/.buildkite/Dockerfile
@ -1,8 +1,6 @@
 ARG PYTHON_VERSION=3.9
 FROM python:${PYTHON_VERSION}

-ENV FORCE_COLOR=1
-
 WORKDIR /code/eland
 RUN python -m pip install nox

--- a/.buildkite/build-docker-images.sh
+++ b/.buildkite/build-docker-images.sh
@ -1,11 +0,0 @@
-#!/usr/bin/env bash
-
-set -eo pipefail
-export LC_ALL=en_US.UTF-8
-
-echo "--- Building the Wolfi image"
-# Building the linux/arm64 image takes about one hour on Buildkite, which is too slow
-docker build --file Dockerfile.wolfi .
-
-echo "--- Building the public image"
-docker build .
--- a/.buildkite/build-documentation.sh
+++ b/.buildkite/build-documentation.sh
@ -1,8 +1,15 @@
 #!/usr/bin/env bash
+sudo apt-get update
+sudo apt-get install -y pandoc python3 python3-pip
+python3 -m pip install nox
+/opt/buildkite-agent/.local/bin/nox -s docs

-docker build --file .buildkite/Dockerfile --tag elastic/eland --build-arg PYTHON_VERSION=${PYTHON_VERSION} .
-docker run \
-  --name doc_build \
-  --rm \
-  elastic/eland \
-  bash -c "apt-get update && apt-get install --yes pandoc && nox -s docs"
+# I couldn't make this work, for some reason pandoc is not found in the docker container repository:
+# docker build --file .buildkite/Dockerfile --tag elastic/eland --build-arg PYTHON_VERSION=${PYTHON_VERSION} .
+# docker run \
+#        --name doc_build \
+#        --rm \
+#        elastic/eland \
+#        apt-get update && \
+#        sudo apt-get install --yes pandoc && \
+#        nox -s docs
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@ -15,36 +15,24 @@ steps:
      machineType: "n2-standard-2"
    commands:
      - ./.buildkite/build-documentation.sh
-  - label: ":docker: Build Wolfi image"
-    env:
-      PYTHON_VERSION: 3.11-bookworm
-    agents:
-      provider: "gcp"
-      machineType: "n2-standard-2"
-    commands:
-      - ./.buildkite/build-docker-images.sh
-  - label: ":python: {{ matrix.python }} :elasticsearch: {{ matrix.stack }} :pandas: {{ matrix.pandas }}"
+  - label: "Eland :python: {{ matrix.python }} :elasticsearch: {{ matrix.stack }}"
    agents:
      provider: "gcp"
      machineType: "n2-standard-4"
    env:
      PYTHON_VERSION: "{{ matrix.python }}"
-      PANDAS_VERSION: "{{ matrix.pandas }}"
+      PANDAS_VERSION: '1.5.0'
      TEST_SUITE: "xpack"
      ELASTICSEARCH_VERSION: "{{ matrix.stack }}"
    matrix:
      setup:
-        # Python and pandas versions need to be added to the nox configuration too
-        # (in the decorators of the test method in noxfile.py)
-        pandas:
-          - '1.5.0'
-          - '2.2.3'
        python:
-          - '3.12'
          - '3.11'
          - '3.10'
          - '3.9'
+          - '3.8'
        stack:          
-          - '9.0.0'
-          - '9.1.0-SNAPSHOT'
+          - '8.16.0-SNAPSHOT'
+          - '8.15.2'
+          - '8.14.1'
    command: ./.buildkite/run-tests
--- a/.dockerignore
+++ b/.dockerignore
@ -1,4 +1,5 @@
 # docs and example
+docs/*
 example/*

 # Git
@ -17,6 +18,9 @@ dist/
 # Build folder
 build/

+# docs
+docs/*
+
 # pytest results
 tests/dataframe/results/*csv
 result_images/
--- a/.github/workflows/backport.yml
+++ b/.github/workflows/backport.yml
@ -1,26 +0,0 @@
-name: Backport
-on:
-  pull_request_target:
-    types:
-      - closed
-      - labeled
-
-jobs:
-  backport:
-    name: Backport
-    runs-on: ubuntu-latest
-    # Only react to merged PRs for security reasons.
-    # See https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target.
-    if: >
-      github.event.pull_request.merged
-      && (
-        github.event.action == 'closed'
-        || (
-          github.event.action == 'labeled'
-          && contains(github.event.label.name, 'backport')
-        )
-      )
-    steps:
-      - uses: tibdex/backport@9565281eda0731b1d20c4025c43339fb0a23812e # v2.0.4
-        with:
-          github_token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/docs-build.yml
+++ b/.github/workflows/docs-build.yml
@ -1,19 +0,0 @@
-name: docs-build
-
-on:
-  push:
-    branches:
-      - main
-  pull_request_target: ~
-  merge_group: ~
-
-jobs:
-  docs-preview:
-    uses: elastic/docs-builder/.github/workflows/preview-build.yml@main
-    with:
-      path-pattern: docs/**
-    permissions:
-      deployments: write
-      id-token: write
-      contents: read
-      pull-requests: write
--- a/.github/workflows/docs-cleanup.yml
+++ b/.github/workflows/docs-cleanup.yml
@ -1,14 +0,0 @@
-name: docs-cleanup
-
-on:
-  pull_request_target:
-    types:
-      - closed
-
-jobs:
-  docs-preview:
-    uses: elastic/docs-builder/.github/workflows/preview-cleanup.yml@main
-    permissions:
-      contents: none
-      id-token: write
-      deployments: write
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@ -9,6 +9,3 @@ python:
  install:
    - path: .
    - requirements: docs/requirements-docs.txt
-
-sphinx:
-  configuration: docs/sphinx/conf.py
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@ -2,56 +2,11 @@
 Changelog
 =========

-9.0.1 (2025-04-30)
------------------
-
-* Forbid Elasticsearch 8 client or server (`#780 <https://github.com/elastic/eland/pull/780>`_)
-* Fix DeBERTa tokenization (`#769 <https://github.com/elastic/eland/pull/769>`_)
-* Upgrade PyTorch to 2.5.1 (`#785 <https://github.com/elastic/eland/pull/785>`_)
-* Upgrade LightGBM to 4.6.0 (`#782 <https://github.com/elastic/eland/pull/782>`_)
-
-9.0.0 (2025-04-15)
------------------
-
-* Drop Python 3.8, Support Python 3.12 (`#743 <https://github.com/elastic/eland/pull/743>`_)
-* Support Pandas 2 (`#742 <https://github.com/elastic/eland/pull/742>`_)
-* Upgrade transformers to 4.47 (`#752 <https://github.com/elastic/eland/pull/752>`_)
-* Remove ML model export as sklearn Pipeline (`#744 <https://github.com/elastic/eland/pull/744>`_)
-* Allow scikit-learn 1.5 (`#729 <https://github.com/elastic/eland/pull/729>`_)
-* Migrate docs from AsciiDoc to Markdown (`#762 <https://github.com/elastic/eland/pull/762>`_)
-
-8.17.0 (2025-01-07)
-------------------
-
-* Support sparse embedding models such as SPLADE-v3-DistilBERT (`#740 <https://github.com/elastic/eland/pull/740>`_)
-
-8.16.0 (2024-11-13)
-------------------
-
-* Add deprecation warning for ESGradientBoostingModel subclasses (`#738 <https://github.com/elastic/eland/pull/738>`_)
-
-8.15.4 (2024-10-17)
-------------------
-
-* Revert "Allow reading Elasticsearch certs in Wolfi image" (`#734 <https://github.com/elastic/eland/pull/734>`_)
-
-8.15.3 (2024-10-09)
-------------------
-
-* Added support for DeBERTa-V2 tokenizer (`#717 <https://github.com/elastic/eland/pull/717>`_)
-* Fixed ``--ca-cert`` with a shared Elasticsearch Docker volume (`#732 <https://github.com/elastic/eland/pull/732>`_)
-
-8.15.2 (2024-10-02)
-------------------
-
-* Fixed Docker image build (`#728 <https://github.com/elastic/eland/pull/728>`_)
-
 8.15.1 (2024-10-01)
 -------------------

-* Upgraded PyTorch to version 2.3.1, which is compatible with Elasticsearch 8.15.2 or above (`#718 <https://github.com/elastic/eland/pull/718>`_)
-* Migrated to distroless Wolfi base Docker image (`#720 <https://github.com/elastic/eland/pull/720>`_)
-
+* Upgrade PyTorch to version 2.3.1, which is compatible with Elasticsearch 8.15.2 or above
+* Migrate to distroless Wolfi base Docker image

 8.15.0 (2024-08-12)
 -------------------
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -78,15 +78,9 @@ Once your changes and tests are ready to submit for review:
    # Run Auto-format, lint, mypy type checker for your changes
    $ nox -s format

-    # Launch Elasticsearch with a trial licence and ML enabled
-    $ docker run --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" -e "xpack.security.enabled=false" -e "xpack.license.self_generated.type=trial" docker.elastic.co/elasticsearch/elasticsearch:9.0.0
-
-    # See all test suites
-    $ nox -l
-    # Run a specific test suite
-    $ nox -rs "test-3.12(pandas_version='2.2.3')"
-    # Run a specific test
-    $ nox -rs "test-3.12(pandas_version='2.2.3')" -- -k test_learning_to_rank
+    # Run the test suite
+    $ pytest --doctest-modules eland/ tests/
+    $ pytest --nbval tests/notebook/

    ```

@ -175,7 +169,7 @@ currently using a minimum version of PyCharm 2019.2.4.
 * Setup Elasticsearch instance with docker

    ``` bash
-    > ELASTICSEARCH_VERSION=elasticsearch:8.17.0 BUILDKITE=false .buildkite/run-elasticsearch.sh
+    > ELASTICSEARCH_VERSION=elasticsearch:8.x-SNAPSHOT BUILDKITE=false .buildkite/run-elasticsearch.sh
    ```

 * Now check `http://localhost:9200`
@ -209,7 +203,7 @@ currently using a minimum version of PyCharm 2019.2.4.
 * To test specific versions of Python run

    ``` bash
-    > nox -s test-3.12
+    > nox -s test-3.8
    ```

 ### Documentation
--- a/2
+++ b/2
@ -18,7 +18,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
    if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
      python3 -m pip install \
        --no-cache-dir --disable-pip-version-check --extra-index-url https://download.pytorch.org/whl/cpu  \
-        torch==2.5.1+cpu .[all]; \
+        torch==2.1.2+cpu .[all]; \
    else \
      python3 -m pip install \
        --no-cache-dir --disable-pip-version-check \
--- a/Dockerfile.wolfi
+++ b/Dockerfile.wolfi
@ -13,7 +13,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
    if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
      python3 -m pip install \
        --no-cache-dir --disable-pip-version-check --extra-index-url https://download.pytorch.org/whl/cpu  \
-        torch==2.5.1+cpu .[all]; \
+        torch==2.1.2+cpu .[all]; \
    else \
      python3 -m pip install \
        --no-cache-dir --disable-pip-version-check \
--- a/README.md
+++ b/README.md
@ -53,9 +53,8 @@ $ conda install -c conda-forge eland

 ### Compatibility

- Supports Python 3.9, 3.10, 3.11 and 3.12.
- Supports Pandas 1.5 and 2.
- Supports Elasticsearch 8+ clusters, recommended 8.16 or later for all features to work.
+- Supports Python 3.8, 3.9, 3.10, 3.11 and Pandas 1.5
+- Supports Elasticsearch clusters that are 7.11+, recommended 8.13 or later for all features to work.
  If you are using the NLP with PyTorch feature make sure your Eland minor version matches the minor 
  version of your Elasticsearch cluster. For all other features it is sufficient for the major versions
  to match.
@ -278,3 +277,15 @@ Downloading: 100%|██████████| 249M/249M [00:23<00:00, 11.2MB
 >>> ptm.import_model(model_path=model_path, config_path=None, vocab_path=vocab_path, config=config)
 100%|██████████| 63/63 [00:12<00:00,  5.02it/s]
 ```
+
+## Feedback 🗣️
+
+The engineering team here at Elastic is looking for developers to participate in
+research and feedback sessions to learn more about how you use Eland and what
+improvements we can make to their design and your workflow. If you're interested
+in sharing your insights into developer experience and language client design,
+please fill out this [short form](https://forms.gle/bYZwDQXijfhfwshn9).
+Depending on the number of responses we get, we may either contact you for a 1:1
+conversation or a focus group with other developers who use the same client.
+Thank you in advance - your feedback is crucial to improving the user experience
+for all Elasticsearch developers!
--- a/docs/docset.yml
+++ b/docs/docset.yml
@ -1,8 +0,0 @@
-project: 'Eland Python client'
-cross_links:
-  - docs-content
-toc:
-  - toc: reference
-subs:
-  es:   "Elasticsearch"
-  ml:   "machine learning"
--- a/docs/guide/dataframes.asciidoc
+++ b/docs/guide/dataframes.asciidoc
@ -1,16 +1,16 @@
---
-mapped_pages:
-  - https://www.elastic.co/guide/en/elasticsearch/client/eland/current/dataframes.html
---
+[[dataframes]]
+== Data Frames

-# Data Frames [dataframes]
+`eland.DataFrame` wraps an Elasticsearch index in a Pandas-like API
+and defers all processing and filtering of data to Elasticsearch
+instead of your local machine. This means you can process large
+amounts of data within Elasticsearch from a Jupyter Notebook
+without overloading your machine.

-`eland.DataFrame` wraps an Elasticsearch index in a Pandas-like API and defers all processing and filtering of data to Elasticsearch instead of your local machine. This means you can process large amounts of data within Elasticsearch from a Jupyter Notebook without overloading your machine.
-
-```python
+[source,python]
+-------------------------------------
 >>> import eland as ed
->>>
-# Connect to 'flights' index via localhost Elasticsearch node
+>>> # Connect to 'flights' index via localhost Elasticsearch node
 >>> df = ed.DataFrame('http://localhost:9200', 'flights')

 # eland.DataFrame instance has the same API as pandas.DataFrame
@ -29,14 +29,14 @@ mapped_pages:
 <class 'eland.dataframe.DataFrame'>
 Index: 13059 entries, 0 to 13058
 Data columns (total 27 columns):
- #   Column              Non-Null Count  Dtype
---  ------              --------------  -----
- 0   AvgTicketPrice      13059 non-null  float64
- 1   Cancelled           13059 non-null  bool
- 2   Carrier             13059 non-null  object
-...
- 24  OriginWeather       13059 non-null  object
- 25  dayOfWeek           13059 non-null  int64
+ #   Column              Non-Null Count  Dtype         
+---  ------              --------------  -----         
+ 0   AvgTicketPrice      13059 non-null  float64       
+ 1   Cancelled           13059 non-null  bool          
+ 2   Carrier             13059 non-null  object        
+...      
+ 24  OriginWeather       13059 non-null  object        
+ 25  dayOfWeek           13059 non-null  int64         
 26  timestamp           13059 non-null  datetime64[ns]
 dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)
 memory usage: 80.0 bytes
@ -59,5 +59,4 @@ Elasticsearch storage usage: 5.043 MB
 sum        9.261629e+07    8.204365e+06
 min        0.000000e+00    1.000205e+02
 std        4.578263e+03    2.663867e+02
-```
-
+-------------------------------------
--- a/docs/guide/index.asciidoc
+++ b/docs/guide/index.asciidoc
@ -0,0 +1,14 @@
+= Eland Python Client
+
+:doctype:           book
+
+include::{asciidoc-dir}/../../shared/versions/stack/{source_branch}.asciidoc[]
+include::{asciidoc-dir}/../../shared/attributes.asciidoc[]
+
+include::overview.asciidoc[]
+
+include::installation.asciidoc[]
+
+include::dataframes.asciidoc[]
+
+include::machine-learning.asciidoc[]
--- a/docs/guide/installation.asciidoc
+++ b/docs/guide/installation.asciidoc
@ -0,0 +1,16 @@
+[[installation]]
+== Installation
+
+Eland can be installed with https://pip.pypa.io[pip] from https://pypi.org/project/eland[PyPI]. We recommend https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/[using a virtual environment] when installing with pip:
+
+[source,sh]
+-----------------------------
+$ python -m pip install eland
+-----------------------------
+
+Alternatively, Eland can be installed with https://docs.conda.io[Conda] from https://anaconda.org/conda-forge/eland[Conda Forge]:
+
+[source,sh]
+------------------------------------
+$ conda install -c conda-forge eland
+------------------------------------
--- a/docs/guide/machine-learning.asciidoc
+++ b/docs/guide/machine-learning.asciidoc
@ -0,0 +1,242 @@
+[[machine-learning]]
+== Machine Learning
+
+[discrete]
+[[ml-trained-models]]
+=== Trained models
+
+Eland allows transforming trained models from scikit-learn, XGBoost,
+and LightGBM libraries to be serialized and used as an inference
+model in {es}.
+
+[source,python]
+------------------------
+>>> from xgboost import XGBClassifier
+>>> from eland.ml import MLModel
+
+# Train and exercise an XGBoost ML model locally
+>>> xgb_model = XGBClassifier(booster="gbtree")
+>>> xgb_model.fit(training_data[0], training_data[1])
+
+>>> xgb_model.predict(training_data[0])
+[0 1 1 0 1 0 0 0 1 0]
+
+# Import the model into Elasticsearch
+>>> es_model = MLModel.import_model(
+    es_client="http://localhost:9200",
+    model_id="xgb-classifier",
+    model=xgb_model,
+    feature_names=["f0", "f1", "f2", "f3", "f4"],
+)
+
+# Exercise the ML model in Elasticsearch with the training data
+>>> es_model.predict(training_data[0])
+[0 1 1 0 1 0 0 0 1 0]
+------------------------
+
+[discrete]
+[[ml-nlp-pytorch]]
+=== Natural language processing (NLP) with PyTorch
+
+IMPORTANT: You need to install the appropriate version of PyTorch to import an
+NLP model. Run `python -m pip install 'eland[pytorch]'` to install that version.
+
+For NLP tasks, Eland enables you to import PyTorch models into {es}. Use the 
+`eland_import_hub_model` script to download and install supported 
+https://huggingface.co/transformers[transformer models] from the
+https://huggingface.co/models[Hugging Face model hub]. For example:
+
+[source,bash]
+------------------------
+$ eland_import_hub_model <authentication> \ <1>
+  --url http://localhost:9200/ \ <2>
+  --hub-model-id elastic/distilbert-base-cased-finetuned-conll03-english \ <3>
+  --task-type ner \ <4>
+  --start
+------------------------
+<1> Use an authentication method to access your cluster. Refer to <<ml-nlp-pytorch-auth>>.
+<2> The cluster URL. Alternatively, use `--cloud-id`.
+<3> Specify the identifier for the model in the Hugging Face model hub.
+<4> Specify the type of NLP task. Supported values are `fill_mask`, `ner`,
+`question_answering`, `text_classification`, `text_embedding`, `text_expansion`,
+`text_similarity` and `zero_shot_classification`.
+
+For more information about the available options, run `eland_import_hub_model` with the `--help` option.
+
+[source,bash]
+------------------------
+$ eland_import_hub_model --help
+------------------------
+
+[discrete]
+[[ml-nlp-pytorch-docker]]
+==== Import model with Docker
+
+IMPORTANT: To use the Docker container, you need to clone the Eland repository: https://github.com/elastic/eland
+
+If you want to use Eland without installing it, you can use the Docker image:
+
+You can use the container interactively:
+
+```bash
+$ docker run -it --rm --network host docker.elastic.co/eland/eland
+```
+
+Running installed scripts is also possible without an interactive shell, for example:
+
+```bash
+docker run -it --rm docker.elastic.co/eland/eland \
+    eland_import_hub_model \
+      --url $ELASTICSEARCH_URL \
+      --hub-model-id elastic/distilbert-base-uncased-finetuned-conll03-english \
+      --start
+```
+
+Replace the `$ELASTICSEARCH_URL` with the URL for your Elasticsearch cluster. For authentication purposes, include an administrator username and password in the URL in the following format: `https://username:password@host:port`.
+
+[discrete]
+[[ml-nlp-pytorch-air-gapped]]
+==== Install models in an air-gapped environment 
+
+You can install models in a restricted or closed network by pointing the 
+`eland_import_hub_model` script to local files. 
+
+For an offline install of a Hugging Face model, the model first needs to be 
+cloned locally, Git and https://git-lfs.com/[Git Large File Storage] are 
+required to be installed in your system.
+
+1. Select a model you want to use from Hugging Face. Refer to the 
+{ml-docs}/ml-nlp-model-ref.html[compatible third party model] list for more 
+information on the supported architectures. 
+
+2. Clone the selected model from Hugging Face by using the model URL. For 
+example:
+
+--
+[source,bash]
+----
+git clone https://huggingface.co/dslim/bert-base-NER
+----
+This command results in a local copy of 
+of the model in the directory `bert-base-NER`.
+--
+
+3. Use the `eland_import_hub_model` script with the `--hub-model-id` set to the 
+directory of the cloned model to install it:
+
+--
+[source,bash]
+----
+eland_import_hub_model \
+      --url 'XXXX' \
+      --hub-model-id /PATH/TO/MODEL \
+      --task-type ner \
+      --es-username elastic --es-password XXX \
+      --es-model-id bert-base-ner
+----
+
+If you use the Docker image to run `eland_import_hub_model` you must bind mount 
+the model directory, so the container can read the files:
+
+[source,bash]
+----
+docker run --mount type=bind,source=/PATH/TO/MODEL,destination=/model,readonly -it --rm docker.elastic.co/eland/eland \
+    eland_import_hub_model \
+      --url 'XXXX' \
+      --hub-model-id /model \
+      --task-type ner \
+      --es-username elastic --es-password XXX \
+      --es-model-id bert-base-ner
+----
+Once it's uploaded to {es}, the model will have the ID specified by 
+`--es-model-id`. If it is not set, the model ID is derived from 
+`--hub-model-id`; spaces and path delimiters are converted to double 
+underscores `__`.
+
+--
+
+[discrete]
+[[ml-nlp-pytorch-proxy]]
+==== Connect to Elasticsearch through a proxy
+
+Behind the scenes, Eland uses the `requests` Python library, which
+https://requests.readthedocs.io/en/latest/user/advanced/#proxies[allows configuring
+proxies through an environment variable]. For example, to use an HTTP proxy to connect to
+an HTTPS Elasticsearch cluster, you need to set the `HTTPS_PROXY` environment variable
+when invoking Eland:
+
+[source,bash]
+--------------------------------------------------
+HTTPS_PROXY=http://proxy-host:proxy-port eland_import_hub_model ...
+--------------------------------------------------
+
+If you disabled security on your Elasticsearch cluster, you should use `HTTP_PROXY`
+instead.
+
+[discrete]
+[[ml-nlp-pytorch-auth]]
+==== Authentication methods
+
+The following authentication options are available when using the import script:
+
+* Elasticsearch username and password authentication (specified with the `-u` and `-p` options):
+
+--
+[source,bash]
+--------------------------------------------------
+eland_import_hub_model -u <username> -p <password> --cloud-id <cloud-id> ...
+--------------------------------------------------
+These `-u` and `-p` options also work when you use `--url`.
+--
+
+* Elasticsearch username and password authentication (embedded in the URL):
+
+--
+[source,bash]
+--------------------------------------------------
+eland_import_hub_model --url https://<user>:<password>@<hostname>:<port> ...
+--------------------------------------------------
+--
+
+* Elasticsearch API key authentication:
+
+--
+[source,bash]
+--------------------------------------------------
+eland_import_hub_model --es-api-key <api-key> --url https://<hostname>:<port> ...
+--------------------------------------------------
+--
+
+* HuggingFace Hub access token (for private models):
+
+--
+[source,bash]
+--------------------------------------------------
+eland_import_hub_model --hub-access-token <access-token> ...
+--------------------------------------------------
+--
+
+[discrete]
+[[ml-nlp-pytorch-tls]]
+==== TLS/SSL
+
+The following TLS/SSL options for Elasticsearch are available when using the import script:
+
+
+* Specify alternate CA bundle to verify the cluster certificate:
+
+--
+[source,bash]
+--------------------------------------------------
+eland_import_hub_model --ca-certs CA_CERTS ...
+--------------------------------------------------
+--
+
+* Disable TLS/SSL verification altogether (strongly discouraged):
+
+--
+[source,bash]
+--------------------------------------------------
+eland_import_hub_model --insecure ...
+--------------------------------------------------
+--
--- a/docs/guide/overview.asciidoc
+++ b/docs/guide/overview.asciidoc
@ -1,36 +1,33 @@
---
-mapped_pages:
-  - https://www.elastic.co/guide/en/elasticsearch/client/eland/current/index.html
-  - https://www.elastic.co/guide/en/elasticsearch/client/eland/current/overview.html
-navigation_title: Eland
---
+[[overview]]
+== Overview

-# Eland Python client [overview]
+Eland is a Python client and toolkit for DataFrames and {ml} in {es}.
+Full documentation is available on https://eland.readthedocs.io[Read the Docs].
+Source code is available on https://github.com/elastic/eland[GitHub].

-Eland is a Python client and toolkit for DataFrames and {{ml}} in {{es}}. Full documentation is available on [Read the Docs](https://eland.readthedocs.io). Source code is available on [GitHub](https://github.com/elastic/eland).
+[discrete]
+=== Compatibility

+- Supports Python 3.8+ and Pandas 1.5
+- Supports {es} clusters that are 7.11+, recommended 7.14 or later for all features to work.
+  Make sure your Eland major version matches the major version of your Elasticsearch cluster.

-## Compatibility [_compatibility]
+The recommended way to set your requirements in your `setup.py` or
+`requirements.txt` is::

-* Supports Python 3.9+ and Pandas 1.5
-* Supports {{es}} 8+ clusters, recommended 8.16 or later for all features to work. Make sure your Eland major version matches the major version of your Elasticsearch cluster.
+    # Elasticsearch 8.x
+    eland>=8,<9

-The recommended way to set your requirements in your `setup.py` or `requirements.txt` is::
+    # Elasticsearch 7.x
+    eland>=7,<8

-```
-# Elasticsearch 8.x
-eland>=8,<9
-```
-```
-# Elasticsearch 7.x
-eland>=7,<8
-```
+[discrete]
+=== Getting Started

-## Getting Started [_getting_started]
+Create a `DataFrame` object connected to an {es} cluster running on `http://localhost:9200`:

-Create a `DataFrame` object connected to an {{es}} cluster running on `http://localhost:9200`:
-
-```python
+[source,python]
+------------------------------------
 >>> import eland as ed
 >>> df = ed.DataFrame(
 ...    es_client="http://localhost:9200",
@ -51,14 +48,15 @@ Create a `DataFrame` object connected to an {{es}} cluster running on `http://lo
 13058      858.144337      False  ...         6 2018-02-11 14:54:34

 [13059 rows x 27 columns]
-```
+------------------------------------

-
-### Elastic Cloud [_elastic_cloud]
+[discrete]
+==== Elastic Cloud

 You can also connect Eland to an Elasticsearch instance in Elastic Cloud:

-```python
+[source,python]
+------------------------------------
 >>> import eland as ed
 >>> from elasticsearch import Elasticsearch

@ -75,16 +73,16 @@ You can also connect Eland to an Elasticsearch instance in Elastic Cloud:
 3          181.694216       True  ...         0 2018-01-01 10:33:28
 4          730.041778      False  ...         0 2018-01-01 05:13:00
 [5 rows x 27 columns]
-```
+------------------------------------

 Eland can be used for complex queries and aggregations:

-```python
+[source,python]
+------------------------------------
 >>> df[df.Carrier != "Kibana Airlines"].groupby("Carrier").mean(numeric_only=False)
                  AvgTicketPrice  Cancelled                     timestamp
-Carrier
+Carrier                                                                  
 ES-Air                630.235816   0.129814 2018-01-21 20:45:00.200000000
 JetBeats              627.457373   0.134698 2018-01-21 14:43:18.112400635
 Logstash Airways      624.581974   0.125188 2018-01-21 16:14:50.711798340
-```
-
+------------------------------------
--- a/docs/reference/installation.md
+++ b/docs/reference/installation.md
@ -1,19 +0,0 @@
---
-mapped_pages:
-  - https://www.elastic.co/guide/en/elasticsearch/client/eland/current/installation.html
---
-
-# Installation [installation]
-
-Eland can be installed with [pip](https://pip.pypa.io) from [PyPI](https://pypi.org/project/eland). We recommend [using a virtual environment](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/) when installing with pip:
-
-```sh
-$ python -m pip install eland
-```
-
-Alternatively, Eland can be installed with [Conda](https://docs.conda.io) from [Conda Forge](https://anaconda.org/conda-forge/eland):
-
-```sh
-$ conda install -c conda-forge eland
-```
-
--- a/docs/reference/machine-learning.md
+++ b/docs/reference/machine-learning.md
@ -1,199 +0,0 @@
---
-mapped_pages:
-  - https://www.elastic.co/guide/en/elasticsearch/client/eland/current/machine-learning.html
---
-
-# Machine Learning [machine-learning]
-
-
-## Trained models [ml-trained-models]
-
-Eland allows transforming *some* 
-[trained models](https://eland.readthedocs.io/en/latest/reference/api/eland.ml.MLModel.import_model.html#parameters) from scikit-learn, XGBoost, 
-and LightGBM libraries to be serialized and used as an inference model in {{es}}.
-
-```python
->>> from xgboost import XGBClassifier
->>> from eland.ml import MLModel
-
-# Train and exercise an XGBoost ML model locally
->>> xgb_model = XGBClassifier(booster="gbtree")
->>> xgb_model.fit(training_data[0], training_data[1])
-
->>> xgb_model.predict(training_data[0])
-[0 1 1 0 1 0 0 0 1 0]
-
-# Import the model into Elasticsearch
->>> es_model = MLModel.import_model(
-    es_client="http://localhost:9200",
-    model_id="xgb-classifier",
-    model=xgb_model,
-    feature_names=["f0", "f1", "f2", "f3", "f4"],
-)
-
-# Exercise the ML model in Elasticsearch with the training data
->>> es_model.predict(training_data[0])
-[0 1 1 0 1 0 0 0 1 0]
-```
-
-
-## Natural language processing (NLP) with PyTorch [ml-nlp-pytorch]
-
-::::{important}
-You need to install the appropriate version of PyTorch to import an NLP model. Run `python -m pip install 'eland[pytorch]'` to install that version.
-::::
-
-
-For NLP tasks, Eland enables you to import PyTorch models into {{es}}. Use the `eland_import_hub_model` script to download and install supported [transformer models](https://huggingface.co/transformers) from the [Hugging Face model hub](https://huggingface.co/models). For example:
-
-```bash
-eland_import_hub_model <authentication> \ <1>
-  --url http://localhost:9200/ \ <2>
-  --hub-model-id elastic/distilbert-base-cased-finetuned-conll03-english \ <3>
-  --task-type ner \ <4>
-  --start
-```
- 
-1. Use an authentication method to access your cluster. Refer to [Authentication methods](machine-learning.md#ml-nlp-pytorch-auth).
-2. The cluster URL. Alternatively, use `--cloud-id`.
-3. Specify the identifier for the model in the Hugging Face model hub.
-4. Specify the type of NLP task. Supported values are `fill_mask`, `ner`, `question_answering`, `text_classification`, `text_embedding`, `text_expansion`, `text_similarity` and `zero_shot_classification`.
-
-
-For more information about the available options, run `eland_import_hub_model` with the `--help` option.
-
-```bash
-eland_import_hub_model --help
-```
-
-
-### Import model with Docker [ml-nlp-pytorch-docker]
-
-::::{important}
-To use the Docker container, you need to clone the Eland repository: [https://github.com/elastic/eland](https://github.com/elastic/eland)
-::::
-
-
-If you want to use Eland without installing it, you can use the Docker image:
-
-You can use the container interactively:
-
-```bash
-docker run -it --rm --network host docker.elastic.co/eland/eland
-```
-
-Running installed scripts is also possible without an interactive shell, for example:
-
-```bash
-docker run -it --rm docker.elastic.co/eland/eland \
-    eland_import_hub_model \
-      --url $ELASTICSEARCH_URL \
-      --hub-model-id elastic/distilbert-base-uncased-finetuned-conll03-english \
-      --start
-```
-
-Replace the `$ELASTICSEARCH_URL` with the URL for your Elasticsearch cluster. For authentication purposes, include an administrator username and password in the URL in the following format: `https://username:password@host:port`.
-
-
-### Install models in an air-gapped environment [ml-nlp-pytorch-air-gapped]
-
-You can install models in a restricted or closed network by pointing the `eland_import_hub_model` script to local files.
-
-For an offline install of a Hugging Face model, the model first needs to be cloned locally, Git and [Git Large File Storage](https://git-lfs.com/) are required to be installed in your system.
-
-1. Select a model you want to use from Hugging Face. Refer to the [compatible third party model](docs-content://explore-analyze/machine-learning/nlp/ml-nlp-model-ref.md) list for more information on the supported architectures.
-2. Clone the selected model from Hugging Face by using the model URL. For example:
-
-    ```bash
-    git clone https://huggingface.co/dslim/bert-base-NER
-    ```
-
-    This command results in a local copy of of the model in the directory `bert-base-NER`.
-
-3. Use the `eland_import_hub_model` script with the `--hub-model-id` set to the directory of the cloned model to install it:
-
-    ```bash
-    eland_import_hub_model \
-          --url 'XXXX' \
-          --hub-model-id /PATH/TO/MODEL \
-          --task-type ner \
-          --es-username elastic --es-password XXX \
-          --es-model-id bert-base-ner
-    ```
-
-    If you use the Docker image to run `eland_import_hub_model` you must bind mount the model directory, so the container can read the files:
-
-    ```bash
-    docker run --mount type=bind,source=/PATH/TO/MODEL,destination=/model,readonly -it --rm docker.elastic.co/eland/eland \
-        eland_import_hub_model \
-          --url 'XXXX' \
-          --hub-model-id /model \
-          --task-type ner \
-          --es-username elastic --es-password XXX \
-          --es-model-id bert-base-ner
-    ```
-
-    Once it’s uploaded to {{es}}, the model will have the ID specified by `--es-model-id`. If it is not set, the model ID is derived from `--hub-model-id`; spaces and path delimiters are converted to double underscores `__`.
-
-
-
-### Connect to Elasticsearch through a proxy [ml-nlp-pytorch-proxy]
-
-Behind the scenes, Eland uses the `requests` Python library, which [allows configuring proxies through an environment variable](https://requests.readthedocs.io/en/latest/user/advanced/#proxies). For example, to use an HTTP proxy to connect to an HTTPS Elasticsearch cluster, you need to set the `HTTPS_PROXY` environment variable when invoking Eland:
-
-```bash
-HTTPS_PROXY=http://proxy-host:proxy-port eland_import_hub_model ...
-```
-
-If you disabled security on your Elasticsearch cluster, you should use `HTTP_PROXY` instead.
-
-
-### Authentication methods [ml-nlp-pytorch-auth]
-
-The following authentication options are available when using the import script:
-
-* Elasticsearch username and password authentication (specified with the `-u` and `-p` options):
-
-    ```bash
-    eland_import_hub_model -u <username> -p <password> --cloud-id <cloud-id> ...
-    ```
-
-    These `-u` and `-p` options also work when you use `--url`.
-
-* Elasticsearch username and password authentication (embedded in the URL):
-
-    ```bash
-    eland_import_hub_model --url https://<user>:<password>@<hostname>:<port> ...
-    ```
-
-* Elasticsearch API key authentication:
-
-    ```bash
-    eland_import_hub_model --es-api-key <api-key> --url https://<hostname>:<port> ...
-    ```
-
-* HuggingFace Hub access token (for private models):
-
-    ```bash
-    eland_import_hub_model --hub-access-token <access-token> ...
-    ```
-
-
-
-### TLS/SSL [ml-nlp-pytorch-tls]
-
-The following TLS/SSL options for Elasticsearch are available when using the import script:
-
-* Specify alternate CA bundle to verify the cluster certificate:
-
-    ```bash
-    eland_import_hub_model --ca-certs CA_CERTS ...
-    ```
-
-* Disable TLS/SSL verification altogether (strongly discouraged):
-
-    ```bash
-    eland_import_hub_model --insecure ...
-    ```
-
-
--- a/docs/reference/toc.yml
+++ b/docs/reference/toc.yml
@ -1,6 +0,0 @@
-project: 'Eland reference'
-toc:
-  - file: index.md
-  - file: installation.md
-  - file: dataframes.md
-  - file: machine-learning.md
--- a/docs/sphinx/development/contributing.rst
+++ b/docs/sphinx/development/contributing.rst
@ -200,7 +200,7 @@ Configuring PyCharm And Running Tests
 - To test specific versions of Python run
   .. code-block:: bash

-      nox -s test-3.12
+      nox -s test-3.8


 Documentation
--- a/docs/sphinx/examples/demo_notebook.ipynb
+++ b/docs/sphinx/examples/demo_notebook.ipynb
@ -24,7 +24,7 @@
        "\n",
        "For this example, you will need:\n",
        "\n",
-        "- Python 3.9 or later\n",
+        "- Python 3.8 or later\n",
        "- An Elastic deployment\n",
        "  - We'll be using [Elastic Cloud](https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html) for this example (available with a [free trial](https://cloud.elastic.co/registration))\n",
        "\n",
--- a/docs/sphinx/reference/api/eland.DataFrame.to_json.rst
+++ b/docs/sphinx/reference/api/eland.DataFrame.to_json.rst
@ -1,5 +1,5 @@
 eland.DataFrame.to\_json
-========================
+=======================

 .. currentmodule:: eland

--- a/docs/sphinx/reference/api/eland.ml.MLModel.rst
+++ b/docs/sphinx/reference/api/eland.ml.MLModel.rst
@ -17,7 +17,6 @@
      ~MLModel.delete_model
      ~MLModel.exists_model
      ~MLModel.export_model
-      ~MLModel.import_ltr_model
      ~MLModel.import_model
      ~MLModel.predict
   
--- a/eland/init.py
+++ b/eland/init.py
@ -15,8 +15,6 @@
 #  specific language governing permissions and limitations
 #  under the License.

-import warnings
-
 from ._version import (  # noqa: F401
    __author__,
    __author_email__,
@ -27,16 +25,13 @@ from ._version import (  # noqa: F401
    __url__,
    __version__,
 )
-from .common import ElandDeprecationWarning, SortOrder
+from .common import SortOrder
 from .dataframe import DataFrame
 from .etl import csv_to_eland, eland_to_pandas, pandas_to_eland
 from .index import Index
 from .ndframe import NDFrame
 from .series import Series

-# Display Eland deprecation warnings by default
-warnings.simplefilter("default", category=ElandDeprecationWarning)
-
 __all__ = [
    "DataFrame",
    "Series",
--- a/eland/_version.py
+++ b/eland/_version.py
@ -18,7 +18,7 @@
 __title__ = "eland"
 __description__ = "Python Client and Toolkit for DataFrames, Big Data, Machine Learning and ETL in Elasticsearch"
 __url__ = "https://github.com/elastic/eland"
-__version__ = "9.0.1"
+__version__ = "8.15.1"
 __author__ = "Steve Dodson"
 __author_email__ = "steve.dodson@elastic.co"
 __maintainer__ = "Elastic Client Library Maintainers"
--- a/eland/cli/eland_import_hub_model.py
+++ b/eland/cli/eland_import_hub_model.py
@ -236,9 +236,14 @@ def check_cluster_version(es_client, logger):
            f"Elasticsearch version {major_version} does not support NLP models. Please upgrade Elasticsearch to the latest version"
        )
        exit(1)
-    elif major_version < 9:
+
+    # PyTorch was upgraded to version 2.3.1 in 8.15.2
+    # and is incompatible with earlier versions
+    if sem_ver < (8, 15, 2):
+        import torch
+
        logger.error(
-            "Eland 9.x does not support Elasticsearch 8.x. Please upgrade Elasticsearch first."
+            f"Eland uses PyTorch version {torch.__version__} which is incompatible with Elasticsearch versions prior to 8.15.2. Please upgrade Elasticsearch to at least version 8.15.2"
        )
        exit(1)

--- a/eland/common.py
+++ b/eland/common.py
@ -52,10 +52,6 @@ PANDAS_VERSION: Tuple[int, ...] = tuple(
 _ELAND_MAJOR_VERSION = int(_eland_version.split(".")[0])


-class ElandDeprecationWarning(DeprecationWarning):
-    """Warning for deprecation functionality in Eland"""
-
-
 with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    EMPTY_SERIES_DTYPE = pd.Series().dtype
@ -309,7 +305,7 @@ def elasticsearch_date_to_pandas_date(


 def ensure_es_client(
-    es_client: Union[str, List[str], Tuple[str, ...], Elasticsearch],
+    es_client: Union[str, List[str], Tuple[str, ...], Elasticsearch]
 ) -> Elasticsearch:
    if isinstance(es_client, tuple):
        es_client = list(es_client)
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@ -34,7 +34,7 @@ from pandas.io.formats.printing import pprint_thing  # type: ignore
 from pandas.util._validators import validate_bool_kwarg  # type: ignore

 import eland.plotting as gfx
-from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, PANDAS_VERSION, docstring_parameter
+from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
 from eland.filter import BooleanFilter
 from eland.groupby import DataFrameGroupBy
 from eland.ndframe import NDFrame
@ -411,7 +411,9 @@ class DataFrame(NDFrame):
            axis = pd.DataFrame._get_axis_name(axis)
            axes = {axis: labels}
        elif index is not None or columns is not None:
-            axes = {"columns": columns, "index": index}
+            axes, _ = pd.DataFrame()._construct_axes_from_arguments(
+                (index, columns), {}
+            )
        else:
            raise ValueError(
                "Need to specify at least one of 'labels', 'index' or 'columns'"
@ -1359,7 +1361,7 @@ class DataFrame(NDFrame):
        default_handler=None,
        lines=False,
        compression="infer",
-        index=None,
+        index=True,
        indent=None,
        storage_options=None,
    ):
@ -1374,8 +1376,6 @@ class DataFrame(NDFrame):
        --------
        :pandas_api_docs:`pandas.DataFrame.to_json`
        """
-        if index is None and PANDAS_VERSION[0] == 1:
-            index = True  # switch to the pandas 1 default
        kwargs = {
            "path_or_buf": path_or_buf,
            "orient": orient,
--- a/eland/etl.py
+++ b/eland/etl.py
@ -16,7 +16,6 @@
 #  under the License.

 import csv
-import warnings
 from collections import deque
 from typing import Any, Dict, Generator, List, Mapping, Optional, Tuple, Union

@ -111,15 +110,15 @@ def pandas_to_eland(
    2  3.141  1  ...  3  Long text - to be indexed as es type text
    <BLANKLINE>
    [3 rows x 8 columns]
-    >>> pd_df.dtypes  # doctest skip required for pandas < 2  # doctest: +SKIP
-    A          float64
-    B            int64
-    C           object
-    D    datetime64[s]
-    E          float64
-    F             bool
-    G            int64
-    H           object
+    >>> pd_df.dtypes
+    A           float64
+    B             int64
+    C            object
+    D    datetime64[ns]
+    E           float64
+    F              bool
+    G             int64
+    H            object
    dtype: object

    Convert `pandas.DataFrame` to `eland.DataFrame` - this creates an Elasticsearch index called `pandas_to_eland`.
@ -308,9 +307,9 @@ def csv_to_eland(  # type: ignore
    names=None,
    index_col=None,
    usecols=None,
-    squeeze=None,
+    squeeze=False,
    prefix=None,
-    mangle_dupe_cols=None,
+    mangle_dupe_cols=True,
    # General Parsing Configuration
    dtype=None,
    engine=None,
@ -358,7 +357,6 @@ def csv_to_eland(  # type: ignore
    low_memory: bool = _DEFAULT_LOW_MEMORY,
    memory_map=False,
    float_precision=None,
-    **extra_kwargs,
 ) -> "DataFrame":
    """
    Read a comma-separated values (csv) file into eland.DataFrame (i.e. an Elasticsearch index).
@ -487,6 +485,7 @@ def csv_to_eland(  # type: ignore
        "usecols": usecols,
        "verbose": verbose,
        "encoding": encoding,
+        "squeeze": squeeze,
        "memory_map": memory_map,
        "float_precision": float_precision,
        "na_filter": na_filter,
@ -495,9 +494,9 @@ def csv_to_eland(  # type: ignore
        "error_bad_lines": error_bad_lines,
        "on_bad_lines": on_bad_lines,
        "low_memory": low_memory,
+        "mangle_dupe_cols": mangle_dupe_cols,
        "infer_datetime_format": infer_datetime_format,
        "skip_blank_lines": skip_blank_lines,
-        **extra_kwargs,
    }

    if chunksize is None:
@ -526,18 +525,6 @@ def csv_to_eland(  # type: ignore

        kwargs.pop("on_bad_lines")

-    if "squeeze" in kwargs:
-        kwargs.pop("squeeze")
-        warnings.warn(
-            "This argument no longer works, use .squeeze('columns') on your DataFrame instead"
-        )
-
-    if "mangle_dupe_cols" in kwargs:
-        kwargs.pop("mangle_dupe_cols")
-        warnings.warn(
-            "The mangle_dupe_cols argument no longer works. Furthermore, "
-            "duplicate columns will automatically get a number suffix."
-        )
    # read csv in chunks to pandas DataFrame and dump to eland DataFrame (and Elasticsearch)
    reader = pd.read_csv(filepath_or_buffer, **kwargs)

--- a/eland/field_mappings.py
+++ b/eland/field_mappings.py
@ -712,11 +712,8 @@ class FieldMappings:
            capabilities, orient="index", columns=FieldMappings.column_labels
        )

-        self._mappings_capabilities = pd.concat(
-            [
-                self._mappings_capabilities,
-                capability_matrix_row,
-            ]
+        self._mappings_capabilities = self._mappings_capabilities.append(
+            capability_matrix_row
        )

    def numeric_source_fields(self) -> List[str]:
--- a/eland/index.py
+++ b/eland/index.py
@ -50,7 +50,10 @@ class Index:
        # index_field.setter
        self._is_source_field = False

-        self.es_index_field = es_index_field
+        # The type:ignore is due to mypy not being smart enough
+        # to recognize the property.setter has a different type
+        # than the property.getter.
+        self.es_index_field = es_index_field  # type: ignore

    @property
    def sort_field(self) -> str:
--- a/eland/ml/_model_serializer.py
+++ b/eland/ml/_model_serializer.py
@ -19,7 +19,7 @@ import base64
 import gzip
 import json
 from abc import ABC
-from typing import Any, Dict, List, Optional, Sequence, Tuple
+from typing import Any, Dict, List, Optional, Sequence


 def add_if_exists(d: Dict[str, Any], k: str, v: Any) -> None:
@ -58,9 +58,6 @@ class ModelSerializer(ABC):
            "ascii"
        )

-    def bounds(self) -> Tuple[float, float]:
-        raise NotImplementedError
-

 class TreeNode:
    def __init__(
@ -132,14 +129,6 @@ class Tree(ModelSerializer):
        add_if_exists(d, "tree_structure", [t.to_dict() for t in self._tree_structure])
        return {"tree": d}

-    def bounds(self) -> Tuple[float, float]:
-        leaf_values = [
-            tree_node._leaf_value[0]
-            for tree_node in self._tree_structure
-            if tree_node._leaf_value is not None
-        ]
-        return min(leaf_values), max(leaf_values)
-

 class Ensemble(ModelSerializer):
    def __init__(
@ -169,9 +158,3 @@ class Ensemble(ModelSerializer):
        add_if_exists(d, "classification_weights", self._classification_weights)
        add_if_exists(d, "aggregate_output", self._output_aggregator)
        return {"ensemble": d}
-
-    def bounds(self) -> Tuple[float, float]:
-        min_bound, max_bound = tuple(
-            map(sum, zip(*[model.bounds() for model in self._trained_models]))
-        )
-        return min_bound, max_bound
--- a/eland/ml/exporters/init.py
+++ b/eland/ml/exporters/init.py
@ -0,0 +1,16 @@
+#  Licensed to Elasticsearch B.V. under one or more contributor
+#  license agreements. See the NOTICE file distributed with
+#  this work for additional information regarding copyright
+#  ownership. Elasticsearch B.V. licenses this file to you under
+#  the Apache License, Version 2.0 (the "License"); you may
+#  not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+# 	http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
--- a/eland/ml/exporters/_sklearn_deserializers.py
+++ b/eland/ml/exporters/_sklearn_deserializers.py
@ -0,0 +1,226 @@
+#  Licensed to Elasticsearch B.V. under one or more contributor
+#  license agreements. See the NOTICE file distributed with
+#  this work for additional information regarding copyright
+#  ownership. Elasticsearch B.V. licenses this file to you under
+#  the Apache License, Version 2.0 (the "License"); you may
+#  not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+# 	http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+from typing import Any, Dict
+
+import numpy as np
+
+from .._optional import import_optional_dependency
+
+import_optional_dependency("sklearn", on_version="warn")
+
+import sklearn
+from sklearn.preprocessing import FunctionTransformer
+
+
+class Tree:
+    """Wrapper to create sklearn Tree objects from Elastic ML tree
+    description in JSON format.
+    """
+
+    def __init__(
+        self,
+        json_tree: Dict[str, Any],
+        feature_names_map: Dict[str, int],
+    ):
+        tree_leaf = -1
+
+        node_count = len(json_tree["tree_structure"])
+        children_left = np.ones((node_count,), dtype=int) * tree_leaf
+        children_right = np.ones((node_count,), dtype=int) * tree_leaf
+        feature = np.ones((node_count,), dtype=int) * -2
+        threshold = np.ones((node_count,), dtype=float) * -2
+        impurity = np.zeros((node_count,), dtype=float)
+        # value works only for regression and binary classification
+        value = np.zeros((node_count, 1, 1), dtype="<f8")
+        n_node_samples = np.zeros((node_count,), dtype=int)
+
+        # parse values from the JSON tree
+        feature_names = json_tree["feature_names"]
+        for json_node in json_tree["tree_structure"]:
+            node_id = json_node["node_index"]
+            if "number_samples" in json_node:
+                n_node_samples[node_id] = json_node["number_samples"]
+            else:
+                n_node_samples[node_id] = 0
+
+            if "leaf_value" not in json_node:
+                children_left[node_id] = json_node["left_child"]
+                children_right[node_id] = json_node["right_child"]
+                feature[node_id] = feature_names_map[
+                    feature_names[json_node["split_feature"]]
+                ]
+                threshold[node_id] = json_node["threshold"]
+                if "split_gain" in json_node:
+                    impurity[node_id] = json_node["split_gain"]
+                else:
+                    impurity[node_id] = -1
+            else:
+                value[node_id, 0, 0] = json_node["leaf_value"]
+
+        # iterate through tree to get max depth and expected values
+        weighted_n_node_samples = n_node_samples.copy()
+        self.max_depth = Tree._compute_expectations(
+            children_left=children_left,
+            children_right=children_right,
+            node_sample_weight=weighted_n_node_samples,
+            values=value,
+            node_index=0,
+        )
+        self.n_outputs = value.shape[-1]
+
+        # initialize the sklearn tree
+        self.tree = sklearn.tree._tree.Tree(
+            len(feature_names), np.array([1], dtype=int), 1
+        )
+        node_state = np.array(
+            [
+                (
+                    children_left[i],
+                    children_right[i],
+                    feature[i],
+                    threshold[i],
+                    impurity[i],
+                    n_node_samples[i],
+                    weighted_n_node_samples[i],
+                    True,
+                )
+                for i in range(node_count)
+            ],
+            dtype={
+                "names": [
+                    "left_child",
+                    "right_child",
+                    "feature",
+                    "threshold",
+                    "impurity",
+                    "n_node_samples",
+                    "weighted_n_node_samples",
+                    "missing_go_to_left",
+                ],
+                "formats": ["<i8", "<i8", "<i8", "<f8", "<f8", "<i8", "<f8", "u1"],
+            },
+        )
+        state = {
+            "max_depth": self.max_depth,
+            "node_count": node_count,
+            "nodes": node_state,
+            "values": value,
+        }
+        self.tree.__setstate__(state)
+
+    @staticmethod
+    def _compute_expectations(
+        children_left, children_right, node_sample_weight, values, node_index
+    ) -> int:
+        if children_right[node_index] == -1:
+            return 0
+
+        left_index = children_left[node_index]
+        right_index = children_right[node_index]
+        depth_left = Tree._compute_expectations(
+            children_left, children_right, node_sample_weight, values, left_index
+        )
+        depth_right = Tree._compute_expectations(
+            children_left, children_right, node_sample_weight, values, right_index
+        )
+        left_weight = node_sample_weight[left_index]
+        right_weight = node_sample_weight[right_index]
+
+        v = (
+            (
+                left_weight * values[left_index, :]
+                + right_weight * values[right_index, :]
+            )
+            / (left_weight + right_weight)
+            if left_weight + right_weight > 0
+            else 0
+        )
+        values[node_index, :] = v
+        return max(depth_left, depth_right) + 1
+
+
+class TargetMeanEncoder(FunctionTransformer):
+    """FunctionTransformer implementation of the target mean encoder, which is
+    deserialized from the Elastic ML preprocessor description in JSON formats.
+    """
+
+    def __init__(self, preprocessor: Dict[str, Any]):
+        self.preprocessor = preprocessor
+        target_map = self.preprocessor["target_mean_encoding"]["target_map"]
+        feature_name_out = self.preprocessor["target_mean_encoding"]["feature_name"]
+        self.field_name_in = self.preprocessor["target_mean_encoding"]["field"]
+        fallback_value = self.preprocessor["target_mean_encoding"]["default_value"]
+
+        def func(column):
+            return np.array(
+                [
+                    (
+                        target_map[str(category)]
+                        if category in target_map
+                        else fallback_value
+                    )
+                    for category in column
+                ]
+            ).reshape(-1, 1)
+
+        def feature_names_out(ft, carr):
+            return [feature_name_out if c == self.field_name_in else c for c in carr]
+
+        super().__init__(func=func, feature_names_out=feature_names_out)
+
+
+class FrequencyEncoder(FunctionTransformer):
+    """FunctionTransformer implementation of the frequency encoder, which is
+    deserialized from the Elastic ML preprocessor description in JSON format.
+    """
+
+    def __init__(self, preprocessor: Dict[str, Any]):
+        self.preprocessor = preprocessor
+        frequency_map = self.preprocessor["frequency_encoding"]["frequency_map"]
+        feature_name_out = self.preprocessor["frequency_encoding"]["feature_name"]
+        self.field_name_in = self.preprocessor["frequency_encoding"]["field"]
+        fallback_value = 0.0
+
+        def func(column):
+            return np.array(
+                [
+                    (
+                        frequency_map[str(category)]
+                        if category in frequency_map
+                        else fallback_value
+                    )
+                    for category in column
+                ]
+            ).reshape(-1, 1)
+
+        def feature_names_out(ft, carr):
+            return [feature_name_out if c == self.field_name_in else c for c in carr]
+
+        super().__init__(func=func, feature_names_out=feature_names_out)
+
+
+class OneHotEncoder(sklearn.preprocessing.OneHotEncoder):
+    """Wrapper for sklearn one-hot encoder, which is deserialized from the
+    Elastic ML preprocessor description in JSON format.
+    """
+
+    def __init__(self, preprocessor: Dict[str, Any]):
+        self.preprocessor = preprocessor
+        self.field_name_in = self.preprocessor["one_hot_encoding"]["field"]
+        self.cats = [list(self.preprocessor["one_hot_encoding"]["hot_map"].keys())]
+        super().__init__(categories=self.cats, handle_unknown="ignore")
--- a/eland/ml/exporters/common.py
+++ b/eland/ml/exporters/common.py
@ -0,0 +1,46 @@
+#  Licensed to Elasticsearch B.V. under one or more contributor
+#  license agreements. See the NOTICE file distributed with
+#  this work for additional information regarding copyright
+#  ownership. Elasticsearch B.V. licenses this file to you under
+#  the Apache License, Version 2.0 (the "License"); you may
+#  not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+# 	http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+import eland
+
+
+class ModelDefinitionKeyError(Exception):
+    """
+    This exception is raised when a key is not found in the model definition.
+
+    Attributes:
+        missed_key (str): The key that was not found in the model definition.
+        available_keys (List[str]): The list of keys that are available in the model definition.
+
+    Examples:
+        model_definition = {"key1": "value1", "key2": "value2"}
+        try:
+            model_definition["key3"]
+        except KeyError as ex:
+            raise ModelDefinitionKeyError(ex) from ex
+    """
+
+    def __init__(self, ex: KeyError):
+        self.missed_key = ex.args[0]
+
+    def __str__(self):
+        return (
+            f'Key "{self.missed_key}" is not available. '
+            + "The model definition may have changed. "
+            + "Make sure you are using an Elasticsearch version compatible "
+            + f"with Eland {eland.__version__}."
+        )
--- a/eland/ml/exporters/es_gb_models.py
+++ b/eland/ml/exporters/es_gb_models.py
@ -0,0 +1,472 @@
+#  Licensed to Elasticsearch B.V. under one or more contributor
+#  license agreements. See the NOTICE file distributed with
+#  this work for additional information regarding copyright
+#  ownership. Elasticsearch B.V. licenses this file to you under
+#  the Apache License, Version 2.0 (the "License"); you may
+#  not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+# 	http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+from abc import ABC
+from typing import Any, List, Literal, Mapping, Optional, Set, Tuple, Union
+
+import numpy as np
+from elasticsearch import Elasticsearch
+from numpy.typing import ArrayLike
+
+from .._optional import import_optional_dependency
+
+import_optional_dependency("sklearn", on_version="warn")
+
+from sklearn.dummy import DummyClassifier, DummyRegressor
+from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
+from sklearn.ensemble._gb_losses import (
+    BinomialDeviance,
+    HuberLossFunction,
+    LeastSquaresError,
+)
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils.validation import check_array
+
+from eland.common import ensure_es_client
+from eland.ml.common import TYPE_CLASSIFICATION, TYPE_REGRESSION
+
+from ._sklearn_deserializers import Tree
+from .common import ModelDefinitionKeyError
+
+
+class ESGradientBoostingModel(ABC):
+    """
+    Abstract class for converting Elastic ML model into sklearn Pipeline.
+    """
+
+    def __init__(
+        self,
+        es_client: Union[str, List[str], Tuple[str, ...], "Elasticsearch"],
+        model_id: str,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        es_client : Elasticsearch client argument(s)
+            - elasticsearch-py parameters or
+            - elasticsearch-py instance
+        model_id : str
+            The unique identifier of the trained inference model in Elasticsearch.
+
+        Raises
+        ------
+        RuntimeError
+            On failure to retrieve trained model information to the specified model ID.
+        ValueError
+            The model is expected to be trained in Elastic Stack. Models initially imported
+            from xgboost, lgbm, or sklearn are not supported.
+        """
+        self.es_client: Elasticsearch = ensure_es_client(es_client)
+        self.model_id = model_id
+
+        self._trained_model_result = self.es_client.ml.get_trained_models(
+            model_id=self.model_id,
+            decompress_definition=True,
+            include=["hyperparameters", "definition"],
+        )
+
+        if (
+            "trained_model_configs" not in self._trained_model_result
+            or len(self._trained_model_result["trained_model_configs"]) == 0
+        ):
+            raise RuntimeError(
+                f"Failed to retrieve the trained model for model ID {self.model_id!r}"
+            )
+
+        if "metadata" not in self._trained_model_result["trained_model_configs"][0]:
+            raise ValueError(
+                "Error initializing sklearn classifier. Incorrect prior class probability. "
+                + "Note: only export of models trained in the Elastic Stack is supported."
+            )
+        preprocessors = []
+        if "preprocessors" in self._definition:
+            preprocessors = self._definition["preprocessors"]
+        (
+            self.feature_names_in_,
+            self.input_field_names,
+        ) = ESGradientBoostingModel._get_feature_names_in_(
+            preprocessors,
+            self._definition["trained_model"]["ensemble"]["feature_names"],
+            self._trained_model_result["trained_model_configs"][0]["input"][
+                "field_names"
+            ],
+        )
+
+        feature_names_map = {name: i for i, name in enumerate(self.feature_names_in_)}
+
+        trained_models = self._definition["trained_model"]["ensemble"]["trained_models"]
+        self._trees = []
+        for trained_model in trained_models:
+            self._trees.append(Tree(trained_model["tree"], feature_names_map))
+
+        # 0's tree is the constant estimator
+        self.n_estimators = len(trained_models) - 1
+
+    def _initialize_estimators(self, decision_tree_type) -> None:
+        self.estimators_ = np.ndarray(
+            (len(self._trees) - 1, 1), dtype=decision_tree_type
+        )
+        self.n_estimators_ = self.estimators_.shape[0]
+
+        for i in range(self.n_estimators_):
+            estimator = decision_tree_type()
+            estimator.tree_ = self._trees[i + 1].tree
+            estimator.n_features_in_ = self.n_features_in_
+            estimator.max_depth = self._max_depth
+            estimator.max_features_ = self.max_features_
+            self.estimators_[i, 0] = estimator
+
+    def _extract_common_parameters(self) -> None:
+        self.n_features_in_ = len(self.feature_names_in_)
+        self.max_features_ = self.n_features_in_
+
+    @property
+    def _max_depth(self) -> int:
+        return max(map(lambda x: x.max_depth, self._trees))
+
+    @property
+    def _n_outputs(self) -> int:
+        return self._trees[0].n_outputs
+
+    @property
+    def _definition(self) -> Mapping[Union[str, int], Any]:
+        return self._trained_model_result["trained_model_configs"][0]["definition"]
+
+    @staticmethod
+    def _get_feature_names_in_(
+        preprocessors, feature_names, field_names
+    ) -> Tuple[List[str], Set[str]]:
+        input_field_names = set()
+
+        def add_input_field_name(preprocessor_type: str, feature_name: str) -> None:
+            if feature_name in feature_names:
+                input_field_names.add(preprocessor[preprocessor_type]["field"])
+
+        for preprocessor in preprocessors:
+            if "target_mean_encoding" in preprocessor:
+                add_input_field_name(
+                    "target_mean_encoding",
+                    preprocessor["target_mean_encoding"]["feature_name"],
+                )
+            elif "frequency_encoding" in preprocessor:
+                add_input_field_name(
+                    "frequency_encoding",
+                    preprocessor["frequency_encoding"]["feature_name"],
+                )
+            elif "one_hot_encoding" in preprocessor:
+                for feature_name in preprocessor["one_hot_encoding"][
+                    "hot_map"
+                ].values():
+                    add_input_field_name("one_hot_encoding", feature_name)
+
+        for field_name in field_names:
+            if field_name in feature_names and field_name not in input_field_names:
+                input_field_names.add(field_name)
+
+        return feature_names, input_field_names
+
+    @property
+    def preprocessors(self) -> List[Any]:
+        """
+        Returns the list of preprocessor JSON definitions.
+
+        Returns
+        -------
+        List[Any]
+            List of preprocessors definitions or [].
+        """
+        if "preprocessors" in self._definition:
+            return self._definition["preprocessors"]
+        return []
+
+    def fit(self, X, y, sample_weight=None, monitor=None) -> None:
+        """
+        Override of the sklearn fit() method. It does nothing since Elastic ML models are
+        trained in the Elastic Stack or imported.
+        """
+        # Do nothing, model if fitted using Elasticsearch API
+        pass
+
+
+class ESGradientBoostingClassifier(ESGradientBoostingModel, GradientBoostingClassifier):
+    """
+    Elastic ML model wrapper compatible with sklearn GradientBoostingClassifier.
+    """
+
+    def __init__(
+        self,
+        es_client: Union[str, List[str], Tuple[str, ...], "Elasticsearch"],
+        model_id: str,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        es_client : Elasticsearch client argument(s)
+            - elasticsearch-py parameters or
+            - elasticsearch-py instance
+        model_id : str
+            The unique identifier of the trained inference model in Elasticsearch.
+
+        Raises
+        ------
+        NotImplementedError
+            Multi-class classification is not supported at the moment.
+        ValueError
+            The classifier should be defined for at least 2 classes.
+        ModelDefinitionKeyError
+            If required data cannot be extracted from the model definition due to a schema change.
+        """
+
+        try:
+            ESGradientBoostingModel.__init__(self, es_client, model_id)
+            self._extract_common_parameters()
+            GradientBoostingClassifier.__init__(
+                self,
+                learning_rate=1.0,
+                n_estimators=self.n_estimators,
+                max_depth=self._max_depth,
+            )
+
+            if "classification_labels" in self._definition["trained_model"]["ensemble"]:
+                self.classes_ = np.array(
+                    self._definition["trained_model"]["ensemble"][
+                        "classification_labels"
+                    ]
+                )
+            else:
+                self.classes_ = None
+
+            self.n_outputs = self._n_outputs
+            if self.classes_ is not None:
+                self.n_classes_ = len(self.classes_)
+            elif self.n_outputs <= 2:
+                self.n_classes_ = 2
+            else:
+                self.n_classes_ = self.n_outputs
+
+            if self.n_classes_ == 2:
+                self._loss = BinomialDeviance(self.n_classes_)
+                # self.n_outputs = 1
+            elif self.n_classes_ > 2:
+                raise NotImplementedError("Only binary classification is implemented.")
+            else:
+                raise ValueError(f"At least 2 classes required. got {self.n_classes_}.")
+
+            self.init_ = self._initialize_init_()
+            self._initialize_estimators(DecisionTreeClassifier)
+        except KeyError as ex:
+            raise ModelDefinitionKeyError(ex) from ex
+
+    @property
+    def analysis_type(self) -> Literal["classification"]:
+        return TYPE_CLASSIFICATION
+
+    def _initialize_init_(self) -> DummyClassifier:
+        estimator = DummyClassifier(strategy="prior")
+
+        estimator.n_classes_ = self.n_classes_
+        estimator.n_outputs_ = self.n_outputs
+        estimator.classes_ = np.arange(self.n_classes_)
+        estimator._strategy = estimator.strategy
+
+        if self.n_classes_ == 2:
+            log_odds = self._trees[0].tree.value.flatten()[0]
+            if np.isnan(log_odds):
+                raise ValueError(
+                    "Error initializing sklearn classifier. Incorrect prior class probability. "
+                    + "Note: only export of models trained in the Elastic Stack is supported."
+                )
+            class_prior = 1 / (1 + np.exp(-log_odds))
+            estimator.class_prior_ = np.array([1 - class_prior, class_prior])
+        else:
+            raise NotImplementedError("Only binary classification is implemented.")
+
+        return estimator
+
+    def predict_proba(
+        self, X, feature_names_in: Optional[Union["ArrayLike", List[str]]] = None
+    ) -> "ArrayLike":
+        """Predict class probabilities for X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input samples.
+        feature_names_in : {array of string, list of string} of length n_features.
+            Feature names of the corresponding columns in X. Important, since the column list
+            can be extended by ColumnTransformer through the pipeline. By default None.
+
+        Returns
+        -------
+        ArrayLike of shape (n_samples, n_classes)
+            The class probabilities of the input samples. The order of the
+            classes corresponds to that in the attribute :term:`classes_`.
+        """
+        if feature_names_in is not None:
+            if X.shape[1] != len(feature_names_in):
+                raise ValueError(
+                    f"Dimension mismatch: X with {X.shape[1]} columns has to be the same size as feature_names_in with {len(feature_names_in)}."
+                )
+            if isinstance(feature_names_in, np.ndarray):
+                feature_names_in = feature_names_in.tolist()
+            # select columns used by the model in the correct order
+            X = X[:, [feature_names_in.index(fn) for fn in self.feature_names_in_]]
+
+        X = check_array(X)
+        return GradientBoostingClassifier.predict_proba(self, X)
+
+    def predict(
+        self,
+        X: "ArrayLike",
+        feature_names_in: Optional[Union["ArrayLike", List[str]]] = None,
+    ) -> "ArrayLike":
+        """Predict class for X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input samples.
+        feature_names_in : {array of string, list of string} of length n_features.
+            Feature names of the corresponding columns in X. Important, since the column list
+            can be extended by ColumnTransformer through the pipeline. By default None.
+
+        Returns
+        -------
+        ArrayLike of shape (n_samples,)
+            The predicted values.
+        """
+        if feature_names_in is not None:
+            if X.shape[1] != len(feature_names_in):
+                raise ValueError(
+                    f"Dimension mismatch: X with {X.shape[1]} columns has to be the same size as feature_names_in with {len(feature_names_in)}."
+                )
+            if isinstance(feature_names_in, np.ndarray):
+                feature_names_in = feature_names_in.tolist()
+            # select columns used by the model in the correct order
+            X = X[:, [feature_names_in.index(fn) for fn in self.feature_names_in_]]
+
+        X = check_array(X)
+        return GradientBoostingClassifier.predict(self, X)
+
+
+class ESGradientBoostingRegressor(ESGradientBoostingModel, GradientBoostingRegressor):
+    """
+    Elastic ML model wrapper compatible with sklearn GradientBoostingRegressor.
+    """
+
+    def __init__(
+        self,
+        es_client: Union[str, List[str], Tuple[str, ...], "Elasticsearch"],
+        model_id: str,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        es_client : Elasticsearch client argument(s)
+            - elasticsearch-py parameters or
+            - elasticsearch-py instance
+        model_id : str
+            The unique identifier of the trained inference model in Elasticsearch.
+
+        Raises
+        ------
+        NotImplementedError
+            Only MSE, MSLE, and Huber loss functions are supported.
+        ModelDefinitionKeyError
+            If required data cannot be extracted from the model definition due to a schema change.
+        """
+        try:
+            ESGradientBoostingModel.__init__(self, es_client, model_id)
+            self._extract_common_parameters()
+            GradientBoostingRegressor.__init__(
+                self,
+                learning_rate=1.0,
+                n_estimators=self.n_estimators,
+                max_depth=self._max_depth,
+            )
+
+            self.n_outputs = 1
+            loss_function = self._trained_model_result["trained_model_configs"][0][
+                "metadata"
+            ]["analytics_config"]["analysis"][self.analysis_type]["loss_function"]
+            if loss_function == "mse" or loss_function == "msle":
+                self.criterion = "squared_error"
+                self._loss = LeastSquaresError()
+            elif loss_function == "huber":
+                loss_parameter = loss_function = self._trained_model_result[
+                    "trained_model_configs"
+                ][0]["metadata"]["analytics_config"]["analysis"][self.analysis_type][
+                    "loss_function_parameter"
+                ]
+                self.criterion = "huber"
+                self._loss = HuberLossFunction(loss_parameter)
+            else:
+                raise NotImplementedError(
+                    "Only MSE, MSLE and Huber loss functions are supported."
+                )
+
+            self.init_ = self._initialize_init_()
+            self._initialize_estimators(DecisionTreeRegressor)
+        except KeyError as ex:
+            raise ModelDefinitionKeyError(ex) from ex
+
+    @property
+    def analysis_type(self) -> Literal["regression"]:
+        return TYPE_REGRESSION
+
+    def _initialize_init_(self) -> DummyRegressor:
+        constant = self._trees[0].tree.value[0]
+        estimator = DummyRegressor(
+            strategy="constant",
+            constant=constant,
+        )
+        estimator.constant_ = np.array([constant])
+        estimator.n_outputs_ = 1
+        return estimator
+
+    def predict(
+        self,
+        X: "ArrayLike",
+        feature_names_in: Optional[Union["ArrayLike", List[str]]] = None,
+    ) -> "ArrayLike":
+        """Predict targets for X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input samples.
+        feature_names_in : {array of string, list of string} of length n_features.
+            Feature names of the corresponding columns in X. Important, since the column list
+            can be extended by ColumnTransformer through the pipeline. By default None.
+
+        Returns
+        -------
+        ArrayLike of shape (n_samples,)
+            The predicted values.
+        """
+        if feature_names_in is not None:
+            if X.shape[1] != len(feature_names_in):
+                raise ValueError(
+                    f"Dimension mismatch: X with {X.shape[1]} columns has to be the same size as feature_names_in with {len(feature_names_in)}."
+                )
+            if isinstance(X, np.ndarray):
+                feature_names_in = feature_names_in.tolist()
+            # select columns used by the model in the correct order
+            X = X[:, [feature_names_in.index(fn) for fn in self.feature_names_in_]]
+
+        X = check_array(X)
+        return GradientBoostingRegressor.predict(self, X)
--- a/eland/ml/ml_model.py
+++ b/eland/ml/ml_model.py
@ -38,6 +38,7 @@ if TYPE_CHECKING:
            RandomForestClassifier,
            RandomForestRegressor,
        )
+        from sklearn.pipeline import Pipeline  # type: ignore # noqa: F401
        from sklearn.tree import (  # type: ignore # noqa: F401
            DecisionTreeClassifier,
            DecisionTreeRegressor,
@ -571,6 +572,83 @@ class MLModel:
            return False
        return True

+    def export_model(self) -> "Pipeline":
+        """Export Elastic ML model as sklearn Pipeline.
+
+        Returns
+        -------
+        sklearn.pipeline.Pipeline
+            _description_
+
+        Raises
+        ------
+        AssertionError
+            If preprocessors JSON definition has unexpected schema.
+        ValueError
+            The model is expected to be trained in Elastic Stack. Models initially imported
+            from xgboost, lgbm, or sklearn are not supported.
+        ValueError
+            If unexpected categorical encoding is found in the list of preprocessors.
+        NotImplementedError
+            Only regression and binary classification models are supported currently.
+        """
+        from sklearn.compose import ColumnTransformer  # type: ignore # noqa: F401
+        from sklearn.pipeline import Pipeline
+
+        from .exporters._sklearn_deserializers import (
+            FrequencyEncoder,
+            OneHotEncoder,
+            TargetMeanEncoder,
+        )
+        from .exporters.es_gb_models import (
+            ESGradientBoostingClassifier,
+            ESGradientBoostingRegressor,
+        )
+
+        if self.model_type == TYPE_CLASSIFICATION:
+            model = ESGradientBoostingClassifier(
+                es_client=self._client, model_id=self._model_id
+            )
+        elif self.model_type == TYPE_REGRESSION:
+            model = ESGradientBoostingRegressor(
+                es_client=self._client, model_id=self._model_id
+            )
+        else:
+            raise NotImplementedError(
+                "Only regression and binary classification models are supported currently."
+            )
+
+        transformers = []
+        for p in model.preprocessors:
+            assert (
+                len(p) == 1
+            ), f"Unexpected preprocessor data structure: {p}. One-key mapping expected."
+            encoding_type = list(p.keys())[0]
+            field = p[encoding_type]["field"]
+            if encoding_type == "frequency_encoding":
+                transform = FrequencyEncoder(p)
+                transformers.append((f"{field}_{encoding_type}", transform, field))
+            elif encoding_type == "target_mean_encoding":
+                transform = TargetMeanEncoder(p)
+                transformers.append((f"{field}_{encoding_type}", transform, field))
+            elif encoding_type == "one_hot_encoding":
+                transform = OneHotEncoder(p)
+                transformers.append((f"{field}_{encoding_type}", transform, [field]))
+            else:
+                raise ValueError(
+                    f"Unexpected categorical encoding type {encoding_type} found. "
+                    + "Expected encodings: frequency_encoding, target_mean_encoding, one_hot_encoding."
+                )
+        preprocessor = ColumnTransformer(
+            transformers=transformers,
+            remainder="passthrough",
+            verbose_feature_names_out=False,
+        )
+
+        pipeline = Pipeline(steps=[("preprocessor", preprocessor), ("es_model", model)])
+
+        return pipeline
+
    @property
    def _trained_model_config(self) -> Dict[str, Any]:
        """Lazily loads an ML models 'trained_model_config' information"""
--- a/eland/ml/pytorch/_pytorch_model.py
+++ b/eland/ml/pytorch/_pytorch_model.py
@ -126,7 +126,6 @@ class PyTorchModel:
    def infer(
        self,
        docs: List[Mapping[str, str]],
-        inference_config: Optional[Mapping[str, Any]] = None,
        timeout: str = DEFAULT_TIMEOUT,
    ) -> Any:
        if docs is None:
@ -134,8 +133,6 @@ class PyTorchModel:

        __body: Dict[str, Any] = {}
        __body["docs"] = docs
-        if inference_config is not None:
-            __body["inference_config"] = inference_config

        __path = f"/_ml/trained_models/{_quote(self.model_id)}/_infer"
        __query: Dict[str, Any] = {}
--- a/eland/ml/pytorch/nlp_ml_model.py
+++ b/eland/ml/pytorch/nlp_ml_model.py
@ -86,27 +86,6 @@ class NlpXLMRobertaTokenizationConfig(NlpTokenizationConfig):
        )


-class NlpDebertaV2TokenizationConfig(NlpTokenizationConfig):
-    def __init__(
-        self,
-        *,
-        do_lower_case: t.Optional[bool] = None,
-        with_special_tokens: t.Optional[bool] = None,
-        max_sequence_length: t.Optional[int] = None,
-        truncate: t.Optional[
-            t.Union["t.Literal['first', 'none', 'second']", str]
-        ] = None,
-        span: t.Optional[int] = None,
-    ):
-        super().__init__(
-            configuration_type="deberta_v2",
-            with_special_tokens=with_special_tokens,
-            max_sequence_length=max_sequence_length,
-            truncate=truncate,
-            span=span,
-        )
-
-
 class NlpBertTokenizationConfig(NlpTokenizationConfig):
    def __init__(
        self,
--- a/eland/ml/pytorch/transformers.py
+++ b/eland/ml/pytorch/transformers.py
@ -25,13 +25,17 @@ import os.path
 import random
 import re
 from abc import ABC, abstractmethod
-from typing import Dict, List, Optional, Set, Tuple, Union
+from typing import Any, Dict, List, Optional, Set, Tuple, Union

 import torch  # type: ignore
 import transformers  # type: ignore
-from torch import Tensor
+from sentence_transformers import SentenceTransformer  # type: ignore
+from torch import Tensor, nn
 from torch.profiler import profile  # type: ignore
 from transformers import (
+    AutoConfig,
+    AutoModel,
+    AutoModelForQuestionAnswering,
    BertTokenizer,
    PretrainedConfig,
    PreTrainedModel,
@ -44,7 +48,6 @@ from eland.ml.pytorch.nlp_ml_model import (
    NerInferenceOptions,
    NlpBertJapaneseTokenizationConfig,
    NlpBertTokenizationConfig,
-    NlpDebertaV2TokenizationConfig,
    NlpMPNetTokenizationConfig,
    NlpRobertaTokenizationConfig,
    NlpTokenizationConfig,
@ -61,13 +64,8 @@ from eland.ml.pytorch.nlp_ml_model import (
    ZeroShotClassificationInferenceOptions,
 )
 from eland.ml.pytorch.traceable_model import TraceableModel
-from eland.ml.pytorch.wrappers import (
-    _DistilBertWrapper,
-    _DPREncoderWrapper,
-    _QuestionAnsweringWrapperModule,
-    _SentenceTransformerWrapperModule,
-)

+DEFAULT_OUTPUT_KEY = "sentence_embedding"
 SUPPORTED_TASK_TYPES = {
    "fill_mask",
    "ner",
@ -118,7 +116,6 @@ SUPPORTED_TOKENIZERS = (
    transformers.BartTokenizer,
    transformers.SqueezeBertTokenizer,
    transformers.XLMRobertaTokenizer,
-    transformers.DebertaV2Tokenizer,
 )
 SUPPORTED_TOKENIZERS_NAMES = ", ".join(sorted([str(x) for x in SUPPORTED_TOKENIZERS]))

@ -173,6 +170,283 @@ def task_type_from_model_config(model_config: PretrainedConfig) -> Optional[str]
    return potential_task_types.pop()


+class _QuestionAnsweringWrapperModule(nn.Module):  # type: ignore
+    """
+    A wrapper around a question answering model.
+    Our inference engine only takes the first tuple if the inference response
+    is a tuple.
+
+    This wrapper transforms the output to be a stacked tensor if its a tuple.
+
+    Otherwise it passes it through
+    """
+
+    def __init__(self, model: PreTrainedModel):
+        super().__init__()
+        self._hf_model = model
+        self.config = model.config
+
+    @staticmethod
+    def from_pretrained(model_id: str, *, token: Optional[str] = None) -> Optional[Any]:
+        model = AutoModelForQuestionAnswering.from_pretrained(
+            model_id, token=token, torchscript=True
+        )
+        if isinstance(
+            model.config,
+            (
+                transformers.MPNetConfig,
+                transformers.XLMRobertaConfig,
+                transformers.RobertaConfig,
+                transformers.BartConfig,
+            ),
+        ):
+            return _TwoParameterQuestionAnsweringWrapper(model)
+        else:
+            return _QuestionAnsweringWrapper(model)
+
+
+class _QuestionAnsweringWrapper(_QuestionAnsweringWrapperModule):
+    def __init__(self, model: PreTrainedModel):
+        super().__init__(model=model)
+
+    def forward(
+        self,
+        input_ids: Tensor,
+        attention_mask: Tensor,
+        token_type_ids: Tensor,
+        position_ids: Tensor,
+    ) -> Tensor:
+        """Wrap the input and output to conform to the native process interface."""
+
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+            "token_type_ids": token_type_ids,
+            "position_ids": position_ids,
+        }
+
+        # remove inputs for specific model types
+        if isinstance(self._hf_model.config, transformers.DistilBertConfig):
+            del inputs["token_type_ids"]
+            del inputs["position_ids"]
+        response = self._hf_model(**inputs)
+        if isinstance(response, tuple):
+            return torch.stack(list(response), dim=0)
+        return response
+
+
+class _TwoParameterQuestionAnsweringWrapper(_QuestionAnsweringWrapperModule):
+    def __init__(self, model: PreTrainedModel):
+        super().__init__(model=model)
+
+    def forward(self, input_ids: Tensor, attention_mask: Tensor) -> Tensor:
+        """Wrap the input and output to conform to the native process interface."""
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+        }
+        response = self._hf_model(**inputs)
+        if isinstance(response, tuple):
+            return torch.stack(list(response), dim=0)
+        return response
+
+
+class _DistilBertWrapper(nn.Module):  # type: ignore
+    """
+    In Elasticsearch the BERT tokenizer is used for DistilBERT models but
+    the BERT tokenizer produces 4 inputs where DistilBERT models expect 2.
+
+    Wrap the model's forward function in a method that accepts the 4
+    arguments passed to a BERT model then discard the token_type_ids
+    and the position_ids to match the wrapped DistilBERT model forward
+    function
+    """
+
+    def __init__(self, model: transformers.PreTrainedModel):
+        super().__init__()
+        self._model = model
+        self.config = model.config
+
+    @staticmethod
+    def try_wrapping(model: PreTrainedModel) -> Optional[Any]:
+        if isinstance(model.config, transformers.DistilBertConfig):
+            return _DistilBertWrapper(model)
+        else:
+            return model
+
+    def forward(
+        self,
+        input_ids: Tensor,
+        attention_mask: Tensor,
+        _token_type_ids: Tensor = None,
+        _position_ids: Tensor = None,
+    ) -> Tensor:
+        """Wrap the input and output to conform to the native process interface."""
+
+        return self._model(input_ids=input_ids, attention_mask=attention_mask)
+
+
+class _SentenceTransformerWrapperModule(nn.Module):  # type: ignore
+    """
+    A wrapper around sentence-transformer models to provide pooling,
+    normalization and other graph layers that are not defined in the base
+    HuggingFace transformer model.
+    """
+
+    def __init__(self, model: PreTrainedModel, output_key: str = DEFAULT_OUTPUT_KEY):
+        super().__init__()
+        self._hf_model = model
+        self._st_model = SentenceTransformer(model.config.name_or_path)
+        self._output_key = output_key
+        self.config = model.config
+
+        self._remove_pooling_layer()
+        self._replace_transformer_layer()
+
+    @staticmethod
+    def from_pretrained(
+        model_id: str,
+        tokenizer: PreTrainedTokenizer,
+        *,
+        token: Optional[str] = None,
+        output_key: str = DEFAULT_OUTPUT_KEY,
+    ) -> Optional[Any]:
+        model = AutoModel.from_pretrained(model_id, token=token, torchscript=True)
+        if isinstance(
+            tokenizer,
+            (
+                transformers.BartTokenizer,
+                transformers.MPNetTokenizer,
+                transformers.RobertaTokenizer,
+                transformers.XLMRobertaTokenizer,
+            ),
+        ):
+            return _TwoParameterSentenceTransformerWrapper(model, output_key)
+        else:
+            return _SentenceTransformerWrapper(model, output_key)
+
+    def _remove_pooling_layer(self) -> None:
+        """
+        Removes any last pooling layer which is not used to create embeddings.
+        Leaving this layer in will cause it to return a NoneType which in turn
+        will fail to load in libtorch. Alternatively, we can just use the output
+        of the pooling layer as a dummy but this also affects (if only in a
+        minor way) the performance of inference, so we're better off removing
+        the layer if we can.
+        """
+
+        if hasattr(self._hf_model, "pooler"):
+            self._hf_model.pooler = None
+
+    def _replace_transformer_layer(self) -> None:
+        """
+        Replaces the HuggingFace Transformer layer in the SentenceTransformer
+        modules so we can set it with one that has pooling layer removed and
+        was loaded ready for TorchScript export.
+        """
+
+        self._st_model._modules["0"].auto_model = self._hf_model
+
+
+class _SentenceTransformerWrapper(_SentenceTransformerWrapperModule):
+    def __init__(self, model: PreTrainedModel, output_key: str = DEFAULT_OUTPUT_KEY):
+        super().__init__(model=model, output_key=output_key)
+
+    def forward(
+        self,
+        input_ids: Tensor,
+        attention_mask: Tensor,
+        token_type_ids: Tensor,
+        position_ids: Tensor,
+    ) -> Tensor:
+        """Wrap the input and output to conform to the native process interface."""
+
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+            "token_type_ids": token_type_ids,
+            "position_ids": position_ids,
+        }
+
+        # remove inputs for specific model types
+        if isinstance(self._hf_model.config, transformers.DistilBertConfig):
+            del inputs["token_type_ids"]
+
+        return self._st_model(inputs)[self._output_key]
+
+
+class _TwoParameterSentenceTransformerWrapper(_SentenceTransformerWrapperModule):
+    def __init__(self, model: PreTrainedModel, output_key: str = DEFAULT_OUTPUT_KEY):
+        super().__init__(model=model, output_key=output_key)
+
+    def forward(self, input_ids: Tensor, attention_mask: Tensor) -> Tensor:
+        """Wrap the input and output to conform to the native process interface."""
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+        }
+        return self._st_model(inputs)[self._output_key]
+
+
+class _DPREncoderWrapper(nn.Module):  # type: ignore
+    """
+    AutoModel loading does not work for DPRContextEncoders, this only exists as
+    a workaround. This may never be fixed so this is likely permanent.
+    See: https://github.com/huggingface/transformers/issues/13670
+    """
+
+    _SUPPORTED_MODELS = {
+        transformers.DPRContextEncoder,
+        transformers.DPRQuestionEncoder,
+    }
+    _SUPPORTED_MODELS_NAMES = set([x.__name__ for x in _SUPPORTED_MODELS])
+
+    def __init__(
+        self,
+        model: Union[transformers.DPRContextEncoder, transformers.DPRQuestionEncoder],
+    ):
+        super().__init__()
+        self._model = model
+        self.config = model.config
+
+    @staticmethod
+    def from_pretrained(model_id: str, *, token: Optional[str] = None) -> Optional[Any]:
+        config = AutoConfig.from_pretrained(model_id, token=token)
+
+        def is_compatible() -> bool:
+            is_dpr_model = config.model_type == "dpr"
+            has_architectures = (
+                config.architectures is not None and len(config.architectures) == 1
+            )
+            is_supported_architecture = has_architectures and (
+                config.architectures[0] in _DPREncoderWrapper._SUPPORTED_MODELS_NAMES
+            )
+            return is_dpr_model and is_supported_architecture
+
+        if is_compatible():
+            model = getattr(transformers, config.architectures[0]).from_pretrained(
+                model_id, torchscript=True
+            )
+            return _DPREncoderWrapper(model)
+        else:
+            return None
+
+    def forward(
+        self,
+        input_ids: Tensor,
+        attention_mask: Tensor,
+        token_type_ids: Tensor,
+        _position_ids: Tensor,
+    ) -> Tensor:
+        """Wrap the input and output to conform to the native process interface."""
+
+        return self._model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+        )
+
+
 class _TransformerTraceableModel(TraceableModel):
    """A base class representing a HuggingFace transformer model that can be traced."""

@ -214,15 +488,9 @@ class _TransformerTraceableModel(TraceableModel):
                transformers.XLMRobertaTokenizer,
            ),
        ):
+            del inputs["token_type_ids"]
            return (inputs["input_ids"], inputs["attention_mask"])

-        if isinstance(self._tokenizer, transformers.DebertaV2Tokenizer):
-            return (
-                inputs["input_ids"],
-                inputs["attention_mask"],
-                inputs["token_type_ids"],
-            )
-
        position_ids = torch.arange(inputs["input_ids"].size(1), dtype=torch.long)
        inputs["position_ids"] = position_ids
        return (
@ -255,15 +523,6 @@ class _TraceableFillMaskModel(_TransformerTraceableModel):
        )


-class _TraceableTextExpansionModel(_TransformerTraceableModel):
-    def _prepare_inputs(self) -> transformers.BatchEncoding:
-        return self._tokenizer(
-            "This is an example sentence.",
-            padding="max_length",
-            return_tensors="pt",
-        )
-
-
 class _TraceableNerModel(_TraceableClassificationModel):
    def _prepare_inputs(self) -> transformers.BatchEncoding:
        return self._tokenizer(
@ -298,7 +557,7 @@ class _TraceableTextEmbeddingModel(_TransformerTraceableModel):
    def _prepare_inputs(self) -> transformers.BatchEncoding:
        return self._tokenizer(
            "This is an example sentence.",
-            padding="longest",
+            padding="max_length",
            return_tensors="pt",
        )

@ -422,12 +681,7 @@ class TransformerModel:
                " ".join(m) for m, _ in sorted(ranks.items(), key=lambda kv: kv[1])
            ]
            vocab_obj["merges"] = merges
-
-        if isinstance(self._tokenizer, transformers.DebertaV2Tokenizer):
-            sp_model = self._tokenizer._tokenizer.spm
-        else:
-            sp_model = getattr(self._tokenizer, "sp_model", None)
-
+        sp_model = getattr(self._tokenizer, "sp_model", None)
        if sp_model:
            id_correction = getattr(self._tokenizer, "fairseq_offset", 0)
            scores = []
@ -465,11 +719,6 @@ class TransformerModel:
            return NlpXLMRobertaTokenizationConfig(
                max_sequence_length=_max_sequence_length
            )
-        elif isinstance(self._tokenizer, transformers.DebertaV2Tokenizer):
-            return NlpDebertaV2TokenizationConfig(
-                max_sequence_length=_max_sequence_length,
-                do_lower_case=getattr(self._tokenizer, "do_lower_case", None),
-            )
        else:
            japanese_morphological_tokenizers = ["mecab"]
            if (
@ -492,7 +741,7 @@ class TransformerModel:
        # a random or very large value.
        REASONABLE_MAX_LENGTH = 8192
        max_len = getattr(self._tokenizer, "model_max_length", None)
-        if max_len is not None and max_len <= REASONABLE_MAX_LENGTH:
+        if max_len is not None and max_len < REASONABLE_MAX_LENGTH:
            return int(max_len)

        max_sizes = getattr(self._tokenizer, "max_model_input_sizes", dict())
@ -726,13 +975,6 @@ class TransformerModel:
            else:
                self._task_type = maybe_task_type

-        if self._task_type == "text_expansion":
-            model = transformers.AutoModelForMaskedLM.from_pretrained(
-                self._model_id, token=self._access_token, torchscript=True
-            )
-            model = _DistilBertWrapper.try_wrapping(model)
-            return _TraceableTextExpansionModel(self._tokenizer, model)
-
        if self._task_type == "fill_mask":
            model = transformers.AutoModelForMaskedLM.from_pretrained(
                self._model_id, token=self._access_token, torchscript=True
@ -792,7 +1034,7 @@ class TransformerModel:

        else:
            raise TypeError(
-                f"Task {self._task_type} is not supported, must be one of: {SUPPORTED_TASK_TYPES_NAMES}"
+                f"Unknown task type {self._task_type}, must be one of: {SUPPORTED_TASK_TYPES_NAMES}"
            )

    def elasticsearch_model_id(self) -> str:
@ -823,5 +1065,9 @@ def elasticsearch_model_id(model_id: str) -> str:
    """

    id = re.sub(r"[\s\\/]", "__", model_id).lower()[-64:]
-    id = id.removeprefix("__")
+    if id.startswith("__"):
+        # This check is only needed as long as Eland supports Python 3.8
+        # str.removeprefix was introduced in Python 3.9 and can be used
+        # once 3.8 support is dropped
+        id = id[2:]
    return id
--- a/eland/ml/pytorch/wrappers.py
+++ b/eland/ml/pytorch/wrappers.py
@ -1,317 +0,0 @@
-#  Licensed to Elasticsearch B.V. under one or more contributor
-#  license agreements. See the NOTICE file distributed with
-#  this work for additional information regarding copyright
-#  ownership. Elasticsearch B.V. licenses this file to you under
-#  the Apache License, Version 2.0 (the "License"); you may
-#  not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-# 	http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing,
-#  software distributed under the License is distributed on an
-#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-#  KIND, either express or implied.  See the License for the
-#  specific language governing permissions and limitations
-#  under the License.
-
-"""
-This module contains the wrapper classes for the Hugging Face models.
-Wrapping is necessary to ensure that the forward method of the model
-is called with the same arguments the ml-cpp pytorch_inference process
-uses.
-"""
-
-from typing import Any, Optional, Union
-
-import torch  # type: ignore
-import transformers  # type: ignore
-from sentence_transformers import SentenceTransformer  # type: ignore
-from torch import Tensor, nn
-from transformers import (
-    AutoConfig,
-    AutoModel,
-    AutoModelForQuestionAnswering,
-    PreTrainedModel,
-    PreTrainedTokenizer,
-)
-
-DEFAULT_OUTPUT_KEY = "sentence_embedding"
-
-
-class _QuestionAnsweringWrapperModule(nn.Module):  # type: ignore
-    """
-    A wrapper around a question answering model.
-    Our inference engine only takes the first tuple if the inference response
-    is a tuple.
-
-    This wrapper transforms the output to be a stacked tensor if its a tuple.
-
-    Otherwise it passes it through
-    """
-
-    def __init__(self, model: PreTrainedModel):
-        super().__init__()
-        self._hf_model = model
-        self.config = model.config
-
-    @staticmethod
-    def from_pretrained(model_id: str, *, token: Optional[str] = None) -> Optional[Any]:
-        model = AutoModelForQuestionAnswering.from_pretrained(
-            model_id, token=token, torchscript=True
-        )
-        if isinstance(
-            model.config,
-            (
-                transformers.MPNetConfig,
-                transformers.XLMRobertaConfig,
-                transformers.RobertaConfig,
-                transformers.BartConfig,
-            ),
-        ):
-            return _TwoParameterQuestionAnsweringWrapper(model)
-        else:
-            return _QuestionAnsweringWrapper(model)
-
-
-class _QuestionAnsweringWrapper(_QuestionAnsweringWrapperModule):
-    def __init__(self, model: PreTrainedModel):
-        super().__init__(model=model)
-
-    def forward(
-        self,
-        input_ids: Tensor,
-        attention_mask: Tensor,
-        token_type_ids: Tensor,
-        position_ids: Tensor,
-    ) -> Tensor:
-        """Wrap the input and output to conform to the native process interface."""
-
-        inputs = {
-            "input_ids": input_ids,
-            "attention_mask": attention_mask,
-            "token_type_ids": token_type_ids,
-            "position_ids": position_ids,
-        }
-
-        # remove inputs for specific model types
-        if isinstance(self._hf_model.config, transformers.DistilBertConfig):
-            del inputs["token_type_ids"]
-            del inputs["position_ids"]
-        response = self._hf_model(**inputs)
-        if isinstance(response, tuple):
-            return torch.stack(list(response), dim=0)
-        return response
-
-
-class _TwoParameterQuestionAnsweringWrapper(_QuestionAnsweringWrapperModule):
-    def __init__(self, model: PreTrainedModel):
-        super().__init__(model=model)
-
-    def forward(self, input_ids: Tensor, attention_mask: Tensor) -> Tensor:
-        """Wrap the input and output to conform to the native process interface."""
-        inputs = {
-            "input_ids": input_ids,
-            "attention_mask": attention_mask,
-        }
-        response = self._hf_model(**inputs)
-        if isinstance(response, tuple):
-            return torch.stack(list(response), dim=0)
-        return response
-
-
-class _DistilBertWrapper(nn.Module):  # type: ignore
-    """
-    In Elasticsearch the BERT tokenizer is used for DistilBERT models but
-    the BERT tokenizer produces 4 inputs where DistilBERT models expect 2.
-
-    Wrap the model's forward function in a method that accepts the 4
-    arguments passed to a BERT model then discard the token_type_ids
-    and the position_ids to match the wrapped DistilBERT model forward
-    function
-    """
-
-    def __init__(self, model: transformers.PreTrainedModel):
-        super().__init__()
-        self._model = model
-        self.config = model.config
-
-    @staticmethod
-    def try_wrapping(model: PreTrainedModel) -> Optional[Any]:
-        if isinstance(model.config, transformers.DistilBertConfig):
-            return _DistilBertWrapper(model)
-        else:
-            return model
-
-    def forward(
-        self,
-        input_ids: Tensor,
-        attention_mask: Tensor,
-        _token_type_ids: Tensor = None,
-        _position_ids: Tensor = None,
-    ) -> Tensor:
-        """Wrap the input and output to conform to the native process interface."""
-
-        return self._model(input_ids=input_ids, attention_mask=attention_mask)
-
-
-class _SentenceTransformerWrapperModule(nn.Module):  # type: ignore
-    """
-    A wrapper around sentence-transformer models to provide pooling,
-    normalization and other graph layers that are not defined in the base
-    HuggingFace transformer model.
-    """
-
-    def __init__(self, model: PreTrainedModel, output_key: str = DEFAULT_OUTPUT_KEY):
-        super().__init__()
-        self._hf_model = model
-        self._st_model = SentenceTransformer(model.config.name_or_path)
-        self._output_key = output_key
-        self.config = model.config
-
-        self._remove_pooling_layer()
-        self._replace_transformer_layer()
-
-    @staticmethod
-    def from_pretrained(
-        model_id: str,
-        tokenizer: PreTrainedTokenizer,
-        *,
-        token: Optional[str] = None,
-        output_key: str = DEFAULT_OUTPUT_KEY,
-    ) -> Optional[Any]:
-        model = AutoModel.from_pretrained(model_id, token=token, torchscript=True)
-        if isinstance(
-            tokenizer,
-            (
-                transformers.BartTokenizer,
-                transformers.MPNetTokenizer,
-                transformers.RobertaTokenizer,
-                transformers.XLMRobertaTokenizer,
-                transformers.DebertaV2Tokenizer,
-            ),
-        ):
-            return _TwoParameterSentenceTransformerWrapper(model, output_key)
-        else:
-            return _SentenceTransformerWrapper(model, output_key)
-
-    def _remove_pooling_layer(self) -> None:
-        """
-        Removes any last pooling layer which is not used to create embeddings.
-        Leaving this layer in will cause it to return a NoneType which in turn
-        will fail to load in libtorch. Alternatively, we can just use the output
-        of the pooling layer as a dummy but this also affects (if only in a
-        minor way) the performance of inference, so we're better off removing
-        the layer if we can.
-        """
-
-        if hasattr(self._hf_model, "pooler"):
-            self._hf_model.pooler = None
-
-    def _replace_transformer_layer(self) -> None:
-        """
-        Replaces the HuggingFace Transformer layer in the SentenceTransformer
-        modules so we can set it with one that has pooling layer removed and
-        was loaded ready for TorchScript export.
-        """
-
-        self._st_model._modules["0"].auto_model = self._hf_model
-
-
-class _SentenceTransformerWrapper(_SentenceTransformerWrapperModule):
-    def __init__(self, model: PreTrainedModel, output_key: str = DEFAULT_OUTPUT_KEY):
-        super().__init__(model=model, output_key=output_key)
-
-    def forward(
-        self,
-        input_ids: Tensor,
-        attention_mask: Tensor,
-        token_type_ids: Tensor,
-        position_ids: Tensor,
-    ) -> Tensor:
-        """Wrap the input and output to conform to the native process interface."""
-
-        inputs = {
-            "input_ids": input_ids,
-            "attention_mask": attention_mask,
-            "token_type_ids": token_type_ids,
-            "position_ids": position_ids,
-        }
-
-        # remove inputs for specific model types
-        if isinstance(self._hf_model.config, transformers.DistilBertConfig):
-            del inputs["token_type_ids"]
-
-        return self._st_model(inputs)[self._output_key]
-
-
-class _TwoParameterSentenceTransformerWrapper(_SentenceTransformerWrapperModule):
-    def __init__(self, model: PreTrainedModel, output_key: str = DEFAULT_OUTPUT_KEY):
-        super().__init__(model=model, output_key=output_key)
-
-    def forward(self, input_ids: Tensor, attention_mask: Tensor) -> Tensor:
-        """Wrap the input and output to conform to the native process interface."""
-        inputs = {
-            "input_ids": input_ids,
-            "attention_mask": attention_mask,
-        }
-        return self._st_model(inputs)[self._output_key]
-
-
-class _DPREncoderWrapper(nn.Module):  # type: ignore
-    """
-    AutoModel loading does not work for DPRContextEncoders, this only exists as
-    a workaround. This may never be fixed so this is likely permanent.
-    See: https://github.com/huggingface/transformers/issues/13670
-    """
-
-    _SUPPORTED_MODELS = {
-        transformers.DPRContextEncoder,
-        transformers.DPRQuestionEncoder,
-    }
-    _SUPPORTED_MODELS_NAMES = set([x.__name__ for x in _SUPPORTED_MODELS])
-
-    def __init__(
-        self,
-        model: Union[transformers.DPRContextEncoder, transformers.DPRQuestionEncoder],
-    ):
-        super().__init__()
-        self._model = model
-        self.config = model.config
-
-    @staticmethod
-    def from_pretrained(model_id: str, *, token: Optional[str] = None) -> Optional[Any]:
-        config = AutoConfig.from_pretrained(model_id, token=token)
-
-        def is_compatible() -> bool:
-            is_dpr_model = config.model_type == "dpr"
-            has_architectures = (
-                config.architectures is not None and len(config.architectures) == 1
-            )
-            is_supported_architecture = has_architectures and (
-                config.architectures[0] in _DPREncoderWrapper._SUPPORTED_MODELS_NAMES
-            )
-            return is_dpr_model and is_supported_architecture
-
-        if is_compatible():
-            model = getattr(transformers, config.architectures[0]).from_pretrained(
-                model_id, torchscript=True
-            )
-            return _DPREncoderWrapper(model)
-        else:
-            return None
-
-    def forward(
-        self,
-        input_ids: Tensor,
-        attention_mask: Tensor,
-        token_type_ids: Tensor,
-        _position_ids: Tensor,
-    ) -> Tensor:
-        """Wrap the input and output to conform to the native process interface."""
-
-        return self._model(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            token_type_ids=token_type_ids,
-        )
--- a/eland/series.py
+++ b/eland/series.py
@ -40,12 +40,11 @@ from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Tuple, Union

 import numpy as np
 import pandas as pd  # type: ignore
-from pandas.core.indexes.frozen import FrozenList
 from pandas.io.common import _expand_user, stringify_path  # type: ignore

 import eland.plotting
 from eland.arithmetics import ArithmeticNumber, ArithmeticSeries, ArithmeticString
-from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, PANDAS_VERSION, docstring_parameter
+from eland.common import DEFAULT_NUM_ROWS_DISPLAYED, docstring_parameter
 from eland.filter import (
    BooleanFilter,
    Equal,
@ -293,26 +292,18 @@ class Series(NDFrame):
        Examples
        --------
        >>> df = ed.DataFrame('http://localhost:9200', 'flights')
-        >>> df['Carrier'].value_counts()  # doctest: +SKIP
-        Carrier
+        >>> df['Carrier'].value_counts()
        Logstash Airways    3331
        JetBeats            3274
        Kibana Airlines     3234
        ES-Air              3220
-        Name: count, dtype: int64
+        Name: Carrier, dtype: int64
        """
        if not isinstance(es_size, int):
            raise TypeError("es_size must be a positive integer.")
        elif es_size <= 0:
            raise ValueError("es_size must be a positive integer.")
-        value_counts = self._query_compiler.value_counts(es_size)
-        # https://pandas.pydata.org/docs/whatsnew/v2.0.0.html#value-counts-sets-the-resulting-name-to-count
-        if PANDAS_VERSION[0] == 2:
-            value_counts.name = "count"
-            value_counts.index.names = FrozenList([self.es_field_name])
-            value_counts.index.name = self.es_field_name
-
-        return value_counts
+        return self._query_compiler.value_counts(es_size)

    # dtype not implemented for Series as causes query to fail
    # in pandas.core.computation.ops.Term.type
--- a/noxfile.py
+++ b/noxfile.py
@ -16,6 +16,7 @@
 #  under the License.

 import os
+import subprocess
 from pathlib import Path

 import nox
@ -57,10 +58,10 @@ TYPED_FILES = (

@nox.session(reuse_venv=True, python="3.11")
 def format(session):
-    session.install("black ~= 25.0", "isort", "flynt")
+    session.install("black", "isort", "flynt")
    session.run("python", "utils/license-headers.py", "fix", *SOURCE_FILES)
    session.run("flynt", *SOURCE_FILES)
-    session.run("black", "--target-version=py39", *SOURCE_FILES)
+    session.run("black", "--target-version=py38", *SOURCE_FILES)
    session.run("isort", "--profile=black", *SOURCE_FILES)
    lint(session)

@ -69,34 +70,38 @@ def format(session):
 def lint(session):
    # Install numpy to use its mypy plugin
    # https://numpy.org/devdocs/reference/typing.html#mypy-plugin
-    session.install("black ~= 25.0", "flake8", "mypy", "isort", "numpy")
+    session.install("black", "flake8", "mypy", "isort", "numpy")
    session.install(".")
    session.run("python", "utils/license-headers.py", "check", *SOURCE_FILES)
-    session.run("black", "--check", "--target-version=py39", *SOURCE_FILES)
+    session.run("black", "--check", "--target-version=py38", *SOURCE_FILES)
    session.run("isort", "--check", "--profile=black", *SOURCE_FILES)
    session.run("flake8", "--extend-ignore=E203,E402,E501,E704,E712", *SOURCE_FILES)

    # TODO: When all files are typed we can change this to .run("mypy", "--strict", "eland/")
-    stdout = session.run(
-        "mypy",
-        "--show-error-codes",
-        "--strict",
-        *TYPED_FILES,
-        success_codes=(0, 1),
-        silent=True,
-    )
+    session.log("mypy --show-error-codes --strict eland/")
+    for typed_file in TYPED_FILES:
+        if not os.path.isfile(typed_file):
+            session.error(f"The file {typed_file!r} couldn't be found")
+        process = subprocess.run(
+            ["mypy", "--show-error-codes", "--strict", typed_file],
+            env=session.env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+        )
+        # Ensure that mypy itself ran successfully
+        assert process.returncode in (0, 1)

-    errors = []
-    for line in stdout.splitlines():
-        filepath = line.partition(":")[0]
-        if filepath in TYPED_FILES:
-            errors.append(line)
-    if errors:
-        session.error("\n" + "\n".join(sorted(set(errors))))
+        errors = []
+        for line in process.stdout.decode().split("\n"):
+            filepath = line.partition(":")[0]
+            if filepath in TYPED_FILES:
+                errors.append(line)
+        if errors:
+            session.error("\n" + "\n".join(sorted(set(errors))))


-@nox.session(python=["3.9", "3.10", "3.11", "3.12"])
-@nox.parametrize("pandas_version", ["1.5.0", "2.2.3"])
+@nox.session(python=["3.8", "3.9", "3.10", "3.11"])
+@nox.parametrize("pandas_version", ["1.5.0"])
 def test(session, pandas_version: str):
    session.install("-r", "requirements-dev.txt")
    session.install(".")
@ -116,6 +121,9 @@ def test(session, pandas_version: str):
        "--nbval",
    )

+    # PyTorch 2.3.1 doesn't support Python 3.12
+    if session.python == "3.12":
+        pytest_args += ("--ignore=eland/ml/pytorch",)
    session.run(
        *pytest_args,
        *(session.posargs or ("eland/", "tests/")),
@ -132,6 +140,7 @@ def test(session, pandas_version: str):
            "scikit-learn",
            "xgboost",
            "lightgbm",
+            "shap",
        )
        session.run("pytest", "tests/ml/")

--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@ -10,6 +10,7 @@ pytest>=5.2.1
 pytest-mock
 pytest-cov
 nbval
+shap==0.43.0

 #
 # Docs
--- a/setup.py
+++ b/setup.py
@ -38,10 +38,10 @@ CLASSIFIERS = [
    "Programming Language :: Python",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.8",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
    "Topic :: Scientific/Engineering",
 ]

@ -56,16 +56,16 @@ with open(path.join(here, "README.md"), "r", "utf-8") as f:

 extras = {
    "xgboost": ["xgboost>=0.90,<2"],
-    "scikit-learn": ["scikit-learn>=1.3,<1.6"],
-    "lightgbm": ["lightgbm>=3,<5"],
+    "scikit-learn": ["scikit-learn>=1.3,<1.4"],
+    "lightgbm": ["lightgbm>=2,<4"],
    "pytorch": [
        "requests<3",
-        "torch==2.5.1",
+        "torch==2.3.1",
        "tqdm",
        "sentence-transformers>=2.1.0,<=2.7.0",
        # sentencepiece is a required dependency for the slow tokenizers
        # https://huggingface.co/transformers/v4.4.2/migration.html#sentencepiece-is-removed-from-the-required-dependencies
-        "transformers[sentencepiece]>=4.47.0",
+        "transformers[sentencepiece]>=4.31.0,<4.44.0",
    ],
 }
 extras["all"] = list({dep for deps in extras.values() for dep in deps})
@ -86,8 +86,8 @@ setup(
    keywords="elastic eland pandas python",
    packages=find_packages(include=["eland", "eland.*"]),
    install_requires=[
-        "elasticsearch>=9,<10",
-        "pandas>=1.5,<3",
+        "elasticsearch>=8.3,<9",
+        "pandas>=1.5,<2",
        "matplotlib>=3.6",
        "numpy>=1.2.0,<2",
        "packaging",
@ -95,7 +95,7 @@ setup(
    entry_points={
        "console_scripts": "eland_import_hub_model=eland.cli.eland_import_hub_model:main"
    },
-    python_requires=">=3.9,<3.13",
+    python_requires=">=3.8,<3.12",
    package_data={"eland": ["py.typed"]},
    include_package_data=True,
    zip_safe=False,
--- a/tests/common.py
+++ b/tests/common.py
@ -24,7 +24,6 @@ import pandas as pd
 from pandas.testing import assert_frame_equal, assert_series_equal

 import eland as ed
-from eland.common import PANDAS_VERSION

 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

@ -46,10 +45,7 @@ with gzip.open(FLIGHTS_FILE_NAME) as f:
 _pd_flights = pd.DataFrame.from_records(flight_records).reindex(
    _ed_flights.columns, axis=1
 )
-if PANDAS_VERSION[0] >= 2:
-    _pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"], format="mixed")
-else:
-    _pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"])
+_pd_flights["timestamp"] = pd.to_datetime(_pd_flights["timestamp"])
 # Mimic what copy_to in an Elasticsearch mapping would do, combining the two fields in a list
 _pd_flights["Cities"] = _pd_flights.apply(
    lambda x: list(sorted([x["OriginCityName"], x["DestCityName"]])), axis=1
@ -66,7 +62,7 @@ _pd_ecommerce["products.created_on"] = _pd_ecommerce["products.created_on"].appl
 )
 _pd_ecommerce.insert(2, "customer_birth_date", None)
 _pd_ecommerce.index = _pd_ecommerce.index.map(str)  # make index 'object' not int
-_pd_ecommerce["customer_birth_date"].astype("datetime64[ns]")
+_pd_ecommerce["customer_birth_date"].astype("datetime64")
 _ed_ecommerce = ed.DataFrame(ES_TEST_CLIENT, ECOMMERCE_INDEX_NAME)


--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -77,16 +77,7 @@ class SymmetricAPIChecker:
                pd_exc = e

            self.check_exception(ed_exc, pd_exc)
-            try:
-                self.check_values(ed_obj, pd_obj)
-            except AssertionError as e:
-                # This is an attribute we allow to differ when comparing zero-length objects
-                if (
-                    'Attribute "inferred_type" are different' in repr(e)
-                    and len(ed_obj) == 0
-                    and len(pd_obj) == 0
-                ):
-                    self.check_values(ed_obj, pd_obj, check_index_type=False)
+            self.check_values(ed_obj, pd_obj)

            if isinstance(ed_obj, (ed.DataFrame, ed.Series)):
                return SymmetricAPIChecker(ed_obj, pd_obj)
@ -94,16 +85,16 @@ class SymmetricAPIChecker:

        return f

-    def check_values(self, ed_obj, pd_obj, **kwargs):
+    def check_values(self, ed_obj, pd_obj):
        """Checks that any two values coming from eland and pandas are equal"""
        if isinstance(ed_obj, ed.DataFrame):
-            assert_pandas_eland_frame_equal(pd_obj, ed_obj, **kwargs)
+            assert_pandas_eland_frame_equal(pd_obj, ed_obj)
        elif isinstance(ed_obj, ed.Series):
-            assert_pandas_eland_series_equal(pd_obj, ed_obj, **kwargs)
+            assert_pandas_eland_series_equal(pd_obj, ed_obj)
        elif isinstance(ed_obj, pd.DataFrame):
-            assert_frame_equal(ed_obj, pd_obj, **kwargs)
+            assert_frame_equal(ed_obj, pd_obj)
        elif isinstance(ed_obj, pd.Series):
-            assert_series_equal(ed_obj, pd_obj, **kwargs)
+            assert_series_equal(ed_obj, pd_obj)
        elif isinstance(ed_obj, pd.Index):
            assert ed_obj.equals(pd_obj)
        else:
--- a/tests/dataframe/test_datetime_pytest.py
+++ b/tests/dataframe/test_datetime_pytest.py
@ -87,8 +87,6 @@ class TestDataFrameDateTime(TestData):
            },
            index=["0", "1", "2"],
        )
-        # https://pandas.pydata.org/docs/whatsnew/v2.0.0.html#construction-with-datetime64-or-timedelta64-dtype-with-unsupported-resolution
-        df["D"] = df["D"].astype("datetime64[ns]")

        expected_mappings = {
            "mappings": {
--- a/tests/dataframe/test_describe_pytest.py
+++ b/tests/dataframe/test_describe_pytest.py
@ -33,17 +33,9 @@ class TestDataFrameDescribe(TestData):
            ["Cancelled", "FlightDelay"], axis="columns"
        )

-        # Pandas >= 2 calculates aggregations such as min and max for timestamps too
-        # This could be implemented in eland, but as of yet this is not the case
-        # We therefore remove it before the comparison
-        if "timestamp" in pd_describe.columns:
-            pd_describe = pd_describe.drop(["timestamp"], axis="columns")
-
-        # Pandas >= 2 orders the aggregations differently than Pandas < 2
-        # A sort_index is applied so tests will succeed in both environments
        assert_frame_equal(
-            pd_describe.drop(["25%", "50%", "75%"], axis="index").sort_index(),
-            ed_describe.drop(["25%", "50%", "75%"], axis="index").sort_index(),
+            pd_describe.drop(["25%", "50%", "75%"], axis="index"),
+            ed_describe.drop(["25%", "50%", "75%"], axis="index"),
            check_exact=False,
            rtol=True,
        )
--- a/tests/dataframe/test_head_tail_pytest.py
+++ b/tests/dataframe/test_head_tail_pytest.py
@ -99,7 +99,7 @@ class TestDataFrameHeadTail(TestData):

        ed_head_0 = ed_flights.head(0)
        pd_head_0 = pd_flights.head(0)
-        assert_pandas_eland_frame_equal(pd_head_0, ed_head_0, check_index_type=False)
+        assert_pandas_eland_frame_equal(pd_head_0, ed_head_0)

    def test_doc_test_tail(self):
        df = self.ed_flights()
--- a/tests/dataframe/test_metrics_pytest.py
+++ b/tests/dataframe/test_metrics_pytest.py
@ -22,7 +22,6 @@ import pandas as pd
 import pytest
 from pandas.testing import assert_frame_equal, assert_series_equal

-from eland.common import PANDAS_VERSION
 from tests.common import TestData, assert_almost_equal


@ -75,8 +74,6 @@ class TestDataFrameMetrics(TestData):
        logger.setLevel(logging.DEBUG)

        for func in self.extended_funcs:
-            if PANDAS_VERSION[0] >= 2 and func == "mad":
-                continue
            pd_metric = getattr(pd_flights, func)(
                **({"numeric_only": True} if func != "mad" else {})
            )
@ -95,8 +92,6 @@ class TestDataFrameMetrics(TestData):
        ed_flights_1 = ed_flights[ed_flights.FlightNum == "9HY9SWR"][["AvgTicketPrice"]]

        for func in self.extended_funcs:
-            if PANDAS_VERSION[0] >= 2 and func == "mad":
-                continue
            pd_metric = getattr(pd_flights_1, func)()
            ed_metric = getattr(ed_flights_1, func)(numeric_only=False)

@ -107,8 +102,6 @@ class TestDataFrameMetrics(TestData):
        ed_flights_0 = ed_flights[ed_flights.FlightNum == "XXX"][["AvgTicketPrice"]]

        for func in self.extended_funcs:
-            if PANDAS_VERSION[0] >= 2 and func == "mad":
-                continue
            pd_metric = getattr(pd_flights_0, func)()
            ed_metric = getattr(ed_flights_0, func)(numeric_only=False)

@ -498,13 +491,8 @@ class TestDataFrameMetrics(TestData):
            ["AvgTicketPrice", "FlightDelayMin", "dayOfWeek"]
        )

-        if PANDAS_VERSION[0] == 1:
-            pd_quantile = pd_flights.agg(["quantile", "min"], numeric_only=numeric_only)
-            ed_quantile = ed_flights.agg(["quantile", "min"], numeric_only=numeric_only)
-
-        else:  # numeric_only is no longer available for pandas > 2
-            pd_quantile = pd_flights.agg(["quantile", "min"])
-            ed_quantile = ed_flights.agg(["quantile", "min"])
+        pd_quantile = pd_flights.agg(["quantile", "min"], numeric_only=numeric_only)
+        ed_quantile = ed_flights.agg(["quantile", "min"], numeric_only=numeric_only)

        assert_frame_equal(
            pd_quantile, ed_quantile, check_exact=False, rtol=4, check_dtype=False
--- a/tests/dataframe/test_utils_pytest.py
+++ b/tests/dataframe/test_utils_pytest.py
@ -69,12 +69,6 @@ class TestDataFrameUtils(TestData):
        )
        ed_df_head = ed_df.head()

-        # https://pandas.pydata.org/docs/whatsnew/v2.0.0.html#construction-with-datetime64-or-timedelta64-dtype-with-unsupported-resolution
-        df["D"] = df["D"].astype("datetime64[ns]")
-        df["H"] = (
-            df["H"].dt.tz_localize(None).astype("datetime64[ns]").dt.tz_localize("UTC")
-        )
-
        assert_pandas_eland_frame_equal(df, ed_df_head)

        ES_TEST_CLIENT.indices.delete(index=index_name)
--- a/tests/ml/pytorch/test_pytorch_model_config_pytest.py
+++ b/tests/ml/pytorch/test_pytorch_model_config_pytest.py
@ -39,7 +39,6 @@ try:
    from eland.ml.pytorch import (
        FillMaskInferenceOptions,
        NlpBertTokenizationConfig,
-        NlpDebertaV2TokenizationConfig,
        NlpMPNetTokenizationConfig,
        NlpRobertaTokenizationConfig,
        NlpXLMRobertaTokenizationConfig,
@ -58,6 +57,10 @@ except ImportError:
 from tests import ES_VERSION

 pytestmark = [
+    pytest.mark.skipif(
+        ES_VERSION < (8, 15, 1),
+        reason="Eland uses Pytorch 2.3.1, versions of Elasticsearch prior to 8.15.1 are incompatible with PyTorch 2.3.1",
+    ),
    pytest.mark.skipif(
        not HAS_SKLEARN, reason="This test requires 'scikit-learn' package to run"
    ),
@ -146,14 +149,6 @@ if HAS_PYTORCH and HAS_SKLEARN and HAS_TRANSFORMERS:
            1024,
            None,
        ),
-        (
-            "microsoft/deberta-v3-xsmall",
-            "fill_mask",
-            FillMaskInferenceOptions,
-            NlpDebertaV2TokenizationConfig,
-            512,
-            None,
-        ),
    ]
 else:
    MODEL_CONFIGURATIONS = []
--- a/tests/ml/pytorch/test_pytorch_model_upload_pytest.py
+++ b/tests/ml/pytorch/test_pytorch_model_upload_pytest.py
@ -38,6 +38,10 @@ except ImportError:
 from tests import ES_TEST_CLIENT, ES_VERSION

 pytestmark = [
+    pytest.mark.skipif(
+        ES_VERSION < (8, 15, 2),
+        reason="Eland uses Pytorch 2.3.1, versions of Elasticsearch prior to 8.15.2 are incompatible with PyTorch 2.3.1",
+    ),
    pytest.mark.skipif(
        not HAS_SKLEARN, reason="This test requires 'scikit-learn' package to run"
    ),
@ -63,8 +67,6 @@ TEXT_EMBEDDING_MODELS = [
    )
 ]

-TEXT_SIMILARITY_MODELS = ["mixedbread-ai/mxbai-rerank-xsmall-v1"]
-

@pytest.fixture(scope="function", autouse=True)
 def setup_and_tear_down():
@ -133,25 +135,3 @@ class TestPytorchModel:
                    )
                    > 0
                )
-
-    @pytest.mark.skipif(
-        ES_VERSION < (8, 16, 0), reason="requires 8.16.0 for DeBERTa models"
-    )
-    @pytest.mark.parametrize("model_id", TEXT_SIMILARITY_MODELS)
-    def test_text_similarity(self, model_id):
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            ptm = download_model_and_start_deployment(
-                tmp_dir, False, model_id, "text_similarity"
-            )
-            result = ptm.infer(
-                docs=[
-                    {
-                        "text_field": "The Amazon rainforest covers most of the Amazon basin in South America"
-                    },
-                    {"text_field": "Paris is the capital of France"},
-                ],
-                inference_config={"text_similarity": {"text": "France"}},
-            )
-
-            assert result.body["inference_results"][0]["predicted_value"] < 0
-            assert result.body["inference_results"][1]["predicted_value"] > 0
--- a/tests/ml/test_ml_model_pytest.py
+++ b/tests/ml/test_ml_model_pytest.py
@ -15,18 +15,20 @@
 #  specific language governing permissions and limitations
 #  under the License.

+from operator import itemgetter
 from typing import Tuple

 import numpy as np
 import pytest

+import eland as ed
 from eland.ml import MLModel
 from eland.ml.ltr import FeatureLogger, LTRModelConfig, QueryFeatureExtractor
-from eland.ml.transformers import get_model_transformer
 from tests import (
    ES_IS_SERVERLESS,
    ES_TEST_CLIENT,
    ES_VERSION,
+    FLIGHTS_SMALL_INDEX_NAME,
    NATIONAL_PARKS_INDEX_NAME,
 )

@ -53,16 +55,26 @@ try:
 except ImportError:
    HAS_LIGHTGBM = False

+try:
+    import shap
+
+    HAS_SHAP = True
+except ImportError:
+    HAS_SHAP = False
+

 requires_sklearn = pytest.mark.skipif(
-    not HAS_SKLEARN, reason="This test requires 'scikit-learn' package to run"
+    not HAS_SKLEARN, reason="This test requires 'scikit-learn' package to run."
 )
 requires_xgboost = pytest.mark.skipif(
-    not HAS_XGBOOST, reason="This test requires 'xgboost' package to run"
+    not HAS_XGBOOST, reason="This test requires 'xgboost' package to run."
+)
+requires_shap = pytest.mark.skipif(
+    not HAS_SHAP, reason="This tests requries 'shap' package to run."
 )
 requires_no_ml_extras = pytest.mark.skipif(
    HAS_SKLEARN or HAS_XGBOOST,
-    reason="This test requires 'scikit-learn' and 'xgboost' to not be installed",
+    reason="This test requires 'scikit-learn' and 'xgboost' to not be installed.",
 )

 requires_lightgbm = pytest.mark.skipif(
@ -96,6 +108,102 @@ def check_prediction_equality(es_model: MLModel, py_model, test_data):
    np.testing.assert_almost_equal(test_results, es_results, decimal=2)


+def yield_model_id(analysis, analyzed_fields):
+    import random
+    import string
+    import time
+
+    suffix = "".join(random.choices(string.ascii_lowercase, k=4))
+    job_id = "test-flights-regression-" + suffix
+    dest = job_id + "-dest"
+
+    response = ES_TEST_CLIENT.ml.put_data_frame_analytics(
+        id=job_id,
+        analysis=analysis,
+        dest={"index": dest},
+        source={"index": [FLIGHTS_SMALL_INDEX_NAME]},
+        analyzed_fields=analyzed_fields,
+    )
+    assert response.meta.status == 200
+    response = ES_TEST_CLIENT.ml.start_data_frame_analytics(id=job_id)
+    assert response.meta.status == 200
+
+    time.sleep(2)
+    response = ES_TEST_CLIENT.ml.get_trained_models(model_id=job_id + "*")
+    assert response.meta.status == 200
+    assert response.body["count"] == 1
+    model_id = response.body["trained_model_configs"][0]["model_id"]
+
+    yield model_id
+
+    ES_TEST_CLIENT.ml.delete_data_frame_analytics(id=job_id)
+    ES_TEST_CLIENT.indices.delete(index=dest)
+    ES_TEST_CLIENT.ml.delete_trained_model(model_id=model_id)
+
+
+@pytest.fixture(params=[[0, 4], [0, 1], range(5)])
+def regression_model_id(request):
+    analysis = {
+        "regression": {
+            "dependent_variable": "FlightDelayMin",
+            "max_trees": 3,
+            "num_top_feature_importance_values": 0,
+            "max_optimization_rounds_per_hyperparameter": 1,
+            "prediction_field_name": "FlightDelayMin_prediction",
+            "training_percent": 30,
+            "randomize_seed": 1000,
+            "loss_function": "mse",
+            "early_stopping_enabled": True,
+        }
+    }
+    all_includes = [
+        "FlightDelayMin",
+        "FlightDelayType",
+        "FlightTimeMin",
+        "DistanceMiles",
+        "OriginAirportID",
+    ]
+    includes = [all_includes[i] for i in request.param]
+    analyzed_fields = {
+        "includes": includes,
+        "excludes": [],
+    }
+    yield from yield_model_id(analysis=analysis, analyzed_fields=analyzed_fields)
+
+
+@pytest.fixture(params=[[0, 6], [5, 6], range(7)])
+def classification_model_id(request):
+    analysis = {
+        "classification": {
+            "dependent_variable": "Cancelled",
+            "max_trees": 5,
+            "num_top_feature_importance_values": 0,
+            "max_optimization_rounds_per_hyperparameter": 1,
+            "prediction_field_name": "Cancelled_prediction",
+            "training_percent": 50,
+            "randomize_seed": 1000,
+            "num_top_classes": -1,
+            "class_assignment_objective": "maximize_accuracy",
+            "early_stopping_enabled": True,
+        }
+    }
+    all_includes = [
+        "OriginWeather",
+        "OriginAirportID",
+        "DestCityName",
+        "DestWeather",
+        "DestRegion",
+        "AvgTicketPrice",
+        "Cancelled",
+    ]
+    includes = [all_includes[i] for i in request.param]
+    analyzed_fields = {
+        "includes": includes,
+        "excludes": [],
+    }
+    yield from yield_model_id(analysis=analysis, analyzed_fields=analyzed_fields)
+
+
 def randomize_model_id(prefix, suffix_size=10):
    import random
    import string
@ -220,46 +328,6 @@ class TestMLModel:
        # Clean up
        es_model.delete_model()

-    def _normalize_ltr_score_from_XGBRanker(self, ranker, ltr_model_config, scores):
-        """Normalize the scores of an XGBRanker model as ES implementation of LTR would do.
-
-        Parameters
-        ----------
-        ranker : XGBRanker
-            The XGBRanker model to retrieve the minimum score from.
-
-        ltr_model_config : LTRModelConfig
-            LTR model config.
-
-        Returns
-        -------
-        scores : List[float]
-            Normalized scores for the model.
-        """
-
-        should_rescore = (
-            (ES_VERSION[0] == 8 and ES_VERSION >= (8, 19))
-            or (
-                ES_VERSION[0] == 9
-                and (ES_VERSION[1] >= 1 or (ES_VERSION[1] == 0 and ES_VERSION[2] >= 1))
-            )
-            or ES_IS_SERVERLESS
-        )
-
-        if should_rescore:
-            # In 8.19+, 9.0.1 and 9.1, the scores are normalized if there are negative scores
-            min_model_score, _ = (
-                get_model_transformer(
-                    ranker, feature_names=ltr_model_config.feature_names
-                )
-                .transform()
-                .bounds()
-            )
-            if min_model_score < 0:
-                scores = [score - min_model_score for score in scores]
-
-        return scores
-
    @requires_elasticsearch_version((8, 12))
    @requires_xgboost
    @pytest.mark.parametrize("compress_model_definition", [True, False])
@ -371,11 +439,6 @@ class TestMLModel:
            ],
            reverse=True,
        )
-
-        expected_scores = self._normalize_ltr_score_from_XGBRanker(
-            ranker, ltr_model_config, expected_scores
-        )
-
        np.testing.assert_almost_equal(expected_scores, doc_scores, decimal=2)

        # Verify prediction is not supported for LTR
@ -727,3 +790,172 @@ class TestMLModel:

        # Clean up
        es_model.delete_model()
+
+    @requires_sklearn
+    @requires_shap
+    def test_export_regressor(self, regression_model_id):
+        ed_flights = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_SMALL_INDEX_NAME).head(10)
+        types = dict(ed_flights.dtypes)
+        X = ed_flights.to_pandas().astype(types)
+
+        model = MLModel(es_client=ES_TEST_CLIENT, model_id=regression_model_id)
+        pipeline = model.export_model()
+        pipeline.fit(X)
+
+        predictions_sklearn = pipeline.predict(
+            X, feature_names_in=pipeline["preprocessor"].get_feature_names_out()
+        )
+        response = ES_TEST_CLIENT.ml.infer_trained_model(
+            model_id=regression_model_id,
+            docs=X[pipeline["es_model"].input_field_names].to_dict("records"),
+        )
+        predictions_es = np.array(
+            list(
+                map(
+                    itemgetter("FlightDelayMin_prediction"),
+                    response.body["inference_results"],
+                )
+            )
+        )
+        np.testing.assert_array_almost_equal(predictions_sklearn, predictions_es)
+
+        import pandas as pd
+
+        X_transformed = pipeline["preprocessor"].transform(X=X)
+        X_transformed = pd.DataFrame(
+            X_transformed, columns=pipeline["preprocessor"].get_feature_names_out()
+        )
+        explainer = shap.TreeExplainer(pipeline["es_model"])
+        shap_values = explainer.shap_values(
+            X_transformed[pipeline["es_model"].feature_names_in_]
+        )
+        np.testing.assert_array_almost_equal(
+            predictions_sklearn, shap_values.sum(axis=1) + explainer.expected_value
+        )
+
+    @requires_sklearn
+    def test_export_classification(self, classification_model_id):
+        ed_flights = ed.DataFrame(ES_TEST_CLIENT, FLIGHTS_SMALL_INDEX_NAME).head(10)
+        X = ed.eland_to_pandas(ed_flights)
+
+        model = MLModel(es_client=ES_TEST_CLIENT, model_id=classification_model_id)
+        pipeline = model.export_model()
+        pipeline.fit(X)
+
+        predictions_sklearn = pipeline.predict(
+            X, feature_names_in=pipeline["preprocessor"].get_feature_names_out()
+        )
+        prediction_proba_sklearn = pipeline.predict_proba(
+            X, feature_names_in=pipeline["preprocessor"].get_feature_names_out()
+        ).max(axis=1)
+
+        response = ES_TEST_CLIENT.ml.infer_trained_model(
+            model_id=classification_model_id,
+            docs=X[pipeline["es_model"].input_field_names].to_dict("records"),
+        )
+        predictions_es = np.array(
+            list(
+                map(
+                    lambda x: str(int(x["Cancelled_prediction"])),
+                    response.body["inference_results"],
+                )
+            )
+        )
+        prediction_proba_es = np.array(
+            list(
+                map(
+                    itemgetter("prediction_probability"),
+                    response.body["inference_results"],
+                )
+            )
+        )
+        np.testing.assert_array_almost_equal(
+            prediction_proba_sklearn, prediction_proba_es
+        )
+        np.testing.assert_array_equal(predictions_sklearn, predictions_es)
+
+        import pandas as pd
+
+        X_transformed = pipeline["preprocessor"].transform(X=X)
+        X_transformed = pd.DataFrame(
+            X_transformed, columns=pipeline["preprocessor"].get_feature_names_out()
+        )
+        explainer = shap.TreeExplainer(pipeline["es_model"])
+        shap_values = explainer.shap_values(
+            X_transformed[pipeline["es_model"].feature_names_in_]
+        )
+        log_odds = shap_values.sum(axis=1) + explainer.expected_value
+        prediction_proba_shap = 1 / (1 + np.exp(-log_odds))
+        # use probability of the predicted class
+        prediction_proba_shap[prediction_proba_shap < 0.5] = (
+            1 - prediction_proba_shap[prediction_proba_shap < 0.5]
+        )
+        np.testing.assert_array_almost_equal(
+            prediction_proba_sklearn, prediction_proba_shap
+        )
+
+    @requires_xgboost
+    @requires_sklearn
+    @pytest.mark.parametrize("objective", ["binary:logistic", "reg:squarederror"])
+    def test_xgb_import_export(self, objective):
+        booster = "gbtree"
+
+        if objective.startswith("binary:"):
+            training_data = datasets.make_classification(n_features=5)
+            xgb_model = XGBClassifier(
+                booster=booster, objective=objective, use_label_encoder=False
+            )
+        else:
+            training_data = datasets.make_regression(n_features=5)
+            xgb_model = XGBRegressor(
+                booster=booster, objective=objective, use_label_encoder=False
+            )
+
+        # Train model
+        xgb_model.fit(training_data[0], training_data[1])
+
+        # Serialise the models to Elasticsearch
+        feature_names = ["feature0", "feature1", "feature2", "feature3", "feature4"]
+        model_id = "test_xgb_model"
+
+        es_model = MLModel.import_model(
+            ES_TEST_CLIENT, model_id, xgb_model, feature_names, es_if_exists="replace"
+        )
+
+        # Export suppose to fail
+        with pytest.raises(ValueError) as ex:
+            es_model.export_model()
+        assert ex.match("Error initializing sklearn classifier.")
+
+        # Clean up
+        es_model.delete_model()
+
+    @requires_lightgbm
+    @pytest.mark.parametrize("objective", ["regression", "binary"])
+    def test_lgbm_import_export(self, objective):
+        booster = "gbdt"
+        if objective == "binary":
+            training_data = datasets.make_classification(n_features=5)
+            lgbm_model = LGBMClassifier(boosting_type=booster, objective=objective)
+        else:
+            training_data = datasets.make_regression(n_features=5)
+            lgbm_model = LGBMRegressor(boosting_type=booster, objective=objective)
+
+        # Train model
+        lgbm_model.fit(training_data[0], training_data[1])
+
+        # Serialise the models to Elasticsearch
+        feature_names = ["feature0", "feature1", "feature2", "feature3", "feature4"]
+        model_id = "test_lgbm_model"
+
+        es_model = MLModel.import_model(
+            ES_TEST_CLIENT, model_id, lgbm_model, feature_names, es_if_exists="replace"
+        )
+
+        # Export suppose to fail
+        with pytest.raises(ValueError) as ex:
+            es_model.export_model()
+        assert ex.match("Error initializing sklearn classifier.")
+
+        # Clean up
+        es_model.delete_model()
--- a/tests/notebook/test_demo_notebook.ipynb
+++ b/tests/notebook/test_demo_notebook.ipynb
@ -1647,14 +1647,6 @@
   "execution_count": 32,
   "metadata": {},
   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-       "/code/eland/.nox/test-3-12-pandas_version-2-2-3/lib/python3.12/site-packages/eland/series.py:464: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
-       "  return self._query_compiler.dtypes[0]\n"
-     ]
-    },
    {
     "data": {
      "text/html": [
@ -1800,9 +1792,6 @@
    }
   ],
   "source": [
-    "# NBVAL_IGNORE_OUTPUT\n",
-    "# The ignore statement above is because of output difference between Pandas 1 and 2\n",
-    "# and can be removed once Pandas 1 support is dropped\n",
    "ed_flights.query('Carrier == \"Kibana Airlines\" & AvgTicketPrice > 900.0 & Cancelled == True')"
   ]
  },
@ -2388,8 +2377,8 @@
       "      <th>AvgTicketPrice</th>\n",
       "      <th>DistanceKilometers</th>\n",
       "      <th>...</th>\n",
+       "      <th>FlightTimeMin</th>\n",
       "      <th>dayOfWeek</th>\n",
-       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
@ -2399,15 +2388,23 @@
       "      <td>13059.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>13059.000000</td>\n",
-       "      <td>13059</td>\n",
+       "      <td>13059.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>628.253689</td>\n",
       "      <td>7092.142455</td>\n",
       "      <td>...</td>\n",
+       "      <td>511.127842</td>\n",
       "      <td>2.835975</td>\n",
-       "      <td>2018-01-21 19:20:45.564438016</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>266.396861</td>\n",
+       "      <td>4578.438497</td>\n",
+       "      <td>...</td>\n",
+       "      <td>334.753952</td>\n",
+       "      <td>1.939439</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
@ -2415,65 +2412,57 @@
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
-       "      <td>2018-01-01 00:00:00</td>\n",
+       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>409.893816</td>\n",
       "      <td>2459.705673</td>\n",
       "      <td>...</td>\n",
+       "      <td>252.333192</td>\n",
       "      <td>1.000000</td>\n",
-       "      <td>2018-01-11 05:16:25.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>640.556668</td>\n",
       "      <td>7610.330866</td>\n",
       "      <td>...</td>\n",
+       "      <td>503.045170</td>\n",
       "      <td>3.000000</td>\n",
-       "      <td>2018-01-22 00:32:11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>842.185470</td>\n",
       "      <td>9736.637600</td>\n",
       "      <td>...</td>\n",
+       "      <td>720.416036</td>\n",
       "      <td>4.000000</td>\n",
-       "      <td>2018-02-01 04:51:18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1199.729053</td>\n",
       "      <td>19881.482315</td>\n",
       "      <td>...</td>\n",
+       "      <td>1902.902032</td>\n",
       "      <td>6.000000</td>\n",
-       "      <td>2018-02-11 23:50:12</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>std</th>\n",
-       "      <td>266.396861</td>\n",
-       "      <td>4578.438497</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1.939439</td>\n",
-       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
-       "<p>8 rows × 8 columns</p>\n",
+       "<p>8 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
-       "       AvgTicketPrice  DistanceKilometers  ...     dayOfWeek                      timestamp\n",
-       "count    13059.000000        13059.000000  ...  13059.000000                          13059\n",
-       "mean       628.253689         7092.142455  ...      2.835975  2018-01-21 19:20:45.564438016\n",
-       "min        100.020528            0.000000  ...      0.000000            2018-01-01 00:00:00\n",
-       "25%        409.893816         2459.705673  ...      1.000000     2018-01-11 05:16:25.500000\n",
-       "50%        640.556668         7610.330866  ...      3.000000            2018-01-22 00:32:11\n",
-       "75%        842.185470         9736.637600  ...      4.000000            2018-02-01 04:51:18\n",
-       "max       1199.729053        19881.482315  ...      6.000000            2018-02-11 23:50:12\n",
-       "std        266.396861         4578.438497  ...      1.939439                            NaN\n",
+       "       AvgTicketPrice  DistanceKilometers  ...  FlightTimeMin     dayOfWeek\n",
+       "count    13059.000000        13059.000000  ...   13059.000000  13059.000000\n",
+       "mean       628.253689         7092.142455  ...     511.127842      2.835975\n",
+       "std        266.396861         4578.438497  ...     334.753952      1.939439\n",
+       "min        100.020528            0.000000  ...       0.000000      0.000000\n",
+       "25%        409.893816         2459.705673  ...     252.333192      1.000000\n",
+       "50%        640.556668         7610.330866  ...     503.045170      3.000000\n",
+       "75%        842.185470         9736.637600  ...     720.416036      4.000000\n",
+       "max       1199.729053        19881.482315  ...    1902.902032      6.000000\n",
       "\n",
-       "[8 rows x 8 columns]"
+       "[8 rows x 7 columns]"
      ]
     },
     "execution_count": 39,
@ -2482,8 +2471,6 @@
    }
   ],
   "source": [
-    "# NBVAL_IGNORE_OUTPUT\n",
-    "# Once support for pandas <2 is dropped, this and the line above can be removed\n",
    "pd_flights.describe()"
   ]
  },
--- a/tests/series/test_filter_pytest.py
+++ b/tests/series/test_filter_pytest.py
@ -58,9 +58,7 @@ class TestSeriesFilter(TestData):
        ed_ser = ed_flights_small.filter(items=items, axis=0)
        pd_ser = pd_flights_small.filter(items=items, axis=0)

-        # For an empty Series, eland will say the datatype it knows from the Elastic index
-        # Pandas however will state empty as the datatype
-        assert_pandas_eland_series_equal(pd_ser, ed_ser, check_index_type=False)
+        assert_pandas_eland_series_equal(pd_ser, ed_ser)

    def test_flights_filter_index_like_and_regex(self):
        ed_flights_small = self.ed_flights_small()["FlightDelayType"]
--- a/tests/series/test_metrics_pytest.py
+++ b/tests/series/test_metrics_pytest.py
@ -24,7 +24,6 @@ import pandas as pd
 import pytest
 from pandas.testing import assert_series_equal

-from eland.common import PANDAS_VERSION
 from tests.common import TestData, assert_almost_equal


@ -43,8 +42,6 @@ class TestSeriesMetrics(TestData):
        ed_flights = self.ed_flights()["AvgTicketPrice"]

        for func in self.all_funcs:
-            if PANDAS_VERSION[0] >= 2 and func == "mad":
-                continue
            pd_metric = getattr(pd_flights, func)()
            ed_metric = getattr(ed_flights, func)()

@ -90,8 +87,6 @@ class TestSeriesMetrics(TestData):
            ed_ecommerce = self.ed_ecommerce()[column]

            for func in self.all_funcs:
-                if PANDAS_VERSION[0] >= 2 and func == "mad":
-                    continue
                pd_metric = getattr(pd_ecommerce, func)()
                ed_metric = getattr(ed_ecommerce, func)(
                    **({"numeric_only": True} if (func != "nunique") else {})