From 51a2b9cc19ac52ee0ca1435bd18e44dfc4290c2b Mon Sep 17 00:00:00 2001
From: "Mark J. Hoy" <mark.hoy@elastic.co>
Date: Wed, 23 Apr 2025 11:53:32 -0400
Subject: [PATCH] Add 9.1.0 Snapshot to Build and Fix test_ml_model Tests to
 Normalized Expected Scores if Min Score is Less Than Zero (#777)

* normalized expected scores if min is < 0

* only normalize scores for ES after 8.19+ / 9.1+

* add 9.1.0 snapshot to build matrix

* get min score from booster trees

* removing typing on function definition

* properly flatten our tree leaf scores

* simplify getting min score

* debugging messages

* get all the matches in better way

* Fix model score normalization.

* lint

* lint again

* lint; correct return for bounds map/list

* revert to Aurelian's fix

* re-lint :/

---------

Co-authored-by: Aurelien FOUCRET <aurelien.foucret@elastic.co>
---
 .buildkite/pipeline.yml          |  3 ++-
 eland/ml/_model_serializer.py    | 19 +++++++++++++++-
 tests/ml/test_ml_model_pytest.py | 39 ++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 6b4c630..6402cc9 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -45,5 +45,6 @@ steps:
           - '3.10'
           - '3.9'
         stack:          
-          - '9.0.0-SNAPSHOT'
+          - '9.0.0'
+          - '9.1.0-SNAPSHOT'
     command: ./.buildkite/run-tests
diff --git a/eland/ml/_model_serializer.py b/eland/ml/_model_serializer.py
index efea762..d5ecb28 100644
--- a/eland/ml/_model_serializer.py
+++ b/eland/ml/_model_serializer.py
@@ -19,7 +19,7 @@ import base64
 import gzip
 import json
 from abc import ABC
-from typing import Any, Dict, List, Optional, Sequence
+from typing import Any, Dict, List, Optional, Sequence, Tuple
 
 
 def add_if_exists(d: Dict[str, Any], k: str, v: Any) -> None:
@@ -58,6 +58,9 @@ class ModelSerializer(ABC):
             "ascii"
         )
 
+    def bounds(self) -> Tuple[float, float]:
+        raise NotImplementedError
+
 
 class TreeNode:
     def __init__(
@@ -129,6 +132,14 @@ class Tree(ModelSerializer):
         add_if_exists(d, "tree_structure", [t.to_dict() for t in self._tree_structure])
         return {"tree": d}
 
+    def bounds(self) -> Tuple[float, float]:
+        leaf_values = [
+            tree_node._leaf_value[0]
+            for tree_node in self._tree_structure
+            if tree_node._leaf_value is not None
+        ]
+        return min(leaf_values), max(leaf_values)
+
 
 class Ensemble(ModelSerializer):
     def __init__(
@@ -158,3 +169,9 @@ class Ensemble(ModelSerializer):
         add_if_exists(d, "classification_weights", self._classification_weights)
         add_if_exists(d, "aggregate_output", self._output_aggregator)
         return {"ensemble": d}
+
+    def bounds(self) -> Tuple[float, float]:
+        min_bound, max_bound = tuple(
+            map(sum, zip(*[model.bounds() for model in self._trained_models]))
+        )
+        return min_bound, max_bound
diff --git a/tests/ml/test_ml_model_pytest.py b/tests/ml/test_ml_model_pytest.py
index 6342f90..71e1234 100644
--- a/tests/ml/test_ml_model_pytest.py
+++ b/tests/ml/test_ml_model_pytest.py
@@ -22,6 +22,7 @@ import pytest
 
 from eland.ml import MLModel
 from eland.ml.ltr import FeatureLogger, LTRModelConfig, QueryFeatureExtractor
+from eland.ml.transformers import get_model_transformer
 from tests import (
     ES_IS_SERVERLESS,
     ES_TEST_CLIENT,
@@ -219,6 +220,39 @@ class TestMLModel:
         # Clean up
         es_model.delete_model()
 
+    def _normalize_ltr_score_from_XGBRanker(self, ranker, ltr_model_config, scores):
+        """Normalize the scores of an XGBRanker model as ES implementation of LTR would do.
+
+        Parameters
+        ----------
+        ranker : XGBRanker
+            The XGBRanker model to retrieve the minimum score from.
+
+        ltr_model_config : LTRModelConfig
+            LTR model config.
+
+        Returns
+        -------
+        scores : List[float]
+            Normalized scores for the model.
+        """
+
+        if (ES_VERSION[0] == 8 and ES_VERSION >= (8, 19)) or (
+            ES_VERSION >= (9, 1) or ES_IS_SERVERLESS
+        ):
+            # In 8.19 and 9.1, the scores are normalized if there are negative scores
+            min_model_score, _ = (
+                get_model_transformer(
+                    ranker, feature_names=ltr_model_config.feature_names
+                )
+                .transform()
+                .bounds()
+            )
+            if min_model_score < 0:
+                scores = [score - min_model_score for score in scores]
+
+        return scores
+
     @requires_elasticsearch_version((8, 12))
     @requires_xgboost
     @pytest.mark.parametrize("compress_model_definition", [True, False])
@@ -330,6 +364,11 @@ class TestMLModel:
             ],
             reverse=True,
         )
+
+        expected_scores = self._normalize_ltr_score_from_XGBRanker(
+            ranker, ltr_model_config, expected_scores
+        )
+
         np.testing.assert_almost_equal(expected_scores, doc_scores, decimal=2)
 
         # Verify prediction is not supported for LTR