Add 9.1.0 Snapshot to Build and Fix test_ml_model Tests to Normalized Expected Scores if Min Score is Less Than Zero (#777)

* normalized expected scores if min is < 0 * only normalize scores for ES after 8.19+ / 9.1+ * add 9.1.0 snapshot to build matrix * get min score from booster trees * removing typing on function definition * properly flatten our tree leaf scores * simplify getting min score * debugging messages * get all the matches in better way * Fix model score normalization. * lint * lint again * lint; correct return for bounds map/list * revert to Aurelian's fix * re-lint :/ --------- Co-authored-by: Aurelien FOUCRET <aurelien.foucret@elastic.co>
2025-07-11 00:02:14 +08:00 · 2025-04-23 11:53:32 -04:00 · 2025-04-23 11:53:32 -04:00 · 51a2b9cc19
commit 51a2b9cc19
parent a9c36927f6
3 changed files with 59 additions and 2 deletions
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@ -45,5 +45,6 @@ steps:
          - '3.10'
          - '3.9'
        stack:          
-          - '9.0.0-SNAPSHOT'
+          - '9.0.0'
          - '9.1.0-SNAPSHOT'
    command: ./.buildkite/run-tests
--- a/eland/ml/_model_serializer.py
+++ b/eland/ml/_model_serializer.py
@ -19,7 +19,7 @@ import base64
 import gzip
 import json
 from abc import ABC
-from typing import Any, Dict, List, Optional, Sequence
+from typing import Any, Dict, List, Optional, Sequence, Tuple
 def add_if_exists(d: Dict[str, Any], k: str, v: Any) -> None:
@ -58,6 +58,9 @@ class ModelSerializer(ABC):
            "ascii"
        )
    def bounds(self) -> Tuple[float, float]:
        raise NotImplementedError
 class TreeNode:
    def __init__(
@ -129,6 +132,14 @@ class Tree(ModelSerializer):
        add_if_exists(d, "tree_structure", [t.to_dict() for t in self._tree_structure])
        return {"tree": d}
    def bounds(self) -> Tuple[float, float]:
        leaf_values = [
            tree_node._leaf_value[0]
            for tree_node in self._tree_structure
            if tree_node._leaf_value is not None
        ]
        return min(leaf_values), max(leaf_values)
 class Ensemble(ModelSerializer):
    def __init__(
@ -158,3 +169,9 @@ class Ensemble(ModelSerializer):
        add_if_exists(d, "classification_weights", self._classification_weights)
        add_if_exists(d, "aggregate_output", self._output_aggregator)
        return {"ensemble": d}
    def bounds(self) -> Tuple[float, float]:
        min_bound, max_bound = tuple(
            map(sum, zip(*[model.bounds() for model in self._trained_models]))
        )
        return min_bound, max_bound
--- a/tests/ml/test_ml_model_pytest.py
+++ b/tests/ml/test_ml_model_pytest.py
@ -22,6 +22,7 @@ import pytest
 from eland.ml import MLModel
 from eland.ml.ltr import FeatureLogger, LTRModelConfig, QueryFeatureExtractor
 from eland.ml.transformers import get_model_transformer
 from tests import (
    ES_IS_SERVERLESS,
    ES_TEST_CLIENT,
@ -219,6 +220,39 @@ class TestMLModel:
        # Clean up
        es_model.delete_model()
    def _normalize_ltr_score_from_XGBRanker(self, ranker, ltr_model_config, scores):
        """Normalize the scores of an XGBRanker model as ES implementation of LTR would do.
        Parameters
        ----------
        ranker : XGBRanker
            The XGBRanker model to retrieve the minimum score from.
        ltr_model_config : LTRModelConfig
            LTR model config.
        Returns
        -------
        scores : List[float]
            Normalized scores for the model.
        """
        if (ES_VERSION[0] == 8 and ES_VERSION >= (8, 19)) or (
            ES_VERSION >= (9, 1) or ES_IS_SERVERLESS
        ):
            # In 8.19 and 9.1, the scores are normalized if there are negative scores
            min_model_score, _ = (
                get_model_transformer(
                    ranker, feature_names=ltr_model_config.feature_names
                )
                .transform()
                .bounds()
            )
            if min_model_score < 0:
                scores = [score - min_model_score for score in scores]
        return scores
    @requires_elasticsearch_version((8, 12))
    @requires_xgboost
    @pytest.mark.parametrize("compress_model_definition", [True, False])
@ -330,6 +364,11 @@ class TestMLModel:
            ],
            reverse=True,
        )
        expected_scores = self._normalize_ltr_score_from_XGBRanker(
            ranker, ltr_model_config, expected_scores
        )
        np.testing.assert_almost_equal(expected_scores, doc_scores, decimal=2)
        # Verify prediction is not supported for LTR