Add prefix_string config option to the import model hub script (#642)

2025-07-11 00:02:14 +08:00 · 2024-01-19 08:06:57 +00:00 · 2024-01-19 08:06:57 +00:00 · 64216d44fb
commit 64216d44fb
parent 0a6e3db157
4 changed files with 90 additions and 3 deletions
--- a/eland/cli/eland_import_hub_model.py
+++ b/eland/cli/eland_import_hub_model.py
@ -128,6 +128,19 @@ def get_arg_parser():
        "--ca-certs", required=False, default=DEFAULT, help="Path to CA bundle"
    )

+    parser.add_argument(
+        "--ingest-prefix",
+        required=False,
+        default=None,
+        help="String to prepend to model input at ingest",
+    )
+    parser.add_argument(
+        "--search-prefix",
+        required=False,
+        default=None,
+        help="String to prepend to model input at search",
+    )
+
    return parser


@ -244,6 +257,8 @@ def main():
                task_type=args.task_type,
                es_version=cluster_version,
                quantize=args.quantize,
+                ingest_prefix=args.ingest_prefix,
+                search_prefix=args.search_prefix,
            )
            model_path, config, vocab_path = tm.save(tmp_dir)
        except TaskTypeError as err:
--- a/eland/ml/pytorch/nlp_ml_model.py
+++ b/eland/ml/pytorch/nlp_ml_model.py
@ -308,6 +308,23 @@ class TrainedModelInput:
        return self.__dict__


+class PrefixStrings:
+    def __init__(
+        self, *, ingest_prefix: t.Optional[str], search_prefix: t.Optional[str]
+    ):
+        self.ingest_prefix = ingest_prefix
+        self.search_prefix = search_prefix
+
+    def to_dict(self) -> t.Dict[str, t.Any]:
+        config = {}
+        if self.ingest_prefix is not None:
+            config["ingest"] = self.ingest_prefix
+        if self.search_prefix is not None:
+            config["search"] = self.search_prefix
+
+        return config
+
+
 class NlpTrainedModelConfig:
    def __init__(
        self,
@ -318,6 +335,7 @@ class NlpTrainedModelConfig:
        metadata: t.Optional[dict] = None,
        model_type: t.Union["t.Literal['pytorch']", str] = "pytorch",
        tags: t.Optional[t.Union[t.List[str], t.Tuple[str, ...]]] = None,
+        prefix_strings: t.Optional[PrefixStrings],
    ):
        self.tags = tags
        self.description = description
@ -325,6 +343,7 @@ class NlpTrainedModelConfig:
        self.input = input
        self.metadata = metadata
        self.model_type = model_type
+        self.prefix_strings = prefix_strings

    def to_dict(self) -> t.Dict[str, t.Any]:
        return {
--- a/eland/ml/pytorch/transformers.py
+++ b/eland/ml/pytorch/transformers.py
@ -53,6 +53,7 @@ from eland.ml.pytorch.nlp_ml_model import (
    NlpTrainedModelConfig,
    NlpXLMRobertaTokenizationConfig,
    PassThroughInferenceOptions,
+    PrefixStrings,
    QuestionAnsweringInferenceOptions,
    TextClassificationInferenceOptions,
    TextEmbeddingInferenceOptions,
@ -596,6 +597,8 @@ class TransformerModel:
        es_version: Optional[Tuple[int, int, int]] = None,
        quantize: bool = False,
        access_token: Optional[str] = None,
+        ingest_prefix: Optional[str] = None,
+        search_prefix: Optional[str] = None,
    ):
        """
        Loads a model from the Hugging Face repository or local file and creates
@ -618,11 +621,22 @@ class TransformerModel:

        quantize: bool, default False
            Quantize the model.
+
+        access_token: Optional[str]
+            For the HuggingFace Hub private model access
+
+        ingest_prefix: Optional[str]
+            Prefix string to prepend to input at ingest
+
+        search_prefix: Optional[str]
+            Prefix string to prepend to input at search
        """

        self._model_id = model_id
        self._access_token = access_token
        self._task_type = task_type.replace("-", "_")
+        self._ingest_prefix = ingest_prefix
+        self._search_prefix = search_prefix

        # load Hugging Face model and tokenizer
        # use padding in the tokenizer to ensure max length sequences are used for tracing (at call time)
@ -783,6 +797,19 @@ class TransformerModel:
            "per_allocation_memory_bytes": per_allocation_memory_bytes,
        }

+        prefix_strings = (
+            PrefixStrings(
+                ingest_prefix=self._ingest_prefix, search_prefix=self._search_prefix
+            )
+            if self._ingest_prefix or self._search_prefix
+            else None
+        )
+        prefix_strings_supported = es_version is None or es_version >= (8, 12, 0)
+        if not prefix_strings_supported and prefix_strings:
+            raise Exception(
+                f"The Elasticsearch cluster version {es_version} does not support prefix strings. Support was added in version 8.12.0"
+            )
+
        return NlpTrainedModelConfig(
            description=f"Model {self._model_id} for task type '{self._task_type}'",
            model_type="pytorch",
@ -791,6 +818,7 @@ class TransformerModel:
                field_names=["text_field"],
            ),
            metadata=metadata,
+            prefix_strings=prefix_strings,
        )

    def _get_per_deployment_memory(self) -> float:
--- a/tests/ml/pytorch/test_pytorch_model_config_pytest.py
+++ b/tests/ml/pytorch/test_pytorch_model_config_pytest.py
@ -154,13 +154,13 @@ else:
    MODEL_CONFIGURATIONS = []


-@pytest.mark.skip(reason="https://github.com/elastic/eland/issues/633")
 class TestModelConfguration:
+    @pytest.mark.skip(reason="https://github.com/elastic/eland/issues/633")
    @pytest.mark.parametrize(
        "model_id,task_type,config_type,tokenizer_type,max_sequence_len,embedding_size",
        MODEL_CONFIGURATIONS,
    )
-    def test_text_prediction(
+    def test_model_config(
        self,
        model_id,
        task_type,
@ -170,7 +170,6 @@ class TestModelConfguration:
        embedding_size,
    ):
        with tempfile.TemporaryDirectory() as tmp_dir:
-            print("loading model " + model_id)
            tm = TransformerModel(
                model_id=model_id,
                task_type=task_type,
@ -183,6 +182,7 @@ class TestModelConfguration:
            assert isinstance(config.inference_config, config_type)
            tokenization = config.inference_config.tokenization
            assert isinstance(config.metadata, dict)
+            assert config.prefix_strings is None
            assert (
                "per_deployment_memory_bytes" in config.metadata
                and config.metadata["per_deployment_memory_bytes"] > 0
@ -210,3 +210,28 @@ class TestModelConfguration:
                assert len(config.inference_config.classification_labels) > 0

            del tm
+
+    def test_model_config_with_prefix_string(self):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            tm = TransformerModel(
+                model_id="sentence-transformers/all-distilroberta-v1",
+                task_type="text_embedding",
+                es_version=(8, 12, 0),
+                quantize=False,
+                ingest_prefix="INGEST:",
+                search_prefix="SEARCH:",
+            )
+            _, config, _ = tm.save(tmp_dir)
+            assert config.prefix_strings.to_dict()["ingest"] == "INGEST:"
+            assert config.prefix_strings.to_dict()["search"] == "SEARCH:"
+
+    def test_model_config_with_prefix_string_not_supported(self):
+        with pytest.raises(Exception):
+            TransformerModel(
+                model_id="sentence-transformers/all-distilroberta-v1",
+                task_type="text_embedding",
+                es_version=(8, 11, 0),
+                quantize=False,
+                ingest_prefix="INGEST:",
+                search_prefix="SEARCH:",
+            )