[ML] add support for question_answering NLP tasks (#457)

Adds support for `question_answering` NLP models within the pytorch model uploader. Related: https://github.com/elastic/elasticsearch/pull/85958
2025-07-11 00:02:14 +08:00 · 2022-05-04 13:15:33 -04:00 · 2022-05-04 13:15:33 -04:00 · 70fadc9986
commit 70fadc9986
parent afe08f8107
2 changed files with 156 additions and 17 deletions
--- a/eland/ml/pytorch/nlp_ml_model.py
+++ b/eland/ml/pytorch/nlp_ml_model.py
@ -19,8 +19,22 @@ import typing as t


 class NlpTokenizationConfig:
-    def __init__(self, *, configuration_type: str):
+    def __init__(
+        self,
+        *,
+        configuration_type: str,
+        with_special_tokens: t.Optional[bool] = None,
+        max_sequence_length: t.Optional[int] = None,
+        truncate: t.Optional[
+            t.Union["t.Literal['first', 'none', 'second']", str]
+        ] = None,
+        span: t.Optional[int] = None,
+    ):
        self.name = configuration_type
+        self.with_special_tokens = with_special_tokens
+        self.max_sequence_length = max_sequence_length
+        self.truncate = truncate
+        self.span = span

    def to_dict(self):
        return {
@ -42,12 +56,14 @@ class NlpRobertaTokenizationConfig(NlpTokenizationConfig):
        ] = None,
        span: t.Optional[int] = None,
    ):
-        super().__init__(configuration_type="roberta")
+        super().__init__(
+            configuration_type="roberta",
+            with_special_tokens=with_special_tokens,
+            max_sequence_length=max_sequence_length,
+            truncate=truncate,
+            span=span,
+        )
        self.add_prefix_space = add_prefix_space
-        self.with_special_tokens = with_special_tokens
-        self.max_sequence_length = max_sequence_length
-        self.truncate = truncate
-        self.span = span


 class NlpBertTokenizationConfig(NlpTokenizationConfig):
@ -62,12 +78,14 @@ class NlpBertTokenizationConfig(NlpTokenizationConfig):
        ] = None,
        span: t.Optional[int] = None,
    ):
-        super().__init__(configuration_type="bert")
+        super().__init__(
+            configuration_type="bert",
+            with_special_tokens=with_special_tokens,
+            max_sequence_length=max_sequence_length,
+            truncate=truncate,
+            span=span,
+        )
        self.do_lower_case = do_lower_case
-        self.with_special_tokens = with_special_tokens
-        self.max_sequence_length = max_sequence_length
-        self.truncate = truncate
-        self.span = span


 class NlpMPNetTokenizationConfig(NlpTokenizationConfig):
@ -82,12 +100,14 @@ class NlpMPNetTokenizationConfig(NlpTokenizationConfig):
        ] = None,
        span: t.Optional[int] = None,
    ):
-        super().__init__(configuration_type="mpnet")
+        super().__init__(
+            configuration_type="mpnet",
+            with_special_tokens=with_special_tokens,
+            max_sequence_length=max_sequence_length,
+            truncate=truncate,
+            span=span,
+        )
        self.do_lower_case = do_lower_case
-        self.with_special_tokens = with_special_tokens
-        self.max_sequence_length = max_sequence_length
-        self.truncate = truncate
-        self.span = span


 class InferenceConfig:
@ -180,6 +200,24 @@ class PassThroughInferenceOptions(InferenceConfig):
        self.results_field = results_field


+class QuestionAnsweringInferenceOptions(InferenceConfig):
+    def __init__(
+        self,
+        *,
+        tokenization: NlpTokenizationConfig,
+        results_field: t.Optional[str] = None,
+        max_answer_length: t.Optional[int] = None,
+        question: t.Optional[str] = None,
+        num_top_classes: t.Optional[int] = None,
+    ):
+        super().__init__(configuration_type="question_answering")
+        self.tokenization = tokenization
+        self.results_field = results_field
+        self.max_answer_length = max_answer_length
+        self.question = question
+        self.num_top_classes = num_top_classes
+
+
 class TextEmbeddingInferenceOptions(InferenceConfig):
    def __init__(
        self,
--- a/eland/ml/pytorch/transformers.py
+++ b/eland/ml/pytorch/transformers.py
@ -32,6 +32,7 @@ from torch import Tensor, nn
 from transformers import (
    AutoConfig,
    AutoModel,
+    AutoModelForQuestionAnswering,
    PreTrainedModel,
    PreTrainedTokenizer,
    PreTrainedTokenizerFast,
@ -46,6 +47,7 @@ from eland.ml.pytorch.nlp_ml_model import (
    NlpTokenizationConfig,
    NlpTrainedModelConfig,
    PassThroughInferenceOptions,
+    QuestionAnsweringInferenceOptions,
    TextClassificationInferenceOptions,
    TextEmbeddingInferenceOptions,
    TrainedModelInput,
@ -59,6 +61,7 @@ SUPPORTED_TASK_TYPES = {
    "text_classification",
    "text_embedding",
    "zero_shot_classification",
+    "question_answering",
 }
 TASK_TYPE_TO_INFERENCE_CONFIG = {
    "fill_mask": FillMaskInferenceOptions,
@ -67,6 +70,7 @@ TASK_TYPE_TO_INFERENCE_CONFIG = {
    "text_embedding": TextEmbeddingInferenceOptions,
    "zero_shot_classification": ZeroShotClassificationInferenceOptions,
    "pass_through": PassThroughInferenceOptions,
+    "question_answering": QuestionAnsweringInferenceOptions,
 }
 SUPPORTED_TASK_TYPES_NAMES = ", ".join(sorted(SUPPORTED_TASK_TYPES))
 SUPPORTED_TOKENIZERS = (
@ -92,6 +96,86 @@ TracedModelTypes = Union[
 ]


+class _QuestionAnsweringWrapperModule(nn.Module):  # type: ignore
+    """
+    A wrapper around a question answering model.
+    Our inference engine only takes the first tuple if the inference response
+    is a tuple.
+
+    This wrapper transforms the output to be a stacked tensor if its a tuple.
+
+    Otherwise it passes it through
+    """
+
+    def __init__(self, model: PreTrainedModel):
+        super().__init__()
+        self._hf_model = model
+        self.config = model.config
+
+    @staticmethod
+    def from_pretrained(model_id: str) -> Optional[Any]:
+        model = AutoModelForQuestionAnswering.from_pretrained(
+            model_id, torchscript=True
+        )
+        if isinstance(
+            model.config,
+            (
+                transformers.MPNetConfig,
+                transformers.RobertaConfig,
+                transformers.BartConfig,
+            ),
+        ):
+            return _TwoParameterQuestionAnsweringWrapper(model)
+        else:
+            return _QuestionAnsweringWrapper(model)
+
+
+class _QuestionAnsweringWrapper(_QuestionAnsweringWrapperModule):
+    def __init__(self, model: PreTrainedModel):
+        super().__init__(model=model)
+
+    def forward(
+        self,
+        input_ids: Tensor,
+        attention_mask: Tensor,
+        token_type_ids: Tensor,
+        position_ids: Tensor,
+    ) -> Tensor:
+        """Wrap the input and output to conform to the native process interface."""
+
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+            "token_type_ids": token_type_ids,
+            "position_ids": position_ids,
+        }
+
+        # remove inputs for specific model types
+        if isinstance(self._hf_model.config, transformers.DistilBertConfig):
+            del inputs["token_type_ids"]
+            del inputs["position_ids"]
+        response = self._hf_model(**inputs)
+        if isinstance(response, tuple):
+            return torch.stack(list(response), dim=0)
+        return response
+
+
+class _TwoParameterQuestionAnsweringWrapper(_QuestionAnsweringWrapperModule):
+    def __init__(self, model: PreTrainedModel):
+        super().__init__(model=model)
+
+    def forward(self, input_ids: Tensor, attention_mask: Tensor) -> Tensor:
+        """Wrap the input and output to conform to the native process interface."""
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+        }
+        response = self._hf_model(**inputs)
+        if isinstance(response, tuple):
+            return torch.stack(list(response), dim=0)
+        return response
+
+
 class _DistilBertWrapper(nn.Module):  # type: ignore
    """
    A simple wrapper around DistilBERT model which makes the model inputs
@ -404,6 +488,16 @@ class _TraceableZeroShotClassificationModel(_TraceableClassificationModel):
        )


+class _TraceableQuestionAnsweringModel(_TraceableModel):
+    def _prepare_inputs(self) -> transformers.BatchEncoding:
+        return self._tokenizer(
+            "What is the meaning of life?"
+            "The meaning of life, according to the hitchikers guide, is 42.",
+            padding="max_length",
+            return_tensors="pt",
+        )
+
+
 class TransformerModel:
    def __init__(self, model_id: str, task_type: str, quantize: bool = False):
        self._model_id = model_id
@ -472,6 +566,11 @@ class TransformerModel:
    def _create_config(self) -> NlpTrainedModelConfig:
        tokenization_config = self._create_tokenization_config()

+        # Set squad well known defaults
+        if self._task_type == "question_answering":
+            tokenization_config.max_sequence_length = 386
+            tokenization_config.span = 128
+            tokenization_config.truncate = "none"
        inference_config = (
            TASK_TYPE_TO_INFERENCE_CONFIG[self._task_type](
                tokenization=tokenization_config,
@ -530,7 +629,9 @@ class TransformerModel:
            )
            model = _DistilBertWrapper.try_wrapping(model)
            return _TraceableZeroShotClassificationModel(self._tokenizer, model)
-
+        elif self._task_type == "question_answering":
+            model = _QuestionAnsweringWrapperModule.from_pretrained(self._model_id)
+            return _TraceableQuestionAnsweringModel(self._tokenizer, model)
        else:
            raise TypeError(
                f"Unknown task type {self._task_type}, must be one of: {SUPPORTED_TASK_TYPES_NAMES}"