mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Compare commits
13 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
cef4710695 | ||
|
44ead02b05 | ||
|
cb7c4fb122 | ||
|
9e8f164677 | ||
|
3c3ffd7403 | ||
|
f5c2dcfc9d | ||
|
878cde6126 | ||
|
ec45c395fd | ||
|
00dc55b3bd | ||
|
8147eb517a | ||
|
4728d9b648 | ||
|
51a2b9cc19 | ||
|
a9c36927f6 |
@ -45,5 +45,6 @@ steps:
|
|||||||
- '3.10'
|
- '3.10'
|
||||||
- '3.9'
|
- '3.9'
|
||||||
stack:
|
stack:
|
||||||
- '9.0.0-SNAPSHOT'
|
- '9.0.0'
|
||||||
|
- '9.1.0-SNAPSHOT'
|
||||||
command: ./.buildkite/run-tests
|
command: ./.buildkite/run-tests
|
||||||
|
2
.github/workflows/docs-build.yml
vendored
2
.github/workflows/docs-build.yml
vendored
@ -16,4 +16,4 @@ jobs:
|
|||||||
deployments: write
|
deployments: write
|
||||||
id-token: write
|
id-token: write
|
||||||
contents: read
|
contents: read
|
||||||
pull-requests: read
|
pull-requests: write
|
||||||
|
@ -2,6 +2,14 @@
|
|||||||
Changelog
|
Changelog
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
9.0.1 (2025-04-30)
|
||||||
|
------------------
|
||||||
|
|
||||||
|
* Forbid Elasticsearch 8 client or server (`#780 <https://github.com/elastic/eland/pull/780>`_)
|
||||||
|
* Fix DeBERTa tokenization (`#769 <https://github.com/elastic/eland/pull/769>`_)
|
||||||
|
* Upgrade PyTorch to 2.5.1 (`#785 <https://github.com/elastic/eland/pull/785>`_)
|
||||||
|
* Upgrade LightGBM to 4.6.0 (`#782 <https://github.com/elastic/eland/pull/782>`_)
|
||||||
|
|
||||||
9.0.0 (2025-04-15)
|
9.0.0 (2025-04-15)
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
|
@ -78,9 +78,15 @@ Once your changes and tests are ready to submit for review:
|
|||||||
# Run Auto-format, lint, mypy type checker for your changes
|
# Run Auto-format, lint, mypy type checker for your changes
|
||||||
$ nox -s format
|
$ nox -s format
|
||||||
|
|
||||||
# Run the test suite
|
# Launch Elasticsearch with a trial licence and ML enabled
|
||||||
$ pytest --doctest-modules eland/ tests/
|
$ docker run --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" -e "xpack.security.enabled=false" -e "xpack.license.self_generated.type=trial" docker.elastic.co/elasticsearch/elasticsearch:9.0.0
|
||||||
$ pytest --nbval tests/notebook/
|
|
||||||
|
# See all test suites
|
||||||
|
$ nox -l
|
||||||
|
# Run a specific test suite
|
||||||
|
$ nox -rs "test-3.12(pandas_version='2.2.3')"
|
||||||
|
# Run a specific test
|
||||||
|
$ nox -rs "test-3.12(pandas_version='2.2.3')" -- -k test_learning_to_rank
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
|||||||
if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
|
if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
|
||||||
python3 -m pip install \
|
python3 -m pip install \
|
||||||
--no-cache-dir --disable-pip-version-check --extra-index-url https://download.pytorch.org/whl/cpu \
|
--no-cache-dir --disable-pip-version-check --extra-index-url https://download.pytorch.org/whl/cpu \
|
||||||
torch==2.3.1+cpu .[all]; \
|
torch==2.5.1+cpu .[all]; \
|
||||||
else \
|
else \
|
||||||
python3 -m pip install \
|
python3 -m pip install \
|
||||||
--no-cache-dir --disable-pip-version-check \
|
--no-cache-dir --disable-pip-version-check \
|
||||||
|
@ -13,7 +13,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
|
|||||||
if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
|
if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
|
||||||
python3 -m pip install \
|
python3 -m pip install \
|
||||||
--no-cache-dir --disable-pip-version-check --extra-index-url https://download.pytorch.org/whl/cpu \
|
--no-cache-dir --disable-pip-version-check --extra-index-url https://download.pytorch.org/whl/cpu \
|
||||||
torch==2.3.1+cpu .[all]; \
|
torch==2.5.1+cpu .[all]; \
|
||||||
else \
|
else \
|
||||||
python3 -m pip install \
|
python3 -m pip install \
|
||||||
--no-cache-dir --disable-pip-version-check \
|
--no-cache-dir --disable-pip-version-check \
|
||||||
|
@ -53,7 +53,8 @@ $ conda install -c conda-forge eland
|
|||||||
|
|
||||||
### Compatibility
|
### Compatibility
|
||||||
|
|
||||||
- Supports Python 3.9, 3.10, 3.11, 3.12 and Pandas 1.5
|
- Supports Python 3.9, 3.10, 3.11 and 3.12.
|
||||||
|
- Supports Pandas 1.5 and 2.
|
||||||
- Supports Elasticsearch 8+ clusters, recommended 8.16 or later for all features to work.
|
- Supports Elasticsearch 8+ clusters, recommended 8.16 or later for all features to work.
|
||||||
If you are using the NLP with PyTorch feature make sure your Eland minor version matches the minor
|
If you are using the NLP with PyTorch feature make sure your Eland minor version matches the minor
|
||||||
version of your Elasticsearch cluster. For all other features it is sufficient for the major versions
|
version of your Elasticsearch cluster. For all other features it is sufficient for the major versions
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
__title__ = "eland"
|
__title__ = "eland"
|
||||||
__description__ = "Python Client and Toolkit for DataFrames, Big Data, Machine Learning and ETL in Elasticsearch"
|
__description__ = "Python Client and Toolkit for DataFrames, Big Data, Machine Learning and ETL in Elasticsearch"
|
||||||
__url__ = "https://github.com/elastic/eland"
|
__url__ = "https://github.com/elastic/eland"
|
||||||
__version__ = "9.0.0"
|
__version__ = "9.0.1"
|
||||||
__author__ = "Steve Dodson"
|
__author__ = "Steve Dodson"
|
||||||
__author_email__ = "steve.dodson@elastic.co"
|
__author_email__ = "steve.dodson@elastic.co"
|
||||||
__maintainer__ = "Elastic Client Library Maintainers"
|
__maintainer__ = "Elastic Client Library Maintainers"
|
||||||
|
@ -236,14 +236,9 @@ def check_cluster_version(es_client, logger):
|
|||||||
f"Elasticsearch version {major_version} does not support NLP models. Please upgrade Elasticsearch to the latest version"
|
f"Elasticsearch version {major_version} does not support NLP models. Please upgrade Elasticsearch to the latest version"
|
||||||
)
|
)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
elif major_version < 9:
|
||||||
# PyTorch was upgraded to version 2.3.1 in 8.15.2
|
|
||||||
# and is incompatible with earlier versions
|
|
||||||
if sem_ver < (8, 15, 2):
|
|
||||||
import torch
|
|
||||||
|
|
||||||
logger.error(
|
logger.error(
|
||||||
f"Eland uses PyTorch version {torch.__version__} which is incompatible with Elasticsearch versions prior to 8.15.2. Please upgrade Elasticsearch to at least version 8.15.2"
|
"Eland 9.x does not support Elasticsearch 8.x. Please upgrade Elasticsearch first."
|
||||||
)
|
)
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
@ -50,10 +50,7 @@ class Index:
|
|||||||
# index_field.setter
|
# index_field.setter
|
||||||
self._is_source_field = False
|
self._is_source_field = False
|
||||||
|
|
||||||
# The type:ignore is due to mypy not being smart enough
|
self.es_index_field = es_index_field
|
||||||
# to recognize the property.setter has a different type
|
|
||||||
# than the property.getter.
|
|
||||||
self.es_index_field = es_index_field # type: ignore
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def sort_field(self) -> str:
|
def sort_field(self) -> str:
|
||||||
|
@ -19,7 +19,7 @@ import base64
|
|||||||
import gzip
|
import gzip
|
||||||
import json
|
import json
|
||||||
from abc import ABC
|
from abc import ABC
|
||||||
from typing import Any, Dict, List, Optional, Sequence
|
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||||
|
|
||||||
|
|
||||||
def add_if_exists(d: Dict[str, Any], k: str, v: Any) -> None:
|
def add_if_exists(d: Dict[str, Any], k: str, v: Any) -> None:
|
||||||
@ -58,6 +58,9 @@ class ModelSerializer(ABC):
|
|||||||
"ascii"
|
"ascii"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def bounds(self) -> Tuple[float, float]:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
class TreeNode:
|
class TreeNode:
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -129,6 +132,14 @@ class Tree(ModelSerializer):
|
|||||||
add_if_exists(d, "tree_structure", [t.to_dict() for t in self._tree_structure])
|
add_if_exists(d, "tree_structure", [t.to_dict() for t in self._tree_structure])
|
||||||
return {"tree": d}
|
return {"tree": d}
|
||||||
|
|
||||||
|
def bounds(self) -> Tuple[float, float]:
|
||||||
|
leaf_values = [
|
||||||
|
tree_node._leaf_value[0]
|
||||||
|
for tree_node in self._tree_structure
|
||||||
|
if tree_node._leaf_value is not None
|
||||||
|
]
|
||||||
|
return min(leaf_values), max(leaf_values)
|
||||||
|
|
||||||
|
|
||||||
class Ensemble(ModelSerializer):
|
class Ensemble(ModelSerializer):
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -158,3 +169,9 @@ class Ensemble(ModelSerializer):
|
|||||||
add_if_exists(d, "classification_weights", self._classification_weights)
|
add_if_exists(d, "classification_weights", self._classification_weights)
|
||||||
add_if_exists(d, "aggregate_output", self._output_aggregator)
|
add_if_exists(d, "aggregate_output", self._output_aggregator)
|
||||||
return {"ensemble": d}
|
return {"ensemble": d}
|
||||||
|
|
||||||
|
def bounds(self) -> Tuple[float, float]:
|
||||||
|
min_bound, max_bound = tuple(
|
||||||
|
map(sum, zip(*[model.bounds() for model in self._trained_models]))
|
||||||
|
)
|
||||||
|
return min_bound, max_bound
|
||||||
|
@ -126,6 +126,7 @@ class PyTorchModel:
|
|||||||
def infer(
|
def infer(
|
||||||
self,
|
self,
|
||||||
docs: List[Mapping[str, str]],
|
docs: List[Mapping[str, str]],
|
||||||
|
inference_config: Optional[Mapping[str, Any]] = None,
|
||||||
timeout: str = DEFAULT_TIMEOUT,
|
timeout: str = DEFAULT_TIMEOUT,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
if docs is None:
|
if docs is None:
|
||||||
@ -133,6 +134,8 @@ class PyTorchModel:
|
|||||||
|
|
||||||
__body: Dict[str, Any] = {}
|
__body: Dict[str, Any] = {}
|
||||||
__body["docs"] = docs
|
__body["docs"] = docs
|
||||||
|
if inference_config is not None:
|
||||||
|
__body["inference_config"] = inference_config
|
||||||
|
|
||||||
__path = f"/_ml/trained_models/{_quote(self.model_id)}/_infer"
|
__path = f"/_ml/trained_models/{_quote(self.model_id)}/_infer"
|
||||||
__query: Dict[str, Any] = {}
|
__query: Dict[str, Any] = {}
|
||||||
|
@ -86,7 +86,7 @@ class NlpXLMRobertaTokenizationConfig(NlpTokenizationConfig):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DebertaV2Config(NlpTokenizationConfig):
|
class NlpDebertaV2TokenizationConfig(NlpTokenizationConfig):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
|
@ -25,17 +25,13 @@ import os.path
|
|||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
from typing import Dict, List, Optional, Set, Tuple, Union
|
||||||
|
|
||||||
import torch # type: ignore
|
import torch # type: ignore
|
||||||
import transformers # type: ignore
|
import transformers # type: ignore
|
||||||
from sentence_transformers import SentenceTransformer # type: ignore
|
from torch import Tensor
|
||||||
from torch import Tensor, nn
|
|
||||||
from torch.profiler import profile # type: ignore
|
from torch.profiler import profile # type: ignore
|
||||||
from transformers import (
|
from transformers import (
|
||||||
AutoConfig,
|
|
||||||
AutoModel,
|
|
||||||
AutoModelForQuestionAnswering,
|
|
||||||
BertTokenizer,
|
BertTokenizer,
|
||||||
PretrainedConfig,
|
PretrainedConfig,
|
||||||
PreTrainedModel,
|
PreTrainedModel,
|
||||||
@ -44,11 +40,11 @@ from transformers import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from eland.ml.pytorch.nlp_ml_model import (
|
from eland.ml.pytorch.nlp_ml_model import (
|
||||||
DebertaV2Config,
|
|
||||||
FillMaskInferenceOptions,
|
FillMaskInferenceOptions,
|
||||||
NerInferenceOptions,
|
NerInferenceOptions,
|
||||||
NlpBertJapaneseTokenizationConfig,
|
NlpBertJapaneseTokenizationConfig,
|
||||||
NlpBertTokenizationConfig,
|
NlpBertTokenizationConfig,
|
||||||
|
NlpDebertaV2TokenizationConfig,
|
||||||
NlpMPNetTokenizationConfig,
|
NlpMPNetTokenizationConfig,
|
||||||
NlpRobertaTokenizationConfig,
|
NlpRobertaTokenizationConfig,
|
||||||
NlpTokenizationConfig,
|
NlpTokenizationConfig,
|
||||||
@ -65,8 +61,13 @@ from eland.ml.pytorch.nlp_ml_model import (
|
|||||||
ZeroShotClassificationInferenceOptions,
|
ZeroShotClassificationInferenceOptions,
|
||||||
)
|
)
|
||||||
from eland.ml.pytorch.traceable_model import TraceableModel
|
from eland.ml.pytorch.traceable_model import TraceableModel
|
||||||
|
from eland.ml.pytorch.wrappers import (
|
||||||
|
_DistilBertWrapper,
|
||||||
|
_DPREncoderWrapper,
|
||||||
|
_QuestionAnsweringWrapperModule,
|
||||||
|
_SentenceTransformerWrapperModule,
|
||||||
|
)
|
||||||
|
|
||||||
DEFAULT_OUTPUT_KEY = "sentence_embedding"
|
|
||||||
SUPPORTED_TASK_TYPES = {
|
SUPPORTED_TASK_TYPES = {
|
||||||
"fill_mask",
|
"fill_mask",
|
||||||
"ner",
|
"ner",
|
||||||
@ -172,284 +173,6 @@ def task_type_from_model_config(model_config: PretrainedConfig) -> Optional[str]
|
|||||||
return potential_task_types.pop()
|
return potential_task_types.pop()
|
||||||
|
|
||||||
|
|
||||||
class _QuestionAnsweringWrapperModule(nn.Module): # type: ignore
|
|
||||||
"""
|
|
||||||
A wrapper around a question answering model.
|
|
||||||
Our inference engine only takes the first tuple if the inference response
|
|
||||||
is a tuple.
|
|
||||||
|
|
||||||
This wrapper transforms the output to be a stacked tensor if its a tuple.
|
|
||||||
|
|
||||||
Otherwise it passes it through
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, model: PreTrainedModel):
|
|
||||||
super().__init__()
|
|
||||||
self._hf_model = model
|
|
||||||
self.config = model.config
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def from_pretrained(model_id: str, *, token: Optional[str] = None) -> Optional[Any]:
|
|
||||||
model = AutoModelForQuestionAnswering.from_pretrained(
|
|
||||||
model_id, token=token, torchscript=True
|
|
||||||
)
|
|
||||||
if isinstance(
|
|
||||||
model.config,
|
|
||||||
(
|
|
||||||
transformers.MPNetConfig,
|
|
||||||
transformers.XLMRobertaConfig,
|
|
||||||
transformers.RobertaConfig,
|
|
||||||
transformers.BartConfig,
|
|
||||||
),
|
|
||||||
):
|
|
||||||
return _TwoParameterQuestionAnsweringWrapper(model)
|
|
||||||
else:
|
|
||||||
return _QuestionAnsweringWrapper(model)
|
|
||||||
|
|
||||||
|
|
||||||
class _QuestionAnsweringWrapper(_QuestionAnsweringWrapperModule):
|
|
||||||
def __init__(self, model: PreTrainedModel):
|
|
||||||
super().__init__(model=model)
|
|
||||||
|
|
||||||
def forward(
|
|
||||||
self,
|
|
||||||
input_ids: Tensor,
|
|
||||||
attention_mask: Tensor,
|
|
||||||
token_type_ids: Tensor,
|
|
||||||
position_ids: Tensor,
|
|
||||||
) -> Tensor:
|
|
||||||
"""Wrap the input and output to conform to the native process interface."""
|
|
||||||
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": attention_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
"position_ids": position_ids,
|
|
||||||
}
|
|
||||||
|
|
||||||
# remove inputs for specific model types
|
|
||||||
if isinstance(self._hf_model.config, transformers.DistilBertConfig):
|
|
||||||
del inputs["token_type_ids"]
|
|
||||||
del inputs["position_ids"]
|
|
||||||
response = self._hf_model(**inputs)
|
|
||||||
if isinstance(response, tuple):
|
|
||||||
return torch.stack(list(response), dim=0)
|
|
||||||
return response
|
|
||||||
|
|
||||||
|
|
||||||
class _TwoParameterQuestionAnsweringWrapper(_QuestionAnsweringWrapperModule):
|
|
||||||
def __init__(self, model: PreTrainedModel):
|
|
||||||
super().__init__(model=model)
|
|
||||||
|
|
||||||
def forward(self, input_ids: Tensor, attention_mask: Tensor) -> Tensor:
|
|
||||||
"""Wrap the input and output to conform to the native process interface."""
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": attention_mask,
|
|
||||||
}
|
|
||||||
response = self._hf_model(**inputs)
|
|
||||||
if isinstance(response, tuple):
|
|
||||||
return torch.stack(list(response), dim=0)
|
|
||||||
return response
|
|
||||||
|
|
||||||
|
|
||||||
class _DistilBertWrapper(nn.Module): # type: ignore
|
|
||||||
"""
|
|
||||||
In Elasticsearch the BERT tokenizer is used for DistilBERT models but
|
|
||||||
the BERT tokenizer produces 4 inputs where DistilBERT models expect 2.
|
|
||||||
|
|
||||||
Wrap the model's forward function in a method that accepts the 4
|
|
||||||
arguments passed to a BERT model then discard the token_type_ids
|
|
||||||
and the position_ids to match the wrapped DistilBERT model forward
|
|
||||||
function
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, model: transformers.PreTrainedModel):
|
|
||||||
super().__init__()
|
|
||||||
self._model = model
|
|
||||||
self.config = model.config
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def try_wrapping(model: PreTrainedModel) -> Optional[Any]:
|
|
||||||
if isinstance(model.config, transformers.DistilBertConfig):
|
|
||||||
return _DistilBertWrapper(model)
|
|
||||||
else:
|
|
||||||
return model
|
|
||||||
|
|
||||||
def forward(
|
|
||||||
self,
|
|
||||||
input_ids: Tensor,
|
|
||||||
attention_mask: Tensor,
|
|
||||||
_token_type_ids: Tensor = None,
|
|
||||||
_position_ids: Tensor = None,
|
|
||||||
) -> Tensor:
|
|
||||||
"""Wrap the input and output to conform to the native process interface."""
|
|
||||||
|
|
||||||
return self._model(input_ids=input_ids, attention_mask=attention_mask)
|
|
||||||
|
|
||||||
|
|
||||||
class _SentenceTransformerWrapperModule(nn.Module): # type: ignore
|
|
||||||
"""
|
|
||||||
A wrapper around sentence-transformer models to provide pooling,
|
|
||||||
normalization and other graph layers that are not defined in the base
|
|
||||||
HuggingFace transformer model.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, model: PreTrainedModel, output_key: str = DEFAULT_OUTPUT_KEY):
|
|
||||||
super().__init__()
|
|
||||||
self._hf_model = model
|
|
||||||
self._st_model = SentenceTransformer(model.config.name_or_path)
|
|
||||||
self._output_key = output_key
|
|
||||||
self.config = model.config
|
|
||||||
|
|
||||||
self._remove_pooling_layer()
|
|
||||||
self._replace_transformer_layer()
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def from_pretrained(
|
|
||||||
model_id: str,
|
|
||||||
tokenizer: PreTrainedTokenizer,
|
|
||||||
*,
|
|
||||||
token: Optional[str] = None,
|
|
||||||
output_key: str = DEFAULT_OUTPUT_KEY,
|
|
||||||
) -> Optional[Any]:
|
|
||||||
model = AutoModel.from_pretrained(model_id, token=token, torchscript=True)
|
|
||||||
if isinstance(
|
|
||||||
tokenizer,
|
|
||||||
(
|
|
||||||
transformers.BartTokenizer,
|
|
||||||
transformers.MPNetTokenizer,
|
|
||||||
transformers.RobertaTokenizer,
|
|
||||||
transformers.XLMRobertaTokenizer,
|
|
||||||
transformers.DebertaV2Tokenizer,
|
|
||||||
),
|
|
||||||
):
|
|
||||||
return _TwoParameterSentenceTransformerWrapper(model, output_key)
|
|
||||||
else:
|
|
||||||
return _SentenceTransformerWrapper(model, output_key)
|
|
||||||
|
|
||||||
def _remove_pooling_layer(self) -> None:
|
|
||||||
"""
|
|
||||||
Removes any last pooling layer which is not used to create embeddings.
|
|
||||||
Leaving this layer in will cause it to return a NoneType which in turn
|
|
||||||
will fail to load in libtorch. Alternatively, we can just use the output
|
|
||||||
of the pooling layer as a dummy but this also affects (if only in a
|
|
||||||
minor way) the performance of inference, so we're better off removing
|
|
||||||
the layer if we can.
|
|
||||||
"""
|
|
||||||
|
|
||||||
if hasattr(self._hf_model, "pooler"):
|
|
||||||
self._hf_model.pooler = None
|
|
||||||
|
|
||||||
def _replace_transformer_layer(self) -> None:
|
|
||||||
"""
|
|
||||||
Replaces the HuggingFace Transformer layer in the SentenceTransformer
|
|
||||||
modules so we can set it with one that has pooling layer removed and
|
|
||||||
was loaded ready for TorchScript export.
|
|
||||||
"""
|
|
||||||
|
|
||||||
self._st_model._modules["0"].auto_model = self._hf_model
|
|
||||||
|
|
||||||
|
|
||||||
class _SentenceTransformerWrapper(_SentenceTransformerWrapperModule):
|
|
||||||
def __init__(self, model: PreTrainedModel, output_key: str = DEFAULT_OUTPUT_KEY):
|
|
||||||
super().__init__(model=model, output_key=output_key)
|
|
||||||
|
|
||||||
def forward(
|
|
||||||
self,
|
|
||||||
input_ids: Tensor,
|
|
||||||
attention_mask: Tensor,
|
|
||||||
token_type_ids: Tensor,
|
|
||||||
position_ids: Tensor,
|
|
||||||
) -> Tensor:
|
|
||||||
"""Wrap the input and output to conform to the native process interface."""
|
|
||||||
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": attention_mask,
|
|
||||||
"token_type_ids": token_type_ids,
|
|
||||||
"position_ids": position_ids,
|
|
||||||
}
|
|
||||||
|
|
||||||
# remove inputs for specific model types
|
|
||||||
if isinstance(self._hf_model.config, transformers.DistilBertConfig):
|
|
||||||
del inputs["token_type_ids"]
|
|
||||||
|
|
||||||
return self._st_model(inputs)[self._output_key]
|
|
||||||
|
|
||||||
|
|
||||||
class _TwoParameterSentenceTransformerWrapper(_SentenceTransformerWrapperModule):
|
|
||||||
def __init__(self, model: PreTrainedModel, output_key: str = DEFAULT_OUTPUT_KEY):
|
|
||||||
super().__init__(model=model, output_key=output_key)
|
|
||||||
|
|
||||||
def forward(self, input_ids: Tensor, attention_mask: Tensor) -> Tensor:
|
|
||||||
"""Wrap the input and output to conform to the native process interface."""
|
|
||||||
inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"attention_mask": attention_mask,
|
|
||||||
}
|
|
||||||
return self._st_model(inputs)[self._output_key]
|
|
||||||
|
|
||||||
|
|
||||||
class _DPREncoderWrapper(nn.Module): # type: ignore
|
|
||||||
"""
|
|
||||||
AutoModel loading does not work for DPRContextEncoders, this only exists as
|
|
||||||
a workaround. This may never be fixed so this is likely permanent.
|
|
||||||
See: https://github.com/huggingface/transformers/issues/13670
|
|
||||||
"""
|
|
||||||
|
|
||||||
_SUPPORTED_MODELS = {
|
|
||||||
transformers.DPRContextEncoder,
|
|
||||||
transformers.DPRQuestionEncoder,
|
|
||||||
}
|
|
||||||
_SUPPORTED_MODELS_NAMES = set([x.__name__ for x in _SUPPORTED_MODELS])
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
model: Union[transformers.DPRContextEncoder, transformers.DPRQuestionEncoder],
|
|
||||||
):
|
|
||||||
super().__init__()
|
|
||||||
self._model = model
|
|
||||||
self.config = model.config
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def from_pretrained(model_id: str, *, token: Optional[str] = None) -> Optional[Any]:
|
|
||||||
config = AutoConfig.from_pretrained(model_id, token=token)
|
|
||||||
|
|
||||||
def is_compatible() -> bool:
|
|
||||||
is_dpr_model = config.model_type == "dpr"
|
|
||||||
has_architectures = (
|
|
||||||
config.architectures is not None and len(config.architectures) == 1
|
|
||||||
)
|
|
||||||
is_supported_architecture = has_architectures and (
|
|
||||||
config.architectures[0] in _DPREncoderWrapper._SUPPORTED_MODELS_NAMES
|
|
||||||
)
|
|
||||||
return is_dpr_model and is_supported_architecture
|
|
||||||
|
|
||||||
if is_compatible():
|
|
||||||
model = getattr(transformers, config.architectures[0]).from_pretrained(
|
|
||||||
model_id, torchscript=True
|
|
||||||
)
|
|
||||||
return _DPREncoderWrapper(model)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def forward(
|
|
||||||
self,
|
|
||||||
input_ids: Tensor,
|
|
||||||
attention_mask: Tensor,
|
|
||||||
token_type_ids: Tensor,
|
|
||||||
_position_ids: Tensor,
|
|
||||||
) -> Tensor:
|
|
||||||
"""Wrap the input and output to conform to the native process interface."""
|
|
||||||
|
|
||||||
return self._model(
|
|
||||||
input_ids=input_ids,
|
|
||||||
attention_mask=attention_mask,
|
|
||||||
token_type_ids=token_type_ids,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class _TransformerTraceableModel(TraceableModel):
|
class _TransformerTraceableModel(TraceableModel):
|
||||||
"""A base class representing a HuggingFace transformer model that can be traced."""
|
"""A base class representing a HuggingFace transformer model that can be traced."""
|
||||||
|
|
||||||
@ -489,12 +212,17 @@ class _TransformerTraceableModel(TraceableModel):
|
|||||||
transformers.MPNetTokenizer,
|
transformers.MPNetTokenizer,
|
||||||
transformers.RobertaTokenizer,
|
transformers.RobertaTokenizer,
|
||||||
transformers.XLMRobertaTokenizer,
|
transformers.XLMRobertaTokenizer,
|
||||||
transformers.DebertaV2Tokenizer,
|
|
||||||
),
|
),
|
||||||
):
|
):
|
||||||
del inputs["token_type_ids"]
|
|
||||||
return (inputs["input_ids"], inputs["attention_mask"])
|
return (inputs["input_ids"], inputs["attention_mask"])
|
||||||
|
|
||||||
|
if isinstance(self._tokenizer, transformers.DebertaV2Tokenizer):
|
||||||
|
return (
|
||||||
|
inputs["input_ids"],
|
||||||
|
inputs["attention_mask"],
|
||||||
|
inputs["token_type_ids"],
|
||||||
|
)
|
||||||
|
|
||||||
position_ids = torch.arange(inputs["input_ids"].size(1), dtype=torch.long)
|
position_ids = torch.arange(inputs["input_ids"].size(1), dtype=torch.long)
|
||||||
inputs["position_ids"] = position_ids
|
inputs["position_ids"] = position_ids
|
||||||
return (
|
return (
|
||||||
@ -694,7 +422,12 @@ class TransformerModel:
|
|||||||
" ".join(m) for m, _ in sorted(ranks.items(), key=lambda kv: kv[1])
|
" ".join(m) for m, _ in sorted(ranks.items(), key=lambda kv: kv[1])
|
||||||
]
|
]
|
||||||
vocab_obj["merges"] = merges
|
vocab_obj["merges"] = merges
|
||||||
|
|
||||||
|
if isinstance(self._tokenizer, transformers.DebertaV2Tokenizer):
|
||||||
|
sp_model = self._tokenizer._tokenizer.spm
|
||||||
|
else:
|
||||||
sp_model = getattr(self._tokenizer, "sp_model", None)
|
sp_model = getattr(self._tokenizer, "sp_model", None)
|
||||||
|
|
||||||
if sp_model:
|
if sp_model:
|
||||||
id_correction = getattr(self._tokenizer, "fairseq_offset", 0)
|
id_correction = getattr(self._tokenizer, "fairseq_offset", 0)
|
||||||
scores = []
|
scores = []
|
||||||
@ -733,7 +466,7 @@ class TransformerModel:
|
|||||||
max_sequence_length=_max_sequence_length
|
max_sequence_length=_max_sequence_length
|
||||||
)
|
)
|
||||||
elif isinstance(self._tokenizer, transformers.DebertaV2Tokenizer):
|
elif isinstance(self._tokenizer, transformers.DebertaV2Tokenizer):
|
||||||
return DebertaV2Config(
|
return NlpDebertaV2TokenizationConfig(
|
||||||
max_sequence_length=_max_sequence_length,
|
max_sequence_length=_max_sequence_length,
|
||||||
do_lower_case=getattr(self._tokenizer, "do_lower_case", None),
|
do_lower_case=getattr(self._tokenizer, "do_lower_case", None),
|
||||||
)
|
)
|
||||||
|
317
eland/ml/pytorch/wrappers.py
Normal file
317
eland/ml/pytorch/wrappers.py
Normal file
@ -0,0 +1,317 @@
|
|||||||
|
# Licensed to Elasticsearch B.V. under one or more contributor
|
||||||
|
# license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright
|
||||||
|
# ownership. Elasticsearch B.V. licenses this file to you under
|
||||||
|
# the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing,
|
||||||
|
# software distributed under the License is distributed on an
|
||||||
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
# KIND, either express or implied. See the License for the
|
||||||
|
# specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
"""
|
||||||
|
This module contains the wrapper classes for the Hugging Face models.
|
||||||
|
Wrapping is necessary to ensure that the forward method of the model
|
||||||
|
is called with the same arguments the ml-cpp pytorch_inference process
|
||||||
|
uses.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
|
import torch # type: ignore
|
||||||
|
import transformers # type: ignore
|
||||||
|
from sentence_transformers import SentenceTransformer # type: ignore
|
||||||
|
from torch import Tensor, nn
|
||||||
|
from transformers import (
|
||||||
|
AutoConfig,
|
||||||
|
AutoModel,
|
||||||
|
AutoModelForQuestionAnswering,
|
||||||
|
PreTrainedModel,
|
||||||
|
PreTrainedTokenizer,
|
||||||
|
)
|
||||||
|
|
||||||
|
DEFAULT_OUTPUT_KEY = "sentence_embedding"
|
||||||
|
|
||||||
|
|
||||||
|
class _QuestionAnsweringWrapperModule(nn.Module): # type: ignore
|
||||||
|
"""
|
||||||
|
A wrapper around a question answering model.
|
||||||
|
Our inference engine only takes the first tuple if the inference response
|
||||||
|
is a tuple.
|
||||||
|
|
||||||
|
This wrapper transforms the output to be a stacked tensor if its a tuple.
|
||||||
|
|
||||||
|
Otherwise it passes it through
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, model: PreTrainedModel):
|
||||||
|
super().__init__()
|
||||||
|
self._hf_model = model
|
||||||
|
self.config = model.config
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def from_pretrained(model_id: str, *, token: Optional[str] = None) -> Optional[Any]:
|
||||||
|
model = AutoModelForQuestionAnswering.from_pretrained(
|
||||||
|
model_id, token=token, torchscript=True
|
||||||
|
)
|
||||||
|
if isinstance(
|
||||||
|
model.config,
|
||||||
|
(
|
||||||
|
transformers.MPNetConfig,
|
||||||
|
transformers.XLMRobertaConfig,
|
||||||
|
transformers.RobertaConfig,
|
||||||
|
transformers.BartConfig,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
return _TwoParameterQuestionAnsweringWrapper(model)
|
||||||
|
else:
|
||||||
|
return _QuestionAnsweringWrapper(model)
|
||||||
|
|
||||||
|
|
||||||
|
class _QuestionAnsweringWrapper(_QuestionAnsweringWrapperModule):
|
||||||
|
def __init__(self, model: PreTrainedModel):
|
||||||
|
super().__init__(model=model)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
input_ids: Tensor,
|
||||||
|
attention_mask: Tensor,
|
||||||
|
token_type_ids: Tensor,
|
||||||
|
position_ids: Tensor,
|
||||||
|
) -> Tensor:
|
||||||
|
"""Wrap the input and output to conform to the native process interface."""
|
||||||
|
|
||||||
|
inputs = {
|
||||||
|
"input_ids": input_ids,
|
||||||
|
"attention_mask": attention_mask,
|
||||||
|
"token_type_ids": token_type_ids,
|
||||||
|
"position_ids": position_ids,
|
||||||
|
}
|
||||||
|
|
||||||
|
# remove inputs for specific model types
|
||||||
|
if isinstance(self._hf_model.config, transformers.DistilBertConfig):
|
||||||
|
del inputs["token_type_ids"]
|
||||||
|
del inputs["position_ids"]
|
||||||
|
response = self._hf_model(**inputs)
|
||||||
|
if isinstance(response, tuple):
|
||||||
|
return torch.stack(list(response), dim=0)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
class _TwoParameterQuestionAnsweringWrapper(_QuestionAnsweringWrapperModule):
|
||||||
|
def __init__(self, model: PreTrainedModel):
|
||||||
|
super().__init__(model=model)
|
||||||
|
|
||||||
|
def forward(self, input_ids: Tensor, attention_mask: Tensor) -> Tensor:
|
||||||
|
"""Wrap the input and output to conform to the native process interface."""
|
||||||
|
inputs = {
|
||||||
|
"input_ids": input_ids,
|
||||||
|
"attention_mask": attention_mask,
|
||||||
|
}
|
||||||
|
response = self._hf_model(**inputs)
|
||||||
|
if isinstance(response, tuple):
|
||||||
|
return torch.stack(list(response), dim=0)
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
class _DistilBertWrapper(nn.Module): # type: ignore
|
||||||
|
"""
|
||||||
|
In Elasticsearch the BERT tokenizer is used for DistilBERT models but
|
||||||
|
the BERT tokenizer produces 4 inputs where DistilBERT models expect 2.
|
||||||
|
|
||||||
|
Wrap the model's forward function in a method that accepts the 4
|
||||||
|
arguments passed to a BERT model then discard the token_type_ids
|
||||||
|
and the position_ids to match the wrapped DistilBERT model forward
|
||||||
|
function
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, model: transformers.PreTrainedModel):
|
||||||
|
super().__init__()
|
||||||
|
self._model = model
|
||||||
|
self.config = model.config
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def try_wrapping(model: PreTrainedModel) -> Optional[Any]:
|
||||||
|
if isinstance(model.config, transformers.DistilBertConfig):
|
||||||
|
return _DistilBertWrapper(model)
|
||||||
|
else:
|
||||||
|
return model
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
input_ids: Tensor,
|
||||||
|
attention_mask: Tensor,
|
||||||
|
_token_type_ids: Tensor = None,
|
||||||
|
_position_ids: Tensor = None,
|
||||||
|
) -> Tensor:
|
||||||
|
"""Wrap the input and output to conform to the native process interface."""
|
||||||
|
|
||||||
|
return self._model(input_ids=input_ids, attention_mask=attention_mask)
|
||||||
|
|
||||||
|
|
||||||
|
class _SentenceTransformerWrapperModule(nn.Module): # type: ignore
|
||||||
|
"""
|
||||||
|
A wrapper around sentence-transformer models to provide pooling,
|
||||||
|
normalization and other graph layers that are not defined in the base
|
||||||
|
HuggingFace transformer model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, model: PreTrainedModel, output_key: str = DEFAULT_OUTPUT_KEY):
|
||||||
|
super().__init__()
|
||||||
|
self._hf_model = model
|
||||||
|
self._st_model = SentenceTransformer(model.config.name_or_path)
|
||||||
|
self._output_key = output_key
|
||||||
|
self.config = model.config
|
||||||
|
|
||||||
|
self._remove_pooling_layer()
|
||||||
|
self._replace_transformer_layer()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def from_pretrained(
|
||||||
|
model_id: str,
|
||||||
|
tokenizer: PreTrainedTokenizer,
|
||||||
|
*,
|
||||||
|
token: Optional[str] = None,
|
||||||
|
output_key: str = DEFAULT_OUTPUT_KEY,
|
||||||
|
) -> Optional[Any]:
|
||||||
|
model = AutoModel.from_pretrained(model_id, token=token, torchscript=True)
|
||||||
|
if isinstance(
|
||||||
|
tokenizer,
|
||||||
|
(
|
||||||
|
transformers.BartTokenizer,
|
||||||
|
transformers.MPNetTokenizer,
|
||||||
|
transformers.RobertaTokenizer,
|
||||||
|
transformers.XLMRobertaTokenizer,
|
||||||
|
transformers.DebertaV2Tokenizer,
|
||||||
|
),
|
||||||
|
):
|
||||||
|
return _TwoParameterSentenceTransformerWrapper(model, output_key)
|
||||||
|
else:
|
||||||
|
return _SentenceTransformerWrapper(model, output_key)
|
||||||
|
|
||||||
|
def _remove_pooling_layer(self) -> None:
|
||||||
|
"""
|
||||||
|
Removes any last pooling layer which is not used to create embeddings.
|
||||||
|
Leaving this layer in will cause it to return a NoneType which in turn
|
||||||
|
will fail to load in libtorch. Alternatively, we can just use the output
|
||||||
|
of the pooling layer as a dummy but this also affects (if only in a
|
||||||
|
minor way) the performance of inference, so we're better off removing
|
||||||
|
the layer if we can.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if hasattr(self._hf_model, "pooler"):
|
||||||
|
self._hf_model.pooler = None
|
||||||
|
|
||||||
|
def _replace_transformer_layer(self) -> None:
|
||||||
|
"""
|
||||||
|
Replaces the HuggingFace Transformer layer in the SentenceTransformer
|
||||||
|
modules so we can set it with one that has pooling layer removed and
|
||||||
|
was loaded ready for TorchScript export.
|
||||||
|
"""
|
||||||
|
|
||||||
|
self._st_model._modules["0"].auto_model = self._hf_model
|
||||||
|
|
||||||
|
|
||||||
|
class _SentenceTransformerWrapper(_SentenceTransformerWrapperModule):
|
||||||
|
def __init__(self, model: PreTrainedModel, output_key: str = DEFAULT_OUTPUT_KEY):
|
||||||
|
super().__init__(model=model, output_key=output_key)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
input_ids: Tensor,
|
||||||
|
attention_mask: Tensor,
|
||||||
|
token_type_ids: Tensor,
|
||||||
|
position_ids: Tensor,
|
||||||
|
) -> Tensor:
|
||||||
|
"""Wrap the input and output to conform to the native process interface."""
|
||||||
|
|
||||||
|
inputs = {
|
||||||
|
"input_ids": input_ids,
|
||||||
|
"attention_mask": attention_mask,
|
||||||
|
"token_type_ids": token_type_ids,
|
||||||
|
"position_ids": position_ids,
|
||||||
|
}
|
||||||
|
|
||||||
|
# remove inputs for specific model types
|
||||||
|
if isinstance(self._hf_model.config, transformers.DistilBertConfig):
|
||||||
|
del inputs["token_type_ids"]
|
||||||
|
|
||||||
|
return self._st_model(inputs)[self._output_key]
|
||||||
|
|
||||||
|
|
||||||
|
class _TwoParameterSentenceTransformerWrapper(_SentenceTransformerWrapperModule):
|
||||||
|
def __init__(self, model: PreTrainedModel, output_key: str = DEFAULT_OUTPUT_KEY):
|
||||||
|
super().__init__(model=model, output_key=output_key)
|
||||||
|
|
||||||
|
def forward(self, input_ids: Tensor, attention_mask: Tensor) -> Tensor:
|
||||||
|
"""Wrap the input and output to conform to the native process interface."""
|
||||||
|
inputs = {
|
||||||
|
"input_ids": input_ids,
|
||||||
|
"attention_mask": attention_mask,
|
||||||
|
}
|
||||||
|
return self._st_model(inputs)[self._output_key]
|
||||||
|
|
||||||
|
|
||||||
|
class _DPREncoderWrapper(nn.Module): # type: ignore
|
||||||
|
"""
|
||||||
|
AutoModel loading does not work for DPRContextEncoders, this only exists as
|
||||||
|
a workaround. This may never be fixed so this is likely permanent.
|
||||||
|
See: https://github.com/huggingface/transformers/issues/13670
|
||||||
|
"""
|
||||||
|
|
||||||
|
_SUPPORTED_MODELS = {
|
||||||
|
transformers.DPRContextEncoder,
|
||||||
|
transformers.DPRQuestionEncoder,
|
||||||
|
}
|
||||||
|
_SUPPORTED_MODELS_NAMES = set([x.__name__ for x in _SUPPORTED_MODELS])
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
model: Union[transformers.DPRContextEncoder, transformers.DPRQuestionEncoder],
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self._model = model
|
||||||
|
self.config = model.config
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def from_pretrained(model_id: str, *, token: Optional[str] = None) -> Optional[Any]:
|
||||||
|
config = AutoConfig.from_pretrained(model_id, token=token)
|
||||||
|
|
||||||
|
def is_compatible() -> bool:
|
||||||
|
is_dpr_model = config.model_type == "dpr"
|
||||||
|
has_architectures = (
|
||||||
|
config.architectures is not None and len(config.architectures) == 1
|
||||||
|
)
|
||||||
|
is_supported_architecture = has_architectures and (
|
||||||
|
config.architectures[0] in _DPREncoderWrapper._SUPPORTED_MODELS_NAMES
|
||||||
|
)
|
||||||
|
return is_dpr_model and is_supported_architecture
|
||||||
|
|
||||||
|
if is_compatible():
|
||||||
|
model = getattr(transformers, config.architectures[0]).from_pretrained(
|
||||||
|
model_id, torchscript=True
|
||||||
|
)
|
||||||
|
return _DPREncoderWrapper(model)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
input_ids: Tensor,
|
||||||
|
attention_mask: Tensor,
|
||||||
|
token_type_ids: Tensor,
|
||||||
|
_position_ids: Tensor,
|
||||||
|
) -> Tensor:
|
||||||
|
"""Wrap the input and output to conform to the native process interface."""
|
||||||
|
|
||||||
|
return self._model(
|
||||||
|
input_ids=input_ids,
|
||||||
|
attention_mask=attention_mask,
|
||||||
|
token_type_ids=token_type_ids,
|
||||||
|
)
|
@ -116,9 +116,6 @@ def test(session, pandas_version: str):
|
|||||||
"--nbval",
|
"--nbval",
|
||||||
)
|
)
|
||||||
|
|
||||||
# PyTorch 2.3.1 doesn't support Python 3.12
|
|
||||||
if session.python == "3.12":
|
|
||||||
pytest_args += ("--ignore=eland/ml/pytorch",)
|
|
||||||
session.run(
|
session.run(
|
||||||
*pytest_args,
|
*pytest_args,
|
||||||
*(session.posargs or ("eland/", "tests/")),
|
*(session.posargs or ("eland/", "tests/")),
|
||||||
|
6
setup.py
6
setup.py
@ -57,10 +57,10 @@ with open(path.join(here, "README.md"), "r", "utf-8") as f:
|
|||||||
extras = {
|
extras = {
|
||||||
"xgboost": ["xgboost>=0.90,<2"],
|
"xgboost": ["xgboost>=0.90,<2"],
|
||||||
"scikit-learn": ["scikit-learn>=1.3,<1.6"],
|
"scikit-learn": ["scikit-learn>=1.3,<1.6"],
|
||||||
"lightgbm": ["lightgbm>=2,<4"],
|
"lightgbm": ["lightgbm>=3,<5"],
|
||||||
"pytorch": [
|
"pytorch": [
|
||||||
"requests<3",
|
"requests<3",
|
||||||
"torch==2.3.1",
|
"torch==2.5.1",
|
||||||
"tqdm",
|
"tqdm",
|
||||||
"sentence-transformers>=2.1.0,<=2.7.0",
|
"sentence-transformers>=2.1.0,<=2.7.0",
|
||||||
# sentencepiece is a required dependency for the slow tokenizers
|
# sentencepiece is a required dependency for the slow tokenizers
|
||||||
@ -86,7 +86,7 @@ setup(
|
|||||||
keywords="elastic eland pandas python",
|
keywords="elastic eland pandas python",
|
||||||
packages=find_packages(include=["eland", "eland.*"]),
|
packages=find_packages(include=["eland", "eland.*"]),
|
||||||
install_requires=[
|
install_requires=[
|
||||||
"elasticsearch>=8.3,<9",
|
"elasticsearch>=9,<10",
|
||||||
"pandas>=1.5,<3",
|
"pandas>=1.5,<3",
|
||||||
"matplotlib>=3.6",
|
"matplotlib>=3.6",
|
||||||
"numpy>=1.2.0,<2",
|
"numpy>=1.2.0,<2",
|
||||||
|
@ -39,6 +39,7 @@ try:
|
|||||||
from eland.ml.pytorch import (
|
from eland.ml.pytorch import (
|
||||||
FillMaskInferenceOptions,
|
FillMaskInferenceOptions,
|
||||||
NlpBertTokenizationConfig,
|
NlpBertTokenizationConfig,
|
||||||
|
NlpDebertaV2TokenizationConfig,
|
||||||
NlpMPNetTokenizationConfig,
|
NlpMPNetTokenizationConfig,
|
||||||
NlpRobertaTokenizationConfig,
|
NlpRobertaTokenizationConfig,
|
||||||
NlpXLMRobertaTokenizationConfig,
|
NlpXLMRobertaTokenizationConfig,
|
||||||
@ -57,10 +58,6 @@ except ImportError:
|
|||||||
from tests import ES_VERSION
|
from tests import ES_VERSION
|
||||||
|
|
||||||
pytestmark = [
|
pytestmark = [
|
||||||
pytest.mark.skipif(
|
|
||||||
ES_VERSION < (8, 15, 1),
|
|
||||||
reason="Eland uses Pytorch 2.3.1, versions of Elasticsearch prior to 8.15.1 are incompatible with PyTorch 2.3.1",
|
|
||||||
),
|
|
||||||
pytest.mark.skipif(
|
pytest.mark.skipif(
|
||||||
not HAS_SKLEARN, reason="This test requires 'scikit-learn' package to run"
|
not HAS_SKLEARN, reason="This test requires 'scikit-learn' package to run"
|
||||||
),
|
),
|
||||||
@ -149,6 +146,14 @@ if HAS_PYTORCH and HAS_SKLEARN and HAS_TRANSFORMERS:
|
|||||||
1024,
|
1024,
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"microsoft/deberta-v3-xsmall",
|
||||||
|
"fill_mask",
|
||||||
|
FillMaskInferenceOptions,
|
||||||
|
NlpDebertaV2TokenizationConfig,
|
||||||
|
512,
|
||||||
|
None,
|
||||||
|
),
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
MODEL_CONFIGURATIONS = []
|
MODEL_CONFIGURATIONS = []
|
||||||
|
@ -38,10 +38,6 @@ except ImportError:
|
|||||||
from tests import ES_TEST_CLIENT, ES_VERSION
|
from tests import ES_TEST_CLIENT, ES_VERSION
|
||||||
|
|
||||||
pytestmark = [
|
pytestmark = [
|
||||||
pytest.mark.skipif(
|
|
||||||
ES_VERSION < (8, 15, 2),
|
|
||||||
reason="Eland uses Pytorch 2.3.1, versions of Elasticsearch prior to 8.15.2 are incompatible with PyTorch 2.3.1",
|
|
||||||
),
|
|
||||||
pytest.mark.skipif(
|
pytest.mark.skipif(
|
||||||
not HAS_SKLEARN, reason="This test requires 'scikit-learn' package to run"
|
not HAS_SKLEARN, reason="This test requires 'scikit-learn' package to run"
|
||||||
),
|
),
|
||||||
@ -67,6 +63,8 @@ TEXT_EMBEDDING_MODELS = [
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
TEXT_SIMILARITY_MODELS = ["mixedbread-ai/mxbai-rerank-xsmall-v1"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function", autouse=True)
|
@pytest.fixture(scope="function", autouse=True)
|
||||||
def setup_and_tear_down():
|
def setup_and_tear_down():
|
||||||
@ -135,3 +133,25 @@ class TestPytorchModel:
|
|||||||
)
|
)
|
||||||
> 0
|
> 0
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
ES_VERSION < (8, 16, 0), reason="requires 8.16.0 for DeBERTa models"
|
||||||
|
)
|
||||||
|
@pytest.mark.parametrize("model_id", TEXT_SIMILARITY_MODELS)
|
||||||
|
def test_text_similarity(self, model_id):
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
ptm = download_model_and_start_deployment(
|
||||||
|
tmp_dir, False, model_id, "text_similarity"
|
||||||
|
)
|
||||||
|
result = ptm.infer(
|
||||||
|
docs=[
|
||||||
|
{
|
||||||
|
"text_field": "The Amazon rainforest covers most of the Amazon basin in South America"
|
||||||
|
},
|
||||||
|
{"text_field": "Paris is the capital of France"},
|
||||||
|
],
|
||||||
|
inference_config={"text_similarity": {"text": "France"}},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result.body["inference_results"][0]["predicted_value"] < 0
|
||||||
|
assert result.body["inference_results"][1]["predicted_value"] > 0
|
||||||
|
@ -22,6 +22,7 @@ import pytest
|
|||||||
|
|
||||||
from eland.ml import MLModel
|
from eland.ml import MLModel
|
||||||
from eland.ml.ltr import FeatureLogger, LTRModelConfig, QueryFeatureExtractor
|
from eland.ml.ltr import FeatureLogger, LTRModelConfig, QueryFeatureExtractor
|
||||||
|
from eland.ml.transformers import get_model_transformer
|
||||||
from tests import (
|
from tests import (
|
||||||
ES_IS_SERVERLESS,
|
ES_IS_SERVERLESS,
|
||||||
ES_TEST_CLIENT,
|
ES_TEST_CLIENT,
|
||||||
@ -219,6 +220,46 @@ class TestMLModel:
|
|||||||
# Clean up
|
# Clean up
|
||||||
es_model.delete_model()
|
es_model.delete_model()
|
||||||
|
|
||||||
|
def _normalize_ltr_score_from_XGBRanker(self, ranker, ltr_model_config, scores):
|
||||||
|
"""Normalize the scores of an XGBRanker model as ES implementation of LTR would do.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
ranker : XGBRanker
|
||||||
|
The XGBRanker model to retrieve the minimum score from.
|
||||||
|
|
||||||
|
ltr_model_config : LTRModelConfig
|
||||||
|
LTR model config.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
scores : List[float]
|
||||||
|
Normalized scores for the model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
should_rescore = (
|
||||||
|
(ES_VERSION[0] == 8 and ES_VERSION >= (8, 19))
|
||||||
|
or (
|
||||||
|
ES_VERSION[0] == 9
|
||||||
|
and (ES_VERSION[1] >= 1 or (ES_VERSION[1] == 0 and ES_VERSION[2] >= 1))
|
||||||
|
)
|
||||||
|
or ES_IS_SERVERLESS
|
||||||
|
)
|
||||||
|
|
||||||
|
if should_rescore:
|
||||||
|
# In 8.19+, 9.0.1 and 9.1, the scores are normalized if there are negative scores
|
||||||
|
min_model_score, _ = (
|
||||||
|
get_model_transformer(
|
||||||
|
ranker, feature_names=ltr_model_config.feature_names
|
||||||
|
)
|
||||||
|
.transform()
|
||||||
|
.bounds()
|
||||||
|
)
|
||||||
|
if min_model_score < 0:
|
||||||
|
scores = [score - min_model_score for score in scores]
|
||||||
|
|
||||||
|
return scores
|
||||||
|
|
||||||
@requires_elasticsearch_version((8, 12))
|
@requires_elasticsearch_version((8, 12))
|
||||||
@requires_xgboost
|
@requires_xgboost
|
||||||
@pytest.mark.parametrize("compress_model_definition", [True, False])
|
@pytest.mark.parametrize("compress_model_definition", [True, False])
|
||||||
@ -330,6 +371,11 @@ class TestMLModel:
|
|||||||
],
|
],
|
||||||
reverse=True,
|
reverse=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
expected_scores = self._normalize_ltr_score_from_XGBRanker(
|
||||||
|
ranker, ltr_model_config, expected_scores
|
||||||
|
)
|
||||||
|
|
||||||
np.testing.assert_almost_equal(expected_scores, doc_scores, decimal=2)
|
np.testing.assert_almost_equal(expected_scores, doc_scores, decimal=2)
|
||||||
|
|
||||||
# Verify prediction is not supported for LTR
|
# Verify prediction is not supported for LTR
|
||||||
|
Loading…
x
Reference in New Issue
Block a user