Compare commits

...

5 Commits
v9.0.1 ... main

Author SHA1 Message Date
David Kyle
bebb9d52e5
Upgrade Sentence Transformers to v5 (#801)
Sentence Transformers v5 adds support for sparse embedding models and is now necessary for importing sparse models such as https://huggingface.co/naver/splade-v3-distilbert.
2025-07-23 08:07:29 +01:00
Colleen McGinnis
117f61b010
add products to docset.yml (#797) 2025-07-23 10:32:54 +04:00
Jan Calanog
cef4710695
docs-builder: add pull-requests: write permission to docs-build workflow (#800) 2025-06-23 15:39:36 +04:00
Quentin Pradet
44ead02b05
Fix lint (#798) 2025-06-05 15:52:19 +04:00
Miguel Grinberg
cb7c4fb122
Update README.md (#796)
Update Pandas support to include v2
2025-05-16 15:56:20 +01:00
6 changed files with 29 additions and 8 deletions

View File

@ -16,4 +16,4 @@ jobs:
deployments: write deployments: write
id-token: write id-token: write
contents: read contents: read
pull-requests: read pull-requests: write

View File

@ -53,7 +53,8 @@ $ conda install -c conda-forge eland
### Compatibility ### Compatibility
- Supports Python 3.9, 3.10, 3.11, 3.12 and Pandas 1.5 - Supports Python 3.9, 3.10, 3.11 and 3.12.
- Supports Pandas 1.5 and 2.
- Supports Elasticsearch 8+ clusters, recommended 8.16 or later for all features to work. - Supports Elasticsearch 8+ clusters, recommended 8.16 or later for all features to work.
If you are using the NLP with PyTorch feature make sure your Eland minor version matches the minor If you are using the NLP with PyTorch feature make sure your Eland minor version matches the minor
version of your Elasticsearch cluster. For all other features it is sufficient for the major versions version of your Elasticsearch cluster. For all other features it is sufficient for the major versions

View File

@ -1,4 +1,6 @@
project: 'Eland Python client' project: 'Eland Python client'
products:
- id: elasticsearch-client
cross_links: cross_links:
- docs-content - docs-content
toc: toc:

View File

@ -50,10 +50,7 @@ class Index:
# index_field.setter # index_field.setter
self._is_source_field = False self._is_source_field = False
# The type:ignore is due to mypy not being smart enough self.es_index_field = es_index_field
# to recognize the property.setter has a different type
# than the property.getter.
self.es_index_field = es_index_field # type: ignore
@property @property
def sort_field(self) -> str: def sort_field(self) -> str:

View File

@ -62,10 +62,10 @@ extras = {
"requests<3", "requests<3",
"torch==2.5.1", "torch==2.5.1",
"tqdm", "tqdm",
"sentence-transformers>=2.1.0,<=2.7.0", "sentence-transformers>=5.0.0,<6.0.0",
# sentencepiece is a required dependency for the slow tokenizers # sentencepiece is a required dependency for the slow tokenizers
# https://huggingface.co/transformers/v4.4.2/migration.html#sentencepiece-is-removed-from-the-required-dependencies # https://huggingface.co/transformers/v4.4.2/migration.html#sentencepiece-is-removed-from-the-required-dependencies
"transformers[sentencepiece]>=4.47.0", "transformers[sentencepiece]>=4.47.0,<4.50.3",
], ],
} }
extras["all"] = list({dep for deps in extras.values() for dep in deps}) extras["all"] = list({dep for deps in extras.values() for dep in deps})

View File

@ -65,6 +65,8 @@ TEXT_EMBEDDING_MODELS = [
TEXT_SIMILARITY_MODELS = ["mixedbread-ai/mxbai-rerank-xsmall-v1"] TEXT_SIMILARITY_MODELS = ["mixedbread-ai/mxbai-rerank-xsmall-v1"]
TEXT_EXPANSION_MODELS = ["naver/splade-v3-distilbert"]
@pytest.fixture(scope="function", autouse=True) @pytest.fixture(scope="function", autouse=True)
def setup_and_tear_down(): def setup_and_tear_down():
@ -155,3 +157,22 @@ class TestPytorchModel:
assert result.body["inference_results"][0]["predicted_value"] < 0 assert result.body["inference_results"][0]["predicted_value"] < 0
assert result.body["inference_results"][1]["predicted_value"] > 0 assert result.body["inference_results"][1]["predicted_value"] > 0
@pytest.mark.skipif(ES_VERSION < (9, 0, 0), reason="requires current major version")
@pytest.mark.parametrize("model_id", TEXT_EXPANSION_MODELS)
def test_text_expansion(self, model_id):
with tempfile.TemporaryDirectory() as tmp_dir:
ptm = download_model_and_start_deployment(
tmp_dir, False, model_id, "text_expansion"
)
result = ptm.infer(
docs=[
{
"text_field": "The Amazon rainforest covers most of the Amazon basin in South America"
},
{"text_field": "Paris is the capital of France"},
]
)
assert len(result.body["inference_results"][0]["predicted_value"]) > 0
assert len(result.body["inference_results"][1]["predicted_value"]) > 0