mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
120 lines
4.2 KiB
Plaintext
120 lines
4.2 KiB
Plaintext
[[machine-learning]]
|
|
== Machine Learning
|
|
|
|
[discrete]
|
|
[[ml-trained-models]]
|
|
=== Trained models
|
|
|
|
Eland allows transforming trained models from scikit-learn, XGBoost,
|
|
and LightGBM libraries to be serialized and used as an inference
|
|
model in {es}.
|
|
|
|
[source,python]
|
|
------------------------
|
|
>>> from xgboost import XGBClassifier
|
|
>>> from eland.ml import MLModel
|
|
|
|
# Train and exercise an XGBoost ML model locally
|
|
>>> xgb_model = XGBClassifier(booster="gbtree")
|
|
>>> xgb_model.fit(training_data[0], training_data[1])
|
|
|
|
>>> xgb_model.predict(training_data[0])
|
|
[0 1 1 0 1 0 0 0 1 0]
|
|
|
|
# Import the model into Elasticsearch
|
|
>>> es_model = MLModel.import_model(
|
|
es_client="http://localhost:9200",
|
|
model_id="xgb-classifier",
|
|
model=xgb_model,
|
|
feature_names=["f0", "f1", "f2", "f3", "f4"],
|
|
)
|
|
|
|
# Exercise the ML model in Elasticsearch with the training data
|
|
>>> es_model.predict(training_data[0])
|
|
[0 1 1 0 1 0 0 0 1 0]
|
|
------------------------
|
|
|
|
[discrete]
|
|
[[ml-nlp-pytorch]]
|
|
=== Natural language processing (NLP) with PyTorch
|
|
|
|
For NLP tasks, Eland enables you to import PyTorch trained BERT models into {es}.
|
|
Models can be either plain PyTorch models, or supported
|
|
https://huggingface.co/transformers[transformers] models from the
|
|
https://huggingface.co/models[Hugging Face model hub]. For example:
|
|
|
|
[source,bash]
|
|
------------------------
|
|
$ eland_import_hub_model <authentication> \ <1>
|
|
--url http://localhost:9200/ \ <2>
|
|
--hub-model-id elastic/distilbert-base-cased-finetuned-conll03-english \ <3>
|
|
--task-type ner \ <4>
|
|
--start
|
|
------------------------
|
|
<1> Use an authentication method to access your cluster. Refer to <<ml-nlp-pytorch-auth>>.
|
|
<2> The cluster URL. Alternatively, use `--cloud-id`.
|
|
<3> Specify the identifier for the model in the Hugging Face model hub.
|
|
<4> Specify the type of NLP task. Supported values are `fill_mask`, `ner`,
|
|
`text_classification`, `text_embedding`, and `zero_shot_classification`.
|
|
|
|
[source,python]
|
|
------------------------
|
|
>>> import elasticsearch
|
|
>>> from pathlib import Path
|
|
>>> from eland.ml.pytorch import PyTorchModel
|
|
>>> from eland.ml.pytorch.transformers import TransformerModel
|
|
|
|
# Load a Hugging Face transformers model directly from the model hub
|
|
>>> tm = TransformerModel("elastic/distilbert-base-cased-finetuned-conll03-english", "ner")
|
|
Downloading: 100%|██████████| 257/257 [00:00<00:00, 108kB/s]
|
|
Downloading: 100%|██████████| 954/954 [00:00<00:00, 372kB/s]
|
|
Downloading: 100%|██████████| 208k/208k [00:00<00:00, 668kB/s]
|
|
Downloading: 100%|██████████| 112/112 [00:00<00:00, 43.9kB/s]
|
|
Downloading: 100%|██████████| 249M/249M [00:23<00:00, 11.2MB/s]
|
|
|
|
# Export the model in a TorchScript representation which Elasticsearch uses
|
|
>>> tmp_path = "models"
|
|
>>> Path(tmp_path).mkdir(parents=True, exist_ok=True)
|
|
>>> model_path, config_path, vocab_path = tm.save(tmp_path)
|
|
|
|
# Import model into Elasticsearch
|
|
>>> es = elasticsearch.Elasticsearch("http://elastic:mlqa_admin@localhost:9200", timeout=300) # 5 minute timeout
|
|
>>> ptm = PyTorchModel(es, tm.elasticsearch_model_id())
|
|
>>> ptm.import_model(model_path, config_path, vocab_path)
|
|
100%|██████████| 63/63 [00:12<00:00, 5.02it/s]
|
|
------------------------
|
|
|
|
[discrete]
|
|
[[ml-nlp-pytorch-auth]]
|
|
==== Authentication methods
|
|
|
|
The following authentication options are available when using the import script:
|
|
|
|
* username and password authentication (specified with the `-u` and `-p` options):
|
|
+
|
|
--
|
|
[source,bash]
|
|
--------------------------------------------------
|
|
eland_import_hub_model -u <username> -p <password> --cloud-id <cloud-id> ...
|
|
--------------------------------------------------
|
|
These `-u` and `-p` options also work when you use `--url`.
|
|
--
|
|
|
|
* username and password authentication (embedded in the URL):
|
|
+
|
|
--
|
|
[source,bash]
|
|
--------------------------------------------------
|
|
eland_import_hub_model --url https://<user>:<password>@<hostname>:<port> ...
|
|
--------------------------------------------------
|
|
--
|
|
|
|
* API key authentication:
|
|
+
|
|
--
|
|
[source,bash]
|
|
--------------------------------------------------
|
|
eland_import_hub_model --es-api-key <api-key> --url https://<hostname>:<port> ...
|
|
--------------------------------------------------
|
|
--
|