From df51f8af0743a333115244fa33ca4a2c4527dce3 Mon Sep 17 00:00:00 2001 From: Josh Devins Date: Thu, 28 Oct 2021 19:05:39 +0200 Subject: [PATCH] Document how to install transitive binary dependencies, add repo Dockerfile Co-authored-by: Seth Michael Larson --- Dockerfile | 14 ++++++++++++++ README.md | 53 +++++++++++++++++++++++++++++++++++++++++++++++------ setup.py | 24 +++++++++++++----------- 3 files changed, 74 insertions(+), 17 deletions(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..75ab72d --- /dev/null +++ b/Dockerfile @@ -0,0 +1,14 @@ +FROM debian:11.1 + +RUN apt-get update && \ + apt-get install -y build-essential pkg-config cmake \ + python3-dev python3-pip python3-venv \ + libzip-dev libjpeg-dev && \ + apt-get clean + +ADD . /eland +WORKDIR /eland + +RUN python3 -m pip install --no-cache-dir --disable-pip-version-check .[all] + +CMD ["/bin/sh"] diff --git a/README.md b/README.md index 0bde260..4c21f7d 100644 --- a/README.md +++ b/README.md @@ -22,16 +22,16 @@ ## About -Eland is a Python Elasticsearch client for exploring and -analyzing data in Elasticsearch with a familiar Pandas-compatible API. +Eland is a Python Elasticsearch client for exploring and analyzing data in Elasticsearch with a familiar +Pandas-compatible API. Where possible the package uses existing Python APIs and data structures to make it easy to switch between numpy, -pandas, scikit-learn to their Elasticsearch powered equivalents. In general, the data resides in Elasticsearch and +pandas, or scikit-learn to their Elasticsearch powered equivalents. In general, the data resides in Elasticsearch and not in memory, which allows Eland to access large datasets stored in Elasticsearch. -Eland also provides tools to upload trained machine learning models from your -common libraries like [scikit-learn](https://scikit-learn.org), [XGBoost](https://xgboost.readthedocs.io), -and [LightGBM](https://lightgbm.readthedocs.io) into Elasticsearch. +Eland also provides tools to upload trained machine learning models from common libraries like +[scikit-learn](https://scikit-learn.org), [XGBoost](https://xgboost.readthedocs.io), and +[LightGBM](https://lightgbm.readthedocs.io) into Elasticsearch. ## Getting Started @@ -52,6 +52,47 @@ $ conda install -c conda-forge eland - Supports Python 3.7+ and Pandas 1.3 - Supports Elasticsearch clusters that are 7.11+, recommended 7.14 or later for all features to work. +### Prerequisites + +Users installing Eland on Debian-based distributions may need to install prerequisite packages for the transitive +dependencies of Eland: + +```bash +$ sudo apt-get install -y \ + build-essential pkg-config cmake \ + python3-dev libzip-dev libjpeg-dev +``` + +Note that other distributions such as CentOS, RedHat, Arch, etc. may require using a different package manager and +specifying different package names. + +### Docker + +Users wishing to use Eland without installing it, in order to just run the available scripts, can build the Docker +container: + +```bash +$ docker build -t elastic/eland . +``` + +The container can now be used interactively: + +```bash +$ docker run -it --rm --network host elastic/eland +``` + +Running installed scripts is also possible without an interactive shell, e.g.: + +```bash +$ docker run -it --rm --network host \ + elastic/eland \ + eland_import_hub_model \ + --url http://host.docker.internal:9200/ \ + --hub-model-id elastic/distilbert-base-cased-finetuned-conll03-english \ + --task-type ner \ + --start +``` + ### Connecting to Elasticsearch Eland uses the [Elasticsearch low level client](https://elasticsearch-py.readthedocs.io) to connect to Elasticsearch. diff --git a/setup.py b/setup.py index b21d82a..4f0f2c5 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,18 @@ with open(path.join(here, "README.md"), "r", "utf-8") as f: last_html_index = i + 1 long_description = "\n".join(lines[last_html_index:]) +extras = { + "xgboost": ["xgboost>=0.90,<2"], + "scikit-learn": ["scikit-learn>=0.22.1,<1"], + "lightgbm": ["lightgbm>=2,<4"], + "pytorch": [ + "huggingface-hub>=0.0.17,<1", + "sentence-transformers>=2.0.0,<3", + "torch>=1.9.0,<2", + "transformers[torch]>=4.11.0<5", + ], +} +extras["all"] = list({dep for deps in extras.values() for dep in deps}) setup( name=about["__title__"], @@ -81,15 +93,5 @@ setup( package_data={"eland": ["py.typed"]}, include_package_data=True, zip_safe=False, - extras_require={ - "xgboost": ["xgboost>=0.90,<2"], - "scikit-learn": ["scikit-learn>=0.22.1,<1"], - "lightgbm": ["lightgbm>=2,<4"], - "pytorch": [ - "huggingface-hub>=0.0.17,<1", - "sentence-transformers>=2.0.0,<3", - "torch>=1.9.0,<2", - "transformers[torch]>=4.11.0<5", - ], - }, + extras_require=extras, )