mirror of
https://github.com/elastic/eland.git
synced 2025-07-24 00:00:39 +08:00
Update and rearrange documentation
This commit is contained in:
parent
46533ede98
commit
661b33dd0a
@ -1 +1,2 @@
|
|||||||
include LICENSE.txt
|
include LICENSE.txt
|
||||||
|
include README.md
|
||||||
|
357
README.md
357
README.md
@ -1,179 +1,31 @@
|
|||||||
_Note, this project is still very much a work in progress and in an alpha state; input and contributions welcome!_
|
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<a href="https://github.com/elastic/eland">
|
<a href="https://github.com/elastic/eland">
|
||||||
<img src="./docs/source/logo/eland.png" width="30%" alt="eland" />
|
<img src="https://raw.githubusercontent.com/elastic/eland/master/docs/source/logo/eland.png" width="30%" alt="Eland" />
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
<table>
|
<p align="center">
|
||||||
<tr>
|
<a href="https://pypi.org/project/eland"><img src="https://img.shields.io/pypi/v/eland.svg" alt="PyPI Version"></a>
|
||||||
<td>PyPI</td>
|
<a href=""><img src="https://img.shields.io/conda/vn/conda-forge/eland" alt="Conda Version"></a>
|
||||||
<td>
|
<a href="https://pepy.tech/project/eland"><img src="https://pepy.tech/badge/eland" alt="Downloads"></a>
|
||||||
<a href="https://pypi.org/project/eland/">
|
<a href="https://pypi.org/project/eland"><img src="https://img.shields.io/pypi/status/eland.svg" alt="Package Status"></a>
|
||||||
<img src="https://img.shields.io/pypi/v/eland.svg" alt="latest release"/>
|
<a href="https://clients-ci.elastic.co/job/elastic+eland+master"><img src="https://clients-ci.elastic.co/buildStatus/icon?job=elastic%2Beland%2Bmaster" alt="Build Status"></a>
|
||||||
</a>
|
<a href="https://github.com/elastic/eland/blob/master/LICENSE.txt"><img src="https://img.shields.io/pypi/l/eland.svg" alt="License"></a>
|
||||||
</td>
|
</p>
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Conda Forge</td>
|
|
||||||
<td>
|
|
||||||
<a href="https://anaconda.org/conda-forge/eland">
|
|
||||||
<img src="https://img.shields.io/conda/vn/conda-forge/eland" alt="latest release"/>
|
|
||||||
</a>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Package Status</td>
|
|
||||||
<td>
|
|
||||||
<a href="https://pypi.org/project/eland/">
|
|
||||||
<img src="https://img.shields.io/pypi/status/eland.svg" alt="status" />
|
|
||||||
</a>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>License</td>
|
|
||||||
<td>
|
|
||||||
<a href="https://github.com/elastic/eland/blob/master/LICENSE.txt">
|
|
||||||
<img src="https://img.shields.io/pypi/l/eland.svg" alt="license" />
|
|
||||||
</a>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Build Status</td>
|
|
||||||
<td>
|
|
||||||
<a href="https://clients-ci.elastic.co/job/elastic+eland+master/">
|
|
||||||
<img src="https://clients-ci.elastic.co/buildStatus/icon?job=elastic%2Beland%2Bmaster" alt="Build Status" />
|
|
||||||
</a>
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
# What is it?
|
Eland is a Python Elasticsearch client for exploring and
|
||||||
|
analyzing data in Elasticsearch with a familiar Pandas-compatible API.
|
||||||
Eland is a Python Elasticsearch client for exploring and analyzing data
|
|
||||||
residing in Elasticsearch with a familiar Pandas-compatible API.
|
|
||||||
|
|
||||||
Where possible the package uses existing Python APIs and data structures to make it easy to switch between numpy,
|
Where possible the package uses existing Python APIs and data structures to make it easy to switch between numpy,
|
||||||
pandas, scikit-learn to their Elasticsearch powered equivalents. In general, the data resides in Elasticsearch and
|
pandas, scikit-learn to their Elasticsearch powered equivalents. In general, the data resides in Elasticsearch and
|
||||||
not in memory, which allows Eland to access large datasets stored in Elasticsearch.
|
not in memory, which allows Eland to access large datasets stored in Elasticsearch.
|
||||||
|
|
||||||
For example, to explore data in a large Elasticsearch index, simply create an eland DataFrame from an Elasticsearch
|
Eland also provides tools to upload trained machine learning models from your
|
||||||
index pattern, and explore using an API that mirrors a subset of the pandas.DataFrame API:
|
common libraries like [scikit-learn](https://scikit-learn.org), [XGBoost](https://xgboost.readthedocs.io),
|
||||||
|
and [LightGBM](https://lightgbm.readthedocs.io) into Elasticsearch.
|
||||||
|
|
||||||
```
|
## Getting Started
|
||||||
>>> import eland as ed
|
|
||||||
|
|
||||||
>>> # Connect to 'flights' index via localhost Elasticsearch node
|
Eland can be installed from [PyPI](https://pypi.org/project/eland) with Pip:
|
||||||
>>> df = ed.DataFrame('localhost:9200', 'flights')
|
|
||||||
|
|
||||||
>>> df.head()
|
|
||||||
AvgTicketPrice Cancelled ... dayOfWeek timestamp
|
|
||||||
0 841.265642 False ... 0 2018-01-01 00:00:00
|
|
||||||
1 882.982662 False ... 0 2018-01-01 18:27:00
|
|
||||||
2 190.636904 False ... 0 2018-01-01 17:11:14
|
|
||||||
3 181.694216 True ... 0 2018-01-01 10:33:28
|
|
||||||
4 730.041778 False ... 0 2018-01-01 05:13:00
|
|
||||||
|
|
||||||
[5 rows x 27 columns]
|
|
||||||
|
|
||||||
>>> df.describe()
|
|
||||||
AvgTicketPrice DistanceKilometers ... FlightTimeMin dayOfWeek
|
|
||||||
count 13059.000000 13059.000000 ... 13059.000000 13059.000000
|
|
||||||
mean 628.253689 7092.142457 ... 511.127842 2.835975
|
|
||||||
std 266.386661 4578.263193 ... 334.741135 1.939365
|
|
||||||
min 100.020531 0.000000 ... 0.000000 0.000000
|
|
||||||
25% 410.008918 2470.545974 ... 251.739008 1.000000
|
|
||||||
50% 640.387285 7612.072403 ... 503.148975 3.000000
|
|
||||||
75% 842.262193 9735.660463 ... 720.505705 4.239865
|
|
||||||
max 1199.729004 19881.482422 ... 1902.901978 6.000000
|
|
||||||
|
|
||||||
[8 rows x 7 columns]
|
|
||||||
>>> df[['Carrier', 'AvgTicketPrice', 'Cancelled']]
|
|
||||||
Carrier AvgTicketPrice Cancelled
|
|
||||||
0 Kibana Airlines 841.265642 False
|
|
||||||
1 Logstash Airways 882.982662 False
|
|
||||||
2 Logstash Airways 190.636904 False
|
|
||||||
3 Kibana Airlines 181.694216 True
|
|
||||||
4 Kibana Airlines 730.041778 False
|
|
||||||
... ... ... ...
|
|
||||||
13054 Logstash Airways 1080.446279 False
|
|
||||||
13055 Logstash Airways 646.612941 False
|
|
||||||
13056 Logstash Airways 997.751876 False
|
|
||||||
13057 JetBeats 1102.814465 False
|
|
||||||
13058 JetBeats 858.144337 False
|
|
||||||
|
|
||||||
[13059 rows x 3 columns]
|
|
||||||
|
|
||||||
>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head()
|
|
||||||
AvgTicketPrice Cancelled ... dayOfWeek timestamp
|
|
||||||
8 960.869736 True ... 0 2018-01-01 12:09:35
|
|
||||||
26 975.812632 True ... 0 2018-01-01 15:38:32
|
|
||||||
311 946.358410 True ... 0 2018-01-01 11:51:12
|
|
||||||
651 975.383864 True ... 2 2018-01-03 21:13:17
|
|
||||||
950 907.836523 True ... 2 2018-01-03 05:14:51
|
|
||||||
|
|
||||||
[5 rows x 27 columns]
|
|
||||||
|
|
||||||
>>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std'])
|
|
||||||
DistanceKilometers AvgTicketPrice
|
|
||||||
sum 9.261629e+07 8.204365e+06
|
|
||||||
min 0.000000e+00 1.000205e+02
|
|
||||||
std 4.578263e+03 2.663867e+02
|
|
||||||
|
|
||||||
>>> df[['Carrier', 'Origin', 'Dest']].nunique()
|
|
||||||
Carrier 4
|
|
||||||
Origin 156
|
|
||||||
Dest 156
|
|
||||||
dtype: int64
|
|
||||||
|
|
||||||
>>> s = df.AvgTicketPrice * 2 + df.DistanceKilometers - df.FlightDelayMin
|
|
||||||
>>> s
|
|
||||||
0 18174.857422
|
|
||||||
1 10589.365723
|
|
||||||
2 381.273804
|
|
||||||
3 739.126221
|
|
||||||
4 14818.327637
|
|
||||||
...
|
|
||||||
13054 10219.474121
|
|
||||||
13055 8381.823975
|
|
||||||
13056 12661.157104
|
|
||||||
13057 20819.488281
|
|
||||||
13058 18315.431274
|
|
||||||
Length: 13059, dtype: float64
|
|
||||||
>>> print(s.es_info())
|
|
||||||
index_pattern: flights
|
|
||||||
Index:
|
|
||||||
index_field: _id
|
|
||||||
is_source_field: False
|
|
||||||
Mappings:
|
|
||||||
capabilities:
|
|
||||||
es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name
|
|
||||||
NaN script_field_None False double None float64 True True True script_field_None
|
|
||||||
Operations:
|
|
||||||
tasks: []
|
|
||||||
size: None
|
|
||||||
sort_params: None
|
|
||||||
_source: ['script_field_None']
|
|
||||||
body: {'script_fields': {'script_field_None': {'script': {'source': "(((doc['AvgTicketPrice'].value * 2) + doc['DistanceKilometers'].value) - doc['FlightDelayMin'].value)"}}}}
|
|
||||||
post_processing: []
|
|
||||||
|
|
||||||
>>> pd_df = ed.eland_to_pandas(df)
|
|
||||||
>>> pd_df.head()
|
|
||||||
AvgTicketPrice Cancelled ... dayOfWeek timestamp
|
|
||||||
0 841.265642 False ... 0 2018-01-01 00:00:00
|
|
||||||
1 882.982662 False ... 0 2018-01-01 18:27:00
|
|
||||||
2 190.636904 False ... 0 2018-01-01 17:11:14
|
|
||||||
3 181.694216 True ... 0 2018-01-01 10:33:28
|
|
||||||
4 730.041778 False ... 0 2018-01-01 05:13:00
|
|
||||||
|
|
||||||
[5 rows x 27 columns]
|
|
||||||
```
|
|
||||||
|
|
||||||
See [docs](https://eland.readthedocs.io/en/latest) and [demo_notebook.ipynb](https://eland.readthedocs.io/en/latest/examples/demo_notebook.html) for more examples.
|
|
||||||
|
|
||||||
## Where to get it
|
|
||||||
|
|
||||||
Eland can be installed from [PyPI](https://pypi.org/project/eland) via pip:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ python -m pip install eland
|
$ python -m pip install eland
|
||||||
@ -185,88 +37,129 @@ Eland can also be installed from [Conda Forge](https://anaconda.org/conda-forge/
|
|||||||
$ conda install -c conda-forge eland
|
$ conda install -c conda-forge eland
|
||||||
```
|
```
|
||||||
|
|
||||||
The [source code](https://github.com/elastic/eland) is currently available on GitHub.
|
### Supported Versions
|
||||||
|
|
||||||
## Versions and Compatibility
|
- Supports Python 3.6+ and Pandas 1.0.0+
|
||||||
|
- Supports Elasticsearch clusters that are 7.x+, recommended 7.6 or later for all features to work.
|
||||||
|
|
||||||
### Python Version Support
|
### Connecting to Elasticsearch
|
||||||
|
|
||||||
Officially Python 3.6 and above.
|
Eland uses the [Elasticsearch low level client](https://elasticsearch-py.readthedocs.io) to connect to Elasticsearch.
|
||||||
|
This client supports a range of [connection options and authentication options](https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch).
|
||||||
|
|
||||||
eland depends on pandas version 1.0.0+.
|
You can pass either an instance of `elasticsearch.Elasticsearch` to Eland APIs
|
||||||
|
or a string containing the host to connect to:
|
||||||
|
|
||||||
### Elasticsearch Versions
|
```python
|
||||||
|
import eland as ed
|
||||||
|
|
||||||
eland is versioned like the Elastic stack (eland 7.5.1 is compatible with Elasticsearch 7.x up to 7.5.1)
|
# Connecting to an Elasticsearch instance running on 'localhost:9200'
|
||||||
|
df = ed.DataFrame("localhost:9200", es_index_pattern="flights")
|
||||||
|
|
||||||
A major version of the client is compatible with the same major version of Elasticsearch.
|
# Connecting to an Elastic Cloud instance
|
||||||
|
from elasticsearch import Elasticsearch
|
||||||
No compatibility assurances are given between different major versions of the client and Elasticsearch.
|
|
||||||
Major differences likely exist between major versions of Elasticsearch,
|
|
||||||
particularly around request and response object formats, but also around API urls and behaviour.
|
|
||||||
|
|
||||||
## Connecting to Elasticsearch
|
|
||||||
|
|
||||||
eland uses the [Elasticsearch low level client](https://elasticsearch-py.readthedocs.io/) to connect to Elasticsearch.
|
|
||||||
This client supports a range of [connection options and authentication mechanisms]
|
|
||||||
(https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch).
|
|
||||||
|
|
||||||
### Basic Connection Options
|
|
||||||
|
|
||||||
|
es = Elasticsearch(
|
||||||
|
cloud_id="cluster-name:...",
|
||||||
|
http_auth=("elastic", "<password>")
|
||||||
|
)
|
||||||
|
df = ed.DataFrame(es, es_index_pattern="flights")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## DataFrames in Eland
|
||||||
|
|
||||||
|
`eland.DataFrame` wraps an Elasticsearch index in a Pandas-like API
|
||||||
|
and defers all processing and filtering of data to Elasticsearch
|
||||||
|
instead of your local machine. This means you can process large
|
||||||
|
amounts of data within Elasticsearch from a Jupyter Notebook
|
||||||
|
without overloading your machine.
|
||||||
|
|
||||||
|
➤ [Eland DataFrame API documentation](https://eland.readthedocs.io/en/latest/reference/dataframe.html)
|
||||||
|
|
||||||
|
➤ [Advanced examples in a Jupyter Notebook](https://eland.readthedocs.io/en/latest/examples/demo_notebook.html)
|
||||||
|
|
||||||
|
```python
|
||||||
>>> import eland as ed
|
>>> import eland as ed
|
||||||
|
|
||||||
>>> # Connect to flights index via localhost Elasticsearch node
|
>>> # Connect to 'flights' index via localhost Elasticsearch node
|
||||||
>>> ed.DataFrame('localhost', 'flights')
|
>>> df = ed.DataFrame('localhost:9200', 'flights')
|
||||||
|
|
||||||
>>> # Connect to flights index via localhost Elasticsearch node on port 9200
|
# eland.DataFrame instance has the same API as pandas.DataFrame
|
||||||
>>> ed.DataFrame('localhost:9200', 'flights')
|
# except all data is in Elasticsearch. See .info() memory usage.
|
||||||
|
>>> df.head()
|
||||||
|
AvgTicketPrice Cancelled ... dayOfWeek timestamp
|
||||||
|
0 841.265642 False ... 0 2018-01-01 00:00:00
|
||||||
|
1 882.982662 False ... 0 2018-01-01 18:27:00
|
||||||
|
2 190.636904 False ... 0 2018-01-01 17:11:14
|
||||||
|
3 181.694216 True ... 0 2018-01-01 10:33:28
|
||||||
|
4 730.041778 False ... 0 2018-01-01 05:13:00
|
||||||
|
|
||||||
>>> # Connect to flights index via localhost Elasticsearch node on port 9200 with <user>:<password> credentials
|
[5 rows x 27 columns]
|
||||||
>>> ed.DataFrame('http://<user>:<password>@localhost:9200', 'flights')
|
|
||||||
|
|
||||||
>>> # Connect to flights index via ssl
|
>>> df.info()
|
||||||
>>> es = Elasticsearch(
|
<class 'eland.dataframe.DataFrame'>
|
||||||
'https://<user>:<password>@localhost:443',
|
Index: 13059 entries, 0 to 13058
|
||||||
use_ssl=True,
|
Data columns (total 27 columns):
|
||||||
verify_certs=True,
|
# Column Non-Null Count Dtype
|
||||||
ca_certs='/path/to/ca.crt'
|
--- ------ -------------- -----
|
||||||
|
0 AvgTicketPrice 13059 non-null float64
|
||||||
|
1 Cancelled 13059 non-null bool
|
||||||
|
2 Carrier 13059 non-null object
|
||||||
|
...
|
||||||
|
24 OriginWeather 13059 non-null object
|
||||||
|
25 dayOfWeek 13059 non-null int64
|
||||||
|
26 timestamp 13059 non-null datetime64[ns]
|
||||||
|
dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)
|
||||||
|
memory usage: 80.0 bytes
|
||||||
|
|
||||||
|
# Filtering of rows using comparisons
|
||||||
|
>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head()
|
||||||
|
AvgTicketPrice Cancelled ... dayOfWeek timestamp
|
||||||
|
8 960.869736 True ... 0 2018-01-01 12:09:35
|
||||||
|
26 975.812632 True ... 0 2018-01-01 15:38:32
|
||||||
|
311 946.358410 True ... 0 2018-01-01 11:51:12
|
||||||
|
651 975.383864 True ... 2 2018-01-03 21:13:17
|
||||||
|
950 907.836523 True ... 2 2018-01-03 05:14:51
|
||||||
|
|
||||||
|
[5 rows x 27 columns]
|
||||||
|
|
||||||
|
# Running aggregations across an index
|
||||||
|
>>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std'])
|
||||||
|
DistanceKilometers AvgTicketPrice
|
||||||
|
sum 9.261629e+07 8.204365e+06
|
||||||
|
min 0.000000e+00 1.000205e+02
|
||||||
|
std 4.578263e+03 2.663867e+02
|
||||||
|
```
|
||||||
|
|
||||||
|
## Machine Learning in Eland
|
||||||
|
|
||||||
|
Eland allows transforming trained models from scikit-learn, XGBoost, and LightGBM libraries
|
||||||
|
to be serialized and used as an inference model in Elasticsearch
|
||||||
|
|
||||||
|
➤ [Eland Machine Learning API documentation](https://eland.readthedocs.io/en/latest/reference/ml.html)
|
||||||
|
|
||||||
|
➤ [Read more about Machine Learning in Elasticsearch](https://www.elastic.co/guide/en/machine-learning/current/ml-getting-started.html)
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> from xgboost import XGBClassifier
|
||||||
|
>>> from eland.ml import ImportedMLModel
|
||||||
|
|
||||||
|
# Train and exercise an XGBoost ML model locally
|
||||||
|
>>> xgb_model = XGBClassifier(booster="gbtree")
|
||||||
|
>>> xgb_model.fit(training_data[0], training_data[1])
|
||||||
|
|
||||||
|
>>> xgb_model.predict(training_data[0])
|
||||||
|
[0 1 1 0 1 0 0 0 1 0]
|
||||||
|
|
||||||
|
# Import the model into Elasticsearch
|
||||||
|
>>> es_model = ImportedMLModel(
|
||||||
|
es_client="localhost:9200",
|
||||||
|
model_id="xgb-classifier",
|
||||||
|
model=xgb_model,
|
||||||
|
feature_names=["f0", "f1", "f2", "f3", "f4"],
|
||||||
)
|
)
|
||||||
>>> ed.DataFrame(es, 'flights')
|
|
||||||
|
|
||||||
>>> # Connect to flights index via ssl using Urllib3HttpConnection options
|
# Exercise the ML model in Elasticsearch with the training data
|
||||||
>>> es = Elasticsearch(
|
>>> es_model.predict(training_data[0])
|
||||||
['localhost:443', 'other_host:443'],
|
[0 1 1 0 1 0 0 0 1 0]
|
||||||
use_ssl=True,
|
|
||||||
verify_certs=True,
|
|
||||||
ca_certs='/path/to/CA_certs',
|
|
||||||
client_cert='/path/to/clientcert.pem',
|
|
||||||
client_key='/path/to/clientkey.pem'
|
|
||||||
)
|
|
||||||
>>> ed.DataFrame(es, 'flights')
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Connecting to an Elasticsearch Cloud Cluster
|
|
||||||
|
|
||||||
```
|
|
||||||
>>> import eland as ed
|
|
||||||
>>> from elasticsearch import Elasticsearch
|
|
||||||
|
|
||||||
>>> es = Elasticsearch(cloud_id="<cloud_id>", http_auth=('<user>','<password>'))
|
|
||||||
|
|
||||||
>>> es.info()
|
|
||||||
{'name': 'instance-0000000000', 'cluster_name': 'bf900cfce5684a81bca0be0cce5913bc', 'cluster_uuid': 'xLPvrV3jQNeadA7oM4l1jA', 'version': {'number': '7.4.2', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': '2f90bbf7b93631e52bafb59b3b049cb44ec25e96', 'build_date': '2019-10-28T20:40:44.881551Z', 'build_snapshot': False, 'lucene_version': '8.2.0', 'minimum_wire_compatibility_version': '6.8.0', 'minimum_index_compatibility_version': '6.0.0-beta1'}, 'tagline': 'You Know, for Search'}
|
|
||||||
|
|
||||||
>>> df = ed.read_es(es, 'reviews')
|
|
||||||
```
|
|
||||||
|
|
||||||
## Why eland?
|
|
||||||
|
|
||||||
Naming is difficult, but as we had to call it something:
|
|
||||||
|
|
||||||
* eland: elastic and data
|
|
||||||
* eland: 'Elk/Moose' in Dutch (Alces alces)
|
|
||||||
* [Elandsgracht](https://goo.gl/maps/3hGBMqeGRcsBJfKx8): Amsterdam street near Elastic's Amsterdam office
|
|
||||||
|
|
||||||
[Pronunciation](https://commons.wikimedia.org/wiki/File:Nl-eland.ogg): /ˈeːlɑnt/
|
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
=====================
|
=====================
|
||||||
Contributing to eland
|
Contributing to Eland
|
||||||
=====================
|
=====================
|
||||||
|
|
||||||
Eland is an open source project and we love to receive contributions
|
Eland is an open source project and we love to receive contributions
|
||||||
|
@ -58,4 +58,3 @@ the ``pandas.DataFrame`` API. This resolves some of the issues above as:
|
|||||||
* Creating a new ``eland.DataFrame`` API gives us full flexibility in terms of implementation. However,
|
* Creating a new ``eland.DataFrame`` API gives us full flexibility in terms of implementation. However,
|
||||||
it does create a large amount of work which may duplicate a lot of the ``pandas`` code - for example,
|
it does create a large amount of work which may duplicate a lot of the ``pandas`` code - for example,
|
||||||
printing objects etc. - this creates maintenance issues etc.
|
printing objects etc. - this creates maintenance issues etc.
|
||||||
|
|
@ -8,3 +8,4 @@ Development
|
|||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
contributing.rst
|
contributing.rst
|
||||||
|
implementation.rst
|
||||||
|
@ -32,7 +32,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Compare eland DataFrame vs pandas DataFrame"
|
"## Compare Eland DataFrame vs pandas DataFrame"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -52,7 +52,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"ed_flights = ed.read_es('localhost', 'flights')"
|
"ed_flights = ed.DataFrame('localhost', 'flights')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -38,7 +38,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df = ed.read_csv(\"data/online-retail.csv.gz\",\n",
|
"df = ed.csv_to_eland(\"data/online-retail.csv.gz\",\n",
|
||||||
" es_client='localhost', \n",
|
" es_client='localhost', \n",
|
||||||
" es_dest_index='online-retail', \n",
|
" es_dest_index='online-retail', \n",
|
||||||
" es_if_exists='replace', \n",
|
" es_if_exists='replace', \n",
|
||||||
@ -390,7 +390,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### selecting columns\n",
|
"### Selecting columns\n",
|
||||||
"\n",
|
"\n",
|
||||||
"you can also pass a list of columns to select columns from the data frame in a specified order."
|
"you can also pass a list of columns to select columns from the data frame in a specified order."
|
||||||
]
|
]
|
||||||
|
@ -1,10 +0,0 @@
|
|||||||
.. _implementation:
|
|
||||||
|
|
||||||
====================
|
|
||||||
Implementation Notes
|
|
||||||
====================
|
|
||||||
|
|
||||||
.. toctree::
|
|
||||||
:maxdepth: 2
|
|
||||||
|
|
||||||
details.rst
|
|
@ -1,5 +1,3 @@
|
|||||||
.. eland documentation master file, created by
|
|
||||||
|
|
||||||
.. module:: eland
|
.. module:: eland
|
||||||
|
|
||||||
**************************************************************
|
**************************************************************
|
||||||
@ -11,10 +9,10 @@ Eland: DataFrames and Machine Learning backed by Elasticsearch
|
|||||||
**Useful links**:
|
**Useful links**:
|
||||||
`Source Repository <https://github.com/elastic/eland>`__ |
|
`Source Repository <https://github.com/elastic/eland>`__ |
|
||||||
`Issues & Ideas <https://github.com/elastic/eland/issues>`__ |
|
`Issues & Ideas <https://github.com/elastic/eland/issues>`__ |
|
||||||
`Q&A Support <https://discuss.elastic.co>`__ |
|
`Q&A Support <https://discuss.elastic.co>`__
|
||||||
|
|
||||||
Eland is a Python Elasticsearch client for exploring and analyzing data
|
Eland is a Python Elasticsearch client for exploring and analyzing data
|
||||||
residing in Elasticsearch with a familiar Pandas-compatible API.
|
in Elasticsearch with a familiar Pandas-compatible API.
|
||||||
|
|
||||||
Where possible the package uses existing Python APIs and data structures to make it easy to switch between numpy,
|
Where possible the package uses existing Python APIs and data structures to make it easy to switch between numpy,
|
||||||
pandas, scikit-learn to their Elasticsearch powered equivalents. In general, the data resides in Elasticsearch and
|
pandas, scikit-learn to their Elasticsearch powered equivalents. In general, the data resides in Elasticsearch and
|
||||||
@ -48,30 +46,27 @@ If you're new to Elasticsearch we recommend `reading the documentation <https://
|
|||||||
:hidden:
|
:hidden:
|
||||||
|
|
||||||
reference/index
|
reference/index
|
||||||
implementation/index
|
reference/ml
|
||||||
development/index
|
|
||||||
examples/index
|
examples/index
|
||||||
|
development/index
|
||||||
|
|
||||||
* :doc:`reference/index`
|
* :doc:`reference/index`
|
||||||
|
|
||||||
* :doc:`reference/supported_apis`
|
* :doc:`reference/supported_apis`
|
||||||
* :doc:`reference/io`
|
|
||||||
* :doc:`reference/general_utility_functions`
|
|
||||||
* :doc:`reference/dataframe`
|
* :doc:`reference/dataframe`
|
||||||
* :doc:`reference/series`
|
* :doc:`reference/series`
|
||||||
* :doc:`reference/indexing`
|
|
||||||
* :doc:`reference/ml`
|
* :doc:`reference/ml`
|
||||||
|
* :doc:`reference/indexing`
|
||||||
* :doc:`implementation/index`
|
* :doc:`reference/general_utility_functions`
|
||||||
|
* :doc:`reference/io`
|
||||||
* :doc:`implementation/details`
|
|
||||||
|
|
||||||
* :doc:`development/index`
|
* :doc:`development/index`
|
||||||
|
|
||||||
* :doc:`development/contributing`
|
* :doc:`development/contributing`
|
||||||
|
* :doc:`development/implementation`
|
||||||
|
|
||||||
* :doc:`examples/index`
|
* :doc:`examples/index`
|
||||||
|
|
||||||
* :doc:`examples/demo_notebook`
|
* :doc:`examples/demo_notebook`
|
||||||
|
* :doc:`examples/introduction_to_eland_webinar`
|
||||||
* :doc:`examples/online_retail_analysis`
|
* :doc:`examples/online_retail_analysis`
|
||||||
|
6
docs/source/reference/api/eland.DataFrame.ndim.rst
Normal file
6
docs/source/reference/api/eland.DataFrame.ndim.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.DataFrame.ndim
|
||||||
|
====================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: DataFrame.ndim
|
6
docs/source/reference/api/eland.DataFrame.size.rst
Normal file
6
docs/source/reference/api/eland.DataFrame.size.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.DataFrame.size
|
||||||
|
====================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: DataFrame.size
|
6
docs/source/reference/api/eland.Series.dtype.rst
Normal file
6
docs/source/reference/api/eland.Series.dtype.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.dtype
|
||||||
|
==================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: Series.dtype
|
6
docs/source/reference/api/eland.Series.dtypes.rst
Normal file
6
docs/source/reference/api/eland.Series.dtypes.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.dtypes
|
||||||
|
===================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: Series.dtypes
|
6
docs/source/reference/api/eland.Series.ndim.rst
Normal file
6
docs/source/reference/api/eland.Series.ndim.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.ndim
|
||||||
|
=================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: Series.ndim
|
6
docs/source/reference/api/eland.Series.size.rst
Normal file
6
docs/source/reference/api/eland.Series.size.rst
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
eland.Series.size
|
||||||
|
=================
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. autoattribute:: Series.size
|
@ -0,0 +1,6 @@
|
|||||||
|
eland.ml.MLModel.delete_model
|
||||||
|
=============================
|
||||||
|
|
||||||
|
.. currentmodule:: eland.ml
|
||||||
|
|
||||||
|
.. automethod:: MLModel.delete_model
|
@ -0,0 +1,6 @@
|
|||||||
|
eland.ml.MLModel.exists_model
|
||||||
|
=============================
|
||||||
|
|
||||||
|
.. currentmodule:: eland.ml
|
||||||
|
|
||||||
|
.. automethod:: MLModel.exists_model
|
@ -14,8 +14,6 @@ Constructor
|
|||||||
|
|
||||||
Attributes and underlying data
|
Attributes and underlying data
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
**Axes**
|
|
||||||
|
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
|
|
||||||
@ -26,6 +24,8 @@ Attributes and underlying data
|
|||||||
DataFrame.values
|
DataFrame.values
|
||||||
DataFrame.empty
|
DataFrame.empty
|
||||||
DataFrame.shape
|
DataFrame.shape
|
||||||
|
DataFrame.ndim
|
||||||
|
DataFrame.size
|
||||||
|
|
||||||
Indexing, iteration
|
Indexing, iteration
|
||||||
~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
.. _api:
|
.. _api:
|
||||||
|
|
||||||
=============
|
=============
|
||||||
API reference
|
API Reference
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This page gives an overview of all public eland objects, functions and
|
This page gives an overview of all public eland objects, functions and
|
||||||
@ -11,9 +11,9 @@ methods. All classes and functions exposed in ``eland.*`` namespace are public.
|
|||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|
||||||
supported_apis
|
supported_apis
|
||||||
io
|
|
||||||
general_utility_functions
|
|
||||||
dataframe
|
dataframe
|
||||||
series
|
series
|
||||||
indexing
|
|
||||||
ml
|
ml
|
||||||
|
indexing
|
||||||
|
general_utility_functions
|
||||||
|
io
|
||||||
|
@ -13,9 +13,10 @@ To use the Elastic Stack machine learning features, you must have the appropriat
|
|||||||
learning node in your Elasticsearch cluster. If Elastic Stack security features are enabled, you must also ensure
|
learning node in your Elasticsearch cluster. If Elastic Stack security features are enabled, you must also ensure
|
||||||
your users have the necessary privileges.
|
your users have the necessary privileges.
|
||||||
|
|
||||||
The fastest way to get started with machine learning features is to start a free 14-day trial of Elasticsearch Service in the cloud.
|
The fastest way to get started with machine learning features is to
|
||||||
|
`start a free 14-day trial of Elastic Cloud <https://www.elastic.co/cloud/elasticsearch-service/signup>`_.
|
||||||
|
|
||||||
See https://www.elastic.co/guide/en/machine-learning/current/setup.html and other documentation for more detail.
|
See `Elasticsearch Machine Learning documentation <https://www.elastic.co/guide/en/machine-learning/current/setup.html>`_ more details.
|
||||||
|
|
||||||
ImportedMLModel
|
ImportedMLModel
|
||||||
~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~
|
||||||
@ -28,10 +29,17 @@ Constructor
|
|||||||
|
|
||||||
ImportedMLModel
|
ImportedMLModel
|
||||||
|
|
||||||
Learning API
|
Predictions
|
||||||
^^^^^^^^^^^^
|
^^^^^^^^^^^
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
|
|
||||||
ImportedMLModel.predict
|
ImportedMLModel.predict
|
||||||
|
|
||||||
|
Manage Models
|
||||||
|
^^^^^^^^^^^^^
|
||||||
|
.. autosummary::
|
||||||
|
:toctree: api/
|
||||||
|
|
||||||
|
MLModel.exists_model
|
||||||
|
MLModel.delete_model
|
||||||
|
@ -14,15 +14,17 @@ Constructor
|
|||||||
|
|
||||||
Attributes and underlying data
|
Attributes and underlying data
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
**Axes**
|
|
||||||
|
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: api/
|
:toctree: api/
|
||||||
|
|
||||||
Series.index
|
Series.index
|
||||||
|
Series.dtype
|
||||||
|
Series.dtypes
|
||||||
Series.shape
|
Series.shape
|
||||||
Series.name
|
Series.name
|
||||||
Series.empty
|
Series.empty
|
||||||
|
Series.ndim
|
||||||
|
Series.size
|
||||||
|
|
||||||
Indexing, iteration
|
Indexing, iteration
|
||||||
~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~
|
||||||
|
144
setup.py
144
setup.py
@ -32,152 +32,34 @@ CLASSIFIERS = [
|
|||||||
"License :: OSI Approved :: Apache Software License",
|
"License :: OSI Approved :: Apache Software License",
|
||||||
"Environment :: Console",
|
"Environment :: Console",
|
||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
|
"Intended Audience :: Developers",
|
||||||
"Intended Audience :: Science/Research",
|
"Intended Audience :: Science/Research",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
"Programming Language :: Python",
|
"Programming Language :: Python",
|
||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3 :: Only",
|
||||||
"Programming Language :: Python :: 3.6",
|
"Programming Language :: Python :: 3.6",
|
||||||
"Programming Language :: Python :: 3.7",
|
"Programming Language :: Python :: 3.7",
|
||||||
"Programming Language :: Python :: 3.8",
|
"Programming Language :: Python :: 3.8",
|
||||||
|
"Programming Language :: Python :: 3.9",
|
||||||
"Topic :: Scientific/Engineering",
|
"Topic :: Scientific/Engineering",
|
||||||
]
|
]
|
||||||
|
|
||||||
LONG_DESCRIPTION = """
|
# Remove all raw HTML from README for long description
|
||||||
eland is a Elasticsearch client Python package to analyse, explore and manipulate data that resides in Elasticsearch.
|
with open(path.join(here, "README.md"), "r", "utf-8") as f:
|
||||||
Where possible the package uses existing Python APIs and data structures to make it easy to switch between numpy,
|
lines = f.read().split("\n")
|
||||||
pandas, scikit-learn to their Elasticsearch powered equivalents. In general, the data resides in Elasticsearch and
|
last_html_index = 0
|
||||||
not in memory, which allows eland to access large datasets stored in Elasticsearch.
|
for i, line in enumerate(lines):
|
||||||
|
if line == "</p>":
|
||||||
|
last_html_index = i + 1
|
||||||
|
long_description = "\n".join(lines[last_html_index:])
|
||||||
|
|
||||||
For example, to explore data in a large Elasticsearch index, simply create an eland DataFrame from an Elasticsearch
|
|
||||||
index pattern, and explore using an API that mirrors a subset of the pandas.DataFrame API:
|
|
||||||
|
|
||||||
```
|
|
||||||
>>> import eland as ed
|
|
||||||
|
|
||||||
>>> # Connect to 'flights' index via localhost Elasticsearch node
|
|
||||||
>>> df = ed.DataFrame('localhost:9200', 'flights')
|
|
||||||
|
|
||||||
>>> df.head()
|
|
||||||
AvgTicketPrice Cancelled Carrier ... OriginWeather dayOfWeek timestamp
|
|
||||||
0 841.265642 False Kibana Airlines ... Sunny 0 2018-01-01 00:00:00
|
|
||||||
1 882.982662 False Logstash Airways ... Clear 0 2018-01-01 18:27:00
|
|
||||||
2 190.636904 False Logstash Airways ... Rain 0 2018-01-01 17:11:14
|
|
||||||
3 181.694216 True Kibana Airlines ... Thunder & Lightning 0 2018-01-01 10:33:28
|
|
||||||
4 730.041778 False Kibana Airlines ... Damaging Wind 0 2018-01-01 05:13:00
|
|
||||||
|
|
||||||
[5 rows x 27 columns]
|
|
||||||
|
|
||||||
>>> df.describe()
|
|
||||||
AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin FlightTimeHour FlightTimeMin dayOfWeek
|
|
||||||
count 13059.000000 13059.000000 13059.000000 13059.000000 13059.000000 13059.000000 13059.000000
|
|
||||||
mean 628.253689 7092.142457 4406.853010 47.335171 8.518797 511.127842 2.835975
|
|
||||||
std 266.386661 4578.263193 2844.800855 96.743006 5.579019 334.741135 1.939365
|
|
||||||
min 100.020531 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
|
|
||||||
25% 410.008918 2470.545974 1535.126118 0.000000 4.194976 251.738513 1.000000
|
|
||||||
50% 640.362667 7612.072403 4729.922470 0.000000 8.385816 503.148975 3.000000
|
|
||||||
75% 842.254990 9735.082407 6049.459005 15.000000 12.009396 720.534532 4.141221
|
|
||||||
max 1199.729004 19881.482422 12353.780273 360.000000 31.715034 1902.901978 6.000000
|
|
||||||
|
|
||||||
>>> df[['Carrier', 'AvgTicketPrice', 'Cancelled']]
|
|
||||||
Carrier AvgTicketPrice Cancelled
|
|
||||||
0 Kibana Airlines 841.265642 False
|
|
||||||
1 Logstash Airways 882.982662 False
|
|
||||||
2 Logstash Airways 190.636904 False
|
|
||||||
3 Kibana Airlines 181.694216 True
|
|
||||||
4 Kibana Airlines 730.041778 False
|
|
||||||
... ... ... ...
|
|
||||||
13054 Logstash Airways 1080.446279 False
|
|
||||||
13055 Logstash Airways 646.612941 False
|
|
||||||
13056 Logstash Airways 997.751876 False
|
|
||||||
13057 JetBeats 1102.814465 False
|
|
||||||
13058 JetBeats 858.144337 False
|
|
||||||
|
|
||||||
[13059 rows x 3 columns]
|
|
||||||
|
|
||||||
>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head()
|
|
||||||
AvgTicketPrice Cancelled Carrier ... OriginWeather dayOfWeek timestamp
|
|
||||||
8 960.869736 True Kibana Airlines ... Heavy Fog 0 2018-01-01 12:09:35
|
|
||||||
26 975.812632 True Kibana Airlines ... Rain 0 2018-01-01 15:38:32
|
|
||||||
311 946.358410 True Kibana Airlines ... Heavy Fog 0 2018-01-01 11:51:12
|
|
||||||
651 975.383864 True Kibana Airlines ... Rain 2 2018-01-03 21:13:17
|
|
||||||
950 907.836523 True Kibana Airlines ... Thunder & Lightning 2 2018-01-03 05:14:51
|
|
||||||
|
|
||||||
[5 rows x 27 columns]
|
|
||||||
|
|
||||||
>>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std'])
|
|
||||||
DistanceKilometers AvgTicketPrice
|
|
||||||
sum 9.261629e+07 8.204365e+06
|
|
||||||
min 0.000000e+00 1.000205e+02
|
|
||||||
std 4.578263e+03 2.663867e+02
|
|
||||||
|
|
||||||
>>> df[['Carrier', 'Origin', 'Dest']].nunique()
|
|
||||||
Carrier 4
|
|
||||||
Origin 156
|
|
||||||
Dest 156
|
|
||||||
dtype: int64
|
|
||||||
|
|
||||||
>>> s = df.AvgTicketPrice * 2 + df.DistanceKilometers - df.FlightDelayMin
|
|
||||||
>>> s
|
|
||||||
0 18174.857422
|
|
||||||
1 10589.365723
|
|
||||||
2 381.273804
|
|
||||||
3 739.126221
|
|
||||||
4 14818.327637
|
|
||||||
...
|
|
||||||
13054 10219.474121
|
|
||||||
13055 8381.823975
|
|
||||||
13056 12661.157104
|
|
||||||
13057 20819.488281
|
|
||||||
13058 18315.431274
|
|
||||||
Length: 13059, dtype: float64
|
|
||||||
|
|
||||||
>>> print(s.info_es())
|
|
||||||
index_pattern: flights
|
|
||||||
Index:
|
|
||||||
index_field: _id
|
|
||||||
is_source_field: False
|
|
||||||
Mappings:
|
|
||||||
capabilities:
|
|
||||||
es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name
|
|
||||||
NaN script_field_None False double None float64 True True True script_field_None
|
|
||||||
Operations:
|
|
||||||
tasks: []
|
|
||||||
size: None
|
|
||||||
sort_params: None
|
|
||||||
_source: ['script_field_None']
|
|
||||||
body: {'script_fields': {'script_field_None': {'script': {'source': "(((doc['AvgTicketPrice'].value * 2) + doc['DistanceKilometers'].value) - doc['FlightDelayMin'].value)"}}}}
|
|
||||||
post_processing: []
|
|
||||||
|
|
||||||
>>> pd_df = ed.eland_to_pandas(df)
|
|
||||||
>>> pd_df.head()
|
|
||||||
AvgTicketPrice Cancelled Carrier ... OriginWeather dayOfWeek timestamp
|
|
||||||
0 841.265642 False Kibana Airlines ... Sunny 0 2018-01-01 00:00:00
|
|
||||||
1 882.982662 False Logstash Airways ... Clear 0 2018-01-01 18:27:00
|
|
||||||
2 190.636904 False Logstash Airways ... Rain 0 2018-01-01 17:11:14
|
|
||||||
3 181.694216 True Kibana Airlines ... Thunder & Lightning 0 2018-01-01 10:33:28
|
|
||||||
4 730.041778 False Kibana Airlines ... Damaging Wind 0 2018-01-01 05:13:00
|
|
||||||
|
|
||||||
[5 rows x 27 columns]
|
|
||||||
```
|
|
||||||
|
|
||||||
See [docs](https://eland.readthedocs.io/en/latest) and [demo_notebook.ipynb](https://eland.readthedocs.io/en/latest/examples/demo_notebook.html) for more examples.
|
|
||||||
|
|
||||||
## Where to get it
|
|
||||||
The source code is currently hosted on GitHub at:
|
|
||||||
https://github.com/elastic/eland
|
|
||||||
|
|
||||||
Binary installers for the latest released version are available at the [Python
|
|
||||||
package index](https://pypi.org/project/eland).
|
|
||||||
|
|
||||||
```sh
|
|
||||||
pip install eland
|
|
||||||
```
|
|
||||||
"""
|
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name=about["__title__"],
|
name=about["__title__"],
|
||||||
version=about["__version__"],
|
version=about["__version__"],
|
||||||
description=about["__description__"],
|
description=about["__description__"],
|
||||||
long_description=LONG_DESCRIPTION,
|
long_description=long_description,
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
url=about["__url__"],
|
url=about["__url__"],
|
||||||
author=about["__author__"],
|
author=about["__author__"],
|
||||||
|
@ -21,8 +21,10 @@ import re
|
|||||||
import eland
|
import eland
|
||||||
import pandas
|
import pandas
|
||||||
import inspect
|
import inspect
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
api_docs_dir = Path(__file__).absolute().parent.parent / "docs/source/reference/api"
|
||||||
is_supported = []
|
is_supported = []
|
||||||
supported_attr = re.compile(
|
supported_attr = re.compile(
|
||||||
r"(?:[a-zA-Z0-9][a-zA-Z0-9_]*|__[a-zA-Z0-9][a-zA-Z0-9_]*__)"
|
r"(?:[a-zA-Z0-9][a-zA-Z0-9_]*|__[a-zA-Z0-9][a-zA-Z0-9_]*__)"
|
||||||
@ -68,6 +70,23 @@ def main():
|
|||||||
)
|
)
|
||||||
print(row_delimiter)
|
print(row_delimiter)
|
||||||
|
|
||||||
|
for attr, supported in is_supported:
|
||||||
|
if supported and "__" not in attr:
|
||||||
|
attr = attr.replace("ed.", "eland.").rstrip("()")
|
||||||
|
attr_doc_path = api_docs_dir / f"{attr}.rst"
|
||||||
|
if not attr_doc_path.exists():
|
||||||
|
with attr_doc_path.open(mode="w") as f:
|
||||||
|
f.truncate()
|
||||||
|
f.write(
|
||||||
|
f"""{attr}
|
||||||
|
{'=' * len(attr)}
|
||||||
|
|
||||||
|
.. currentmodule:: eland
|
||||||
|
|
||||||
|
.. automethod:: { attr.replace('eland.', '') }
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user