mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Improve docs (#113)
* Adding more examples * Adding more examples to README.md + pypi first page. * Updated README.md
This commit is contained in:
parent
86c51dc267
commit
1914644f93
181
README.md
181
README.md
@ -52,26 +52,110 @@ index pattern, and explore using an API that mirrors a subset of the pandas.Data
|
||||
```
|
||||
>>> import eland as ed
|
||||
|
||||
>>> df = ed.read_es('http://localhost:9200', 'reviews')
|
||||
>>> # Connect to 'flights' index via localhost Elasticsearch node
|
||||
>>> df = ed.DataFrame('localhost:9200', 'flights')
|
||||
|
||||
>>> df.head()
|
||||
reviewerId vendorId rating date
|
||||
0 0 0 5 2006-04-07 17:08
|
||||
1 1 1 5 2006-05-04 12:16
|
||||
2 2 2 4 2006-04-21 12:26
|
||||
3 3 3 5 2006-04-18 15:48
|
||||
4 3 4 5 2006-04-18 15:49
|
||||
AvgTicketPrice Cancelled Carrier ... OriginWeather dayOfWeek timestamp
|
||||
0 841.265642 False Kibana Airlines ... Sunny 0 2018-01-01 00:00:00
|
||||
1 882.982662 False Logstash Airways ... Clear 0 2018-01-01 18:27:00
|
||||
2 190.636904 False Logstash Airways ... Rain 0 2018-01-01 17:11:14
|
||||
3 181.694216 True Kibana Airlines ... Thunder & Lightning 0 2018-01-01 10:33:28
|
||||
4 730.041778 False Kibana Airlines ... Damaging Wind 0 2018-01-01 05:13:00
|
||||
|
||||
[5 rows x 27 columns]
|
||||
|
||||
>>> df.describe()
|
||||
reviewerId vendorId rating
|
||||
count 578805.000000 578805.000000 578805.000000
|
||||
mean 174124.098437 60.645267 4.679671
|
||||
std 116951.972209 54.488053 0.800891
|
||||
min 0.000000 0.000000 0.000000
|
||||
25% 70043.000000 20.000000 5.000000
|
||||
50% 161052.000000 44.000000 5.000000
|
||||
75% 272697.000000 83.000000 5.000000
|
||||
max 400140.000000 246.000000 5.000000
|
||||
AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin FlightTimeHour FlightTimeMin dayOfWeek
|
||||
count 13059.000000 13059.000000 13059.000000 13059.000000 13059.000000 13059.000000 13059.000000
|
||||
mean 628.253689 7092.142457 4406.853010 47.335171 8.518797 511.127842 2.835975
|
||||
std 266.386661 4578.263193 2844.800855 96.743006 5.579019 334.741135 1.939365
|
||||
min 100.020531 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
|
||||
25% 410.008918 2470.545974 1535.126118 0.000000 4.194976 251.738513 1.000000
|
||||
50% 640.362667 7612.072403 4729.922470 0.000000 8.385816 503.148975 3.000000
|
||||
75% 842.254990 9735.082407 6049.459005 15.000000 12.009396 720.534532 4.141221
|
||||
max 1199.729004 19881.482422 12353.780273 360.000000 31.715034 1902.901978 6.000000
|
||||
|
||||
>>> df[['Carrier', 'AvgTicketPrice', 'Cancelled']]
|
||||
Carrier AvgTicketPrice Cancelled
|
||||
0 Kibana Airlines 841.265642 False
|
||||
1 Logstash Airways 882.982662 False
|
||||
2 Logstash Airways 190.636904 False
|
||||
3 Kibana Airlines 181.694216 True
|
||||
4 Kibana Airlines 730.041778 False
|
||||
... ... ... ...
|
||||
13054 Logstash Airways 1080.446279 False
|
||||
13055 Logstash Airways 646.612941 False
|
||||
13056 Logstash Airways 997.751876 False
|
||||
13057 JetBeats 1102.814465 False
|
||||
13058 JetBeats 858.144337 False
|
||||
|
||||
[13059 rows x 3 columns]
|
||||
|
||||
>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head()
|
||||
AvgTicketPrice Cancelled Carrier ... OriginWeather dayOfWeek timestamp
|
||||
8 960.869736 True Kibana Airlines ... Heavy Fog 0 2018-01-01 12:09:35
|
||||
26 975.812632 True Kibana Airlines ... Rain 0 2018-01-01 15:38:32
|
||||
311 946.358410 True Kibana Airlines ... Heavy Fog 0 2018-01-01 11:51:12
|
||||
651 975.383864 True Kibana Airlines ... Rain 2 2018-01-03 21:13:17
|
||||
950 907.836523 True Kibana Airlines ... Thunder & Lightning 2 2018-01-03 05:14:51
|
||||
|
||||
[5 rows x 27 columns]
|
||||
|
||||
>>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std'])
|
||||
DistanceKilometers AvgTicketPrice
|
||||
sum 9.261629e+07 8.204365e+06
|
||||
min 0.000000e+00 1.000205e+02
|
||||
std 4.578263e+03 2.663867e+02
|
||||
|
||||
>>> df[['Carrier', 'Origin', 'Dest']].nunique()
|
||||
Carrier 4
|
||||
Origin 156
|
||||
Dest 156
|
||||
dtype: int64
|
||||
|
||||
>>> s = df.AvgTicketPrice * 2 + df.DistanceKilometers - df.FlightDelayMin
|
||||
>>> s
|
||||
0 18174.857422
|
||||
1 10589.365723
|
||||
2 381.273804
|
||||
3 739.126221
|
||||
4 14818.327637
|
||||
...
|
||||
13054 10219.474121
|
||||
13055 8381.823975
|
||||
13056 12661.157104
|
||||
13057 20819.488281
|
||||
13058 18315.431274
|
||||
Length: 13059, dtype: float64
|
||||
|
||||
>>> print(s.info_es())
|
||||
index_pattern: flights
|
||||
Index:
|
||||
index_field: _id
|
||||
is_source_field: False
|
||||
Mappings:
|
||||
capabilities:
|
||||
es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name
|
||||
NaN script_field_None False double None float64 True True True script_field_None
|
||||
Operations:
|
||||
tasks: []
|
||||
size: None
|
||||
sort_params: None
|
||||
_source: ['script_field_None']
|
||||
body: {'script_fields': {'script_field_None': {'script': {'source': "(((doc['AvgTicketPrice'].value * 2) + doc['DistanceKilometers'].value) - doc['FlightDelayMin'].value)"}}}}
|
||||
post_processing: []
|
||||
|
||||
>>> pd_df = ed.eland_to_pandas(df)
|
||||
>>> pd_df.head()
|
||||
AvgTicketPrice Cancelled Carrier ... OriginWeather dayOfWeek timestamp
|
||||
0 841.265642 False Kibana Airlines ... Sunny 0 2018-01-01 00:00:00
|
||||
1 882.982662 False Logstash Airways ... Clear 0 2018-01-01 18:27:00
|
||||
2 190.636904 False Logstash Airways ... Rain 0 2018-01-01 17:11:14
|
||||
3 181.694216 True Kibana Airlines ... Thunder & Lightning 0 2018-01-01 10:33:28
|
||||
4 730.041778 False Kibana Airlines ... Damaging Wind 0 2018-01-01 05:13:00
|
||||
|
||||
[5 rows x 27 columns]
|
||||
```
|
||||
|
||||
See [docs](https://eland.readthedocs.io/en/latest) and [demo_notebook.ipynb](https://eland.readthedocs.io/en/latest/examples/demo_notebook.html) for more examples.
|
||||
@ -87,28 +171,6 @@ package index](https://pypi.org/project/eland).
|
||||
pip install eland
|
||||
```
|
||||
|
||||
## Development Setup
|
||||
|
||||
1. Create a virtual environment in Python
|
||||
|
||||
For example,
|
||||
|
||||
```
|
||||
python3 -m venv env
|
||||
```
|
||||
|
||||
2. Activate the virtual environment
|
||||
|
||||
```
|
||||
source env/bin/activate
|
||||
```
|
||||
|
||||
3. Install dependencies from the `requirements.txt` file
|
||||
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Versions and Compatibility
|
||||
|
||||
### Python Version Support
|
||||
@ -127,7 +189,48 @@ No compatibility assurances are given between different major versions of the cl
|
||||
Major differences likely exist between major versions of Elasticsearch,
|
||||
particularly around request and response object formats, but also around API urls and behaviour.
|
||||
|
||||
## Connecting to Elasticsearch Cloud
|
||||
## Connecting to Elasticsearch
|
||||
|
||||
eland uses the [Elasticsearch low level client](https://elasticsearch-py.readthedocs.io/) to connect to Elasticsearch.
|
||||
This client supports a range of [connection options and authentication mechanisms]
|
||||
(https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch).
|
||||
|
||||
### Basic Connection Options
|
||||
|
||||
```
|
||||
>>> import eland as ed
|
||||
|
||||
>>> # Connect to flights index via localhost Elasticsearch node
|
||||
>>> ed.DataFrame('localhost', 'flights')
|
||||
|
||||
>>> # Connect to flights index via localhost Elasticsearch node on port 9200
|
||||
>>> ed.DataFrame('localhost:9200', 'flights')
|
||||
|
||||
>>> # Connect to flights index via localhost Elasticsearch node on port 9200 with <user>:<password> credentials
|
||||
>>> ed.DataFrame('http://<user>:<password>@localhost:9200', 'flights')
|
||||
|
||||
>>> # Connect to flights index via ssl
|
||||
>>> es = Elasticsearch(
|
||||
'https://<user>:<password>@localhost:443',
|
||||
use_ssl=True,
|
||||
verify_certs=True,
|
||||
ca_certs='/path/to/ca.crt'
|
||||
)
|
||||
>>> ed.DataFrame(es, 'flights')
|
||||
|
||||
>>> # Connect to flights index via ssl using Urllib3HttpConnection options
|
||||
>>> es = Elasticsearch(
|
||||
['localhost:443', 'other_host:443'],
|
||||
use_ssl=True,
|
||||
verify_certs=True,
|
||||
ca_certs='/path/to/CA_certs',
|
||||
client_cert='/path/to/clientcert.pem',
|
||||
client_key='/path/to/clientkey.pem'
|
||||
)
|
||||
>>> ed.DataFrame(es, 'flights')
|
||||
```
|
||||
|
||||
### Connecting to an Elasticsearch Cloud Cluster
|
||||
|
||||
```
|
||||
>>> import eland as ed
|
||||
|
@ -753,7 +753,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<eland.index.Index at 0x1185c3f50>"
|
||||
"<eland.index.Index at 0x11a604f50>"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
@ -2707,24 +2707,24 @@
|
||||
" <td>410.008918</td>\n",
|
||||
" <td>2470.545974</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>251.942965</td>\n",
|
||||
" <td>251.944994</td>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50%</th>\n",
|
||||
" <td>640.362667</td>\n",
|
||||
" <td>640.387285</td>\n",
|
||||
" <td>7612.072403</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>503.148975</td>\n",
|
||||
" <td>502.986750</td>\n",
|
||||
" <td>3.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>75%</th>\n",
|
||||
" <td>842.006180</td>\n",
|
||||
" <td>9735.660463</td>\n",
|
||||
" <td>842.272763</td>\n",
|
||||
" <td>9735.860651</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>720.569838</td>\n",
|
||||
" <td>4.243151</td>\n",
|
||||
" <td>720.505705</td>\n",
|
||||
" <td>4.246711</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>max</th>\n",
|
||||
@ -2745,9 +2745,9 @@
|
||||
"mean 628.253689 7092.142457 ... 511.127842 2.835975\n",
|
||||
"std 266.386661 4578.263193 ... 334.741135 1.939365\n",
|
||||
"min 100.020531 0.000000 ... 0.000000 0.000000\n",
|
||||
"25% 410.008918 2470.545974 ... 251.942965 1.000000\n",
|
||||
"50% 640.362667 7612.072403 ... 503.148975 3.000000\n",
|
||||
"75% 842.006180 9735.660463 ... 720.569838 4.243151\n",
|
||||
"25% 410.008918 2470.545974 ... 251.944994 1.000000\n",
|
||||
"50% 640.387285 7612.072403 ... 502.986750 3.000000\n",
|
||||
"75% 842.272763 9735.860651 ... 720.505705 4.246711\n",
|
||||
"max 1199.729004 19881.482422 ... 1902.901978 6.000000\n",
|
||||
"\n",
|
||||
"[8 rows x 7 columns]"
|
||||
|
@ -44,7 +44,8 @@
|
||||
" es_if_exists='replace', \n",
|
||||
" es_dropna=True,\n",
|
||||
" es_refresh=True,\n",
|
||||
" compression='gzip')"
|
||||
" compression='gzip',\n",
|
||||
" index_col=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -90,7 +91,7 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Index(['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode',\n",
|
||||
" 'UnitPrice', 'Unnamed: 0'],\n",
|
||||
" 'UnitPrice'],\n",
|
||||
" dtype='object')"
|
||||
]
|
||||
},
|
||||
@ -126,7 +127,6 @@
|
||||
"Quantity int64\n",
|
||||
"StockCode object\n",
|
||||
"UnitPrice float64\n",
|
||||
"Unnamed: 0 int64\n",
|
||||
"dtype: object"
|
||||
]
|
||||
},
|
||||
@ -170,12 +170,11 @@
|
||||
"Quantity Quantity True long None int64 True True False Quantity\n",
|
||||
"StockCode StockCode True keyword None object True True False StockCode\n",
|
||||
"UnitPrice UnitPrice True double None float64 True True False UnitPrice\n",
|
||||
"Unnamed: 0 Unnamed: 0 True long None int64 True True False Unnamed: 0\n",
|
||||
"Operations:\n",
|
||||
" tasks: []\n",
|
||||
" size: None\n",
|
||||
" sort_params: None\n",
|
||||
" _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice', 'Unnamed: 0']\n",
|
||||
" _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice']\n",
|
||||
" body: {}\n",
|
||||
" post_processing: []\n",
|
||||
"\n"
|
||||
@ -233,8 +232,8 @@
|
||||
" <th>Country</th>\n",
|
||||
" <th>CustomerID</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>StockCode</th>\n",
|
||||
" <th>UnitPrice</th>\n",
|
||||
" <th>Unnamed: 0</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
@ -243,28 +242,28 @@
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>14729.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>21123</td>\n",
|
||||
" <td>1.25</td>\n",
|
||||
" <td>1000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1001</th>\n",
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>14729.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>21124</td>\n",
|
||||
" <td>1.25</td>\n",
|
||||
" <td>1001</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>\n",
|
||||
"<p>2 rows × 9 columns</p>"
|
||||
"<p>2 rows × 8 columns</p>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Country CustomerID ... UnitPrice Unnamed: 0\n",
|
||||
"1000 United Kingdom 14729.0 ... 1.25 1000\n",
|
||||
"1001 United Kingdom 14729.0 ... 1.25 1001\n",
|
||||
" Country CustomerID ... StockCode UnitPrice\n",
|
||||
"1000 United Kingdom 14729.0 ... 21123 1.25\n",
|
||||
"1001 United Kingdom 14729.0 ... 21124 1.25\n",
|
||||
"\n",
|
||||
"[2 rows x 9 columns]"
|
||||
"[2 rows x 8 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
@ -300,12 +299,11 @@
|
||||
"Quantity Quantity True long None int64 True True False Quantity\n",
|
||||
"StockCode StockCode True keyword None object True True False StockCode\n",
|
||||
"UnitPrice UnitPrice True double None float64 True True False UnitPrice\n",
|
||||
"Unnamed: 0 Unnamed: 0 True long None int64 True True False Unnamed: 0\n",
|
||||
"Operations:\n",
|
||||
" tasks: [('tail': ('sort_field': '_doc', 'count': 2)), ('head': ('sort_field': '_doc', 'count': 2)), ('tail': ('sort_field': '_doc', 'count': 2))]\n",
|
||||
" size: 2\n",
|
||||
" sort_params: _doc:desc\n",
|
||||
" _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice', 'Unnamed: 0']\n",
|
||||
" _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice']\n",
|
||||
" body: {}\n",
|
||||
" post_processing: [('sort_index'), ('head': ('count': 2)), ('tail': ('count': 2))]\n",
|
||||
"\n"
|
||||
@ -345,8 +343,8 @@
|
||||
" <th>Country</th>\n",
|
||||
" <th>CustomerID</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>StockCode</th>\n",
|
||||
" <th>UnitPrice</th>\n",
|
||||
" <th>Unnamed: 0</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
@ -355,28 +353,28 @@
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>17419.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>21773</td>\n",
|
||||
" <td>1.25</td>\n",
|
||||
" <td>14998</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>14999</th>\n",
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>17419.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>22149</td>\n",
|
||||
" <td>2.10</td>\n",
|
||||
" <td>14999</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>\n",
|
||||
"<p>2 rows × 9 columns</p>"
|
||||
"<p>2 rows × 8 columns</p>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Country CustomerID ... UnitPrice Unnamed: 0\n",
|
||||
"14998 United Kingdom 17419.0 ... 1.25 14998\n",
|
||||
"14999 United Kingdom 17419.0 ... 2.10 14999\n",
|
||||
" Country CustomerID ... StockCode UnitPrice\n",
|
||||
"14998 United Kingdom 17419.0 ... 21773 1.25\n",
|
||||
"14999 United Kingdom 17419.0 ... 22149 2.10\n",
|
||||
"\n",
|
||||
"[2 rows x 9 columns]"
|
||||
"[2 rows x 8 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
@ -523,8 +521,8 @@
|
||||
" <th>Country</th>\n",
|
||||
" <th>CustomerID</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>StockCode</th>\n",
|
||||
" <th>UnitPrice</th>\n",
|
||||
" <th>Unnamed: 0</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
@ -533,55 +531,55 @@
|
||||
" <td>Germany</td>\n",
|
||||
" <td>12662.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>22809</td>\n",
|
||||
" <td>2.95</td>\n",
|
||||
" <td>1109</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1110</th>\n",
|
||||
" <td>Germany</td>\n",
|
||||
" <td>12662.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>84347</td>\n",
|
||||
" <td>2.55</td>\n",
|
||||
" <td>1110</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1111</th>\n",
|
||||
" <td>Germany</td>\n",
|
||||
" <td>12662.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>84945</td>\n",
|
||||
" <td>0.85</td>\n",
|
||||
" <td>1111</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1112</th>\n",
|
||||
" <td>Germany</td>\n",
|
||||
" <td>12662.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>22242</td>\n",
|
||||
" <td>1.65</td>\n",
|
||||
" <td>1112</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1113</th>\n",
|
||||
" <td>Germany</td>\n",
|
||||
" <td>12662.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>22244</td>\n",
|
||||
" <td>1.95</td>\n",
|
||||
" <td>1113</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>\n",
|
||||
"<p>5 rows × 9 columns</p>"
|
||||
"<p>5 rows × 8 columns</p>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Country CustomerID ... UnitPrice Unnamed: 0\n",
|
||||
"1109 Germany 12662.0 ... 2.95 1109\n",
|
||||
"1110 Germany 12662.0 ... 2.55 1110\n",
|
||||
"1111 Germany 12662.0 ... 0.85 1111\n",
|
||||
"1112 Germany 12662.0 ... 1.65 1112\n",
|
||||
"1113 Germany 12662.0 ... 1.95 1113\n",
|
||||
" Country CustomerID ... StockCode UnitPrice\n",
|
||||
"1109 Germany 12662.0 ... 22809 2.95\n",
|
||||
"1110 Germany 12662.0 ... 84347 2.55\n",
|
||||
"1111 Germany 12662.0 ... 84945 0.85\n",
|
||||
"1112 Germany 12662.0 ... 22242 1.65\n",
|
||||
"1113 Germany 12662.0 ... 22244 1.95\n",
|
||||
"\n",
|
||||
"[5 rows x 9 columns]"
|
||||
"[5 rows x 8 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
@ -638,8 +636,8 @@
|
||||
" <th>Country</th>\n",
|
||||
" <th>CustomerID</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>StockCode</th>\n",
|
||||
" <th>UnitPrice</th>\n",
|
||||
" <th>Unnamed: 0</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
@ -648,55 +646,55 @@
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>14729.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>21123</td>\n",
|
||||
" <td>1.25</td>\n",
|
||||
" <td>1000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1001</th>\n",
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>14729.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>21124</td>\n",
|
||||
" <td>1.25</td>\n",
|
||||
" <td>1001</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1002</th>\n",
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>14729.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>21122</td>\n",
|
||||
" <td>1.25</td>\n",
|
||||
" <td>1002</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1003</th>\n",
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>14729.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>84378</td>\n",
|
||||
" <td>1.25</td>\n",
|
||||
" <td>1003</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1004</th>\n",
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>14729.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>21985</td>\n",
|
||||
" <td>0.29</td>\n",
|
||||
" <td>1004</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>\n",
|
||||
"<p>5 rows × 9 columns</p>"
|
||||
"<p>5 rows × 8 columns</p>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Country CustomerID ... UnitPrice Unnamed: 0\n",
|
||||
"1000 United Kingdom 14729.0 ... 1.25 1000\n",
|
||||
"1001 United Kingdom 14729.0 ... 1.25 1001\n",
|
||||
"1002 United Kingdom 14729.0 ... 1.25 1002\n",
|
||||
"1003 United Kingdom 14729.0 ... 1.25 1003\n",
|
||||
"1004 United Kingdom 14729.0 ... 0.29 1004\n",
|
||||
" Country CustomerID ... StockCode UnitPrice\n",
|
||||
"1000 United Kingdom 14729.0 ... 21123 1.25\n",
|
||||
"1001 United Kingdom 14729.0 ... 21124 1.25\n",
|
||||
"1002 United Kingdom 14729.0 ... 21122 1.25\n",
|
||||
"1003 United Kingdom 14729.0 ... 84378 1.25\n",
|
||||
"1004 United Kingdom 14729.0 ... 21985 0.29\n",
|
||||
"\n",
|
||||
"[5 rows x 9 columns]"
|
||||
"[5 rows x 8 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
@ -745,22 +743,22 @@
|
||||
" <th>Country</th>\n",
|
||||
" <th>CustomerID</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>StockCode</th>\n",
|
||||
" <th>UnitPrice</th>\n",
|
||||
" <th>Unnamed: 0</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>\n",
|
||||
"<p>0 rows × 9 columns</p>"
|
||||
"<p>0 rows × 8 columns</p>"
|
||||
],
|
||||
"text/plain": [
|
||||
"Empty DataFrame\n",
|
||||
"Columns: [Country, CustomerID, Description, InvoiceDate, InvoiceNo, Quantity, StockCode, UnitPrice, Unnamed: 0]\n",
|
||||
"Columns: [Country, CustomerID, Description, InvoiceDate, InvoiceNo, Quantity, StockCode, UnitPrice]\n",
|
||||
"Index: []\n",
|
||||
"\n",
|
||||
"[0 rows x 9 columns]"
|
||||
"[0 rows x 8 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
@ -803,12 +801,11 @@
|
||||
"Quantity Quantity True long None int64 True True False Quantity\n",
|
||||
"StockCode StockCode True keyword None object True True False StockCode\n",
|
||||
"UnitPrice UnitPrice True double None float64 True True False UnitPrice\n",
|
||||
"Unnamed: 0 Unnamed: 0 True long None int64 True True False Unnamed: 0\n",
|
||||
"Operations:\n",
|
||||
" tasks: [('boolean_filter': ('boolean_filter': {'bool': {'must': [{'term': {'Country': 'Germany'}}, {'range': {'Quantity': {'gt': 90}}}]}}))]\n",
|
||||
" size: None\n",
|
||||
" sort_params: None\n",
|
||||
" _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice', 'Unnamed: 0']\n",
|
||||
" _source: ['Country', 'CustomerID', 'Description', 'InvoiceDate', 'InvoiceNo', 'Quantity', 'StockCode', 'UnitPrice']\n",
|
||||
" body: {'query': {'bool': {'must': [{'term': {'Country': 'Germany'}}, {'range': {'Quantity': {'gt': 90}}}]}}}\n",
|
||||
" post_processing: []\n",
|
||||
"\n"
|
||||
@ -997,7 +994,6 @@
|
||||
" <th>CustomerID</th>\n",
|
||||
" <th>Quantity</th>\n",
|
||||
" <th>UnitPrice</th>\n",
|
||||
" <th>Unnamed: 0</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
@ -1006,71 +1002,63 @@
|
||||
" <td>10729.000000</td>\n",
|
||||
" <td>15000.000000</td>\n",
|
||||
" <td>15000.000000</td>\n",
|
||||
" <td>15000.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>mean</th>\n",
|
||||
" <td>15590.776680</td>\n",
|
||||
" <td>7.464000</td>\n",
|
||||
" <td>4.103233</td>\n",
|
||||
" <td>7499.500000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>std</th>\n",
|
||||
" <td>1764.025160</td>\n",
|
||||
" <td>85.924387</td>\n",
|
||||
" <td>20.104873</td>\n",
|
||||
" <td>4330.127009</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>min</th>\n",
|
||||
" <td>12347.000000</td>\n",
|
||||
" <td>-9360.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>25%</th>\n",
|
||||
" <td>14224.078193</td>\n",
|
||||
" <td>14225.075800</td>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" <td>1.250000</td>\n",
|
||||
" <td>3760.745049</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50%</th>\n",
|
||||
" <td>15659.417515</td>\n",
|
||||
" <td>15667.359184</td>\n",
|
||||
" <td>2.000000</td>\n",
|
||||
" <td>2.510000</td>\n",
|
||||
" <td>7499.488310</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>75%</th>\n",
|
||||
" <td>17213.978376</td>\n",
|
||||
" <td>6.564935</td>\n",
|
||||
" <td>17212.690092</td>\n",
|
||||
" <td>6.552523</td>\n",
|
||||
" <td>4.210000</td>\n",
|
||||
" <td>11249.500000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>max</th>\n",
|
||||
" <td>18239.000000</td>\n",
|
||||
" <td>2880.000000</td>\n",
|
||||
" <td>950.990000</td>\n",
|
||||
" <td>14999.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" CustomerID Quantity UnitPrice Unnamed: 0\n",
|
||||
"count 10729.000000 15000.000000 15000.000000 15000.000000\n",
|
||||
"mean 15590.776680 7.464000 4.103233 7499.500000\n",
|
||||
"std 1764.025160 85.924387 20.104873 4330.127009\n",
|
||||
"min 12347.000000 -9360.000000 0.000000 0.000000\n",
|
||||
"25% 14224.078193 1.000000 1.250000 3760.745049\n",
|
||||
"50% 15659.417515 2.000000 2.510000 7499.488310\n",
|
||||
"75% 17213.978376 6.564935 4.210000 11249.500000\n",
|
||||
"max 18239.000000 2880.000000 950.990000 14999.000000"
|
||||
" CustomerID Quantity UnitPrice\n",
|
||||
"count 10729.000000 15000.000000 15000.000000\n",
|
||||
"mean 15590.776680 7.464000 4.103233\n",
|
||||
"std 1764.025160 85.924387 20.104873\n",
|
||||
"min 12347.000000 -9360.000000 0.000000\n",
|
||||
"25% 14225.075800 1.000000 1.250000\n",
|
||||
"50% 15667.359184 2.000000 2.510000\n",
|
||||
"75% 17212.690092 6.552523 4.210000\n",
|
||||
"max 18239.000000 2880.000000 950.990000"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
@ -1171,8 +1159,8 @@
|
||||
" <th>Country</th>\n",
|
||||
" <th>CustomerID</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>StockCode</th>\n",
|
||||
" <th>UnitPrice</th>\n",
|
||||
" <th>Unnamed: 0</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
@ -1181,40 +1169,40 @@
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>15485.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>22086</td>\n",
|
||||
" <td>2.55</td>\n",
|
||||
" <td>1228</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1237</th>\n",
|
||||
" <td>Norway</td>\n",
|
||||
" <td>12433.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>22444</td>\n",
|
||||
" <td>1.06</td>\n",
|
||||
" <td>1237</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1286</th>\n",
|
||||
" <td>Norway</td>\n",
|
||||
" <td>12433.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>84050</td>\n",
|
||||
" <td>1.25</td>\n",
|
||||
" <td>1286</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1293</th>\n",
|
||||
" <td>Norway</td>\n",
|
||||
" <td>12433.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>22197</td>\n",
|
||||
" <td>0.85</td>\n",
|
||||
" <td>1293</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1333</th>\n",
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>18144.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>84879</td>\n",
|
||||
" <td>1.69</td>\n",
|
||||
" <td>1333</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
@ -1229,61 +1217,61 @@
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>15061.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>22423</td>\n",
|
||||
" <td>10.95</td>\n",
|
||||
" <td>14784</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>14785</th>\n",
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>15061.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>22075</td>\n",
|
||||
" <td>1.45</td>\n",
|
||||
" <td>14785</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>14788</th>\n",
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>15061.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>17038</td>\n",
|
||||
" <td>0.07</td>\n",
|
||||
" <td>14788</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>14974</th>\n",
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>14739.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>21704</td>\n",
|
||||
" <td>0.72</td>\n",
|
||||
" <td>14974</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>14980</th>\n",
|
||||
" <td>United Kingdom</td>\n",
|
||||
" <td>14739.0</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>22178</td>\n",
|
||||
" <td>1.06</td>\n",
|
||||
" <td>14980</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>\n",
|
||||
"<p>258 rows × 9 columns</p>"
|
||||
"<p>258 rows × 8 columns</p>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Country CustomerID ... UnitPrice Unnamed: 0\n",
|
||||
"1228 United Kingdom 15485.0 ... 2.55 1228\n",
|
||||
"1237 Norway 12433.0 ... 1.06 1237\n",
|
||||
"1286 Norway 12433.0 ... 1.25 1286\n",
|
||||
"1293 Norway 12433.0 ... 0.85 1293\n",
|
||||
"1333 United Kingdom 18144.0 ... 1.69 1333\n",
|
||||
"... ... ... ... ... ...\n",
|
||||
"14784 United Kingdom 15061.0 ... 10.95 14784\n",
|
||||
"14785 United Kingdom 15061.0 ... 1.45 14785\n",
|
||||
"14788 United Kingdom 15061.0 ... 0.07 14788\n",
|
||||
"14974 United Kingdom 14739.0 ... 0.72 14974\n",
|
||||
"14980 United Kingdom 14739.0 ... 1.06 14980\n",
|
||||
" Country CustomerID ... StockCode UnitPrice\n",
|
||||
"1228 United Kingdom 15485.0 ... 22086 2.55\n",
|
||||
"1237 Norway 12433.0 ... 22444 1.06\n",
|
||||
"1286 Norway 12433.0 ... 84050 1.25\n",
|
||||
"1293 Norway 12433.0 ... 22197 0.85\n",
|
||||
"1333 United Kingdom 18144.0 ... 84879 1.69\n",
|
||||
"... ... ... ... ... ...\n",
|
||||
"14784 United Kingdom 15061.0 ... 22423 10.95\n",
|
||||
"14785 United Kingdom 15061.0 ... 22075 1.45\n",
|
||||
"14788 United Kingdom 15061.0 ... 17038 0.07\n",
|
||||
"14974 United Kingdom 14739.0 ... 21704 0.72\n",
|
||||
"14980 United Kingdom 14739.0 ... 22178 1.06\n",
|
||||
"\n",
|
||||
"[258 rows x 9 columns]"
|
||||
"[258 rows x 8 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
@ -1449,6 +1437,15 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.5"
|
||||
},
|
||||
"pycharm": {
|
||||
"stem_cell": {
|
||||
"cell_type": "raw",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"source": []
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -15,6 +15,6 @@
|
||||
__title__ = 'eland'
|
||||
__description__ = 'Python elasticsearch client to analyse, explore and manipulate data that resides in elasticsearch.'
|
||||
__url__ = 'https://github.com/elastic/eland'
|
||||
__version__ = '7.5.1a2'
|
||||
__version__ = '7.5.1a3'
|
||||
__maintainer__ = 'Elasticsearch B.V.'
|
||||
__maintainer_email__ = 'steve.dodson@elastic.co'
|
||||
|
117
setup.py
117
setup.py
@ -34,7 +34,6 @@ CLASSIFIERS = [
|
||||
"Programming Language :: Python :: 3.6",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Cython",
|
||||
"Topic :: Scientific/Engineering",
|
||||
]
|
||||
|
||||
@ -52,26 +51,110 @@ index pattern, and explore using an API that mirrors a subset of the pandas.Data
|
||||
```
|
||||
>>> import eland as ed
|
||||
|
||||
>>> df = ed.read_es('http://localhost:9200', 'reviews')
|
||||
>>> # Connect to 'flights' index via localhost Elasticsearch node
|
||||
>>> df = ed.DataFrame('localhost:9200', 'flights')
|
||||
|
||||
>>> df.head()
|
||||
reviewerId vendorId rating date
|
||||
0 0 0 5 2006-04-07 17:08
|
||||
1 1 1 5 2006-05-04 12:16
|
||||
2 2 2 4 2006-04-21 12:26
|
||||
3 3 3 5 2006-04-18 15:48
|
||||
4 3 4 5 2006-04-18 15:49
|
||||
AvgTicketPrice Cancelled Carrier ... OriginWeather dayOfWeek timestamp
|
||||
0 841.265642 False Kibana Airlines ... Sunny 0 2018-01-01 00:00:00
|
||||
1 882.982662 False Logstash Airways ... Clear 0 2018-01-01 18:27:00
|
||||
2 190.636904 False Logstash Airways ... Rain 0 2018-01-01 17:11:14
|
||||
3 181.694216 True Kibana Airlines ... Thunder & Lightning 0 2018-01-01 10:33:28
|
||||
4 730.041778 False Kibana Airlines ... Damaging Wind 0 2018-01-01 05:13:00
|
||||
|
||||
[5 rows x 27 columns]
|
||||
|
||||
>>> df.describe()
|
||||
reviewerId vendorId rating
|
||||
count 578805.000000 578805.000000 578805.000000
|
||||
mean 174124.098437 60.645267 4.679671
|
||||
std 116951.972209 54.488053 0.800891
|
||||
min 0.000000 0.000000 0.000000
|
||||
25% 70043.000000 20.000000 5.000000
|
||||
50% 161052.000000 44.000000 5.000000
|
||||
75% 272697.000000 83.000000 5.000000
|
||||
max 400140.000000 246.000000 5.000000
|
||||
AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin FlightTimeHour FlightTimeMin dayOfWeek
|
||||
count 13059.000000 13059.000000 13059.000000 13059.000000 13059.000000 13059.000000 13059.000000
|
||||
mean 628.253689 7092.142457 4406.853010 47.335171 8.518797 511.127842 2.835975
|
||||
std 266.386661 4578.263193 2844.800855 96.743006 5.579019 334.741135 1.939365
|
||||
min 100.020531 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
|
||||
25% 410.008918 2470.545974 1535.126118 0.000000 4.194976 251.738513 1.000000
|
||||
50% 640.362667 7612.072403 4729.922470 0.000000 8.385816 503.148975 3.000000
|
||||
75% 842.254990 9735.082407 6049.459005 15.000000 12.009396 720.534532 4.141221
|
||||
max 1199.729004 19881.482422 12353.780273 360.000000 31.715034 1902.901978 6.000000
|
||||
|
||||
>>> df[['Carrier', 'AvgTicketPrice', 'Cancelled']]
|
||||
Carrier AvgTicketPrice Cancelled
|
||||
0 Kibana Airlines 841.265642 False
|
||||
1 Logstash Airways 882.982662 False
|
||||
2 Logstash Airways 190.636904 False
|
||||
3 Kibana Airlines 181.694216 True
|
||||
4 Kibana Airlines 730.041778 False
|
||||
... ... ... ...
|
||||
13054 Logstash Airways 1080.446279 False
|
||||
13055 Logstash Airways 646.612941 False
|
||||
13056 Logstash Airways 997.751876 False
|
||||
13057 JetBeats 1102.814465 False
|
||||
13058 JetBeats 858.144337 False
|
||||
|
||||
[13059 rows x 3 columns]
|
||||
|
||||
>>> df[(df.Carrier=="Kibana Airlines") & (df.AvgTicketPrice > 900.0) & (df.Cancelled == True)].head()
|
||||
AvgTicketPrice Cancelled Carrier ... OriginWeather dayOfWeek timestamp
|
||||
8 960.869736 True Kibana Airlines ... Heavy Fog 0 2018-01-01 12:09:35
|
||||
26 975.812632 True Kibana Airlines ... Rain 0 2018-01-01 15:38:32
|
||||
311 946.358410 True Kibana Airlines ... Heavy Fog 0 2018-01-01 11:51:12
|
||||
651 975.383864 True Kibana Airlines ... Rain 2 2018-01-03 21:13:17
|
||||
950 907.836523 True Kibana Airlines ... Thunder & Lightning 2 2018-01-03 05:14:51
|
||||
|
||||
[5 rows x 27 columns]
|
||||
|
||||
>>> df[['DistanceKilometers', 'AvgTicketPrice']].aggregate(['sum', 'min', 'std'])
|
||||
DistanceKilometers AvgTicketPrice
|
||||
sum 9.261629e+07 8.204365e+06
|
||||
min 0.000000e+00 1.000205e+02
|
||||
std 4.578263e+03 2.663867e+02
|
||||
|
||||
>>> df[['Carrier', 'Origin', 'Dest']].nunique()
|
||||
Carrier 4
|
||||
Origin 156
|
||||
Dest 156
|
||||
dtype: int64
|
||||
|
||||
>>> s = df.AvgTicketPrice * 2 + df.DistanceKilometers - df.FlightDelayMin
|
||||
>>> s
|
||||
0 18174.857422
|
||||
1 10589.365723
|
||||
2 381.273804
|
||||
3 739.126221
|
||||
4 14818.327637
|
||||
...
|
||||
13054 10219.474121
|
||||
13055 8381.823975
|
||||
13056 12661.157104
|
||||
13057 20819.488281
|
||||
13058 18315.431274
|
||||
Length: 13059, dtype: float64
|
||||
|
||||
>>> print(s.info_es())
|
||||
index_pattern: flights
|
||||
Index:
|
||||
index_field: _id
|
||||
is_source_field: False
|
||||
Mappings:
|
||||
capabilities:
|
||||
es_field_name is_source es_dtype es_date_format pd_dtype is_searchable is_aggregatable is_scripted aggregatable_es_field_name
|
||||
NaN script_field_None False double None float64 True True True script_field_None
|
||||
Operations:
|
||||
tasks: []
|
||||
size: None
|
||||
sort_params: None
|
||||
_source: ['script_field_None']
|
||||
body: {'script_fields': {'script_field_None': {'script': {'source': "(((doc['AvgTicketPrice'].value * 2) + doc['DistanceKilometers'].value) - doc['FlightDelayMin'].value)"}}}}
|
||||
post_processing: []
|
||||
|
||||
>>> pd_df = ed.eland_to_pandas(df)
|
||||
>>> pd_df.head()
|
||||
AvgTicketPrice Cancelled Carrier ... OriginWeather dayOfWeek timestamp
|
||||
0 841.265642 False Kibana Airlines ... Sunny 0 2018-01-01 00:00:00
|
||||
1 882.982662 False Logstash Airways ... Clear 0 2018-01-01 18:27:00
|
||||
2 190.636904 False Logstash Airways ... Rain 0 2018-01-01 17:11:14
|
||||
3 181.694216 True Kibana Airlines ... Thunder & Lightning 0 2018-01-01 10:33:28
|
||||
4 730.041778 False Kibana Airlines ... Damaging Wind 0 2018-01-01 05:13:00
|
||||
|
||||
[5 rows x 27 columns]
|
||||
```
|
||||
|
||||
See [docs](https://eland.readthedocs.io/en/latest) and [demo_notebook.ipynb](https://eland.readthedocs.io/en/latest/examples/demo_notebook.html) for more examples.
|
||||
|
Loading…
x
Reference in New Issue
Block a user