mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Added json file for pandas comparison
+ renamed from_es to read_es1
This commit is contained in:
parent
f1e27f1dda
commit
2b83edad69
@ -1,4 +1,4 @@
|
||||
import eland
|
||||
|
||||
def from_es(es_params, index_pattern):
|
||||
def read_es(es_params, index_pattern):
|
||||
return eland.DataFrame(es_params, index_pattern)
|
||||
|
BIN
flights.json.gz
Normal file
BIN
flights.json.gz
Normal file
Binary file not shown.
451
test.ipynb
451
test.ipynb
@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Eland"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
@ -15,7 +22,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = ed.from_es('localhost', 'kibana_sample_data_flights')"
|
||||
"df = ed.read_es('localhost', 'kibana_sample_data_flights')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -339,12 +346,12 @@
|
||||
" <td>2470.545974</td>\n",
|
||||
" <td>1535.126118</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>251.773003</td>\n",
|
||||
" <td>251.682199</td>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50%</th>\n",
|
||||
" <td>640.362667</td>\n",
|
||||
" <td>640.362374</td>\n",
|
||||
" <td>7612.072403</td>\n",
|
||||
" <td>4729.922470</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
@ -353,12 +360,12 @@
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>75%</th>\n",
|
||||
" <td>842.233478</td>\n",
|
||||
" <td>9735.887390</td>\n",
|
||||
" <td>842.260482</td>\n",
|
||||
" <td>9735.660463</td>\n",
|
||||
" <td>6049.459005</td>\n",
|
||||
" <td>15.000000</td>\n",
|
||||
" <td>720.534532</td>\n",
|
||||
" <td>4.095833</td>\n",
|
||||
" <td>14.102113</td>\n",
|
||||
" <td>720.569838</td>\n",
|
||||
" <td>4.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>max</th>\n",
|
||||
@ -380,8 +387,8 @@
|
||||
"std 266.386661 4578.263193 2844.800855 96.743006 \n",
|
||||
"min 100.020531 0.000000 0.000000 0.000000 \n",
|
||||
"25% 410.008918 2470.545974 1535.126118 0.000000 \n",
|
||||
"50% 640.362667 7612.072403 4729.922470 0.000000 \n",
|
||||
"75% 842.233478 9735.887390 6049.459005 15.000000 \n",
|
||||
"50% 640.362374 7612.072403 4729.922470 0.000000 \n",
|
||||
"75% 842.260482 9735.660463 6049.459005 14.102113 \n",
|
||||
"max 1199.729004 19881.482422 12353.780273 360.000000 \n",
|
||||
"\n",
|
||||
" FlightTimeMin dayOfWeek \n",
|
||||
@ -389,9 +396,9 @@
|
||||
"mean 511.127842 2.835975 \n",
|
||||
"std 334.741135 1.939365 \n",
|
||||
"min 0.000000 0.000000 \n",
|
||||
"25% 251.773003 1.000000 \n",
|
||||
"25% 251.682199 1.000000 \n",
|
||||
"50% 503.148975 3.000000 \n",
|
||||
"75% 720.534532 4.095833 \n",
|
||||
"75% 720.569838 4.000000 \n",
|
||||
"max 1902.901978 6.000000 "
|
||||
]
|
||||
},
|
||||
@ -403,6 +410,426 @@
|
||||
"source": [
|
||||
"df.describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Pandas"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pd_df = pd.read_json('flights.json.gz', lines=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>AvgTicketPrice</th>\n",
|
||||
" <th>Cancelled</th>\n",
|
||||
" <th>Carrier</th>\n",
|
||||
" <th>Dest</th>\n",
|
||||
" <th>DestAirportID</th>\n",
|
||||
" <th>DestCityName</th>\n",
|
||||
" <th>DestCountry</th>\n",
|
||||
" <th>DestLocation</th>\n",
|
||||
" <th>DestRegion</th>\n",
|
||||
" <th>DestWeather</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>FlightTimeMin</th>\n",
|
||||
" <th>Origin</th>\n",
|
||||
" <th>OriginAirportID</th>\n",
|
||||
" <th>OriginCityName</th>\n",
|
||||
" <th>OriginCountry</th>\n",
|
||||
" <th>OriginLocation</th>\n",
|
||||
" <th>OriginRegion</th>\n",
|
||||
" <th>OriginWeather</th>\n",
|
||||
" <th>dayOfWeek</th>\n",
|
||||
" <th>timestamp</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>841.265642</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>Sydney Kingsford Smith International Airport</td>\n",
|
||||
" <td>SYD</td>\n",
|
||||
" <td>Sydney</td>\n",
|
||||
" <td>AU</td>\n",
|
||||
" <td>{'lat': '-33.94609833', 'lon': '151.177002'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>Rain</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1030.770416</td>\n",
|
||||
" <td>Frankfurt am Main Airport</td>\n",
|
||||
" <td>FRA</td>\n",
|
||||
" <td>Frankfurt am Main</td>\n",
|
||||
" <td>DE</td>\n",
|
||||
" <td>{'lat': '50.033333', 'lon': '8.570556'}</td>\n",
|
||||
" <td>DE-HE</td>\n",
|
||||
" <td>Sunny</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 00:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>882.982662</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>Venice Marco Polo Airport</td>\n",
|
||||
" <td>VE05</td>\n",
|
||||
" <td>Venice</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>Sunny</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>464.389481</td>\n",
|
||||
" <td>Cape Town International Airport</td>\n",
|
||||
" <td>CPT</td>\n",
|
||||
" <td>Cape Town</td>\n",
|
||||
" <td>ZA</td>\n",
|
||||
" <td>{'lat': '-33.96480179', 'lon': '18.60169983'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>Clear</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 18:27:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>190.636904</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>Venice Marco Polo Airport</td>\n",
|
||||
" <td>VE05</td>\n",
|
||||
" <td>Venice</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>Cloudy</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>Venice Marco Polo Airport</td>\n",
|
||||
" <td>VE05</td>\n",
|
||||
" <td>Venice</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>Rain</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 17:11:14</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>181.694216</td>\n",
|
||||
" <td>True</td>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>Treviso-Sant'Angelo Airport</td>\n",
|
||||
" <td>TV01</td>\n",
|
||||
" <td>Treviso</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '45.648399', 'lon': '12.1944'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>Clear</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>222.749059</td>\n",
|
||||
" <td>Naples International Airport</td>\n",
|
||||
" <td>NA01</td>\n",
|
||||
" <td>Naples</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '40.886002', 'lon': '14.2908'}</td>\n",
|
||||
" <td>IT-72</td>\n",
|
||||
" <td>Thunder & Lightning</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 10:33:28</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>730.041778</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>Xi'an Xianyang International Airport</td>\n",
|
||||
" <td>XIY</td>\n",
|
||||
" <td>Xi'an</td>\n",
|
||||
" <td>CN</td>\n",
|
||||
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>Clear</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>785.779071</td>\n",
|
||||
" <td>Licenciado Benito Juarez International Airport</td>\n",
|
||||
" <td>AICM</td>\n",
|
||||
" <td>Mexico City</td>\n",
|
||||
" <td>MX</td>\n",
|
||||
" <td>{'lat': '19.4363', 'lon': '-99.072098'}</td>\n",
|
||||
" <td>MX-DIF</td>\n",
|
||||
" <td>Damaging Wind</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2018-01-01 05:13:00</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>5 rows × 27 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" AvgTicketPrice Cancelled Carrier \\\n",
|
||||
"0 841.265642 False Kibana Airlines \n",
|
||||
"1 882.982662 False Logstash Airways \n",
|
||||
"2 190.636904 False Logstash Airways \n",
|
||||
"3 181.694216 True Kibana Airlines \n",
|
||||
"4 730.041778 False Kibana Airlines \n",
|
||||
"\n",
|
||||
" Dest DestAirportID DestCityName \\\n",
|
||||
"0 Sydney Kingsford Smith International Airport SYD Sydney \n",
|
||||
"1 Venice Marco Polo Airport VE05 Venice \n",
|
||||
"2 Venice Marco Polo Airport VE05 Venice \n",
|
||||
"3 Treviso-Sant'Angelo Airport TV01 Treviso \n",
|
||||
"4 Xi'an Xianyang International Airport XIY Xi'an \n",
|
||||
"\n",
|
||||
" DestCountry DestLocation DestRegion \\\n",
|
||||
"0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
|
||||
"1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||||
"2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||||
"3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
|
||||
"4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
|
||||
"\n",
|
||||
" DestWeather ... FlightTimeMin \\\n",
|
||||
"0 Rain ... 1030.770416 \n",
|
||||
"1 Sunny ... 464.389481 \n",
|
||||
"2 Cloudy ... 0.000000 \n",
|
||||
"3 Clear ... 222.749059 \n",
|
||||
"4 Clear ... 785.779071 \n",
|
||||
"\n",
|
||||
" Origin OriginAirportID \\\n",
|
||||
"0 Frankfurt am Main Airport FRA \n",
|
||||
"1 Cape Town International Airport CPT \n",
|
||||
"2 Venice Marco Polo Airport VE05 \n",
|
||||
"3 Naples International Airport NA01 \n",
|
||||
"4 Licenciado Benito Juarez International Airport AICM \n",
|
||||
"\n",
|
||||
" OriginCityName OriginCountry \\\n",
|
||||
"0 Frankfurt am Main DE \n",
|
||||
"1 Cape Town ZA \n",
|
||||
"2 Venice IT \n",
|
||||
"3 Naples IT \n",
|
||||
"4 Mexico City MX \n",
|
||||
"\n",
|
||||
" OriginLocation OriginRegion \\\n",
|
||||
"0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
|
||||
"1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
|
||||
"2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||||
"3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
|
||||
"4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
|
||||
"\n",
|
||||
" OriginWeather dayOfWeek timestamp \n",
|
||||
"0 Sunny 0 2018-01-01 00:00:00 \n",
|
||||
"1 Clear 0 2018-01-01 18:27:00 \n",
|
||||
"2 Rain 0 2018-01-01 17:11:14 \n",
|
||||
"3 Thunder & Lightning 0 2018-01-01 10:33:28 \n",
|
||||
"4 Damaging Wind 0 2018-01-01 05:13:00 \n",
|
||||
"\n",
|
||||
"[5 rows x 27 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pd_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>AvgTicketPrice</th>\n",
|
||||
" <th>DistanceKilometers</th>\n",
|
||||
" <th>DistanceMiles</th>\n",
|
||||
" <th>FlightDelayMin</th>\n",
|
||||
" <th>FlightTimeHour</th>\n",
|
||||
" <th>FlightTimeMin</th>\n",
|
||||
" <th>dayOfWeek</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>count</th>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>mean</th>\n",
|
||||
" <td>628.253689</td>\n",
|
||||
" <td>7092.142455</td>\n",
|
||||
" <td>4406.853013</td>\n",
|
||||
" <td>47.335171</td>\n",
|
||||
" <td>8.518797</td>\n",
|
||||
" <td>511.127842</td>\n",
|
||||
" <td>2.835975</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>std</th>\n",
|
||||
" <td>266.396861</td>\n",
|
||||
" <td>4578.438497</td>\n",
|
||||
" <td>2844.909787</td>\n",
|
||||
" <td>96.746711</td>\n",
|
||||
" <td>5.579233</td>\n",
|
||||
" <td>334.753952</td>\n",
|
||||
" <td>1.939439</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>min</th>\n",
|
||||
" <td>100.020528</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>25%</th>\n",
|
||||
" <td>409.893816</td>\n",
|
||||
" <td>2459.705673</td>\n",
|
||||
" <td>1528.390247</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>4.205553</td>\n",
|
||||
" <td>252.333192</td>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50%</th>\n",
|
||||
" <td>640.556668</td>\n",
|
||||
" <td>7610.330866</td>\n",
|
||||
" <td>4728.840363</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>8.384086</td>\n",
|
||||
" <td>503.045170</td>\n",
|
||||
" <td>3.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>75%</th>\n",
|
||||
" <td>842.185470</td>\n",
|
||||
" <td>9736.637600</td>\n",
|
||||
" <td>6050.066114</td>\n",
|
||||
" <td>15.000000</td>\n",
|
||||
" <td>12.006934</td>\n",
|
||||
" <td>720.416036</td>\n",
|
||||
" <td>4.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>max</th>\n",
|
||||
" <td>1199.729053</td>\n",
|
||||
" <td>19881.482315</td>\n",
|
||||
" <td>12353.780369</td>\n",
|
||||
" <td>360.000000</td>\n",
|
||||
" <td>31.715034</td>\n",
|
||||
" <td>1902.902032</td>\n",
|
||||
" <td>6.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
|
||||
"count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
|
||||
"mean 628.253689 7092.142455 4406.853013 47.335171 \n",
|
||||
"std 266.396861 4578.438497 2844.909787 96.746711 \n",
|
||||
"min 100.020528 0.000000 0.000000 0.000000 \n",
|
||||
"25% 409.893816 2459.705673 1528.390247 0.000000 \n",
|
||||
"50% 640.556668 7610.330866 4728.840363 0.000000 \n",
|
||||
"75% 842.185470 9736.637600 6050.066114 15.000000 \n",
|
||||
"max 1199.729053 19881.482315 12353.780369 360.000000 \n",
|
||||
"\n",
|
||||
" FlightTimeHour FlightTimeMin dayOfWeek \n",
|
||||
"count 13059.000000 13059.000000 13059.000000 \n",
|
||||
"mean 8.518797 511.127842 2.835975 \n",
|
||||
"std 5.579233 334.753952 1.939439 \n",
|
||||
"min 0.000000 0.000000 0.000000 \n",
|
||||
"25% 4.205553 252.333192 1.000000 \n",
|
||||
"50% 8.384086 503.045170 3.000000 \n",
|
||||
"75% 12.006934 720.416036 4.000000 \n",
|
||||
"max 31.715034 1902.902032 6.000000 "
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pd_df.describe()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
Loading…
x
Reference in New Issue
Block a user