diff --git a/eland/utils.py b/eland/utils.py
index 28ee93e..5135bfe 100644
--- a/eland/utils.py
+++ b/eland/utils.py
@@ -1,4 +1,4 @@
import eland
-def from_es(es_params, index_pattern):
+def read_es(es_params, index_pattern):
return eland.DataFrame(es_params, index_pattern)
diff --git a/flights.json.gz b/flights.json.gz
new file mode 100644
index 0000000..85f344d
Binary files /dev/null and b/flights.json.gz differ
diff --git a/test.ipynb b/test.ipynb
index 2ba1dea..ebce3d0 100644
--- a/test.ipynb
+++ b/test.ipynb
@@ -1,5 +1,12 @@
{
"cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Eland"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 1,
@@ -15,7 +22,7 @@
"metadata": {},
"outputs": [],
"source": [
- "df = ed.from_es('localhost', 'kibana_sample_data_flights')"
+ "df = ed.read_es('localhost', 'kibana_sample_data_flights')"
]
},
{
@@ -339,12 +346,12 @@
"
2470.545974 | \n",
" 1535.126118 | \n",
" 0.000000 | \n",
- " 251.773003 | \n",
+ " 251.682199 | \n",
" 1.000000 | \n",
" \n",
" \n",
" 50% | \n",
- " 640.362667 | \n",
+ " 640.362374 | \n",
" 7612.072403 | \n",
" 4729.922470 | \n",
" 0.000000 | \n",
@@ -353,12 +360,12 @@
"
\n",
" \n",
" 75% | \n",
- " 842.233478 | \n",
- " 9735.887390 | \n",
+ " 842.260482 | \n",
+ " 9735.660463 | \n",
" 6049.459005 | \n",
- " 15.000000 | \n",
- " 720.534532 | \n",
- " 4.095833 | \n",
+ " 14.102113 | \n",
+ " 720.569838 | \n",
+ " 4.000000 | \n",
"
\n",
" \n",
" max | \n",
@@ -380,8 +387,8 @@
"std 266.386661 4578.263193 2844.800855 96.743006 \n",
"min 100.020531 0.000000 0.000000 0.000000 \n",
"25% 410.008918 2470.545974 1535.126118 0.000000 \n",
- "50% 640.362667 7612.072403 4729.922470 0.000000 \n",
- "75% 842.233478 9735.887390 6049.459005 15.000000 \n",
+ "50% 640.362374 7612.072403 4729.922470 0.000000 \n",
+ "75% 842.260482 9735.660463 6049.459005 14.102113 \n",
"max 1199.729004 19881.482422 12353.780273 360.000000 \n",
"\n",
" FlightTimeMin dayOfWeek \n",
@@ -389,9 +396,9 @@
"mean 511.127842 2.835975 \n",
"std 334.741135 1.939365 \n",
"min 0.000000 0.000000 \n",
- "25% 251.773003 1.000000 \n",
+ "25% 251.682199 1.000000 \n",
"50% 503.148975 3.000000 \n",
- "75% 720.534532 4.095833 \n",
+ "75% 720.569838 4.000000 \n",
"max 1902.901978 6.000000 "
]
},
@@ -403,6 +410,426 @@
"source": [
"df.describe()"
]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Pandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pd_df = pd.read_json('flights.json.gz', lines=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " AvgTicketPrice | \n",
+ " Cancelled | \n",
+ " Carrier | \n",
+ " Dest | \n",
+ " DestAirportID | \n",
+ " DestCityName | \n",
+ " DestCountry | \n",
+ " DestLocation | \n",
+ " DestRegion | \n",
+ " DestWeather | \n",
+ " ... | \n",
+ " FlightTimeMin | \n",
+ " Origin | \n",
+ " OriginAirportID | \n",
+ " OriginCityName | \n",
+ " OriginCountry | \n",
+ " OriginLocation | \n",
+ " OriginRegion | \n",
+ " OriginWeather | \n",
+ " dayOfWeek | \n",
+ " timestamp | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 841.265642 | \n",
+ " False | \n",
+ " Kibana Airlines | \n",
+ " Sydney Kingsford Smith International Airport | \n",
+ " SYD | \n",
+ " Sydney | \n",
+ " AU | \n",
+ " {'lat': '-33.94609833', 'lon': '151.177002'} | \n",
+ " SE-BD | \n",
+ " Rain | \n",
+ " ... | \n",
+ " 1030.770416 | \n",
+ " Frankfurt am Main Airport | \n",
+ " FRA | \n",
+ " Frankfurt am Main | \n",
+ " DE | \n",
+ " {'lat': '50.033333', 'lon': '8.570556'} | \n",
+ " DE-HE | \n",
+ " Sunny | \n",
+ " 0 | \n",
+ " 2018-01-01 00:00:00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 882.982662 | \n",
+ " False | \n",
+ " Logstash Airways | \n",
+ " Venice Marco Polo Airport | \n",
+ " VE05 | \n",
+ " Venice | \n",
+ " IT | \n",
+ " {'lat': '45.505299', 'lon': '12.3519'} | \n",
+ " IT-34 | \n",
+ " Sunny | \n",
+ " ... | \n",
+ " 464.389481 | \n",
+ " Cape Town International Airport | \n",
+ " CPT | \n",
+ " Cape Town | \n",
+ " ZA | \n",
+ " {'lat': '-33.96480179', 'lon': '18.60169983'} | \n",
+ " SE-BD | \n",
+ " Clear | \n",
+ " 0 | \n",
+ " 2018-01-01 18:27:00 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 190.636904 | \n",
+ " False | \n",
+ " Logstash Airways | \n",
+ " Venice Marco Polo Airport | \n",
+ " VE05 | \n",
+ " Venice | \n",
+ " IT | \n",
+ " {'lat': '45.505299', 'lon': '12.3519'} | \n",
+ " IT-34 | \n",
+ " Cloudy | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " Venice Marco Polo Airport | \n",
+ " VE05 | \n",
+ " Venice | \n",
+ " IT | \n",
+ " {'lat': '45.505299', 'lon': '12.3519'} | \n",
+ " IT-34 | \n",
+ " Rain | \n",
+ " 0 | \n",
+ " 2018-01-01 17:11:14 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 181.694216 | \n",
+ " True | \n",
+ " Kibana Airlines | \n",
+ " Treviso-Sant'Angelo Airport | \n",
+ " TV01 | \n",
+ " Treviso | \n",
+ " IT | \n",
+ " {'lat': '45.648399', 'lon': '12.1944'} | \n",
+ " IT-34 | \n",
+ " Clear | \n",
+ " ... | \n",
+ " 222.749059 | \n",
+ " Naples International Airport | \n",
+ " NA01 | \n",
+ " Naples | \n",
+ " IT | \n",
+ " {'lat': '40.886002', 'lon': '14.2908'} | \n",
+ " IT-72 | \n",
+ " Thunder & Lightning | \n",
+ " 0 | \n",
+ " 2018-01-01 10:33:28 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 730.041778 | \n",
+ " False | \n",
+ " Kibana Airlines | \n",
+ " Xi'an Xianyang International Airport | \n",
+ " XIY | \n",
+ " Xi'an | \n",
+ " CN | \n",
+ " {'lat': '34.447102', 'lon': '108.751999'} | \n",
+ " SE-BD | \n",
+ " Clear | \n",
+ " ... | \n",
+ " 785.779071 | \n",
+ " Licenciado Benito Juarez International Airport | \n",
+ " AICM | \n",
+ " Mexico City | \n",
+ " MX | \n",
+ " {'lat': '19.4363', 'lon': '-99.072098'} | \n",
+ " MX-DIF | \n",
+ " Damaging Wind | \n",
+ " 0 | \n",
+ " 2018-01-01 05:13:00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 27 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " AvgTicketPrice Cancelled Carrier \\\n",
+ "0 841.265642 False Kibana Airlines \n",
+ "1 882.982662 False Logstash Airways \n",
+ "2 190.636904 False Logstash Airways \n",
+ "3 181.694216 True Kibana Airlines \n",
+ "4 730.041778 False Kibana Airlines \n",
+ "\n",
+ " Dest DestAirportID DestCityName \\\n",
+ "0 Sydney Kingsford Smith International Airport SYD Sydney \n",
+ "1 Venice Marco Polo Airport VE05 Venice \n",
+ "2 Venice Marco Polo Airport VE05 Venice \n",
+ "3 Treviso-Sant'Angelo Airport TV01 Treviso \n",
+ "4 Xi'an Xianyang International Airport XIY Xi'an \n",
+ "\n",
+ " DestCountry DestLocation DestRegion \\\n",
+ "0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
+ "1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
+ "2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
+ "3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
+ "4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
+ "\n",
+ " DestWeather ... FlightTimeMin \\\n",
+ "0 Rain ... 1030.770416 \n",
+ "1 Sunny ... 464.389481 \n",
+ "2 Cloudy ... 0.000000 \n",
+ "3 Clear ... 222.749059 \n",
+ "4 Clear ... 785.779071 \n",
+ "\n",
+ " Origin OriginAirportID \\\n",
+ "0 Frankfurt am Main Airport FRA \n",
+ "1 Cape Town International Airport CPT \n",
+ "2 Venice Marco Polo Airport VE05 \n",
+ "3 Naples International Airport NA01 \n",
+ "4 Licenciado Benito Juarez International Airport AICM \n",
+ "\n",
+ " OriginCityName OriginCountry \\\n",
+ "0 Frankfurt am Main DE \n",
+ "1 Cape Town ZA \n",
+ "2 Venice IT \n",
+ "3 Naples IT \n",
+ "4 Mexico City MX \n",
+ "\n",
+ " OriginLocation OriginRegion \\\n",
+ "0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
+ "1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
+ "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
+ "3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
+ "4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
+ "\n",
+ " OriginWeather dayOfWeek timestamp \n",
+ "0 Sunny 0 2018-01-01 00:00:00 \n",
+ "1 Clear 0 2018-01-01 18:27:00 \n",
+ "2 Rain 0 2018-01-01 17:11:14 \n",
+ "3 Thunder & Lightning 0 2018-01-01 10:33:28 \n",
+ "4 Damaging Wind 0 2018-01-01 05:13:00 \n",
+ "\n",
+ "[5 rows x 27 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd_df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " AvgTicketPrice | \n",
+ " DistanceKilometers | \n",
+ " DistanceMiles | \n",
+ " FlightDelayMin | \n",
+ " FlightTimeHour | \n",
+ " FlightTimeMin | \n",
+ " dayOfWeek | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 13059.000000 | \n",
+ " 13059.000000 | \n",
+ " 13059.000000 | \n",
+ " 13059.000000 | \n",
+ " 13059.000000 | \n",
+ " 13059.000000 | \n",
+ " 13059.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 628.253689 | \n",
+ " 7092.142455 | \n",
+ " 4406.853013 | \n",
+ " 47.335171 | \n",
+ " 8.518797 | \n",
+ " 511.127842 | \n",
+ " 2.835975 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 266.396861 | \n",
+ " 4578.438497 | \n",
+ " 2844.909787 | \n",
+ " 96.746711 | \n",
+ " 5.579233 | \n",
+ " 334.753952 | \n",
+ " 1.939439 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 100.020528 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 409.893816 | \n",
+ " 2459.705673 | \n",
+ " 1528.390247 | \n",
+ " 0.000000 | \n",
+ " 4.205553 | \n",
+ " 252.333192 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 640.556668 | \n",
+ " 7610.330866 | \n",
+ " 4728.840363 | \n",
+ " 0.000000 | \n",
+ " 8.384086 | \n",
+ " 503.045170 | \n",
+ " 3.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 842.185470 | \n",
+ " 9736.637600 | \n",
+ " 6050.066114 | \n",
+ " 15.000000 | \n",
+ " 12.006934 | \n",
+ " 720.416036 | \n",
+ " 4.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 1199.729053 | \n",
+ " 19881.482315 | \n",
+ " 12353.780369 | \n",
+ " 360.000000 | \n",
+ " 31.715034 | \n",
+ " 1902.902032 | \n",
+ " 6.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
+ "count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
+ "mean 628.253689 7092.142455 4406.853013 47.335171 \n",
+ "std 266.396861 4578.438497 2844.909787 96.746711 \n",
+ "min 100.020528 0.000000 0.000000 0.000000 \n",
+ "25% 409.893816 2459.705673 1528.390247 0.000000 \n",
+ "50% 640.556668 7610.330866 4728.840363 0.000000 \n",
+ "75% 842.185470 9736.637600 6050.066114 15.000000 \n",
+ "max 1199.729053 19881.482315 12353.780369 360.000000 \n",
+ "\n",
+ " FlightTimeHour FlightTimeMin dayOfWeek \n",
+ "count 13059.000000 13059.000000 13059.000000 \n",
+ "mean 8.518797 511.127842 2.835975 \n",
+ "std 5.579233 334.753952 1.939439 \n",
+ "min 0.000000 0.000000 0.000000 \n",
+ "25% 4.205553 252.333192 1.000000 \n",
+ "50% 8.384086 503.045170 3.000000 \n",
+ "75% 12.006934 720.416036 4.000000 \n",
+ "max 31.715034 1902.902032 6.000000 "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd_df.describe()"
+ ]
}
],
"metadata": {
diff --git a/test.py b/test.py
index 30f2cf9..e73163c 100644
--- a/test.py
+++ b/test.py
@@ -1,6 +1,6 @@
import eland as ed
-df = ed.from_es('localhost', 'kibana_sample_data_flights')
+df = ed.read_es('localhost', 'kibana_sample_data_flights')
print(df.head())