diff --git a/eland/utils.py b/eland/utils.py index 28ee93e..5135bfe 100644 --- a/eland/utils.py +++ b/eland/utils.py @@ -1,4 +1,4 @@ import eland -def from_es(es_params, index_pattern): +def read_es(es_params, index_pattern): return eland.DataFrame(es_params, index_pattern) diff --git a/flights.json.gz b/flights.json.gz new file mode 100644 index 0000000..85f344d Binary files /dev/null and b/flights.json.gz differ diff --git a/test.ipynb b/test.ipynb index 2ba1dea..ebce3d0 100644 --- a/test.ipynb +++ b/test.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Eland" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -15,7 +22,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = ed.from_es('localhost', 'kibana_sample_data_flights')" + "df = ed.read_es('localhost', 'kibana_sample_data_flights')" ] }, { @@ -339,12 +346,12 @@ " 2470.545974\n", " 1535.126118\n", " 0.000000\n", - " 251.773003\n", + " 251.682199\n", " 1.000000\n", " \n", " \n", " 50%\n", - " 640.362667\n", + " 640.362374\n", " 7612.072403\n", " 4729.922470\n", " 0.000000\n", @@ -353,12 +360,12 @@ " \n", " \n", " 75%\n", - " 842.233478\n", - " 9735.887390\n", + " 842.260482\n", + " 9735.660463\n", " 6049.459005\n", - " 15.000000\n", - " 720.534532\n", - " 4.095833\n", + " 14.102113\n", + " 720.569838\n", + " 4.000000\n", " \n", " \n", " max\n", @@ -380,8 +387,8 @@ "std 266.386661 4578.263193 2844.800855 96.743006 \n", "min 100.020531 0.000000 0.000000 0.000000 \n", "25% 410.008918 2470.545974 1535.126118 0.000000 \n", - "50% 640.362667 7612.072403 4729.922470 0.000000 \n", - "75% 842.233478 9735.887390 6049.459005 15.000000 \n", + "50% 640.362374 7612.072403 4729.922470 0.000000 \n", + "75% 842.260482 9735.660463 6049.459005 14.102113 \n", "max 1199.729004 19881.482422 12353.780273 360.000000 \n", "\n", " FlightTimeMin dayOfWeek \n", @@ -389,9 +396,9 @@ "mean 511.127842 2.835975 \n", "std 334.741135 1.939365 \n", "min 0.000000 0.000000 \n", - "25% 251.773003 1.000000 \n", + "25% 251.682199 1.000000 \n", "50% 503.148975 3.000000 \n", - "75% 720.534532 4.095833 \n", + "75% 720.569838 4.000000 \n", "max 1902.901978 6.000000 " ] }, @@ -403,6 +410,426 @@ "source": [ "df.describe()" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pandas" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "pd_df = pd.read_json('flights.json.gz', lines=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AvgTicketPriceCancelledCarrierDestDestAirportIDDestCityNameDestCountryDestLocationDestRegionDestWeather...FlightTimeMinOriginOriginAirportIDOriginCityNameOriginCountryOriginLocationOriginRegionOriginWeatherdayOfWeektimestamp
0841.265642FalseKibana AirlinesSydney Kingsford Smith International AirportSYDSydneyAU{'lat': '-33.94609833', 'lon': '151.177002'}SE-BDRain...1030.770416Frankfurt am Main AirportFRAFrankfurt am MainDE{'lat': '50.033333', 'lon': '8.570556'}DE-HESunny02018-01-01 00:00:00
1882.982662FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Sunny...464.389481Cape Town International AirportCPTCape TownZA{'lat': '-33.96480179', 'lon': '18.60169983'}SE-BDClear02018-01-01 18:27:00
2190.636904FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Cloudy...0.000000Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Rain02018-01-01 17:11:14
3181.694216TrueKibana AirlinesTreviso-Sant'Angelo AirportTV01TrevisoIT{'lat': '45.648399', 'lon': '12.1944'}IT-34Clear...222.749059Naples International AirportNA01NaplesIT{'lat': '40.886002', 'lon': '14.2908'}IT-72Thunder & Lightning02018-01-01 10:33:28
4730.041778FalseKibana AirlinesXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDClear...785.779071Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFDamaging Wind02018-01-01 05:13:00
\n", + "

5 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " AvgTicketPrice Cancelled Carrier \\\n", + "0 841.265642 False Kibana Airlines \n", + "1 882.982662 False Logstash Airways \n", + "2 190.636904 False Logstash Airways \n", + "3 181.694216 True Kibana Airlines \n", + "4 730.041778 False Kibana Airlines \n", + "\n", + " Dest DestAirportID DestCityName \\\n", + "0 Sydney Kingsford Smith International Airport SYD Sydney \n", + "1 Venice Marco Polo Airport VE05 Venice \n", + "2 Venice Marco Polo Airport VE05 Venice \n", + "3 Treviso-Sant'Angelo Airport TV01 Treviso \n", + "4 Xi'an Xianyang International Airport XIY Xi'an \n", + "\n", + " DestCountry DestLocation DestRegion \\\n", + "0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n", + "1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", + "4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", + "\n", + " DestWeather ... FlightTimeMin \\\n", + "0 Rain ... 1030.770416 \n", + "1 Sunny ... 464.389481 \n", + "2 Cloudy ... 0.000000 \n", + "3 Clear ... 222.749059 \n", + "4 Clear ... 785.779071 \n", + "\n", + " Origin OriginAirportID \\\n", + "0 Frankfurt am Main Airport FRA \n", + "1 Cape Town International Airport CPT \n", + "2 Venice Marco Polo Airport VE05 \n", + "3 Naples International Airport NA01 \n", + "4 Licenciado Benito Juarez International Airport AICM \n", + "\n", + " OriginCityName OriginCountry \\\n", + "0 Frankfurt am Main DE \n", + "1 Cape Town ZA \n", + "2 Venice IT \n", + "3 Naples IT \n", + "4 Mexico City MX \n", + "\n", + " OriginLocation OriginRegion \\\n", + "0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n", + "1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n", + "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n", + "4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", + "\n", + " OriginWeather dayOfWeek timestamp \n", + "0 Sunny 0 2018-01-01 00:00:00 \n", + "1 Clear 0 2018-01-01 18:27:00 \n", + "2 Rain 0 2018-01-01 17:11:14 \n", + "3 Thunder & Lightning 0 2018-01-01 10:33:28 \n", + "4 Damaging Wind 0 2018-01-01 05:13:00 \n", + "\n", + "[5 rows x 27 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AvgTicketPriceDistanceKilometersDistanceMilesFlightDelayMinFlightTimeHourFlightTimeMindayOfWeek
count13059.00000013059.00000013059.00000013059.00000013059.00000013059.00000013059.000000
mean628.2536897092.1424554406.85301347.3351718.518797511.1278422.835975
std266.3968614578.4384972844.90978796.7467115.579233334.7539521.939439
min100.0205280.0000000.0000000.0000000.0000000.0000000.000000
25%409.8938162459.7056731528.3902470.0000004.205553252.3331921.000000
50%640.5566687610.3308664728.8403630.0000008.384086503.0451703.000000
75%842.1854709736.6376006050.06611415.00000012.006934720.4160364.000000
max1199.72905319881.48231512353.780369360.00000031.7150341902.9020326.000000
\n", + "
" + ], + "text/plain": [ + " AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n", + "count 13059.000000 13059.000000 13059.000000 13059.000000 \n", + "mean 628.253689 7092.142455 4406.853013 47.335171 \n", + "std 266.396861 4578.438497 2844.909787 96.746711 \n", + "min 100.020528 0.000000 0.000000 0.000000 \n", + "25% 409.893816 2459.705673 1528.390247 0.000000 \n", + "50% 640.556668 7610.330866 4728.840363 0.000000 \n", + "75% 842.185470 9736.637600 6050.066114 15.000000 \n", + "max 1199.729053 19881.482315 12353.780369 360.000000 \n", + "\n", + " FlightTimeHour FlightTimeMin dayOfWeek \n", + "count 13059.000000 13059.000000 13059.000000 \n", + "mean 8.518797 511.127842 2.835975 \n", + "std 5.579233 334.753952 1.939439 \n", + "min 0.000000 0.000000 0.000000 \n", + "25% 4.205553 252.333192 1.000000 \n", + "50% 8.384086 503.045170 3.000000 \n", + "75% 12.006934 720.416036 4.000000 \n", + "max 31.715034 1902.902032 6.000000 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd_df.describe()" + ] } ], "metadata": { diff --git a/test.py b/test.py index 30f2cf9..e73163c 100644 --- a/test.py +++ b/test.py @@ -1,6 +1,6 @@ import eland as ed -df = ed.from_es('localhost', 'kibana_sample_data_flights') +df = ed.read_es('localhost', 'kibana_sample_data_flights') print(df.head())