eland/test.ipynb
Stephen Dodson f1e27f1dda First prototype code commit
Experimental prototype, for internal development use only!
2019-06-12 11:46:20 +00:00

430 lines
15 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import eland as ed"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = ed.from_es('localhost', 'kibana_sample_data_flights')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AvgTicketPrice</th>\n",
" <th>Cancelled</th>\n",
" <th>Carrier</th>\n",
" <th>Dest</th>\n",
" <th>DestAirportID</th>\n",
" <th>DestCityName</th>\n",
" <th>DestCountry</th>\n",
" <th>DestLocation</th>\n",
" <th>DestRegion</th>\n",
" <th>DestWeather</th>\n",
" <th>...</th>\n",
" <th>FlightTimeMin</th>\n",
" <th>Origin</th>\n",
" <th>OriginAirportID</th>\n",
" <th>OriginCityName</th>\n",
" <th>OriginCountry</th>\n",
" <th>OriginLocation</th>\n",
" <th>OriginRegion</th>\n",
" <th>OriginWeather</th>\n",
" <th>dayOfWeek</th>\n",
" <th>timestamp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>841.265642</td>\n",
" <td>False</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Sydney Kingsford Smith International Airport</td>\n",
" <td>SYD</td>\n",
" <td>Sydney</td>\n",
" <td>AU</td>\n",
" <td>{'lat': '-33.94609833', 'lon': '151.177002'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Rain</td>\n",
" <td>...</td>\n",
" <td>1030.770416</td>\n",
" <td>Frankfurt am Main Airport</td>\n",
" <td>FRA</td>\n",
" <td>Frankfurt am Main</td>\n",
" <td>DE</td>\n",
" <td>{'lat': '50.033333', 'lon': '8.570556'}</td>\n",
" <td>DE-HE</td>\n",
" <td>Sunny</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T00:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>882.982662</td>\n",
" <td>False</td>\n",
" <td>Logstash Airways</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Sunny</td>\n",
" <td>...</td>\n",
" <td>464.389481</td>\n",
" <td>Cape Town International Airport</td>\n",
" <td>CPT</td>\n",
" <td>Cape Town</td>\n",
" <td>ZA</td>\n",
" <td>{'lat': '-33.96480179', 'lon': '18.60169983'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Clear</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T18:27:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>190.636904</td>\n",
" <td>False</td>\n",
" <td>Logstash Airways</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Cloudy</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Rain</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T17:11:14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>181.694216</td>\n",
" <td>True</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Treviso-Sant'Angelo Airport</td>\n",
" <td>TV01</td>\n",
" <td>Treviso</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.648399', 'lon': '12.1944'}</td>\n",
" <td>IT-34</td>\n",
" <td>Clear</td>\n",
" <td>...</td>\n",
" <td>222.749059</td>\n",
" <td>Naples International Airport</td>\n",
" <td>NA01</td>\n",
" <td>Naples</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '40.886002', 'lon': '14.2908'}</td>\n",
" <td>IT-72</td>\n",
" <td>Thunder &amp; Lightning</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T10:33:28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>730.041778</td>\n",
" <td>False</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Xi'an Xianyang International Airport</td>\n",
" <td>XIY</td>\n",
" <td>Xi'an</td>\n",
" <td>CN</td>\n",
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Clear</td>\n",
" <td>...</td>\n",
" <td>785.779071</td>\n",
" <td>Licenciado Benito Juarez International Airport</td>\n",
" <td>AICM</td>\n",
" <td>Mexico City</td>\n",
" <td>MX</td>\n",
" <td>{'lat': '19.4363', 'lon': '-99.072098'}</td>\n",
" <td>MX-DIF</td>\n",
" <td>Damaging Wind</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T05:13:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
" AvgTicketPrice Cancelled Carrier \\\n",
"0 841.265642 False Kibana Airlines \n",
"1 882.982662 False Logstash Airways \n",
"2 190.636904 False Logstash Airways \n",
"3 181.694216 True Kibana Airlines \n",
"4 730.041778 False Kibana Airlines \n",
"\n",
" Dest DestAirportID DestCityName \\\n",
"0 Sydney Kingsford Smith International Airport SYD Sydney \n",
"1 Venice Marco Polo Airport VE05 Venice \n",
"2 Venice Marco Polo Airport VE05 Venice \n",
"3 Treviso-Sant'Angelo Airport TV01 Treviso \n",
"4 Xi'an Xianyang International Airport XIY Xi'an \n",
"\n",
" DestCountry DestLocation DestRegion \\\n",
"0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
"1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
"4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
"\n",
" DestWeather ... FlightTimeMin \\\n",
"0 Rain ... 1030.770416 \n",
"1 Sunny ... 464.389481 \n",
"2 Cloudy ... 0.000000 \n",
"3 Clear ... 222.749059 \n",
"4 Clear ... 785.779071 \n",
"\n",
" Origin OriginAirportID \\\n",
"0 Frankfurt am Main Airport FRA \n",
"1 Cape Town International Airport CPT \n",
"2 Venice Marco Polo Airport VE05 \n",
"3 Naples International Airport NA01 \n",
"4 Licenciado Benito Juarez International Airport AICM \n",
"\n",
" OriginCityName OriginCountry \\\n",
"0 Frankfurt am Main DE \n",
"1 Cape Town ZA \n",
"2 Venice IT \n",
"3 Naples IT \n",
"4 Mexico City MX \n",
"\n",
" OriginLocation OriginRegion \\\n",
"0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
"1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
"2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
"4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
"\n",
" OriginWeather dayOfWeek timestamp \n",
"0 Sunny 0 2019-05-27T00:00:00 \n",
"1 Clear 0 2019-05-27T18:27:00 \n",
"2 Rain 0 2019-05-27T17:11:14 \n",
"3 Thunder & Lightning 0 2019-05-27T10:33:28 \n",
"4 Damaging Wind 0 2019-05-27T05:13:00 \n",
"\n",
"[5 rows x 27 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AvgTicketPrice</th>\n",
" <th>DistanceKilometers</th>\n",
" <th>DistanceMiles</th>\n",
" <th>FlightDelayMin</th>\n",
" <th>FlightTimeMin</th>\n",
" <th>dayOfWeek</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>628.253689</td>\n",
" <td>7092.142457</td>\n",
" <td>4406.853010</td>\n",
" <td>47.335171</td>\n",
" <td>511.127842</td>\n",
" <td>2.835975</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>266.386661</td>\n",
" <td>4578.263193</td>\n",
" <td>2844.800855</td>\n",
" <td>96.743006</td>\n",
" <td>334.741135</td>\n",
" <td>1.939365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>100.020531</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>410.008918</td>\n",
" <td>2470.545974</td>\n",
" <td>1535.126118</td>\n",
" <td>0.000000</td>\n",
" <td>251.773003</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>640.362667</td>\n",
" <td>7612.072403</td>\n",
" <td>4729.922470</td>\n",
" <td>0.000000</td>\n",
" <td>503.148975</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>842.233478</td>\n",
" <td>9735.887390</td>\n",
" <td>6049.459005</td>\n",
" <td>15.000000</td>\n",
" <td>720.534532</td>\n",
" <td>4.095833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1199.729004</td>\n",
" <td>19881.482422</td>\n",
" <td>12353.780273</td>\n",
" <td>360.000000</td>\n",
" <td>1902.901978</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
"count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
"mean 628.253689 7092.142457 4406.853010 47.335171 \n",
"std 266.386661 4578.263193 2844.800855 96.743006 \n",
"min 100.020531 0.000000 0.000000 0.000000 \n",
"25% 410.008918 2470.545974 1535.126118 0.000000 \n",
"50% 640.362667 7612.072403 4729.922470 0.000000 \n",
"75% 842.233478 9735.887390 6049.459005 15.000000 \n",
"max 1199.729004 19881.482422 12353.780273 360.000000 \n",
"\n",
" FlightTimeMin dayOfWeek \n",
"count 13059.000000 13059.000000 \n",
"mean 511.127842 2.835975 \n",
"std 334.741135 1.939365 \n",
"min 0.000000 0.000000 \n",
"25% 251.773003 1.000000 \n",
"50% 503.148975 3.000000 \n",
"75% 720.534532 4.095833 \n",
"max 1902.901978 6.000000 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}