Minor update to test notebook

This commit is contained in:
Stephen Dodson 2019-06-12 12:51:43 +00:00
parent 3dd919e8b2
commit ded86d0b5d

View File

@ -1,416 +1,5 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Eland"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import eland as ed"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = ed.read_es('localhost', 'kibana_sample_data_flights')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AvgTicketPrice</th>\n",
" <th>Cancelled</th>\n",
" <th>Carrier</th>\n",
" <th>Dest</th>\n",
" <th>DestAirportID</th>\n",
" <th>DestCityName</th>\n",
" <th>DestCountry</th>\n",
" <th>DestLocation</th>\n",
" <th>DestRegion</th>\n",
" <th>DestWeather</th>\n",
" <th>...</th>\n",
" <th>FlightTimeMin</th>\n",
" <th>Origin</th>\n",
" <th>OriginAirportID</th>\n",
" <th>OriginCityName</th>\n",
" <th>OriginCountry</th>\n",
" <th>OriginLocation</th>\n",
" <th>OriginRegion</th>\n",
" <th>OriginWeather</th>\n",
" <th>dayOfWeek</th>\n",
" <th>timestamp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>841.265642</td>\n",
" <td>False</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Sydney Kingsford Smith International Airport</td>\n",
" <td>SYD</td>\n",
" <td>Sydney</td>\n",
" <td>AU</td>\n",
" <td>{'lat': '-33.94609833', 'lon': '151.177002'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Rain</td>\n",
" <td>...</td>\n",
" <td>1030.770416</td>\n",
" <td>Frankfurt am Main Airport</td>\n",
" <td>FRA</td>\n",
" <td>Frankfurt am Main</td>\n",
" <td>DE</td>\n",
" <td>{'lat': '50.033333', 'lon': '8.570556'}</td>\n",
" <td>DE-HE</td>\n",
" <td>Sunny</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T00:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>882.982662</td>\n",
" <td>False</td>\n",
" <td>Logstash Airways</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Sunny</td>\n",
" <td>...</td>\n",
" <td>464.389481</td>\n",
" <td>Cape Town International Airport</td>\n",
" <td>CPT</td>\n",
" <td>Cape Town</td>\n",
" <td>ZA</td>\n",
" <td>{'lat': '-33.96480179', 'lon': '18.60169983'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Clear</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T18:27:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>190.636904</td>\n",
" <td>False</td>\n",
" <td>Logstash Airways</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Cloudy</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Rain</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T17:11:14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>181.694216</td>\n",
" <td>True</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Treviso-Sant'Angelo Airport</td>\n",
" <td>TV01</td>\n",
" <td>Treviso</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.648399', 'lon': '12.1944'}</td>\n",
" <td>IT-34</td>\n",
" <td>Clear</td>\n",
" <td>...</td>\n",
" <td>222.749059</td>\n",
" <td>Naples International Airport</td>\n",
" <td>NA01</td>\n",
" <td>Naples</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '40.886002', 'lon': '14.2908'}</td>\n",
" <td>IT-72</td>\n",
" <td>Thunder &amp; Lightning</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T10:33:28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>730.041778</td>\n",
" <td>False</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Xi'an Xianyang International Airport</td>\n",
" <td>XIY</td>\n",
" <td>Xi'an</td>\n",
" <td>CN</td>\n",
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Clear</td>\n",
" <td>...</td>\n",
" <td>785.779071</td>\n",
" <td>Licenciado Benito Juarez International Airport</td>\n",
" <td>AICM</td>\n",
" <td>Mexico City</td>\n",
" <td>MX</td>\n",
" <td>{'lat': '19.4363', 'lon': '-99.072098'}</td>\n",
" <td>MX-DIF</td>\n",
" <td>Damaging Wind</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T05:13:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
" AvgTicketPrice Cancelled Carrier \\\n",
"0 841.265642 False Kibana Airlines \n",
"1 882.982662 False Logstash Airways \n",
"2 190.636904 False Logstash Airways \n",
"3 181.694216 True Kibana Airlines \n",
"4 730.041778 False Kibana Airlines \n",
"\n",
" Dest DestAirportID DestCityName \\\n",
"0 Sydney Kingsford Smith International Airport SYD Sydney \n",
"1 Venice Marco Polo Airport VE05 Venice \n",
"2 Venice Marco Polo Airport VE05 Venice \n",
"3 Treviso-Sant'Angelo Airport TV01 Treviso \n",
"4 Xi'an Xianyang International Airport XIY Xi'an \n",
"\n",
" DestCountry DestLocation DestRegion \\\n",
"0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
"1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
"4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
"\n",
" DestWeather ... FlightTimeMin \\\n",
"0 Rain ... 1030.770416 \n",
"1 Sunny ... 464.389481 \n",
"2 Cloudy ... 0.000000 \n",
"3 Clear ... 222.749059 \n",
"4 Clear ... 785.779071 \n",
"\n",
" Origin OriginAirportID \\\n",
"0 Frankfurt am Main Airport FRA \n",
"1 Cape Town International Airport CPT \n",
"2 Venice Marco Polo Airport VE05 \n",
"3 Naples International Airport NA01 \n",
"4 Licenciado Benito Juarez International Airport AICM \n",
"\n",
" OriginCityName OriginCountry \\\n",
"0 Frankfurt am Main DE \n",
"1 Cape Town ZA \n",
"2 Venice IT \n",
"3 Naples IT \n",
"4 Mexico City MX \n",
"\n",
" OriginLocation OriginRegion \\\n",
"0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
"1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
"2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
"4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
"\n",
" OriginWeather dayOfWeek timestamp \n",
"0 Sunny 0 2019-05-27T00:00:00 \n",
"1 Clear 0 2019-05-27T18:27:00 \n",
"2 Rain 0 2019-05-27T17:11:14 \n",
"3 Thunder & Lightning 0 2019-05-27T10:33:28 \n",
"4 Damaging Wind 0 2019-05-27T05:13:00 \n",
"\n",
"[5 rows x 27 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AvgTicketPrice</th>\n",
" <th>DistanceKilometers</th>\n",
" <th>DistanceMiles</th>\n",
" <th>FlightDelayMin</th>\n",
" <th>FlightTimeMin</th>\n",
" <th>dayOfWeek</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>628.253689</td>\n",
" <td>7092.142457</td>\n",
" <td>4406.853010</td>\n",
" <td>47.335171</td>\n",
" <td>511.127842</td>\n",
" <td>2.835975</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>266.386661</td>\n",
" <td>4578.263193</td>\n",
" <td>2844.800855</td>\n",
" <td>96.743006</td>\n",
" <td>334.741135</td>\n",
" <td>1.939365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>100.020531</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>410.008918</td>\n",
" <td>2470.545974</td>\n",
" <td>1535.126118</td>\n",
" <td>0.000000</td>\n",
" <td>251.682199</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>640.362374</td>\n",
" <td>7612.072403</td>\n",
" <td>4729.922470</td>\n",
" <td>0.000000</td>\n",
" <td>503.148975</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>842.260482</td>\n",
" <td>9735.660463</td>\n",
" <td>6049.459005</td>\n",
" <td>14.102113</td>\n",
" <td>720.569838</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1199.729004</td>\n",
" <td>19881.482422</td>\n",
" <td>12353.780273</td>\n",
" <td>360.000000</td>\n",
" <td>1902.901978</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
"count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
"mean 628.253689 7092.142457 4406.853010 47.335171 \n",
"std 266.386661 4578.263193 2844.800855 96.743006 \n",
"min 100.020531 0.000000 0.000000 0.000000 \n",
"25% 410.008918 2470.545974 1535.126118 0.000000 \n",
"50% 640.362374 7612.072403 4729.922470 0.000000 \n",
"75% 842.260482 9735.660463 6049.459005 14.102113 \n",
"max 1199.729004 19881.482422 12353.780273 360.000000 \n",
"\n",
" FlightTimeMin dayOfWeek \n",
"count 13059.000000 13059.000000 \n",
"mean 511.127842 2.835975 \n",
"std 334.741135 1.939365 \n",
"min 0.000000 0.000000 \n",
"25% 251.682199 1.000000 \n",
"50% 503.148975 3.000000 \n",
"75% 720.569838 4.000000 \n",
"max 1902.901978 6.000000 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -420,7 +9,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@ -429,7 +18,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -438,7 +27,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@ -671,7 +260,7 @@
"[5 rows x 27 columns]"
]
},
"execution_count": 7,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@ -682,7 +271,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@ -822,7 +411,7 @@
"max 31.715034 1902.902032 6.000000 "
]
},
"execution_count": 8,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@ -830,6 +419,419 @@
"source": [
"pd_df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Eland"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import eland as ed"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"ed_df = ed.read_es('localhost', 'kibana_sample_data_flights')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AvgTicketPrice</th>\n",
" <th>Cancelled</th>\n",
" <th>Carrier</th>\n",
" <th>Dest</th>\n",
" <th>DestAirportID</th>\n",
" <th>DestCityName</th>\n",
" <th>DestCountry</th>\n",
" <th>DestLocation</th>\n",
" <th>DestRegion</th>\n",
" <th>DestWeather</th>\n",
" <th>...</th>\n",
" <th>FlightTimeMin</th>\n",
" <th>Origin</th>\n",
" <th>OriginAirportID</th>\n",
" <th>OriginCityName</th>\n",
" <th>OriginCountry</th>\n",
" <th>OriginLocation</th>\n",
" <th>OriginRegion</th>\n",
" <th>OriginWeather</th>\n",
" <th>dayOfWeek</th>\n",
" <th>timestamp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>841.265642</td>\n",
" <td>False</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Sydney Kingsford Smith International Airport</td>\n",
" <td>SYD</td>\n",
" <td>Sydney</td>\n",
" <td>AU</td>\n",
" <td>{'lat': '-33.94609833', 'lon': '151.177002'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Rain</td>\n",
" <td>...</td>\n",
" <td>1030.770416</td>\n",
" <td>Frankfurt am Main Airport</td>\n",
" <td>FRA</td>\n",
" <td>Frankfurt am Main</td>\n",
" <td>DE</td>\n",
" <td>{'lat': '50.033333', 'lon': '8.570556'}</td>\n",
" <td>DE-HE</td>\n",
" <td>Sunny</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T00:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>882.982662</td>\n",
" <td>False</td>\n",
" <td>Logstash Airways</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Sunny</td>\n",
" <td>...</td>\n",
" <td>464.389481</td>\n",
" <td>Cape Town International Airport</td>\n",
" <td>CPT</td>\n",
" <td>Cape Town</td>\n",
" <td>ZA</td>\n",
" <td>{'lat': '-33.96480179', 'lon': '18.60169983'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Clear</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T18:27:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>190.636904</td>\n",
" <td>False</td>\n",
" <td>Logstash Airways</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Cloudy</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Rain</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T17:11:14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>181.694216</td>\n",
" <td>True</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Treviso-Sant'Angelo Airport</td>\n",
" <td>TV01</td>\n",
" <td>Treviso</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.648399', 'lon': '12.1944'}</td>\n",
" <td>IT-34</td>\n",
" <td>Clear</td>\n",
" <td>...</td>\n",
" <td>222.749059</td>\n",
" <td>Naples International Airport</td>\n",
" <td>NA01</td>\n",
" <td>Naples</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '40.886002', 'lon': '14.2908'}</td>\n",
" <td>IT-72</td>\n",
" <td>Thunder &amp; Lightning</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T10:33:28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>730.041778</td>\n",
" <td>False</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Xi'an Xianyang International Airport</td>\n",
" <td>XIY</td>\n",
" <td>Xi'an</td>\n",
" <td>CN</td>\n",
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Clear</td>\n",
" <td>...</td>\n",
" <td>785.779071</td>\n",
" <td>Licenciado Benito Juarez International Airport</td>\n",
" <td>AICM</td>\n",
" <td>Mexico City</td>\n",
" <td>MX</td>\n",
" <td>{'lat': '19.4363', 'lon': '-99.072098'}</td>\n",
" <td>MX-DIF</td>\n",
" <td>Damaging Wind</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T05:13:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
" AvgTicketPrice Cancelled Carrier \\\n",
"0 841.265642 False Kibana Airlines \n",
"1 882.982662 False Logstash Airways \n",
"2 190.636904 False Logstash Airways \n",
"3 181.694216 True Kibana Airlines \n",
"4 730.041778 False Kibana Airlines \n",
"\n",
" Dest DestAirportID DestCityName \\\n",
"0 Sydney Kingsford Smith International Airport SYD Sydney \n",
"1 Venice Marco Polo Airport VE05 Venice \n",
"2 Venice Marco Polo Airport VE05 Venice \n",
"3 Treviso-Sant'Angelo Airport TV01 Treviso \n",
"4 Xi'an Xianyang International Airport XIY Xi'an \n",
"\n",
" DestCountry DestLocation DestRegion \\\n",
"0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
"1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
"4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
"\n",
" DestWeather ... FlightTimeMin \\\n",
"0 Rain ... 1030.770416 \n",
"1 Sunny ... 464.389481 \n",
"2 Cloudy ... 0.000000 \n",
"3 Clear ... 222.749059 \n",
"4 Clear ... 785.779071 \n",
"\n",
" Origin OriginAirportID \\\n",
"0 Frankfurt am Main Airport FRA \n",
"1 Cape Town International Airport CPT \n",
"2 Venice Marco Polo Airport VE05 \n",
"3 Naples International Airport NA01 \n",
"4 Licenciado Benito Juarez International Airport AICM \n",
"\n",
" OriginCityName OriginCountry \\\n",
"0 Frankfurt am Main DE \n",
"1 Cape Town ZA \n",
"2 Venice IT \n",
"3 Naples IT \n",
"4 Mexico City MX \n",
"\n",
" OriginLocation OriginRegion \\\n",
"0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
"1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
"2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
"4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
"\n",
" OriginWeather dayOfWeek timestamp \n",
"0 Sunny 0 2019-05-27T00:00:00 \n",
"1 Clear 0 2019-05-27T18:27:00 \n",
"2 Rain 0 2019-05-27T17:11:14 \n",
"3 Thunder & Lightning 0 2019-05-27T10:33:28 \n",
"4 Damaging Wind 0 2019-05-27T05:13:00 \n",
"\n",
"[5 rows x 27 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AvgTicketPrice</th>\n",
" <th>DistanceKilometers</th>\n",
" <th>DistanceMiles</th>\n",
" <th>FlightDelayMin</th>\n",
" <th>FlightTimeMin</th>\n",
" <th>dayOfWeek</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>628.253689</td>\n",
" <td>7092.142457</td>\n",
" <td>4406.853010</td>\n",
" <td>47.335171</td>\n",
" <td>511.127842</td>\n",
" <td>2.835975</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>266.386661</td>\n",
" <td>4578.263193</td>\n",
" <td>2844.800855</td>\n",
" <td>96.743006</td>\n",
" <td>334.741135</td>\n",
" <td>1.939365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>100.020531</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>410.008918</td>\n",
" <td>2470.545974</td>\n",
" <td>1535.126118</td>\n",
" <td>0.000000</td>\n",
" <td>252.064162</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>640.387285</td>\n",
" <td>7612.072403</td>\n",
" <td>4729.922470</td>\n",
" <td>0.000000</td>\n",
" <td>503.148975</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>842.259390</td>\n",
" <td>9735.660463</td>\n",
" <td>6049.583389</td>\n",
" <td>15.000000</td>\n",
" <td>720.505705</td>\n",
" <td>4.068000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1199.729004</td>\n",
" <td>19881.482422</td>\n",
" <td>12353.780273</td>\n",
" <td>360.000000</td>\n",
" <td>1902.901978</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
"count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
"mean 628.253689 7092.142457 4406.853010 47.335171 \n",
"std 266.386661 4578.263193 2844.800855 96.743006 \n",
"min 100.020531 0.000000 0.000000 0.000000 \n",
"25% 410.008918 2470.545974 1535.126118 0.000000 \n",
"50% 640.387285 7612.072403 4729.922470 0.000000 \n",
"75% 842.259390 9735.660463 6049.583389 15.000000 \n",
"max 1199.729004 19881.482422 12353.780273 360.000000 \n",
"\n",
" FlightTimeMin dayOfWeek \n",
"count 13059.000000 13059.000000 \n",
"mean 511.127842 2.835975 \n",
"std 334.741135 1.939365 \n",
"min 0.000000 0.000000 \n",
"25% 252.064162 1.000000 \n",
"50% 503.148975 3.000000 \n",
"75% 720.505705 4.068000 \n",
"max 1902.901978 6.000000 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ed_df.describe()"
]
}
],
"metadata": {