mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
Minor update to test notebook
This commit is contained in:
parent
3dd919e8b2
commit
ded86d0b5d
836
test.ipynb
836
test.ipynb
@ -1,416 +1,5 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Eland"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import eland as ed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = ed.read_es('localhost', 'kibana_sample_data_flights')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>AvgTicketPrice</th>\n",
|
||||
" <th>Cancelled</th>\n",
|
||||
" <th>Carrier</th>\n",
|
||||
" <th>Dest</th>\n",
|
||||
" <th>DestAirportID</th>\n",
|
||||
" <th>DestCityName</th>\n",
|
||||
" <th>DestCountry</th>\n",
|
||||
" <th>DestLocation</th>\n",
|
||||
" <th>DestRegion</th>\n",
|
||||
" <th>DestWeather</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>FlightTimeMin</th>\n",
|
||||
" <th>Origin</th>\n",
|
||||
" <th>OriginAirportID</th>\n",
|
||||
" <th>OriginCityName</th>\n",
|
||||
" <th>OriginCountry</th>\n",
|
||||
" <th>OriginLocation</th>\n",
|
||||
" <th>OriginRegion</th>\n",
|
||||
" <th>OriginWeather</th>\n",
|
||||
" <th>dayOfWeek</th>\n",
|
||||
" <th>timestamp</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>841.265642</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>Sydney Kingsford Smith International Airport</td>\n",
|
||||
" <td>SYD</td>\n",
|
||||
" <td>Sydney</td>\n",
|
||||
" <td>AU</td>\n",
|
||||
" <td>{'lat': '-33.94609833', 'lon': '151.177002'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>Rain</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1030.770416</td>\n",
|
||||
" <td>Frankfurt am Main Airport</td>\n",
|
||||
" <td>FRA</td>\n",
|
||||
" <td>Frankfurt am Main</td>\n",
|
||||
" <td>DE</td>\n",
|
||||
" <td>{'lat': '50.033333', 'lon': '8.570556'}</td>\n",
|
||||
" <td>DE-HE</td>\n",
|
||||
" <td>Sunny</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2019-05-27T00:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>882.982662</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>Venice Marco Polo Airport</td>\n",
|
||||
" <td>VE05</td>\n",
|
||||
" <td>Venice</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>Sunny</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>464.389481</td>\n",
|
||||
" <td>Cape Town International Airport</td>\n",
|
||||
" <td>CPT</td>\n",
|
||||
" <td>Cape Town</td>\n",
|
||||
" <td>ZA</td>\n",
|
||||
" <td>{'lat': '-33.96480179', 'lon': '18.60169983'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>Clear</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2019-05-27T18:27:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>190.636904</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>Venice Marco Polo Airport</td>\n",
|
||||
" <td>VE05</td>\n",
|
||||
" <td>Venice</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>Cloudy</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>Venice Marco Polo Airport</td>\n",
|
||||
" <td>VE05</td>\n",
|
||||
" <td>Venice</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>Rain</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2019-05-27T17:11:14</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>181.694216</td>\n",
|
||||
" <td>True</td>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>Treviso-Sant'Angelo Airport</td>\n",
|
||||
" <td>TV01</td>\n",
|
||||
" <td>Treviso</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '45.648399', 'lon': '12.1944'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>Clear</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>222.749059</td>\n",
|
||||
" <td>Naples International Airport</td>\n",
|
||||
" <td>NA01</td>\n",
|
||||
" <td>Naples</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '40.886002', 'lon': '14.2908'}</td>\n",
|
||||
" <td>IT-72</td>\n",
|
||||
" <td>Thunder & Lightning</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2019-05-27T10:33:28</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>730.041778</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>Xi'an Xianyang International Airport</td>\n",
|
||||
" <td>XIY</td>\n",
|
||||
" <td>Xi'an</td>\n",
|
||||
" <td>CN</td>\n",
|
||||
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>Clear</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>785.779071</td>\n",
|
||||
" <td>Licenciado Benito Juarez International Airport</td>\n",
|
||||
" <td>AICM</td>\n",
|
||||
" <td>Mexico City</td>\n",
|
||||
" <td>MX</td>\n",
|
||||
" <td>{'lat': '19.4363', 'lon': '-99.072098'}</td>\n",
|
||||
" <td>MX-DIF</td>\n",
|
||||
" <td>Damaging Wind</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2019-05-27T05:13:00</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>5 rows × 27 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" AvgTicketPrice Cancelled Carrier \\\n",
|
||||
"0 841.265642 False Kibana Airlines \n",
|
||||
"1 882.982662 False Logstash Airways \n",
|
||||
"2 190.636904 False Logstash Airways \n",
|
||||
"3 181.694216 True Kibana Airlines \n",
|
||||
"4 730.041778 False Kibana Airlines \n",
|
||||
"\n",
|
||||
" Dest DestAirportID DestCityName \\\n",
|
||||
"0 Sydney Kingsford Smith International Airport SYD Sydney \n",
|
||||
"1 Venice Marco Polo Airport VE05 Venice \n",
|
||||
"2 Venice Marco Polo Airport VE05 Venice \n",
|
||||
"3 Treviso-Sant'Angelo Airport TV01 Treviso \n",
|
||||
"4 Xi'an Xianyang International Airport XIY Xi'an \n",
|
||||
"\n",
|
||||
" DestCountry DestLocation DestRegion \\\n",
|
||||
"0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
|
||||
"1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||||
"2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||||
"3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
|
||||
"4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
|
||||
"\n",
|
||||
" DestWeather ... FlightTimeMin \\\n",
|
||||
"0 Rain ... 1030.770416 \n",
|
||||
"1 Sunny ... 464.389481 \n",
|
||||
"2 Cloudy ... 0.000000 \n",
|
||||
"3 Clear ... 222.749059 \n",
|
||||
"4 Clear ... 785.779071 \n",
|
||||
"\n",
|
||||
" Origin OriginAirportID \\\n",
|
||||
"0 Frankfurt am Main Airport FRA \n",
|
||||
"1 Cape Town International Airport CPT \n",
|
||||
"2 Venice Marco Polo Airport VE05 \n",
|
||||
"3 Naples International Airport NA01 \n",
|
||||
"4 Licenciado Benito Juarez International Airport AICM \n",
|
||||
"\n",
|
||||
" OriginCityName OriginCountry \\\n",
|
||||
"0 Frankfurt am Main DE \n",
|
||||
"1 Cape Town ZA \n",
|
||||
"2 Venice IT \n",
|
||||
"3 Naples IT \n",
|
||||
"4 Mexico City MX \n",
|
||||
"\n",
|
||||
" OriginLocation OriginRegion \\\n",
|
||||
"0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
|
||||
"1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
|
||||
"2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||||
"3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
|
||||
"4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
|
||||
"\n",
|
||||
" OriginWeather dayOfWeek timestamp \n",
|
||||
"0 Sunny 0 2019-05-27T00:00:00 \n",
|
||||
"1 Clear 0 2019-05-27T18:27:00 \n",
|
||||
"2 Rain 0 2019-05-27T17:11:14 \n",
|
||||
"3 Thunder & Lightning 0 2019-05-27T10:33:28 \n",
|
||||
"4 Damaging Wind 0 2019-05-27T05:13:00 \n",
|
||||
"\n",
|
||||
"[5 rows x 27 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>AvgTicketPrice</th>\n",
|
||||
" <th>DistanceKilometers</th>\n",
|
||||
" <th>DistanceMiles</th>\n",
|
||||
" <th>FlightDelayMin</th>\n",
|
||||
" <th>FlightTimeMin</th>\n",
|
||||
" <th>dayOfWeek</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>count</th>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>mean</th>\n",
|
||||
" <td>628.253689</td>\n",
|
||||
" <td>7092.142457</td>\n",
|
||||
" <td>4406.853010</td>\n",
|
||||
" <td>47.335171</td>\n",
|
||||
" <td>511.127842</td>\n",
|
||||
" <td>2.835975</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>std</th>\n",
|
||||
" <td>266.386661</td>\n",
|
||||
" <td>4578.263193</td>\n",
|
||||
" <td>2844.800855</td>\n",
|
||||
" <td>96.743006</td>\n",
|
||||
" <td>334.741135</td>\n",
|
||||
" <td>1.939365</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>min</th>\n",
|
||||
" <td>100.020531</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>25%</th>\n",
|
||||
" <td>410.008918</td>\n",
|
||||
" <td>2470.545974</td>\n",
|
||||
" <td>1535.126118</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>251.682199</td>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50%</th>\n",
|
||||
" <td>640.362374</td>\n",
|
||||
" <td>7612.072403</td>\n",
|
||||
" <td>4729.922470</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>503.148975</td>\n",
|
||||
" <td>3.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>75%</th>\n",
|
||||
" <td>842.260482</td>\n",
|
||||
" <td>9735.660463</td>\n",
|
||||
" <td>6049.459005</td>\n",
|
||||
" <td>14.102113</td>\n",
|
||||
" <td>720.569838</td>\n",
|
||||
" <td>4.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>max</th>\n",
|
||||
" <td>1199.729004</td>\n",
|
||||
" <td>19881.482422</td>\n",
|
||||
" <td>12353.780273</td>\n",
|
||||
" <td>360.000000</td>\n",
|
||||
" <td>1902.901978</td>\n",
|
||||
" <td>6.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
|
||||
"count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
|
||||
"mean 628.253689 7092.142457 4406.853010 47.335171 \n",
|
||||
"std 266.386661 4578.263193 2844.800855 96.743006 \n",
|
||||
"min 100.020531 0.000000 0.000000 0.000000 \n",
|
||||
"25% 410.008918 2470.545974 1535.126118 0.000000 \n",
|
||||
"50% 640.362374 7612.072403 4729.922470 0.000000 \n",
|
||||
"75% 842.260482 9735.660463 6049.459005 14.102113 \n",
|
||||
"max 1199.729004 19881.482422 12353.780273 360.000000 \n",
|
||||
"\n",
|
||||
" FlightTimeMin dayOfWeek \n",
|
||||
"count 13059.000000 13059.000000 \n",
|
||||
"mean 511.127842 2.835975 \n",
|
||||
"std 334.741135 1.939365 \n",
|
||||
"min 0.000000 0.000000 \n",
|
||||
"25% 251.682199 1.000000 \n",
|
||||
"50% 503.148975 3.000000 \n",
|
||||
"75% 720.569838 4.000000 \n",
|
||||
"max 1902.901978 6.000000 "
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -420,7 +9,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -429,7 +18,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -438,7 +27,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -671,7 +260,7 @@
|
||||
"[5 rows x 27 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -682,7 +271,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -822,7 +411,7 @@
|
||||
"max 31.715034 1902.902032 6.000000 "
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -830,6 +419,419 @@
|
||||
"source": [
|
||||
"pd_df.describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Eland"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import eland as ed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ed_df = ed.read_es('localhost', 'kibana_sample_data_flights')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>AvgTicketPrice</th>\n",
|
||||
" <th>Cancelled</th>\n",
|
||||
" <th>Carrier</th>\n",
|
||||
" <th>Dest</th>\n",
|
||||
" <th>DestAirportID</th>\n",
|
||||
" <th>DestCityName</th>\n",
|
||||
" <th>DestCountry</th>\n",
|
||||
" <th>DestLocation</th>\n",
|
||||
" <th>DestRegion</th>\n",
|
||||
" <th>DestWeather</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>FlightTimeMin</th>\n",
|
||||
" <th>Origin</th>\n",
|
||||
" <th>OriginAirportID</th>\n",
|
||||
" <th>OriginCityName</th>\n",
|
||||
" <th>OriginCountry</th>\n",
|
||||
" <th>OriginLocation</th>\n",
|
||||
" <th>OriginRegion</th>\n",
|
||||
" <th>OriginWeather</th>\n",
|
||||
" <th>dayOfWeek</th>\n",
|
||||
" <th>timestamp</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>841.265642</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>Sydney Kingsford Smith International Airport</td>\n",
|
||||
" <td>SYD</td>\n",
|
||||
" <td>Sydney</td>\n",
|
||||
" <td>AU</td>\n",
|
||||
" <td>{'lat': '-33.94609833', 'lon': '151.177002'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>Rain</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1030.770416</td>\n",
|
||||
" <td>Frankfurt am Main Airport</td>\n",
|
||||
" <td>FRA</td>\n",
|
||||
" <td>Frankfurt am Main</td>\n",
|
||||
" <td>DE</td>\n",
|
||||
" <td>{'lat': '50.033333', 'lon': '8.570556'}</td>\n",
|
||||
" <td>DE-HE</td>\n",
|
||||
" <td>Sunny</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2019-05-27T00:00:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>882.982662</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>Venice Marco Polo Airport</td>\n",
|
||||
" <td>VE05</td>\n",
|
||||
" <td>Venice</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>Sunny</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>464.389481</td>\n",
|
||||
" <td>Cape Town International Airport</td>\n",
|
||||
" <td>CPT</td>\n",
|
||||
" <td>Cape Town</td>\n",
|
||||
" <td>ZA</td>\n",
|
||||
" <td>{'lat': '-33.96480179', 'lon': '18.60169983'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>Clear</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2019-05-27T18:27:00</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>190.636904</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Logstash Airways</td>\n",
|
||||
" <td>Venice Marco Polo Airport</td>\n",
|
||||
" <td>VE05</td>\n",
|
||||
" <td>Venice</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>Cloudy</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>Venice Marco Polo Airport</td>\n",
|
||||
" <td>VE05</td>\n",
|
||||
" <td>Venice</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>Rain</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2019-05-27T17:11:14</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>181.694216</td>\n",
|
||||
" <td>True</td>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>Treviso-Sant'Angelo Airport</td>\n",
|
||||
" <td>TV01</td>\n",
|
||||
" <td>Treviso</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '45.648399', 'lon': '12.1944'}</td>\n",
|
||||
" <td>IT-34</td>\n",
|
||||
" <td>Clear</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>222.749059</td>\n",
|
||||
" <td>Naples International Airport</td>\n",
|
||||
" <td>NA01</td>\n",
|
||||
" <td>Naples</td>\n",
|
||||
" <td>IT</td>\n",
|
||||
" <td>{'lat': '40.886002', 'lon': '14.2908'}</td>\n",
|
||||
" <td>IT-72</td>\n",
|
||||
" <td>Thunder & Lightning</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2019-05-27T10:33:28</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>730.041778</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>Kibana Airlines</td>\n",
|
||||
" <td>Xi'an Xianyang International Airport</td>\n",
|
||||
" <td>XIY</td>\n",
|
||||
" <td>Xi'an</td>\n",
|
||||
" <td>CN</td>\n",
|
||||
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
|
||||
" <td>SE-BD</td>\n",
|
||||
" <td>Clear</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>785.779071</td>\n",
|
||||
" <td>Licenciado Benito Juarez International Airport</td>\n",
|
||||
" <td>AICM</td>\n",
|
||||
" <td>Mexico City</td>\n",
|
||||
" <td>MX</td>\n",
|
||||
" <td>{'lat': '19.4363', 'lon': '-99.072098'}</td>\n",
|
||||
" <td>MX-DIF</td>\n",
|
||||
" <td>Damaging Wind</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2019-05-27T05:13:00</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>5 rows × 27 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" AvgTicketPrice Cancelled Carrier \\\n",
|
||||
"0 841.265642 False Kibana Airlines \n",
|
||||
"1 882.982662 False Logstash Airways \n",
|
||||
"2 190.636904 False Logstash Airways \n",
|
||||
"3 181.694216 True Kibana Airlines \n",
|
||||
"4 730.041778 False Kibana Airlines \n",
|
||||
"\n",
|
||||
" Dest DestAirportID DestCityName \\\n",
|
||||
"0 Sydney Kingsford Smith International Airport SYD Sydney \n",
|
||||
"1 Venice Marco Polo Airport VE05 Venice \n",
|
||||
"2 Venice Marco Polo Airport VE05 Venice \n",
|
||||
"3 Treviso-Sant'Angelo Airport TV01 Treviso \n",
|
||||
"4 Xi'an Xianyang International Airport XIY Xi'an \n",
|
||||
"\n",
|
||||
" DestCountry DestLocation DestRegion \\\n",
|
||||
"0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
|
||||
"1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||||
"2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||||
"3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
|
||||
"4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
|
||||
"\n",
|
||||
" DestWeather ... FlightTimeMin \\\n",
|
||||
"0 Rain ... 1030.770416 \n",
|
||||
"1 Sunny ... 464.389481 \n",
|
||||
"2 Cloudy ... 0.000000 \n",
|
||||
"3 Clear ... 222.749059 \n",
|
||||
"4 Clear ... 785.779071 \n",
|
||||
"\n",
|
||||
" Origin OriginAirportID \\\n",
|
||||
"0 Frankfurt am Main Airport FRA \n",
|
||||
"1 Cape Town International Airport CPT \n",
|
||||
"2 Venice Marco Polo Airport VE05 \n",
|
||||
"3 Naples International Airport NA01 \n",
|
||||
"4 Licenciado Benito Juarez International Airport AICM \n",
|
||||
"\n",
|
||||
" OriginCityName OriginCountry \\\n",
|
||||
"0 Frankfurt am Main DE \n",
|
||||
"1 Cape Town ZA \n",
|
||||
"2 Venice IT \n",
|
||||
"3 Naples IT \n",
|
||||
"4 Mexico City MX \n",
|
||||
"\n",
|
||||
" OriginLocation OriginRegion \\\n",
|
||||
"0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
|
||||
"1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
|
||||
"2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||||
"3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
|
||||
"4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
|
||||
"\n",
|
||||
" OriginWeather dayOfWeek timestamp \n",
|
||||
"0 Sunny 0 2019-05-27T00:00:00 \n",
|
||||
"1 Clear 0 2019-05-27T18:27:00 \n",
|
||||
"2 Rain 0 2019-05-27T17:11:14 \n",
|
||||
"3 Thunder & Lightning 0 2019-05-27T10:33:28 \n",
|
||||
"4 Damaging Wind 0 2019-05-27T05:13:00 \n",
|
||||
"\n",
|
||||
"[5 rows x 27 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ed_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>AvgTicketPrice</th>\n",
|
||||
" <th>DistanceKilometers</th>\n",
|
||||
" <th>DistanceMiles</th>\n",
|
||||
" <th>FlightDelayMin</th>\n",
|
||||
" <th>FlightTimeMin</th>\n",
|
||||
" <th>dayOfWeek</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>count</th>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" <td>13059.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>mean</th>\n",
|
||||
" <td>628.253689</td>\n",
|
||||
" <td>7092.142457</td>\n",
|
||||
" <td>4406.853010</td>\n",
|
||||
" <td>47.335171</td>\n",
|
||||
" <td>511.127842</td>\n",
|
||||
" <td>2.835975</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>std</th>\n",
|
||||
" <td>266.386661</td>\n",
|
||||
" <td>4578.263193</td>\n",
|
||||
" <td>2844.800855</td>\n",
|
||||
" <td>96.743006</td>\n",
|
||||
" <td>334.741135</td>\n",
|
||||
" <td>1.939365</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>min</th>\n",
|
||||
" <td>100.020531</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>25%</th>\n",
|
||||
" <td>410.008918</td>\n",
|
||||
" <td>2470.545974</td>\n",
|
||||
" <td>1535.126118</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>252.064162</td>\n",
|
||||
" <td>1.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>50%</th>\n",
|
||||
" <td>640.387285</td>\n",
|
||||
" <td>7612.072403</td>\n",
|
||||
" <td>4729.922470</td>\n",
|
||||
" <td>0.000000</td>\n",
|
||||
" <td>503.148975</td>\n",
|
||||
" <td>3.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>75%</th>\n",
|
||||
" <td>842.259390</td>\n",
|
||||
" <td>9735.660463</td>\n",
|
||||
" <td>6049.583389</td>\n",
|
||||
" <td>15.000000</td>\n",
|
||||
" <td>720.505705</td>\n",
|
||||
" <td>4.068000</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>max</th>\n",
|
||||
" <td>1199.729004</td>\n",
|
||||
" <td>19881.482422</td>\n",
|
||||
" <td>12353.780273</td>\n",
|
||||
" <td>360.000000</td>\n",
|
||||
" <td>1902.901978</td>\n",
|
||||
" <td>6.000000</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
|
||||
"count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
|
||||
"mean 628.253689 7092.142457 4406.853010 47.335171 \n",
|
||||
"std 266.386661 4578.263193 2844.800855 96.743006 \n",
|
||||
"min 100.020531 0.000000 0.000000 0.000000 \n",
|
||||
"25% 410.008918 2470.545974 1535.126118 0.000000 \n",
|
||||
"50% 640.387285 7612.072403 4729.922470 0.000000 \n",
|
||||
"75% 842.259390 9735.660463 6049.583389 15.000000 \n",
|
||||
"max 1199.729004 19881.482422 12353.780273 360.000000 \n",
|
||||
"\n",
|
||||
" FlightTimeMin dayOfWeek \n",
|
||||
"count 13059.000000 13059.000000 \n",
|
||||
"mean 511.127842 2.835975 \n",
|
||||
"std 334.741135 1.939365 \n",
|
||||
"min 0.000000 0.000000 \n",
|
||||
"25% 252.064162 1.000000 \n",
|
||||
"50% 503.148975 3.000000 \n",
|
||||
"75% 720.505705 4.068000 \n",
|
||||
"max 1902.901978 6.000000 "
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ed_df.describe()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
Loading…
x
Reference in New Issue
Block a user