mirror of
https://github.com/elastic/eland.git
synced 2025-07-11 00:02:14 +08:00
859 lines
31 KiB
Plaintext
859 lines
31 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Pandas"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"pd_df = pd.read_json('flights.json.gz', lines=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>AvgTicketPrice</th>\n",
|
||
" <th>Cancelled</th>\n",
|
||
" <th>Carrier</th>\n",
|
||
" <th>Dest</th>\n",
|
||
" <th>DestAirportID</th>\n",
|
||
" <th>DestCityName</th>\n",
|
||
" <th>DestCountry</th>\n",
|
||
" <th>DestLocation</th>\n",
|
||
" <th>DestRegion</th>\n",
|
||
" <th>DestWeather</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>FlightTimeMin</th>\n",
|
||
" <th>Origin</th>\n",
|
||
" <th>OriginAirportID</th>\n",
|
||
" <th>OriginCityName</th>\n",
|
||
" <th>OriginCountry</th>\n",
|
||
" <th>OriginLocation</th>\n",
|
||
" <th>OriginRegion</th>\n",
|
||
" <th>OriginWeather</th>\n",
|
||
" <th>dayOfWeek</th>\n",
|
||
" <th>timestamp</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>841.265642</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Kibana Airlines</td>\n",
|
||
" <td>Sydney Kingsford Smith International Airport</td>\n",
|
||
" <td>SYD</td>\n",
|
||
" <td>Sydney</td>\n",
|
||
" <td>AU</td>\n",
|
||
" <td>{'lat': '-33.94609833', 'lon': '151.177002'}</td>\n",
|
||
" <td>SE-BD</td>\n",
|
||
" <td>Rain</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1030.770416</td>\n",
|
||
" <td>Frankfurt am Main Airport</td>\n",
|
||
" <td>FRA</td>\n",
|
||
" <td>Frankfurt am Main</td>\n",
|
||
" <td>DE</td>\n",
|
||
" <td>{'lat': '50.033333', 'lon': '8.570556'}</td>\n",
|
||
" <td>DE-HE</td>\n",
|
||
" <td>Sunny</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2018-01-01 00:00:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>882.982662</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Logstash Airways</td>\n",
|
||
" <td>Venice Marco Polo Airport</td>\n",
|
||
" <td>VE05</td>\n",
|
||
" <td>Venice</td>\n",
|
||
" <td>IT</td>\n",
|
||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||
" <td>IT-34</td>\n",
|
||
" <td>Sunny</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>464.389481</td>\n",
|
||
" <td>Cape Town International Airport</td>\n",
|
||
" <td>CPT</td>\n",
|
||
" <td>Cape Town</td>\n",
|
||
" <td>ZA</td>\n",
|
||
" <td>{'lat': '-33.96480179', 'lon': '18.60169983'}</td>\n",
|
||
" <td>SE-BD</td>\n",
|
||
" <td>Clear</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2018-01-01 18:27:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>190.636904</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Logstash Airways</td>\n",
|
||
" <td>Venice Marco Polo Airport</td>\n",
|
||
" <td>VE05</td>\n",
|
||
" <td>Venice</td>\n",
|
||
" <td>IT</td>\n",
|
||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||
" <td>IT-34</td>\n",
|
||
" <td>Cloudy</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>Venice Marco Polo Airport</td>\n",
|
||
" <td>VE05</td>\n",
|
||
" <td>Venice</td>\n",
|
||
" <td>IT</td>\n",
|
||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||
" <td>IT-34</td>\n",
|
||
" <td>Rain</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2018-01-01 17:11:14</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>181.694216</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>Kibana Airlines</td>\n",
|
||
" <td>Treviso-Sant'Angelo Airport</td>\n",
|
||
" <td>TV01</td>\n",
|
||
" <td>Treviso</td>\n",
|
||
" <td>IT</td>\n",
|
||
" <td>{'lat': '45.648399', 'lon': '12.1944'}</td>\n",
|
||
" <td>IT-34</td>\n",
|
||
" <td>Clear</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>222.749059</td>\n",
|
||
" <td>Naples International Airport</td>\n",
|
||
" <td>NA01</td>\n",
|
||
" <td>Naples</td>\n",
|
||
" <td>IT</td>\n",
|
||
" <td>{'lat': '40.886002', 'lon': '14.2908'}</td>\n",
|
||
" <td>IT-72</td>\n",
|
||
" <td>Thunder & Lightning</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2018-01-01 10:33:28</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>730.041778</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Kibana Airlines</td>\n",
|
||
" <td>Xi'an Xianyang International Airport</td>\n",
|
||
" <td>XIY</td>\n",
|
||
" <td>Xi'an</td>\n",
|
||
" <td>CN</td>\n",
|
||
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
|
||
" <td>SE-BD</td>\n",
|
||
" <td>Clear</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>785.779071</td>\n",
|
||
" <td>Licenciado Benito Juarez International Airport</td>\n",
|
||
" <td>AICM</td>\n",
|
||
" <td>Mexico City</td>\n",
|
||
" <td>MX</td>\n",
|
||
" <td>{'lat': '19.4363', 'lon': '-99.072098'}</td>\n",
|
||
" <td>MX-DIF</td>\n",
|
||
" <td>Damaging Wind</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2018-01-01 05:13:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 27 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" AvgTicketPrice Cancelled Carrier \\\n",
|
||
"0 841.265642 False Kibana Airlines \n",
|
||
"1 882.982662 False Logstash Airways \n",
|
||
"2 190.636904 False Logstash Airways \n",
|
||
"3 181.694216 True Kibana Airlines \n",
|
||
"4 730.041778 False Kibana Airlines \n",
|
||
"\n",
|
||
" Dest DestAirportID DestCityName \\\n",
|
||
"0 Sydney Kingsford Smith International Airport SYD Sydney \n",
|
||
"1 Venice Marco Polo Airport VE05 Venice \n",
|
||
"2 Venice Marco Polo Airport VE05 Venice \n",
|
||
"3 Treviso-Sant'Angelo Airport TV01 Treviso \n",
|
||
"4 Xi'an Xianyang International Airport XIY Xi'an \n",
|
||
"\n",
|
||
" DestCountry DestLocation DestRegion \\\n",
|
||
"0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
|
||
"1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||
"2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||
"3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
|
||
"4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
|
||
"\n",
|
||
" DestWeather ... FlightTimeMin \\\n",
|
||
"0 Rain ... 1030.770416 \n",
|
||
"1 Sunny ... 464.389481 \n",
|
||
"2 Cloudy ... 0.000000 \n",
|
||
"3 Clear ... 222.749059 \n",
|
||
"4 Clear ... 785.779071 \n",
|
||
"\n",
|
||
" Origin OriginAirportID \\\n",
|
||
"0 Frankfurt am Main Airport FRA \n",
|
||
"1 Cape Town International Airport CPT \n",
|
||
"2 Venice Marco Polo Airport VE05 \n",
|
||
"3 Naples International Airport NA01 \n",
|
||
"4 Licenciado Benito Juarez International Airport AICM \n",
|
||
"\n",
|
||
" OriginCityName OriginCountry \\\n",
|
||
"0 Frankfurt am Main DE \n",
|
||
"1 Cape Town ZA \n",
|
||
"2 Venice IT \n",
|
||
"3 Naples IT \n",
|
||
"4 Mexico City MX \n",
|
||
"\n",
|
||
" OriginLocation OriginRegion \\\n",
|
||
"0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
|
||
"1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
|
||
"2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||
"3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
|
||
"4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
|
||
"\n",
|
||
" OriginWeather dayOfWeek timestamp \n",
|
||
"0 Sunny 0 2018-01-01 00:00:00 \n",
|
||
"1 Clear 0 2018-01-01 18:27:00 \n",
|
||
"2 Rain 0 2018-01-01 17:11:14 \n",
|
||
"3 Thunder & Lightning 0 2018-01-01 10:33:28 \n",
|
||
"4 Damaging Wind 0 2018-01-01 05:13:00 \n",
|
||
"\n",
|
||
"[5 rows x 27 columns]"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd_df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>AvgTicketPrice</th>\n",
|
||
" <th>DistanceKilometers</th>\n",
|
||
" <th>DistanceMiles</th>\n",
|
||
" <th>FlightDelayMin</th>\n",
|
||
" <th>FlightTimeHour</th>\n",
|
||
" <th>FlightTimeMin</th>\n",
|
||
" <th>dayOfWeek</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>628.253689</td>\n",
|
||
" <td>7092.142455</td>\n",
|
||
" <td>4406.853013</td>\n",
|
||
" <td>47.335171</td>\n",
|
||
" <td>8.518797</td>\n",
|
||
" <td>511.127842</td>\n",
|
||
" <td>2.835975</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>266.396861</td>\n",
|
||
" <td>4578.438497</td>\n",
|
||
" <td>2844.909787</td>\n",
|
||
" <td>96.746711</td>\n",
|
||
" <td>5.579233</td>\n",
|
||
" <td>334.753952</td>\n",
|
||
" <td>1.939439</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>100.020528</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>409.893816</td>\n",
|
||
" <td>2459.705673</td>\n",
|
||
" <td>1528.390247</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>4.205553</td>\n",
|
||
" <td>252.333192</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>640.556668</td>\n",
|
||
" <td>7610.330866</td>\n",
|
||
" <td>4728.840363</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>8.384086</td>\n",
|
||
" <td>503.045170</td>\n",
|
||
" <td>3.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>842.185470</td>\n",
|
||
" <td>9736.637600</td>\n",
|
||
" <td>6050.066114</td>\n",
|
||
" <td>15.000000</td>\n",
|
||
" <td>12.006934</td>\n",
|
||
" <td>720.416036</td>\n",
|
||
" <td>4.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>1199.729053</td>\n",
|
||
" <td>19881.482315</td>\n",
|
||
" <td>12353.780369</td>\n",
|
||
" <td>360.000000</td>\n",
|
||
" <td>31.715034</td>\n",
|
||
" <td>1902.902032</td>\n",
|
||
" <td>6.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
|
||
"count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
|
||
"mean 628.253689 7092.142455 4406.853013 47.335171 \n",
|
||
"std 266.396861 4578.438497 2844.909787 96.746711 \n",
|
||
"min 100.020528 0.000000 0.000000 0.000000 \n",
|
||
"25% 409.893816 2459.705673 1528.390247 0.000000 \n",
|
||
"50% 640.556668 7610.330866 4728.840363 0.000000 \n",
|
||
"75% 842.185470 9736.637600 6050.066114 15.000000 \n",
|
||
"max 1199.729053 19881.482315 12353.780369 360.000000 \n",
|
||
"\n",
|
||
" FlightTimeHour FlightTimeMin dayOfWeek \n",
|
||
"count 13059.000000 13059.000000 13059.000000 \n",
|
||
"mean 8.518797 511.127842 2.835975 \n",
|
||
"std 5.579233 334.753952 1.939439 \n",
|
||
"min 0.000000 0.000000 0.000000 \n",
|
||
"25% 4.205553 252.333192 1.000000 \n",
|
||
"50% 8.384086 503.045170 3.000000 \n",
|
||
"75% 12.006934 720.416036 4.000000 \n",
|
||
"max 31.715034 1902.902032 6.000000 "
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd_df.describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Eland"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import eland as ed"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"ed_df = ed.read_es('localhost', 'kibana_sample_data_flights')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>AvgTicketPrice</th>\n",
|
||
" <th>Cancelled</th>\n",
|
||
" <th>Carrier</th>\n",
|
||
" <th>Dest</th>\n",
|
||
" <th>DestAirportID</th>\n",
|
||
" <th>DestCityName</th>\n",
|
||
" <th>DestCountry</th>\n",
|
||
" <th>DestLocation</th>\n",
|
||
" <th>DestRegion</th>\n",
|
||
" <th>DestWeather</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>FlightTimeMin</th>\n",
|
||
" <th>Origin</th>\n",
|
||
" <th>OriginAirportID</th>\n",
|
||
" <th>OriginCityName</th>\n",
|
||
" <th>OriginCountry</th>\n",
|
||
" <th>OriginLocation</th>\n",
|
||
" <th>OriginRegion</th>\n",
|
||
" <th>OriginWeather</th>\n",
|
||
" <th>dayOfWeek</th>\n",
|
||
" <th>timestamp</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>841.265642</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Kibana Airlines</td>\n",
|
||
" <td>Sydney Kingsford Smith International Airport</td>\n",
|
||
" <td>SYD</td>\n",
|
||
" <td>Sydney</td>\n",
|
||
" <td>AU</td>\n",
|
||
" <td>{'lat': '-33.94609833', 'lon': '151.177002'}</td>\n",
|
||
" <td>SE-BD</td>\n",
|
||
" <td>Rain</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1030.770416</td>\n",
|
||
" <td>Frankfurt am Main Airport</td>\n",
|
||
" <td>FRA</td>\n",
|
||
" <td>Frankfurt am Main</td>\n",
|
||
" <td>DE</td>\n",
|
||
" <td>{'lat': '50.033333', 'lon': '8.570556'}</td>\n",
|
||
" <td>DE-HE</td>\n",
|
||
" <td>Sunny</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2019-05-27T00:00:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>882.982662</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Logstash Airways</td>\n",
|
||
" <td>Venice Marco Polo Airport</td>\n",
|
||
" <td>VE05</td>\n",
|
||
" <td>Venice</td>\n",
|
||
" <td>IT</td>\n",
|
||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||
" <td>IT-34</td>\n",
|
||
" <td>Sunny</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>464.389481</td>\n",
|
||
" <td>Cape Town International Airport</td>\n",
|
||
" <td>CPT</td>\n",
|
||
" <td>Cape Town</td>\n",
|
||
" <td>ZA</td>\n",
|
||
" <td>{'lat': '-33.96480179', 'lon': '18.60169983'}</td>\n",
|
||
" <td>SE-BD</td>\n",
|
||
" <td>Clear</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2019-05-27T18:27:00</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>190.636904</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Logstash Airways</td>\n",
|
||
" <td>Venice Marco Polo Airport</td>\n",
|
||
" <td>VE05</td>\n",
|
||
" <td>Venice</td>\n",
|
||
" <td>IT</td>\n",
|
||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||
" <td>IT-34</td>\n",
|
||
" <td>Cloudy</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>Venice Marco Polo Airport</td>\n",
|
||
" <td>VE05</td>\n",
|
||
" <td>Venice</td>\n",
|
||
" <td>IT</td>\n",
|
||
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
|
||
" <td>IT-34</td>\n",
|
||
" <td>Rain</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2019-05-27T17:11:14</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>181.694216</td>\n",
|
||
" <td>True</td>\n",
|
||
" <td>Kibana Airlines</td>\n",
|
||
" <td>Treviso-Sant'Angelo Airport</td>\n",
|
||
" <td>TV01</td>\n",
|
||
" <td>Treviso</td>\n",
|
||
" <td>IT</td>\n",
|
||
" <td>{'lat': '45.648399', 'lon': '12.1944'}</td>\n",
|
||
" <td>IT-34</td>\n",
|
||
" <td>Clear</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>222.749059</td>\n",
|
||
" <td>Naples International Airport</td>\n",
|
||
" <td>NA01</td>\n",
|
||
" <td>Naples</td>\n",
|
||
" <td>IT</td>\n",
|
||
" <td>{'lat': '40.886002', 'lon': '14.2908'}</td>\n",
|
||
" <td>IT-72</td>\n",
|
||
" <td>Thunder & Lightning</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2019-05-27T10:33:28</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>730.041778</td>\n",
|
||
" <td>False</td>\n",
|
||
" <td>Kibana Airlines</td>\n",
|
||
" <td>Xi'an Xianyang International Airport</td>\n",
|
||
" <td>XIY</td>\n",
|
||
" <td>Xi'an</td>\n",
|
||
" <td>CN</td>\n",
|
||
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
|
||
" <td>SE-BD</td>\n",
|
||
" <td>Clear</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>785.779071</td>\n",
|
||
" <td>Licenciado Benito Juarez International Airport</td>\n",
|
||
" <td>AICM</td>\n",
|
||
" <td>Mexico City</td>\n",
|
||
" <td>MX</td>\n",
|
||
" <td>{'lat': '19.4363', 'lon': '-99.072098'}</td>\n",
|
||
" <td>MX-DIF</td>\n",
|
||
" <td>Damaging Wind</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2019-05-27T05:13:00</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 27 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" AvgTicketPrice Cancelled Carrier \\\n",
|
||
"0 841.265642 False Kibana Airlines \n",
|
||
"1 882.982662 False Logstash Airways \n",
|
||
"2 190.636904 False Logstash Airways \n",
|
||
"3 181.694216 True Kibana Airlines \n",
|
||
"4 730.041778 False Kibana Airlines \n",
|
||
"\n",
|
||
" Dest DestAirportID DestCityName \\\n",
|
||
"0 Sydney Kingsford Smith International Airport SYD Sydney \n",
|
||
"1 Venice Marco Polo Airport VE05 Venice \n",
|
||
"2 Venice Marco Polo Airport VE05 Venice \n",
|
||
"3 Treviso-Sant'Angelo Airport TV01 Treviso \n",
|
||
"4 Xi'an Xianyang International Airport XIY Xi'an \n",
|
||
"\n",
|
||
" DestCountry DestLocation DestRegion \\\n",
|
||
"0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
|
||
"1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||
"2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||
"3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
|
||
"4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
|
||
"\n",
|
||
" DestWeather ... FlightTimeMin \\\n",
|
||
"0 Rain ... 1030.770416 \n",
|
||
"1 Sunny ... 464.389481 \n",
|
||
"2 Cloudy ... 0.000000 \n",
|
||
"3 Clear ... 222.749059 \n",
|
||
"4 Clear ... 785.779071 \n",
|
||
"\n",
|
||
" Origin OriginAirportID \\\n",
|
||
"0 Frankfurt am Main Airport FRA \n",
|
||
"1 Cape Town International Airport CPT \n",
|
||
"2 Venice Marco Polo Airport VE05 \n",
|
||
"3 Naples International Airport NA01 \n",
|
||
"4 Licenciado Benito Juarez International Airport AICM \n",
|
||
"\n",
|
||
" OriginCityName OriginCountry \\\n",
|
||
"0 Frankfurt am Main DE \n",
|
||
"1 Cape Town ZA \n",
|
||
"2 Venice IT \n",
|
||
"3 Naples IT \n",
|
||
"4 Mexico City MX \n",
|
||
"\n",
|
||
" OriginLocation OriginRegion \\\n",
|
||
"0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
|
||
"1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
|
||
"2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
|
||
"3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
|
||
"4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
|
||
"\n",
|
||
" OriginWeather dayOfWeek timestamp \n",
|
||
"0 Sunny 0 2019-05-27T00:00:00 \n",
|
||
"1 Clear 0 2019-05-27T18:27:00 \n",
|
||
"2 Rain 0 2019-05-27T17:11:14 \n",
|
||
"3 Thunder & Lightning 0 2019-05-27T10:33:28 \n",
|
||
"4 Damaging Wind 0 2019-05-27T05:13:00 \n",
|
||
"\n",
|
||
"[5 rows x 27 columns]"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"ed_df.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>AvgTicketPrice</th>\n",
|
||
" <th>DistanceKilometers</th>\n",
|
||
" <th>DistanceMiles</th>\n",
|
||
" <th>FlightDelayMin</th>\n",
|
||
" <th>FlightTimeMin</th>\n",
|
||
" <th>dayOfWeek</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" <td>13059.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>628.253689</td>\n",
|
||
" <td>7092.142457</td>\n",
|
||
" <td>4406.853010</td>\n",
|
||
" <td>47.335171</td>\n",
|
||
" <td>511.127842</td>\n",
|
||
" <td>2.835975</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>266.386661</td>\n",
|
||
" <td>4578.263193</td>\n",
|
||
" <td>2844.800855</td>\n",
|
||
" <td>96.743006</td>\n",
|
||
" <td>334.741135</td>\n",
|
||
" <td>1.939365</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>100.020531</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>410.008918</td>\n",
|
||
" <td>2470.545974</td>\n",
|
||
" <td>1535.126118</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>252.064162</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>640.387285</td>\n",
|
||
" <td>7612.072403</td>\n",
|
||
" <td>4729.922470</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>503.148975</td>\n",
|
||
" <td>3.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>842.259390</td>\n",
|
||
" <td>9735.660463</td>\n",
|
||
" <td>6049.583389</td>\n",
|
||
" <td>15.000000</td>\n",
|
||
" <td>720.505705</td>\n",
|
||
" <td>4.068000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>1199.729004</td>\n",
|
||
" <td>19881.482422</td>\n",
|
||
" <td>12353.780273</td>\n",
|
||
" <td>360.000000</td>\n",
|
||
" <td>1902.901978</td>\n",
|
||
" <td>6.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
|
||
"count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
|
||
"mean 628.253689 7092.142457 4406.853010 47.335171 \n",
|
||
"std 266.386661 4578.263193 2844.800855 96.743006 \n",
|
||
"min 100.020531 0.000000 0.000000 0.000000 \n",
|
||
"25% 410.008918 2470.545974 1535.126118 0.000000 \n",
|
||
"50% 640.387285 7612.072403 4729.922470 0.000000 \n",
|
||
"75% 842.259390 9735.660463 6049.583389 15.000000 \n",
|
||
"max 1199.729004 19881.482422 12353.780273 360.000000 \n",
|
||
"\n",
|
||
" FlightTimeMin dayOfWeek \n",
|
||
"count 13059.000000 13059.000000 \n",
|
||
"mean 511.127842 2.835975 \n",
|
||
"std 334.741135 1.939365 \n",
|
||
"min 0.000000 0.000000 \n",
|
||
"25% 252.064162 1.000000 \n",
|
||
"50% 503.148975 3.000000 \n",
|
||
"75% 720.505705 4.068000 \n",
|
||
"max 1902.901978 6.000000 "
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"ed_df.describe()"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.6.8"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|