eland/test.ipynb
Stephen Dodson 2b83edad69 Added json file for pandas comparison
+ renamed from_es to read_es1
2019-06-12 12:12:40 +00:00

857 lines
31 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Eland"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import eland as ed"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = ed.read_es('localhost', 'kibana_sample_data_flights')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AvgTicketPrice</th>\n",
" <th>Cancelled</th>\n",
" <th>Carrier</th>\n",
" <th>Dest</th>\n",
" <th>DestAirportID</th>\n",
" <th>DestCityName</th>\n",
" <th>DestCountry</th>\n",
" <th>DestLocation</th>\n",
" <th>DestRegion</th>\n",
" <th>DestWeather</th>\n",
" <th>...</th>\n",
" <th>FlightTimeMin</th>\n",
" <th>Origin</th>\n",
" <th>OriginAirportID</th>\n",
" <th>OriginCityName</th>\n",
" <th>OriginCountry</th>\n",
" <th>OriginLocation</th>\n",
" <th>OriginRegion</th>\n",
" <th>OriginWeather</th>\n",
" <th>dayOfWeek</th>\n",
" <th>timestamp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>841.265642</td>\n",
" <td>False</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Sydney Kingsford Smith International Airport</td>\n",
" <td>SYD</td>\n",
" <td>Sydney</td>\n",
" <td>AU</td>\n",
" <td>{'lat': '-33.94609833', 'lon': '151.177002'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Rain</td>\n",
" <td>...</td>\n",
" <td>1030.770416</td>\n",
" <td>Frankfurt am Main Airport</td>\n",
" <td>FRA</td>\n",
" <td>Frankfurt am Main</td>\n",
" <td>DE</td>\n",
" <td>{'lat': '50.033333', 'lon': '8.570556'}</td>\n",
" <td>DE-HE</td>\n",
" <td>Sunny</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T00:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>882.982662</td>\n",
" <td>False</td>\n",
" <td>Logstash Airways</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Sunny</td>\n",
" <td>...</td>\n",
" <td>464.389481</td>\n",
" <td>Cape Town International Airport</td>\n",
" <td>CPT</td>\n",
" <td>Cape Town</td>\n",
" <td>ZA</td>\n",
" <td>{'lat': '-33.96480179', 'lon': '18.60169983'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Clear</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T18:27:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>190.636904</td>\n",
" <td>False</td>\n",
" <td>Logstash Airways</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Cloudy</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Rain</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T17:11:14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>181.694216</td>\n",
" <td>True</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Treviso-Sant'Angelo Airport</td>\n",
" <td>TV01</td>\n",
" <td>Treviso</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.648399', 'lon': '12.1944'}</td>\n",
" <td>IT-34</td>\n",
" <td>Clear</td>\n",
" <td>...</td>\n",
" <td>222.749059</td>\n",
" <td>Naples International Airport</td>\n",
" <td>NA01</td>\n",
" <td>Naples</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '40.886002', 'lon': '14.2908'}</td>\n",
" <td>IT-72</td>\n",
" <td>Thunder &amp; Lightning</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T10:33:28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>730.041778</td>\n",
" <td>False</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Xi'an Xianyang International Airport</td>\n",
" <td>XIY</td>\n",
" <td>Xi'an</td>\n",
" <td>CN</td>\n",
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Clear</td>\n",
" <td>...</td>\n",
" <td>785.779071</td>\n",
" <td>Licenciado Benito Juarez International Airport</td>\n",
" <td>AICM</td>\n",
" <td>Mexico City</td>\n",
" <td>MX</td>\n",
" <td>{'lat': '19.4363', 'lon': '-99.072098'}</td>\n",
" <td>MX-DIF</td>\n",
" <td>Damaging Wind</td>\n",
" <td>0</td>\n",
" <td>2019-05-27T05:13:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
" AvgTicketPrice Cancelled Carrier \\\n",
"0 841.265642 False Kibana Airlines \n",
"1 882.982662 False Logstash Airways \n",
"2 190.636904 False Logstash Airways \n",
"3 181.694216 True Kibana Airlines \n",
"4 730.041778 False Kibana Airlines \n",
"\n",
" Dest DestAirportID DestCityName \\\n",
"0 Sydney Kingsford Smith International Airport SYD Sydney \n",
"1 Venice Marco Polo Airport VE05 Venice \n",
"2 Venice Marco Polo Airport VE05 Venice \n",
"3 Treviso-Sant'Angelo Airport TV01 Treviso \n",
"4 Xi'an Xianyang International Airport XIY Xi'an \n",
"\n",
" DestCountry DestLocation DestRegion \\\n",
"0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
"1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
"4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
"\n",
" DestWeather ... FlightTimeMin \\\n",
"0 Rain ... 1030.770416 \n",
"1 Sunny ... 464.389481 \n",
"2 Cloudy ... 0.000000 \n",
"3 Clear ... 222.749059 \n",
"4 Clear ... 785.779071 \n",
"\n",
" Origin OriginAirportID \\\n",
"0 Frankfurt am Main Airport FRA \n",
"1 Cape Town International Airport CPT \n",
"2 Venice Marco Polo Airport VE05 \n",
"3 Naples International Airport NA01 \n",
"4 Licenciado Benito Juarez International Airport AICM \n",
"\n",
" OriginCityName OriginCountry \\\n",
"0 Frankfurt am Main DE \n",
"1 Cape Town ZA \n",
"2 Venice IT \n",
"3 Naples IT \n",
"4 Mexico City MX \n",
"\n",
" OriginLocation OriginRegion \\\n",
"0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
"1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
"2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
"4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
"\n",
" OriginWeather dayOfWeek timestamp \n",
"0 Sunny 0 2019-05-27T00:00:00 \n",
"1 Clear 0 2019-05-27T18:27:00 \n",
"2 Rain 0 2019-05-27T17:11:14 \n",
"3 Thunder & Lightning 0 2019-05-27T10:33:28 \n",
"4 Damaging Wind 0 2019-05-27T05:13:00 \n",
"\n",
"[5 rows x 27 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AvgTicketPrice</th>\n",
" <th>DistanceKilometers</th>\n",
" <th>DistanceMiles</th>\n",
" <th>FlightDelayMin</th>\n",
" <th>FlightTimeMin</th>\n",
" <th>dayOfWeek</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>628.253689</td>\n",
" <td>7092.142457</td>\n",
" <td>4406.853010</td>\n",
" <td>47.335171</td>\n",
" <td>511.127842</td>\n",
" <td>2.835975</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>266.386661</td>\n",
" <td>4578.263193</td>\n",
" <td>2844.800855</td>\n",
" <td>96.743006</td>\n",
" <td>334.741135</td>\n",
" <td>1.939365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>100.020531</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>410.008918</td>\n",
" <td>2470.545974</td>\n",
" <td>1535.126118</td>\n",
" <td>0.000000</td>\n",
" <td>251.682199</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>640.362374</td>\n",
" <td>7612.072403</td>\n",
" <td>4729.922470</td>\n",
" <td>0.000000</td>\n",
" <td>503.148975</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>842.260482</td>\n",
" <td>9735.660463</td>\n",
" <td>6049.459005</td>\n",
" <td>14.102113</td>\n",
" <td>720.569838</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1199.729004</td>\n",
" <td>19881.482422</td>\n",
" <td>12353.780273</td>\n",
" <td>360.000000</td>\n",
" <td>1902.901978</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
"count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
"mean 628.253689 7092.142457 4406.853010 47.335171 \n",
"std 266.386661 4578.263193 2844.800855 96.743006 \n",
"min 100.020531 0.000000 0.000000 0.000000 \n",
"25% 410.008918 2470.545974 1535.126118 0.000000 \n",
"50% 640.362374 7612.072403 4729.922470 0.000000 \n",
"75% 842.260482 9735.660463 6049.459005 14.102113 \n",
"max 1199.729004 19881.482422 12353.780273 360.000000 \n",
"\n",
" FlightTimeMin dayOfWeek \n",
"count 13059.000000 13059.000000 \n",
"mean 511.127842 2.835975 \n",
"std 334.741135 1.939365 \n",
"min 0.000000 0.000000 \n",
"25% 251.682199 1.000000 \n",
"50% 503.148975 3.000000 \n",
"75% 720.569838 4.000000 \n",
"max 1902.901978 6.000000 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Pandas"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"pd_df = pd.read_json('flights.json.gz', lines=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AvgTicketPrice</th>\n",
" <th>Cancelled</th>\n",
" <th>Carrier</th>\n",
" <th>Dest</th>\n",
" <th>DestAirportID</th>\n",
" <th>DestCityName</th>\n",
" <th>DestCountry</th>\n",
" <th>DestLocation</th>\n",
" <th>DestRegion</th>\n",
" <th>DestWeather</th>\n",
" <th>...</th>\n",
" <th>FlightTimeMin</th>\n",
" <th>Origin</th>\n",
" <th>OriginAirportID</th>\n",
" <th>OriginCityName</th>\n",
" <th>OriginCountry</th>\n",
" <th>OriginLocation</th>\n",
" <th>OriginRegion</th>\n",
" <th>OriginWeather</th>\n",
" <th>dayOfWeek</th>\n",
" <th>timestamp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>841.265642</td>\n",
" <td>False</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Sydney Kingsford Smith International Airport</td>\n",
" <td>SYD</td>\n",
" <td>Sydney</td>\n",
" <td>AU</td>\n",
" <td>{'lat': '-33.94609833', 'lon': '151.177002'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Rain</td>\n",
" <td>...</td>\n",
" <td>1030.770416</td>\n",
" <td>Frankfurt am Main Airport</td>\n",
" <td>FRA</td>\n",
" <td>Frankfurt am Main</td>\n",
" <td>DE</td>\n",
" <td>{'lat': '50.033333', 'lon': '8.570556'}</td>\n",
" <td>DE-HE</td>\n",
" <td>Sunny</td>\n",
" <td>0</td>\n",
" <td>2018-01-01 00:00:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>882.982662</td>\n",
" <td>False</td>\n",
" <td>Logstash Airways</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Sunny</td>\n",
" <td>...</td>\n",
" <td>464.389481</td>\n",
" <td>Cape Town International Airport</td>\n",
" <td>CPT</td>\n",
" <td>Cape Town</td>\n",
" <td>ZA</td>\n",
" <td>{'lat': '-33.96480179', 'lon': '18.60169983'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Clear</td>\n",
" <td>0</td>\n",
" <td>2018-01-01 18:27:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>190.636904</td>\n",
" <td>False</td>\n",
" <td>Logstash Airways</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Cloudy</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>Venice Marco Polo Airport</td>\n",
" <td>VE05</td>\n",
" <td>Venice</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.505299', 'lon': '12.3519'}</td>\n",
" <td>IT-34</td>\n",
" <td>Rain</td>\n",
" <td>0</td>\n",
" <td>2018-01-01 17:11:14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>181.694216</td>\n",
" <td>True</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Treviso-Sant'Angelo Airport</td>\n",
" <td>TV01</td>\n",
" <td>Treviso</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '45.648399', 'lon': '12.1944'}</td>\n",
" <td>IT-34</td>\n",
" <td>Clear</td>\n",
" <td>...</td>\n",
" <td>222.749059</td>\n",
" <td>Naples International Airport</td>\n",
" <td>NA01</td>\n",
" <td>Naples</td>\n",
" <td>IT</td>\n",
" <td>{'lat': '40.886002', 'lon': '14.2908'}</td>\n",
" <td>IT-72</td>\n",
" <td>Thunder &amp; Lightning</td>\n",
" <td>0</td>\n",
" <td>2018-01-01 10:33:28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>730.041778</td>\n",
" <td>False</td>\n",
" <td>Kibana Airlines</td>\n",
" <td>Xi'an Xianyang International Airport</td>\n",
" <td>XIY</td>\n",
" <td>Xi'an</td>\n",
" <td>CN</td>\n",
" <td>{'lat': '34.447102', 'lon': '108.751999'}</td>\n",
" <td>SE-BD</td>\n",
" <td>Clear</td>\n",
" <td>...</td>\n",
" <td>785.779071</td>\n",
" <td>Licenciado Benito Juarez International Airport</td>\n",
" <td>AICM</td>\n",
" <td>Mexico City</td>\n",
" <td>MX</td>\n",
" <td>{'lat': '19.4363', 'lon': '-99.072098'}</td>\n",
" <td>MX-DIF</td>\n",
" <td>Damaging Wind</td>\n",
" <td>0</td>\n",
" <td>2018-01-01 05:13:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 27 columns</p>\n",
"</div>"
],
"text/plain": [
" AvgTicketPrice Cancelled Carrier \\\n",
"0 841.265642 False Kibana Airlines \n",
"1 882.982662 False Logstash Airways \n",
"2 190.636904 False Logstash Airways \n",
"3 181.694216 True Kibana Airlines \n",
"4 730.041778 False Kibana Airlines \n",
"\n",
" Dest DestAirportID DestCityName \\\n",
"0 Sydney Kingsford Smith International Airport SYD Sydney \n",
"1 Venice Marco Polo Airport VE05 Venice \n",
"2 Venice Marco Polo Airport VE05 Venice \n",
"3 Treviso-Sant'Angelo Airport TV01 Treviso \n",
"4 Xi'an Xianyang International Airport XIY Xi'an \n",
"\n",
" DestCountry DestLocation DestRegion \\\n",
"0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n",
"1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n",
"4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n",
"\n",
" DestWeather ... FlightTimeMin \\\n",
"0 Rain ... 1030.770416 \n",
"1 Sunny ... 464.389481 \n",
"2 Cloudy ... 0.000000 \n",
"3 Clear ... 222.749059 \n",
"4 Clear ... 785.779071 \n",
"\n",
" Origin OriginAirportID \\\n",
"0 Frankfurt am Main Airport FRA \n",
"1 Cape Town International Airport CPT \n",
"2 Venice Marco Polo Airport VE05 \n",
"3 Naples International Airport NA01 \n",
"4 Licenciado Benito Juarez International Airport AICM \n",
"\n",
" OriginCityName OriginCountry \\\n",
"0 Frankfurt am Main DE \n",
"1 Cape Town ZA \n",
"2 Venice IT \n",
"3 Naples IT \n",
"4 Mexico City MX \n",
"\n",
" OriginLocation OriginRegion \\\n",
"0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n",
"1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n",
"2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n",
"3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n",
"4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n",
"\n",
" OriginWeather dayOfWeek timestamp \n",
"0 Sunny 0 2018-01-01 00:00:00 \n",
"1 Clear 0 2018-01-01 18:27:00 \n",
"2 Rain 0 2018-01-01 17:11:14 \n",
"3 Thunder & Lightning 0 2018-01-01 10:33:28 \n",
"4 Damaging Wind 0 2018-01-01 05:13:00 \n",
"\n",
"[5 rows x 27 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>AvgTicketPrice</th>\n",
" <th>DistanceKilometers</th>\n",
" <th>DistanceMiles</th>\n",
" <th>FlightDelayMin</th>\n",
" <th>FlightTimeHour</th>\n",
" <th>FlightTimeMin</th>\n",
" <th>dayOfWeek</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" <td>13059.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>628.253689</td>\n",
" <td>7092.142455</td>\n",
" <td>4406.853013</td>\n",
" <td>47.335171</td>\n",
" <td>8.518797</td>\n",
" <td>511.127842</td>\n",
" <td>2.835975</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>266.396861</td>\n",
" <td>4578.438497</td>\n",
" <td>2844.909787</td>\n",
" <td>96.746711</td>\n",
" <td>5.579233</td>\n",
" <td>334.753952</td>\n",
" <td>1.939439</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>100.020528</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>409.893816</td>\n",
" <td>2459.705673</td>\n",
" <td>1528.390247</td>\n",
" <td>0.000000</td>\n",
" <td>4.205553</td>\n",
" <td>252.333192</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>640.556668</td>\n",
" <td>7610.330866</td>\n",
" <td>4728.840363</td>\n",
" <td>0.000000</td>\n",
" <td>8.384086</td>\n",
" <td>503.045170</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>842.185470</td>\n",
" <td>9736.637600</td>\n",
" <td>6050.066114</td>\n",
" <td>15.000000</td>\n",
" <td>12.006934</td>\n",
" <td>720.416036</td>\n",
" <td>4.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1199.729053</td>\n",
" <td>19881.482315</td>\n",
" <td>12353.780369</td>\n",
" <td>360.000000</td>\n",
" <td>31.715034</td>\n",
" <td>1902.902032</td>\n",
" <td>6.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n",
"count 13059.000000 13059.000000 13059.000000 13059.000000 \n",
"mean 628.253689 7092.142455 4406.853013 47.335171 \n",
"std 266.396861 4578.438497 2844.909787 96.746711 \n",
"min 100.020528 0.000000 0.000000 0.000000 \n",
"25% 409.893816 2459.705673 1528.390247 0.000000 \n",
"50% 640.556668 7610.330866 4728.840363 0.000000 \n",
"75% 842.185470 9736.637600 6050.066114 15.000000 \n",
"max 1199.729053 19881.482315 12353.780369 360.000000 \n",
"\n",
" FlightTimeHour FlightTimeMin dayOfWeek \n",
"count 13059.000000 13059.000000 13059.000000 \n",
"mean 8.518797 511.127842 2.835975 \n",
"std 5.579233 334.753952 1.939439 \n",
"min 0.000000 0.000000 0.000000 \n",
"25% 4.205553 252.333192 1.000000 \n",
"50% 8.384086 503.045170 3.000000 \n",
"75% 12.006934 720.416036 4.000000 \n",
"max 31.715034 1902.902032 6.000000 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd_df.describe()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}