From 428f35f21f02cce3bca300a846cc1bf25a9f4182 Mon Sep 17 00:00:00 2001 From: Stephen Dodson Date: Wed, 26 Jun 2019 12:25:08 +0000 Subject: [PATCH] Added DataFrame.info() + more methods --- eland/frame.py | 194 +- eland/mappings.py | 31 +- eland/tests/__init__.py | 6 +- eland/tests/client/test_mappings_pytest.py | 56 +- eland/tests/{frame => }/common.py | 0 eland/tests/frame/test_indexing_pytest.py | 78 +- eland/tests/test.ipynb | 3352 ++------------------ 7 files changed, 664 insertions(+), 3053 deletions(-) rename eland/tests/{frame => }/common.py (100%) diff --git a/eland/frame.py b/eland/frame.py index 6e42e0e..ac5063b 100644 --- a/eland/frame.py +++ b/eland/frame.py @@ -32,6 +32,13 @@ import pandas as pd from pandas.core.arrays.sparse import BlockIndex +from pandas.io.formats import format as fmt +from pandas.io.formats.printing import pprint_thing + +from io import StringIO + +import sys + class DataFrame(): """ pandas.DataFrame like API that proxies into Elasticsearch index(es). @@ -283,6 +290,121 @@ class DataFrame(): return df + def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, + null_counts=None): + """ + Print a concise summary of a DataFrame. + + This method prints information about a DataFrame including + the index dtype and column dtypes, non-null values and memory usage. + + This copies a lot of code from pandas.DataFrame.info as it is difficult + to split out the appropriate code or creating a SparseDataFrame gives + incorrect results on types and counts. + """ + if buf is None: # pragma: no cover + buf = sys.stdout + + fake_df = self.__fake_dataframe__() + + lines = [] + + lines.append(str(type(self))) + lines.append(fake_df.index._summary()) + + if len(self.columns) == 0: + lines.append('Empty {name}'.format(name=type(self).__name__)) + fmt.buffer_put_lines(buf, lines) + return + + cols = self.columns + + # hack + if max_cols is None: + max_cols = pd.get_option('display.max_info_columns', + len(self.columns) + 1) + + max_rows = pd.get_option('display.max_info_rows', len(self) + 1) + + if null_counts is None: + show_counts = ((len(self.columns) <= max_cols) and + (len(self) < max_rows)) + else: + show_counts = null_counts + exceeds_info_cols = len(self.columns) > max_cols + + def _verbose_repr(): + lines.append('Data columns (total %d columns):' % + len(self.columns)) + space = max(len(pprint_thing(k)) for k in self.columns) + 4 + counts = None + + tmpl = "{count}{dtype}" + if show_counts: + counts = self.count() + if len(cols) != len(counts): # pragma: no cover + raise AssertionError( + 'Columns must equal counts ' + '({cols:d} != {counts:d})'.format( + cols=len(cols), counts=len(counts))) + tmpl = "{count} non-null {dtype}" + + dtypes = self.dtypes + for i, col in enumerate(self.columns): + dtype = dtypes.iloc[i] + col = pprint_thing(col) + + count = "" + if show_counts: + count = counts.iloc[i] + + lines.append(_put_str(col, space) + tmpl.format(count=count, + dtype=dtype)) + + def _non_verbose_repr(): + lines.append(self.columns._summary(name='Columns')) + + def _sizeof_fmt(num, size_qualifier): + # returns size in human readable format + for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: + if num < 1024.0: + return ("{num:3.1f}{size_q} " + "{x}".format(num=num, size_q=size_qualifier, x=x)) + num /= 1024.0 + return "{num:3.1f}{size_q} {pb}".format(num=num, + size_q=size_qualifier, + pb='PB') + + if verbose: + _verbose_repr() + elif verbose is False: # specifically set to False, not nesc None + _non_verbose_repr() + else: + if exceeds_info_cols: + _non_verbose_repr() + else: + _verbose_repr() + + counts = self.get_dtype_counts() + dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k + in sorted(counts.items())] + lines.append('dtypes: {types}'.format(types=', '.join(dtypes))) + + if memory_usage is None: + memory_usage = pd.get_option('display.memory_usage') + if memory_usage: + # append memory usage of df to display + size_qualifier = '' + + # TODO - this is different from pd.DataFrame as we shouldn't + # really hold much in memory. For now just approximate with getsizeof + ignore deep + mem_usage = sys.getsizeof(self) + lines.append("memory usage: {mem}\n".format( + mem=_sizeof_fmt(mem_usage, size_qualifier))) + + fmt.buffer_put_lines(buf, lines) + + @property def shape(self): """ @@ -301,7 +423,38 @@ class DataFrame(): @property def columns(self): - return self.mappings.source_fields() + return pd.Index(self.mappings.source_fields()) + + @property + def dtypes(self): + return self.mappings.dtypes() + + def get_dtype_counts(self): + return self.mappings.get_dtype_counts() + + def count(self): + """ + Count non-NA cells for each column (TODO row) + + Counts are based on exists queries against ES + + This is inefficient, as it creates N queries (N is number of fields). + + An alternative approach is to use value_count aggregations. However, they have issues in that: + 1. They can only be used with aggregatable fields (e.g. keyword not text) + 2. For list fields they return multiple counts. E.g. tags=['elastic', 'ml'] returns value_count=2 + for a single document. + """ + counts = {} + for field in self.mappings.source_fields(): + exists_query = {"query":{"exists":{"field":field}}} + field_exists_count = self.client.count(index=self.index_pattern, body=exists_query) + counts[field] = field_exists_count + + count = pd.Series(data=counts, index=self.mappings.source_fields()) + + return count + def __getitem__(self, item): # df['a'] -> item == str @@ -313,6 +466,8 @@ class DataFrame(): columns.append(item) elif isinstance(item, tuple): columns.extend(list(item)) + elif isinstance(item, list): + columns.extend(item) if len(columns) > 0: # Return new eland.DataFrame with modified mappings @@ -337,13 +492,14 @@ class DataFrame(): # Rendering Methods def __repr__(self): + """ + Return a string representation for a particular DataFrame. + """ return self.to_string() - def to_string(self): - # The return for this is display.options.max_rows - max_rows = 60 - head_rows = max_rows / 2 + def __fake_dataframe__(self, max_rows=1): + head_rows = max_rows / 2 + 1 tail_rows = max_rows - head_rows head = self.head(max_rows) @@ -358,13 +514,33 @@ class DataFrame(): # to use the pandas IO methods. # TODO - if data is indexed by time series, return top/bottom of # time series, rather than first max_rows items + if tail_rows > 0: + locations = [0, num_rows-tail_rows] + lengths = [head_rows, tail_rows] + else: + locations = [0] + lengths = [head_rows] + sdf = pd.DataFrame({item: pd.SparseArray(data=head[item], sparse_index= BlockIndex( - num_rows, [0, num_rows-tail_rows], [head_rows, tail_rows])) + num_rows, locations, lengths)) for item in self.columns}) - # TODO - don't hard code max_rows - use pandas default/ES default - return sdf.to_string(max_rows=max_rows) + return sdf - return head.to_string(max_rows=max_rows) + return head + + + def to_string(self): + # TODO - this doesn't return 'notebook' friendly results yet.. + # TODO - don't hard code max_rows - use pandas default/ES default + max_rows = 60 + + df = self.__fake_dataframe__(max_rows=max_rows) + + return df.to_string(max_rows=max_rows, show_dimensions=True) + + +def _put_str(s, space): + return '{s}'.format(s=s)[:space].ljust(space) \ No newline at end of file diff --git a/eland/mappings.py b/eland/mappings.py index 41abcc9..4064f9c 100644 --- a/eland/mappings.py +++ b/eland/mappings.py @@ -1,6 +1,8 @@ import warnings + import pandas as pd + class Mappings(): """ General purpose to manage Elasticsearch to/from pandas mappings @@ -26,6 +28,7 @@ class Mappings(): origin_location.lat True text object True False """ + def __init__(self, client=None, index_pattern=None, @@ -63,14 +66,14 @@ class Mappings(): # field_name, es_dtype, pd_dtype, is_searchable, is_aggregtable, is_source self.mappings_capabilities = Mappings._create_capability_matrix(all_fields, source_fields, all_fields_caps) else: - # Copy object and restrict mapping columns + # Reference object and restrict mapping columns self.mappings_capabilities = mappings.mappings_capabilities.loc[columns] # Cache source field types for efficient lookup # (this massively improves performance of DataFrame.flatten) self.source_field_pd_dtypes = {} - for field_name in self.source_fields(): + for field_name in self.mappings_capabilities[self.mappings_capabilities._source == True].index: pd_dtype = self.mappings_capabilities.loc[field_name]['pd_dtype'] self.source_field_pd_dtypes[field_name] = pd_dtype @@ -336,7 +339,7 @@ class Mappings(): source_fields: list of str List of source fields """ - return self.mappings_capabilities[self.mappings_capabilities._source == True].index.tolist() + return self.source_field_pd_dtypes.keys() def count_source_fields(self): """ @@ -345,5 +348,25 @@ class Mappings(): count_source_fields: int Number of source fields in mapping """ - return len(self.mappings_capabilities[self.mappings_capabilities._source == True].index) + return len(self.source_fields()) + def dtypes(self): + """ + Returns + ------- + dtypes: pd.Series + Source field name + pd_dtype + """ + return pd.Series(self.source_field_pd_dtypes) + + def get_dtype_counts(self): + """ + Return counts of unique dtypes in this object. + + Returns + ------- + get_dtype_counts : Series + Series with the count of columns with each dtype. + """ + return pd.Series(self.mappings_capabilities[self.mappings_capabilities._source == True].groupby('pd_dtype')[ + '_source'].count().to_dict()) diff --git a/eland/tests/__init__.py b/eland/tests/__init__.py index 26d8423..c98fe4f 100644 --- a/eland/tests/__init__.py +++ b/eland/tests/__init__.py @@ -438,7 +438,11 @@ TEST_MAPPING1_EXPECTED = { } TEST_MAPPING1_EXPECTED_DF = pd.DataFrame.from_dict(data=TEST_MAPPING1_EXPECTED, orient='index', columns=['es_dtype']) -TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT = len(TEST_MAPPING1_EXPECTED_DF.index) - 4 +TEST_MAPPING1_EXPECTED_SOURCE_FIELD_DF = TEST_MAPPING1_EXPECTED_DF.drop(index=['city.raw', + 'origin_location.lat.keyword', + 'origin_location.lon.keyword', + 'text.english']) +TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT = len(TEST_MAPPING1_EXPECTED_SOURCE_FIELD_DF.index) TEST_NESTED_USER_GROUP_INDEX_NAME = 'nested_user_group' TEST_NESTED_USER_GROUP_MAPPING = { diff --git a/eland/tests/client/test_mappings_pytest.py b/eland/tests/client/test_mappings_pytest.py index 133da72..0163ba6 100644 --- a/eland/tests/client/test_mappings_pytest.py +++ b/eland/tests/client/test_mappings_pytest.py @@ -1,47 +1,71 @@ # File called _pytest for PyCharm compatability -import pytest - -from eland.tests import * from pandas.util.testing import ( - assert_almost_equal, assert_frame_equal, assert_series_equal) + assert_series_equal, assert_frame_equal) import eland as ed +from eland.tests import * +from eland.tests.common import TestData -class TestMapping(): + +class TestMapping(TestData): # Requires 'setup_tests.py' to be run prior to this - def test_mapping(self): + def test_fields(self): mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME) - assert mappings.all_fields() == TEST_MAPPING1_EXPECTED_DF.index.tolist() + assert TEST_MAPPING1_EXPECTED_DF.index.tolist() == mappings.all_fields() assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings.mappings_capabilities['es_dtype'])) - assert mappings.count_source_fields() == TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT + assert TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT == mappings.count_source_fields() def test_copy(self): mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME) - assert mappings.all_fields() == TEST_MAPPING1_EXPECTED_DF.index.tolist() + assert TEST_MAPPING1_EXPECTED_DF.index.tolist() == mappings.all_fields() assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings.mappings_capabilities['es_dtype'])) - assert mappings.count_source_fields() == TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT + assert TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT == mappings.count_source_fields() # Pick 1 source field columns = ['dest_location'] mappings_copy1 = ed.Mappings(mappings=mappings, columns=columns) - assert mappings_copy1.all_fields() == columns - assert mappings_copy1.count_source_fields() == len(columns) + assert columns == mappings_copy1.all_fields() + assert len(columns) == mappings_copy1.count_source_fields() # Pick 3 source fields (out of order) columns = ['dest_location', 'city', 'user_name'] mappings_copy2 = ed.Mappings(mappings=mappings, columns=columns) - assert mappings_copy2.all_fields() == columns - assert mappings_copy2.count_source_fields() == len(columns) + assert columns == mappings_copy2.all_fields() + assert len(columns) == mappings_copy2.count_source_fields() # Check original is still ok - assert mappings.all_fields() == TEST_MAPPING1_EXPECTED_DF.index.tolist() + assert TEST_MAPPING1_EXPECTED_DF.index.tolist() == mappings.all_fields() assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings.mappings_capabilities['es_dtype'])) - assert mappings.count_source_fields() == TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT + assert TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT == mappings.count_source_fields() + + def test_dtypes(self): + mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME) + + expected_dtypes = pd.Series( + {'city': 'object', 'content': 'object', 'dest_location': 'object', 'email': 'object', + 'maps-telemetry.attributesPerMap.dataSourcesCount.avg': 'int64', + 'maps-telemetry.attributesPerMap.dataSourcesCount.max': 'int64', + 'maps-telemetry.attributesPerMap.dataSourcesCount.min': 'int64', + 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.avg': 'float64', + 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.max': 'int64', + 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.min': 'int64', + 'my_join_field': 'object', 'name': 'object', 'origin_location.lat': 'object', + 'origin_location.lon': 'object', 'text': 'object', 'tweeted_at': 'datetime64[ns]', + 'type': 'object', 'user_name': 'object'}) + + assert_series_equal(expected_dtypes, mappings.dtypes()) + + def test_get_dtype_counts(self): + mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME) + + expected_get_dtype_counts = pd.Series({'datetime64[ns]': 1, 'float64': 1, 'int64': 5, 'object': 11}) + + assert_series_equal(expected_get_dtype_counts, mappings.get_dtype_counts()) diff --git a/eland/tests/frame/common.py b/eland/tests/common.py similarity index 100% rename from eland/tests/frame/common.py rename to eland/tests/common.py diff --git a/eland/tests/frame/test_indexing_pytest.py b/eland/tests/frame/test_indexing_pytest.py index 045c9be..d926850 100644 --- a/eland/tests/frame/test_indexing_pytest.py +++ b/eland/tests/frame/test_indexing_pytest.py @@ -1,12 +1,11 @@ # File called _pytest for PyCharm compatability -from eland.tests.frame.common import TestData -from eland.tests import * +from eland.tests.common import TestData -import eland as ed import pandas as pd +import io from pandas.util.testing import ( - assert_almost_equal, assert_frame_equal, assert_series_equal) + assert_series_equal, assert_frame_equal) class TestDataFrameIndexing(TestData): @@ -59,7 +58,7 @@ class TestDataFrameIndexing(TestData): def test_to_string(self): print(self.ed_flights()) - def test_get_item(self): + def test_getitem(self): # Test 1 attribute ed_carrier = self.ed_flights()['Carrier'] @@ -96,3 +95,72 @@ class TestDataFrameIndexing(TestData): #ed_3_items_to_string = ed_3_items.to_string() #print(ed_3_items_to_string) + + # Test numerics + numerics = ['DistanceMiles', 'AvgTicketPrice', 'FlightTimeMin'] + ed_numerics = self.ed_flights()[numerics] + + # just test headers + ed_numerics_describe = ed_numerics.describe() + assert ed_numerics_describe.columns.tolist() == numerics + + def test_info(self): + ed_flights_info_buf = io.StringIO() + pd_flights_info_buf = io.StringIO() + + self.ed_flights().info(buf=ed_flights_info_buf) + self.pd_flights().info(buf=pd_flights_info_buf) + + ed_flights_info = (ed_flights_info_buf.getvalue().splitlines()) + pd_flights_info = (pd_flights_info_buf.getvalue().splitlines()) + + flights_diff = set(ed_flights_info).symmetric_difference(set(pd_flights_info)) + + ed_ecommerce_info_buf = io.StringIO() + pd_ecommerce_info_buf = io.StringIO() + + self.ed_ecommerce().info(buf=ed_ecommerce_info_buf) + self.pd_ecommerce().info(buf=pd_ecommerce_info_buf) + + ed_ecommerce_info = (ed_ecommerce_info_buf.getvalue().splitlines()) + pd_ecommerce_info = (pd_ecommerce_info_buf.getvalue().splitlines()) + + # We don't compare ecommerce here as the default dtypes in pandas from read_json + # don't match the mapping types. This is mainly because the products field is + # nested and so can be treated as a multi-field in ES, but not in pandas + ecommerce_diff = set(ed_ecommerce_info).symmetric_difference(set(pd_ecommerce_info)) + + + def test_count(self): + pd_flights_count = self.pd_flights().count() + ed_flights_count = self.ed_flights().count() + + assert_series_equal(pd_flights_count, ed_flights_count) + + pd_ecommerce_count = self.pd_ecommerce().count() + ed_ecommerce_count = self.ed_ecommerce().count() + + assert_series_equal(pd_ecommerce_count, ed_ecommerce_count) + + def test_get_dtype_counts(self): + pd_flights_get_dtype_counts = self.pd_flights().get_dtype_counts().sort_index() + ed_flights_get_dtype_counts = self.ed_flights().get_dtype_counts().sort_index() + + assert_series_equal(pd_flights_get_dtype_counts, ed_flights_get_dtype_counts) + + def test_properties(self): + pd_flights_shape = self.pd_flights().shape + ed_flights_shape = self.ed_flights().shape + + assert pd_flights_shape == ed_flights_shape + + pd_flights_columns = self.pd_flights().columns + ed_flights_columns = self.ed_flights().columns + + assert pd_flights_columns.tolist() == ed_flights_columns.tolist() + + pd_flights_dtypes = self.pd_flights().dtypes + ed_flights_dtypes = self.ed_flights().dtypes + + assert_series_equal(pd_flights_dtypes, ed_flights_dtypes) + diff --git a/eland/tests/test.ipynb b/eland/tests/test.ipynb index 8f8e298..a34a9bb 100644 --- a/eland/tests/test.ipynb +++ b/eland/tests/test.ipynb @@ -273,2114 +273,6 @@ "pd_df.head()" ] }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "pd_sdf = pd_df.to_sparse()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AvgTicketPriceCancelledCarrierDestDestAirportIDDestCityNameDestCountryDestLocationDestRegionDestWeather...FlightTimeMinOriginOriginAirportIDOriginCityNameOriginCountryOriginLocationOriginRegionOriginWeatherdayOfWeektimestamp
0841.265642FalseKibana AirlinesSydney Kingsford Smith International AirportSYDSydneyAU{'lat': '-33.94609833', 'lon': '151.177002'}SE-BDRain...1030.770416Frankfurt am Main AirportFRAFrankfurt am MainDE{'lat': '50.033333', 'lon': '8.570556'}DE-HESunny02018-01-01 00:00:00
1882.982662FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Sunny...464.389481Cape Town International AirportCPTCape TownZA{'lat': '-33.96480179', 'lon': '18.60169983'}SE-BDClear02018-01-01 18:27:00
2190.636904FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Cloudy...0.000000Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Rain02018-01-01 17:11:14
3181.694216TrueKibana AirlinesTreviso-Sant'Angelo AirportTV01TrevisoIT{'lat': '45.648399', 'lon': '12.1944'}IT-34Clear...222.749059Naples International AirportNA01NaplesIT{'lat': '40.886002', 'lon': '14.2908'}IT-72Thunder & Lightning02018-01-01 10:33:28
4730.041778FalseKibana AirlinesXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDClear...785.779071Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFDamaging Wind02018-01-01 05:13:00
5418.152089FalseJetBeatsGenoa Cristoforo Colombo AirportGE01GenovaIT{'lat': '44.4133', 'lon': '8.8375'}IT-42Thunder & Lightning...393.590441Edmonton International AirportCYEGEdmontonCA{'lat': '53.30970001', 'lon': '-113.5800018'}CA-ABRain02018-01-01 01:43:03
6180.246816FalseJetBeatsZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHHail...300.000000Zurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHClear02018-01-01 13:49:53
7585.184310FalseKibana AirlinesOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONClear...614.942480Ciampino___G. B. Pastine International AirportRM12RomeIT{'lat': '41.7994', 'lon': '12.5949'}IT-62Thunder & Lightning02018-01-01 04:54:59
8960.869736TrueKibana AirlinesRajiv Gandhi International AirportHYDHyderabadIN{'lat': '17.23131752', 'lon': '78.42985535'}SE-BDCloudy...602.030591Milano Linate AirportMI11MilanIT{'lat': '45.445099', 'lon': '9.27674'}IT-25Heavy Fog02018-01-01 12:09:35
9296.877773FalseLogstash AirwaysTreviso-Sant'Angelo AirportTV01TrevisoIT{'lat': '45.648399', 'lon': '12.1944'}IT-34Rain...174.822216Sheremetyevo International AirportSVOMoscowRU{'lat': '55.972599', 'lon': '37.4146'}RU-MOSCloudy02018-01-01 12:09:35
10906.437948FalseJetBeatsHelsinki Vantaa AirportHELHelsinkiFI{'lat': '60.31719971', 'lon': '24.9633007'}FI-ESRain...503.045170Albuquerque International Sunport AirportABQAlbuquerqueUS{'lat': '35.040199', 'lon': '-106.609001'}US-NMRain02018-01-01 22:06:14
11704.463771FalseLogstash AirwaysVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Cloudy...36.075018Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Rain02018-01-01 11:52:34
12922.499077TrueLogstash AirwaysShanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDClear...679.768391Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFHeavy Fog02018-01-01 02:13:46
13374.959276FalseLogstash AirwaysOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONRain...330.418282Naples International AirportNA01NaplesIT{'lat': '40.886002', 'lon': '14.2908'}IT-72Rain02018-01-01 14:21:13
14552.917371FalseLogstash AirwaysLuis Munoz Marin International AirportSJUSan JuanPR{'lat': '18.43939972', 'lon': '-66.00180054'}PR-U-AClear...407.145031Ciampino___G. B. Pastine International AirportRM12RomeIT{'lat': '41.7994', 'lon': '12.5949'}IT-62Cloudy02018-01-01 17:42:53
15566.487557TrueKibana AirlinesCologne Bonn AirportCGNCologneDE{'lat': '50.86589813', 'lon': '7.142739773'}DE-NWSunny...656.712658Chengdu Shuangliu International AirportCTUChengduCN{'lat': '30.57850075', 'lon': '103.9469986'}SE-BDThunder & Lightning02018-01-01 19:55:32
16989.952787TrueLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Damaging Wind...773.030334Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFThunder & Lightning02018-01-01 07:49:27
17569.613255FalseES-AirMinistro Pistarini International AirportEZEBuenos AiresAR{'lat': '-34.8222', 'lon': '-58.5358'}SE-BDCloudy...704.716920Cleveland Hopkins International AirportCLEClevelandUS{'lat': '41.4117012', 'lon': '-81.84980011'}US-OHRain02018-01-01 01:30:47
18277.429707FalseES-AirShanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDClear...355.957996Olenya Air BaseXLMOOlenegorskRU{'lat': '68.15180206', 'lon': '33.46390152'}RU-MURHail02018-01-01 07:58:17
19772.100846FalseJetBeatsIndira Gandhi International AirportDELNew DelhiIN{'lat': '28.5665', 'lon': '77.103104'}SE-BDClear...875.114675Casper-Natrona County International AirportCPRCasperUS{'lat': '42.90800095', 'lon': '-106.4639969'}US-WYCloudy02018-01-01 00:02:06
20167.599922FalseJetBeatsWichita Mid Continent AirportICTWichitaUS{'lat': '37.64989853', 'lon': '-97.43309784'}US-KSClear...373.966883Erie International Tom Ridge FieldERIErieUS{'lat': '42.08312701', 'lon': '-80.17386675'}US-PACloudy02018-01-01 01:08:20
21253.210065FalseES-AirOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONHail...130.667700Newark Liberty International AirportEWRNewarkUS{'lat': '40.69250107', 'lon': '-74.16870117'}US-NJClear02018-01-01 01:08:20
22917.247620FalseJetBeatsItami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDDamaging Wind...574.495310Copenhagen Kastrup AirportCPHCopenhagenDK{'lat': '55.61790085', 'lon': '12.65600014'}DK-84Sunny02018-01-01 07:48:35
23451.591176FalseLogstash AirwaysVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Heavy Fog...579.728943Seattle Tacoma International AirportSEASeattleUS{'lat': '47.44900131', 'lon': '-122.3089981'}US-WAHeavy Fog02018-01-01 18:57:21
24307.067201FalseLogstash AirwaysCharles de Gaulle International AirportCDGParisFR{'lat': '49.01279831', 'lon': '2.549999952'}FR-JClear...50.157229Berlin-Tegel AirportTXLBerlinDE{'lat': '52.5597', 'lon': '13.2877'}DE-BERain02018-01-01 13:18:25
25268.241596FalseES-AirNarita International AirportNRTTokyoJP{'lat': '35.76470184', 'lon': '140.3860016'}SE-BDRain...527.567422Manchester AirportMANManchesterGB{'lat': '53.35369873', 'lon': '-2.274950027'}GB-ENGThunder & Lightning02018-01-01 08:20:35
26975.812632TrueKibana AirlinesItami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDHail...386.259764Helsinki Vantaa AirportHELHelsinkiFI{'lat': '60.31719971', 'lon': '24.9633007'}FI-ESRain02018-01-01 15:38:32
27134.214546FalseJetBeatsSan Diego International AirportSANSan DiegoUS{'lat': '32.73360062', 'lon': '-117.1900024'}US-CAClear...24.479650Phoenix Sky Harbor International AirportPHXPhoenixUS{'lat': '33.43429947', 'lon': '-112.012001'}US-AZClear02018-01-01 03:08:45
28988.897564FalseKibana AirlinesVerona Villafranca AirportVR10VeronaIT{'lat': '45.395699', 'lon': '10.8885'}IT-34Sunny...568.351033New Chitose AirportCTSChitose / TomakomaiJP{'lat': '42.77519989', 'lon': '141.6920013'}SE-BDDamaging Wind02018-01-01 01:16:59
29511.067220FalseLogstash AirwaysZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHRain...425.889194Tulsa International AirportTULTulsaUS{'lat': '36.19839859', 'lon': '-95.88809967'}US-OKRain02018-01-01 18:00:59
..................................................................
13029795.905278FalseKibana AirlinesMalpensa International AirportMI12MilanIT{'lat': '45.6306', 'lon': '8.72811'}IT-25Sunny...534.375826Itami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDSunny62018-02-11 20:10:13
13030863.388068FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDDamaging Wind...141.172633Tokyo Haneda International AirportHNDTokyoJP{'lat': '35.552299', 'lon': '139.779999'}SE-BDClear62018-02-11 18:59:53
13031575.183008FalseJetBeatsSavannah Hilton Head International AirportSAVSavannahUS{'lat': '32.12760162', 'lon': '-81.20210266'}US-GAThunder & Lightning...1113.137060OR Tambo International AirportJNBJohannesburgZA{'lat': '-26.1392', 'lon': '28.246'}SE-BDHail62018-02-11 00:57:48
13032817.368952FalseJetBeatsSyracuse Hancock International AirportSYRSyracuseUS{'lat': '43.11119843', 'lon': '-76.10630035'}US-NYRain...714.964864El Dorado International AirportBOGBogotaCO{'lat': '4.70159', 'lon': '-74.1469'}CO-CUNThunder & Lightning62018-02-11 12:02:49
13033579.582455FalseES-AirTampa International AirportTPATampaUS{'lat': '27.97550011', 'lon': '-82.53320313'}US-FLRain...234.929046Jorge Chavez International AirportLIMLimaPE{'lat': '-12.0219', 'lon': '-77.114304'}SE-BDThunder & Lightning62018-02-11 02:07:40
130341004.916638FalseJetBeatsOlenya Air BaseXLMOOlenegorskRU{'lat': '68.15180206', 'lon': '33.46390152'}RU-MURClear...526.895776Gimpo International AirportGMPSeoulKR{'lat': '37.5583', 'lon': '126.791'}SE-BDSunny62018-02-11 00:35:04
13035357.562842TrueLogstash AirwaysShanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDThunder & Lightning...0.000000Shanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDThunder & Lightning62018-02-11 11:19:12
13036429.580539FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Sunny...150.000000Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Cloudy62018-02-11 15:07:11
13037729.788171TrueES-AirVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Rain...691.944839Ukrainka Air BaseXHBUBelogorskRU{'lat': '51.169997', 'lon': '128.445007'}RU-AMUDamaging Wind62018-02-11 10:24:42
13038564.897695FalseES-AirPisa International AirportPI05PisaIT{'lat': '43.683899', 'lon': '10.3927'}IT-52Heavy Fog...567.387339OR Tambo International AirportJNBJohannesburgZA{'lat': '-26.1392', 'lon': '28.246'}SE-BDDamaging Wind62018-02-11 00:42:06
130391014.052787FalseLogstash AirwaysVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Thunder & Lightning...690.092327Montreal / Pierre Elliott Trudeau Internationa...YULMontrealCA{'lat': '45.47060013', 'lon': '-73.74079895'}CA-QCThunder & Lightning62018-02-11 10:56:31
13040455.243843FalseES-AirLondon Luton AirportLTNLondonGB{'lat': '51.87469864', 'lon': '-0.368333012'}GB-ENGCloudy...3.028293London Heathrow AirportLHRLondonGB{'lat': '51.4706', 'lon': '-0.461941'}GB-ENGClear62018-02-11 00:39:37
13041611.370232FalseLogstash AirwaysJorge Chavez International AirportLIMLimaPE{'lat': '-12.0219', 'lon': '-77.114304'}SE-BDSunny...338.875531Casper-Natrona County International AirportCPRCasperUS{'lat': '42.90800095', 'lon': '-106.4639969'}US-WYRain62018-02-11 10:24:30
13042595.961285FalseJetBeatsOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONClear...375.129587Frankfurt am Main AirportFRAFrankfurt am MainDE{'lat': '50.033333', 'lon': '8.570556'}DE-HEClear62018-02-11 09:02:07
13043782.747648FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDClear...156.858481Tokyo Haneda International AirportHNDTokyoJP{'lat': '35.552299', 'lon': '139.779999'}SE-BDThunder & Lightning62018-02-11 04:45:06
13044891.117221FalseJetBeatsWinnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBClear...354.106457Vienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Thunder & Lightning62018-02-11 00:51:14
13045587.169921FalseLogstash AirwaysBrisbane International AirportBNEBrisbaneAU{'lat': '-27.38419914', 'lon': '153.1170044'}SE-BDRain...771.305442Amsterdam Airport SchipholAMSAmsterdamNL{'lat': '52.30860138', 'lon': '4.76388979'}NL-NHSunny62018-02-11 05:41:51
13046739.132165FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDRain...542.955572Winnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBHail62018-02-11 10:02:21
13047605.191876FalseJetBeatsPortland International Jetport AirportPWMPortlandUS{'lat': '43.64619827', 'lon': '-70.30930328'}US-METhunder & Lightning...564.599857Jeju International AirportCJUJeju CityKR{'lat': '33.51129913', 'lon': '126.4929962'}SE-BDCloudy62018-02-11 15:55:10
13048361.767659TrueLogstash AirwaysDubai International AirportDXBDubaiAE{'lat': '25.25279999', 'lon': '55.36439896'}SE-BDSunny...180.000000Dubai International AirportDXBDubaiAE{'lat': '25.25279999', 'lon': '55.36439896'}SE-BDHail62018-02-11 04:11:14
13049662.306992FalseES-AirWinnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBHeavy Fog...835.954429Ministro Pistarini International AirportEZEBuenos AiresAR{'lat': '-34.8222', 'lon': '-58.5358'}AR-BSunny62018-02-11 10:13:32
13050630.779526FalseJetBeatsHelsinki Vantaa AirportHELHelsinkiFI{'lat': '60.31719971', 'lon': '24.9633007'}FI-ESSunny...451.755639Beijing Capital International AirportPEKBeijingCN{'lat': '40.08010101', 'lon': '116.5849991'}SE-BDCloudy62018-02-11 11:23:23
13051937.771279TrueLogstash AirwaysLester B. Pearson International AirportYYZTorontoCA{'lat': '43.67720032', 'lon': '-79.63059998'}CA-ONSunny...507.451571Leonardo da Vinci___Fiumicino AirportRM11RomeIT{'lat': '41.8002778', 'lon': '12.2388889'}IT-62Hail62018-02-11 01:13:50
130521085.155339FalseLogstash AirwaysMelbourne International AirportMELMelbourneAU{'lat': '-37.673302', 'lon': '144.843002'}SE-BDCloudy...1044.451122Bologna Guglielmo Marconi AirportBO08BolognaIT{'lat': '44.5354', 'lon': '11.2887'}IT-45Cloudy62018-02-11 18:35:42
130531191.964104FalseLogstash AirwaysZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHHail...728.715904Portland International Jetport AirportPWMPortlandUS{'lat': '43.64619827', 'lon': '-70.30930328'}US-MEClear62018-02-11 19:02:10
130541080.446279FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDRain...402.929088Pisa International AirportPI05PisaIT{'lat': '43.683899', 'lon': '10.3927'}IT-52Sunny62018-02-11 20:42:25
13055646.612941FalseLogstash AirwaysZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHRain...644.418029Winnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBRain62018-02-11 01:41:57
13056997.751876FalseLogstash AirwaysUkrainka Air BaseXHBUBelogorskRU{'lat': '51.169997', 'lon': '128.445007'}RU-AMURain...937.540811Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFSunny62018-02-11 04:09:27
130571102.814465FalseJetBeatsMinistro Pistarini International AirportEZEBuenos AiresAR{'lat': '-34.8222', 'lon': '-58.5358'}SE-BDHail...1697.404971Itami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDHail62018-02-11 08:28:21
13058858.144337FalseJetBeatsWashington Dulles International AirportIADWashingtonUS{'lat': '38.94449997', 'lon': '-77.45580292'}US-DCHeavy Fog...1610.761827Adelaide International AirportADLAdelaideAU{'lat': '-34.945', 'lon': '138.531006'}SE-BDRain62018-02-11 14:54:34
\n", - "

13059 rows × 27 columns

\n", - "
" - ], - "text/plain": [ - " AvgTicketPrice Cancelled Carrier \\\n", - "0 841.265642 False Kibana Airlines \n", - "1 882.982662 False Logstash Airways \n", - "2 190.636904 False Logstash Airways \n", - "3 181.694216 True Kibana Airlines \n", - "4 730.041778 False Kibana Airlines \n", - "5 418.152089 False JetBeats \n", - "6 180.246816 False JetBeats \n", - "7 585.184310 False Kibana Airlines \n", - "8 960.869736 True Kibana Airlines \n", - "9 296.877773 False Logstash Airways \n", - "10 906.437948 False JetBeats \n", - "11 704.463771 False Logstash Airways \n", - "12 922.499077 True Logstash Airways \n", - "13 374.959276 False Logstash Airways \n", - "14 552.917371 False Logstash Airways \n", - "15 566.487557 True Kibana Airlines \n", - "16 989.952787 True Logstash Airways \n", - "17 569.613255 False ES-Air \n", - "18 277.429707 False ES-Air \n", - "19 772.100846 False JetBeats \n", - "20 167.599922 False JetBeats \n", - "21 253.210065 False ES-Air \n", - "22 917.247620 False JetBeats \n", - "23 451.591176 False Logstash Airways \n", - "24 307.067201 False Logstash Airways \n", - "25 268.241596 False ES-Air \n", - "26 975.812632 True Kibana Airlines \n", - "27 134.214546 False JetBeats \n", - "28 988.897564 False Kibana Airlines \n", - "29 511.067220 False Logstash Airways \n", - "... ... ... ... \n", - "13029 795.905278 False Kibana Airlines \n", - "13030 863.388068 False Logstash Airways \n", - "13031 575.183008 False JetBeats \n", - "13032 817.368952 False JetBeats \n", - "13033 579.582455 False ES-Air \n", - "13034 1004.916638 False JetBeats \n", - "13035 357.562842 True Logstash Airways \n", - "13036 429.580539 False Logstash Airways \n", - "13037 729.788171 True ES-Air \n", - "13038 564.897695 False ES-Air \n", - "13039 1014.052787 False Logstash Airways \n", - "13040 455.243843 False ES-Air \n", - "13041 611.370232 False Logstash Airways \n", - "13042 595.961285 False JetBeats \n", - "13043 782.747648 False Logstash Airways \n", - "13044 891.117221 False JetBeats \n", - "13045 587.169921 False Logstash Airways \n", - "13046 739.132165 False Logstash Airways \n", - "13047 605.191876 False JetBeats \n", - "13048 361.767659 True Logstash Airways \n", - "13049 662.306992 False ES-Air \n", - "13050 630.779526 False JetBeats \n", - "13051 937.771279 True Logstash Airways \n", - "13052 1085.155339 False Logstash Airways \n", - "13053 1191.964104 False Logstash Airways \n", - "13054 1080.446279 False Logstash Airways \n", - "13055 646.612941 False Logstash Airways \n", - "13056 997.751876 False Logstash Airways \n", - "13057 1102.814465 False JetBeats \n", - "13058 858.144337 False JetBeats \n", - "\n", - " Dest DestAirportID \\\n", - "0 Sydney Kingsford Smith International Airport SYD \n", - "1 Venice Marco Polo Airport VE05 \n", - "2 Venice Marco Polo Airport VE05 \n", - "3 Treviso-Sant'Angelo Airport TV01 \n", - "4 Xi'an Xianyang International Airport XIY \n", - "5 Genoa Cristoforo Colombo Airport GE01 \n", - "6 Zurich Airport ZRH \n", - "7 Ottawa Macdonald-Cartier International Airport YOW \n", - "8 Rajiv Gandhi International Airport HYD \n", - "9 Treviso-Sant'Angelo Airport TV01 \n", - "10 Helsinki Vantaa Airport HEL \n", - "11 Vienna International Airport VIE \n", - "12 Shanghai Pudong International Airport PVG \n", - "13 Ottawa Macdonald-Cartier International Airport YOW \n", - "14 Luis Munoz Marin International Airport SJU \n", - "15 Cologne Bonn Airport CGN \n", - "16 Venice Marco Polo Airport VE05 \n", - "17 Ministro Pistarini International Airport EZE \n", - "18 Shanghai Pudong International Airport PVG \n", - "19 Indira Gandhi International Airport DEL \n", - "20 Wichita Mid Continent Airport ICT \n", - "21 Ottawa Macdonald-Cartier International Airport YOW \n", - "22 Itami Airport ITM \n", - "23 Vienna International Airport VIE \n", - "24 Charles de Gaulle International Airport CDG \n", - "25 Narita International Airport NRT \n", - "26 Itami Airport ITM \n", - "27 San Diego International Airport SAN \n", - "28 Verona Villafranca Airport VR10 \n", - "29 Zurich Airport ZRH \n", - "... ... ... \n", - "13029 Malpensa International Airport MI12 \n", - "13030 Xi'an Xianyang International Airport XIY \n", - "13031 Savannah Hilton Head International Airport SAV \n", - "13032 Syracuse Hancock International Airport SYR \n", - "13033 Tampa International Airport TPA \n", - "13034 Olenya Air Base XLMO \n", - "13035 Shanghai Pudong International Airport PVG \n", - "13036 Venice Marco Polo Airport VE05 \n", - "13037 Vienna International Airport VIE \n", - "13038 Pisa International Airport PI05 \n", - "13039 Vienna International Airport VIE \n", - "13040 London Luton Airport LTN \n", - "13041 Jorge Chavez International Airport LIM \n", - "13042 Ottawa Macdonald-Cartier International Airport YOW \n", - "13043 Xi'an Xianyang International Airport XIY \n", - "13044 Winnipeg / James Armstrong Richardson Internat... YWG \n", - "13045 Brisbane International Airport BNE \n", - "13046 Xi'an Xianyang International Airport XIY \n", - "13047 Portland International Jetport Airport PWM \n", - "13048 Dubai International Airport DXB \n", - "13049 Winnipeg / James Armstrong Richardson Internat... YWG \n", - "13050 Helsinki Vantaa Airport HEL \n", - "13051 Lester B. Pearson International Airport YYZ \n", - "13052 Melbourne International Airport MEL \n", - "13053 Zurich Airport ZRH \n", - "13054 Xi'an Xianyang International Airport XIY \n", - "13055 Zurich Airport ZRH \n", - "13056 Ukrainka Air Base XHBU \n", - "13057 Ministro Pistarini International Airport EZE \n", - "13058 Washington Dulles International Airport IAD \n", - "\n", - " DestCityName DestCountry \\\n", - "0 Sydney AU \n", - "1 Venice IT \n", - "2 Venice IT \n", - "3 Treviso IT \n", - "4 Xi'an CN \n", - "5 Genova IT \n", - "6 Zurich CH \n", - "7 Ottawa CA \n", - "8 Hyderabad IN \n", - "9 Treviso IT \n", - "10 Helsinki FI \n", - "11 Vienna AT \n", - "12 Shanghai CN \n", - "13 Ottawa CA \n", - "14 San Juan PR \n", - "15 Cologne DE \n", - "16 Venice IT \n", - "17 Buenos Aires AR \n", - "18 Shanghai CN \n", - "19 New Delhi IN \n", - "20 Wichita US \n", - "21 Ottawa CA \n", - "22 Osaka JP \n", - "23 Vienna AT \n", - "24 Paris FR \n", - "25 Tokyo JP \n", - "26 Osaka JP \n", - "27 San Diego US \n", - "28 Verona IT \n", - "29 Zurich CH \n", - "... ... ... \n", - "13029 Milan IT \n", - "13030 Xi'an CN \n", - "13031 Savannah US \n", - "13032 Syracuse US \n", - "13033 Tampa US \n", - "13034 Olenegorsk RU \n", - "13035 Shanghai CN \n", - "13036 Venice IT \n", - "13037 Vienna AT \n", - "13038 Pisa IT \n", - "13039 Vienna AT \n", - "13040 London GB \n", - "13041 Lima PE \n", - "13042 Ottawa CA \n", - "13043 Xi'an CN \n", - "13044 Winnipeg CA \n", - "13045 Brisbane AU \n", - "13046 Xi'an CN \n", - "13047 Portland US \n", - "13048 Dubai AE \n", - "13049 Winnipeg CA \n", - "13050 Helsinki FI \n", - "13051 Toronto CA \n", - "13052 Melbourne AU \n", - "13053 Zurich CH \n", - "13054 Xi'an CN \n", - "13055 Zurich CH \n", - "13056 Belogorsk RU \n", - "13057 Buenos Aires AR \n", - "13058 Washington US \n", - "\n", - " DestLocation DestRegion \\\n", - "0 {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n", - "1 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "3 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", - "4 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "5 {'lat': '44.4133', 'lon': '8.8375'} IT-42 \n", - "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "7 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", - "8 {'lat': '17.23131752', 'lon': '78.42985535'} SE-BD \n", - "9 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", - "10 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", - "11 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "12 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", - "13 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", - "14 {'lat': '18.43939972', 'lon': '-66.00180054'} PR-U-A \n", - "15 {'lat': '50.86589813', 'lon': '7.142739773'} DE-NW \n", - "16 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "17 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n", - "18 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", - "19 {'lat': '28.5665', 'lon': '77.103104'} SE-BD \n", - "20 {'lat': '37.64989853', 'lon': '-97.43309784'} US-KS \n", - "21 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", - "22 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", - "23 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "24 {'lat': '49.01279831', 'lon': '2.549999952'} FR-J \n", - "25 {'lat': '35.76470184', 'lon': '140.3860016'} SE-BD \n", - "26 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", - "27 {'lat': '32.73360062', 'lon': '-117.1900024'} US-CA \n", - "28 {'lat': '45.395699', 'lon': '10.8885'} IT-34 \n", - "29 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "... ... ... \n", - "13029 {'lat': '45.6306', 'lon': '8.72811'} IT-25 \n", - "13030 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "13031 {'lat': '32.12760162', 'lon': '-81.20210266'} US-GA \n", - "13032 {'lat': '43.11119843', 'lon': '-76.10630035'} US-NY \n", - "13033 {'lat': '27.97550011', 'lon': '-82.53320313'} US-FL \n", - "13034 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n", - "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", - "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "13037 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "13038 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n", - "13039 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "13040 {'lat': '51.87469864', 'lon': '-0.368333012'} GB-ENG \n", - "13041 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n", - "13042 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", - "13043 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "13044 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", - "13045 {'lat': '-27.38419914', 'lon': '153.1170044'} SE-BD \n", - "13046 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "13047 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n", - "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n", - "13049 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", - "13050 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", - "13051 {'lat': '43.67720032', 'lon': '-79.63059998'} CA-ON \n", - "13052 {'lat': '-37.673302', 'lon': '144.843002'} SE-BD \n", - "13053 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "13054 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "13055 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "13056 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n", - "13057 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n", - "13058 {'lat': '38.94449997', 'lon': '-77.45580292'} US-DC \n", - "\n", - " DestWeather ... FlightTimeMin \\\n", - "0 Rain ... 1030.770416 \n", - "1 Sunny ... 464.389481 \n", - "2 Cloudy ... 0.000000 \n", - "3 Clear ... 222.749059 \n", - "4 Clear ... 785.779071 \n", - "5 Thunder & Lightning ... 393.590441 \n", - "6 Hail ... 300.000000 \n", - "7 Clear ... 614.942480 \n", - "8 Cloudy ... 602.030591 \n", - "9 Rain ... 174.822216 \n", - "10 Rain ... 503.045170 \n", - "11 Cloudy ... 36.075018 \n", - "12 Clear ... 679.768391 \n", - "13 Rain ... 330.418282 \n", - "14 Clear ... 407.145031 \n", - "15 Sunny ... 656.712658 \n", - "16 Damaging Wind ... 773.030334 \n", - "17 Cloudy ... 704.716920 \n", - "18 Clear ... 355.957996 \n", - "19 Clear ... 875.114675 \n", - "20 Clear ... 373.966883 \n", - "21 Hail ... 130.667700 \n", - "22 Damaging Wind ... 574.495310 \n", - "23 Heavy Fog ... 579.728943 \n", - "24 Clear ... 50.157229 \n", - "25 Rain ... 527.567422 \n", - "26 Hail ... 386.259764 \n", - "27 Clear ... 24.479650 \n", - "28 Sunny ... 568.351033 \n", - "29 Rain ... 425.889194 \n", - "... ... ... ... \n", - "13029 Sunny ... 534.375826 \n", - "13030 Damaging Wind ... 141.172633 \n", - "13031 Thunder & Lightning ... 1113.137060 \n", - "13032 Rain ... 714.964864 \n", - "13033 Rain ... 234.929046 \n", - "13034 Clear ... 526.895776 \n", - "13035 Thunder & Lightning ... 0.000000 \n", - "13036 Sunny ... 150.000000 \n", - "13037 Rain ... 691.944839 \n", - "13038 Heavy Fog ... 567.387339 \n", - "13039 Thunder & Lightning ... 690.092327 \n", - "13040 Cloudy ... 3.028293 \n", - "13041 Sunny ... 338.875531 \n", - "13042 Clear ... 375.129587 \n", - "13043 Clear ... 156.858481 \n", - "13044 Clear ... 354.106457 \n", - "13045 Rain ... 771.305442 \n", - "13046 Rain ... 542.955572 \n", - "13047 Thunder & Lightning ... 564.599857 \n", - "13048 Sunny ... 180.000000 \n", - "13049 Heavy Fog ... 835.954429 \n", - "13050 Sunny ... 451.755639 \n", - "13051 Sunny ... 507.451571 \n", - "13052 Cloudy ... 1044.451122 \n", - "13053 Hail ... 728.715904 \n", - "13054 Rain ... 402.929088 \n", - "13055 Rain ... 644.418029 \n", - "13056 Rain ... 937.540811 \n", - "13057 Hail ... 1697.404971 \n", - "13058 Heavy Fog ... 1610.761827 \n", - "\n", - " Origin OriginAirportID \\\n", - "0 Frankfurt am Main Airport FRA \n", - "1 Cape Town International Airport CPT \n", - "2 Venice Marco Polo Airport VE05 \n", - "3 Naples International Airport NA01 \n", - "4 Licenciado Benito Juarez International Airport AICM \n", - "5 Edmonton International Airport CYEG \n", - "6 Zurich Airport ZRH \n", - "7 Ciampino___G. B. Pastine International Airport RM12 \n", - "8 Milano Linate Airport MI11 \n", - "9 Sheremetyevo International Airport SVO \n", - "10 Albuquerque International Sunport Airport ABQ \n", - "11 Venice Marco Polo Airport VE05 \n", - "12 Licenciado Benito Juarez International Airport AICM \n", - "13 Naples International Airport NA01 \n", - "14 Ciampino___G. B. Pastine International Airport RM12 \n", - "15 Chengdu Shuangliu International Airport CTU \n", - "16 Licenciado Benito Juarez International Airport AICM \n", - "17 Cleveland Hopkins International Airport CLE \n", - "18 Olenya Air Base XLMO \n", - "19 Casper-Natrona County International Airport CPR \n", - "20 Erie International Tom Ridge Field ERI \n", - "21 Newark Liberty International Airport EWR \n", - "22 Copenhagen Kastrup Airport CPH \n", - "23 Seattle Tacoma International Airport SEA \n", - "24 Berlin-Tegel Airport TXL \n", - "25 Manchester Airport MAN \n", - "26 Helsinki Vantaa Airport HEL \n", - "27 Phoenix Sky Harbor International Airport PHX \n", - "28 New Chitose Airport CTS \n", - "29 Tulsa International Airport TUL \n", - "... ... ... \n", - "13029 Itami Airport ITM \n", - "13030 Tokyo Haneda International Airport HND \n", - "13031 OR Tambo International Airport JNB \n", - "13032 El Dorado International Airport BOG \n", - "13033 Jorge Chavez International Airport LIM \n", - "13034 Gimpo International Airport GMP \n", - "13035 Shanghai Pudong International Airport PVG \n", - "13036 Venice Marco Polo Airport VE05 \n", - "13037 Ukrainka Air Base XHBU \n", - "13038 OR Tambo International Airport JNB \n", - "13039 Montreal / Pierre Elliott Trudeau Internationa... YUL \n", - "13040 London Heathrow Airport LHR \n", - "13041 Casper-Natrona County International Airport CPR \n", - "13042 Frankfurt am Main Airport FRA \n", - "13043 Tokyo Haneda International Airport HND \n", - "13044 Vienna International Airport VIE \n", - "13045 Amsterdam Airport Schiphol AMS \n", - "13046 Winnipeg / James Armstrong Richardson Internat... YWG \n", - "13047 Jeju International Airport CJU \n", - "13048 Dubai International Airport DXB \n", - "13049 Ministro Pistarini International Airport EZE \n", - "13050 Beijing Capital International Airport PEK \n", - "13051 Leonardo da Vinci___Fiumicino Airport RM11 \n", - "13052 Bologna Guglielmo Marconi Airport BO08 \n", - "13053 Portland International Jetport Airport PWM \n", - "13054 Pisa International Airport PI05 \n", - "13055 Winnipeg / James Armstrong Richardson Internat... YWG \n", - "13056 Licenciado Benito Juarez International Airport AICM \n", - "13057 Itami Airport ITM \n", - "13058 Adelaide International Airport ADL \n", - "\n", - " OriginCityName OriginCountry \\\n", - "0 Frankfurt am Main DE \n", - "1 Cape Town ZA \n", - "2 Venice IT \n", - "3 Naples IT \n", - "4 Mexico City MX \n", - "5 Edmonton CA \n", - "6 Zurich CH \n", - "7 Rome IT \n", - "8 Milan IT \n", - "9 Moscow RU \n", - "10 Albuquerque US \n", - "11 Venice IT \n", - "12 Mexico City MX \n", - "13 Naples IT \n", - "14 Rome IT \n", - "15 Chengdu CN \n", - "16 Mexico City MX \n", - "17 Cleveland US \n", - "18 Olenegorsk RU \n", - "19 Casper US \n", - "20 Erie US \n", - "21 Newark US \n", - "22 Copenhagen DK \n", - "23 Seattle US \n", - "24 Berlin DE \n", - "25 Manchester GB \n", - "26 Helsinki FI \n", - "27 Phoenix US \n", - "28 Chitose / Tomakomai JP \n", - "29 Tulsa US \n", - "... ... ... \n", - "13029 Osaka JP \n", - "13030 Tokyo JP \n", - "13031 Johannesburg ZA \n", - "13032 Bogota CO \n", - "13033 Lima PE \n", - "13034 Seoul KR \n", - "13035 Shanghai CN \n", - "13036 Venice IT \n", - "13037 Belogorsk RU \n", - "13038 Johannesburg ZA \n", - "13039 Montreal CA \n", - "13040 London GB \n", - "13041 Casper US \n", - "13042 Frankfurt am Main DE \n", - "13043 Tokyo JP \n", - "13044 Vienna AT \n", - "13045 Amsterdam NL \n", - "13046 Winnipeg CA \n", - "13047 Jeju City KR \n", - "13048 Dubai AE \n", - "13049 Buenos Aires AR \n", - "13050 Beijing CN \n", - "13051 Rome IT \n", - "13052 Bologna IT \n", - "13053 Portland US \n", - "13054 Pisa IT \n", - "13055 Winnipeg CA \n", - "13056 Mexico City MX \n", - "13057 Osaka JP \n", - "13058 Adelaide AU \n", - "\n", - " OriginLocation OriginRegion \\\n", - "0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n", - "1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n", - "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n", - "4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "5 {'lat': '53.30970001', 'lon': '-113.5800018'} CA-AB \n", - "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "7 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n", - "8 {'lat': '45.445099', 'lon': '9.27674'} IT-25 \n", - "9 {'lat': '55.972599', 'lon': '37.4146'} RU-MOS \n", - "10 {'lat': '35.040199', 'lon': '-106.609001'} US-NM \n", - "11 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "12 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "13 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n", - "14 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n", - "15 {'lat': '30.57850075', 'lon': '103.9469986'} SE-BD \n", - "16 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "17 {'lat': '41.4117012', 'lon': '-81.84980011'} US-OH \n", - "18 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n", - "19 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n", - "20 {'lat': '42.08312701', 'lon': '-80.17386675'} US-PA \n", - "21 {'lat': '40.69250107', 'lon': '-74.16870117'} US-NJ \n", - "22 {'lat': '55.61790085', 'lon': '12.65600014'} DK-84 \n", - "23 {'lat': '47.44900131', 'lon': '-122.3089981'} US-WA \n", - "24 {'lat': '52.5597', 'lon': '13.2877'} DE-BE \n", - "25 {'lat': '53.35369873', 'lon': '-2.274950027'} GB-ENG \n", - "26 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", - "27 {'lat': '33.43429947', 'lon': '-112.012001'} US-AZ \n", - "28 {'lat': '42.77519989', 'lon': '141.6920013'} SE-BD \n", - "29 {'lat': '36.19839859', 'lon': '-95.88809967'} US-OK \n", - "... ... ... \n", - "13029 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", - "13030 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n", - "13031 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n", - "13032 {'lat': '4.70159', 'lon': '-74.1469'} CO-CUN \n", - "13033 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n", - "13034 {'lat': '37.5583', 'lon': '126.791'} SE-BD \n", - "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", - "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "13037 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n", - "13038 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n", - "13039 {'lat': '45.47060013', 'lon': '-73.74079895'} CA-QC \n", - "13040 {'lat': '51.4706', 'lon': '-0.461941'} GB-ENG \n", - "13041 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n", - "13042 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n", - "13043 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n", - "13044 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "13045 {'lat': '52.30860138', 'lon': '4.76388979'} NL-NH \n", - "13046 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", - "13047 {'lat': '33.51129913', 'lon': '126.4929962'} SE-BD \n", - "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n", - "13049 {'lat': '-34.8222', 'lon': '-58.5358'} AR-B \n", - "13050 {'lat': '40.08010101', 'lon': '116.5849991'} SE-BD \n", - "13051 {'lat': '41.8002778', 'lon': '12.2388889'} IT-62 \n", - "13052 {'lat': '44.5354', 'lon': '11.2887'} IT-45 \n", - "13053 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n", - "13054 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n", - "13055 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", - "13056 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "13057 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", - "13058 {'lat': '-34.945', 'lon': '138.531006'} SE-BD \n", - "\n", - " OriginWeather dayOfWeek timestamp \n", - "0 Sunny 0 2018-01-01 00:00:00 \n", - "1 Clear 0 2018-01-01 18:27:00 \n", - "2 Rain 0 2018-01-01 17:11:14 \n", - "3 Thunder & Lightning 0 2018-01-01 10:33:28 \n", - "4 Damaging Wind 0 2018-01-01 05:13:00 \n", - "5 Rain 0 2018-01-01 01:43:03 \n", - "6 Clear 0 2018-01-01 13:49:53 \n", - "7 Thunder & Lightning 0 2018-01-01 04:54:59 \n", - "8 Heavy Fog 0 2018-01-01 12:09:35 \n", - "9 Cloudy 0 2018-01-01 12:09:35 \n", - "10 Rain 0 2018-01-01 22:06:14 \n", - "11 Rain 0 2018-01-01 11:52:34 \n", - "12 Heavy Fog 0 2018-01-01 02:13:46 \n", - "13 Rain 0 2018-01-01 14:21:13 \n", - "14 Cloudy 0 2018-01-01 17:42:53 \n", - "15 Thunder & Lightning 0 2018-01-01 19:55:32 \n", - "16 Thunder & Lightning 0 2018-01-01 07:49:27 \n", - "17 Rain 0 2018-01-01 01:30:47 \n", - "18 Hail 0 2018-01-01 07:58:17 \n", - "19 Cloudy 0 2018-01-01 00:02:06 \n", - "20 Cloudy 0 2018-01-01 01:08:20 \n", - "21 Clear 0 2018-01-01 01:08:20 \n", - "22 Sunny 0 2018-01-01 07:48:35 \n", - "23 Heavy Fog 0 2018-01-01 18:57:21 \n", - "24 Rain 0 2018-01-01 13:18:25 \n", - "25 Thunder & Lightning 0 2018-01-01 08:20:35 \n", - "26 Rain 0 2018-01-01 15:38:32 \n", - "27 Clear 0 2018-01-01 03:08:45 \n", - "28 Damaging Wind 0 2018-01-01 01:16:59 \n", - "29 Rain 0 2018-01-01 18:00:59 \n", - "... ... ... ... \n", - "13029 Sunny 6 2018-02-11 20:10:13 \n", - "13030 Clear 6 2018-02-11 18:59:53 \n", - "13031 Hail 6 2018-02-11 00:57:48 \n", - "13032 Thunder & Lightning 6 2018-02-11 12:02:49 \n", - "13033 Thunder & Lightning 6 2018-02-11 02:07:40 \n", - "13034 Sunny 6 2018-02-11 00:35:04 \n", - "13035 Thunder & Lightning 6 2018-02-11 11:19:12 \n", - "13036 Cloudy 6 2018-02-11 15:07:11 \n", - "13037 Damaging Wind 6 2018-02-11 10:24:42 \n", - "13038 Damaging Wind 6 2018-02-11 00:42:06 \n", - "13039 Thunder & Lightning 6 2018-02-11 10:56:31 \n", - "13040 Clear 6 2018-02-11 00:39:37 \n", - "13041 Rain 6 2018-02-11 10:24:30 \n", - "13042 Clear 6 2018-02-11 09:02:07 \n", - "13043 Thunder & Lightning 6 2018-02-11 04:45:06 \n", - "13044 Thunder & Lightning 6 2018-02-11 00:51:14 \n", - "13045 Sunny 6 2018-02-11 05:41:51 \n", - "13046 Hail 6 2018-02-11 10:02:21 \n", - "13047 Cloudy 6 2018-02-11 15:55:10 \n", - "13048 Hail 6 2018-02-11 04:11:14 \n", - "13049 Sunny 6 2018-02-11 10:13:32 \n", - "13050 Cloudy 6 2018-02-11 11:23:23 \n", - "13051 Hail 6 2018-02-11 01:13:50 \n", - "13052 Cloudy 6 2018-02-11 18:35:42 \n", - "13053 Clear 6 2018-02-11 19:02:10 \n", - "13054 Sunny 6 2018-02-11 20:42:25 \n", - "13055 Rain 6 2018-02-11 01:41:57 \n", - "13056 Sunny 6 2018-02-11 04:09:27 \n", - "13057 Hail 6 2018-02-11 08:28:21 \n", - "13058 Rain 6 2018-02-11 14:54:34 \n", - "\n", - "[13059 rows x 27 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd_sdf" - ] - }, { "cell_type": "code", "execution_count": 4, @@ -2529,7 +421,7 @@ } ], "source": [ - "pd_sdf.describe()" + "pd_df.describe()" ] }, { @@ -2833,6 +725,7 @@ " DistanceKilometers\n", " DistanceMiles\n", " FlightDelayMin\n", + " FlightTimeHour\n", " FlightTimeMin\n", " dayOfWeek\n", " \n", @@ -2846,6 +739,7 @@ " 13059.000000\n", " 13059.000000\n", " 13059.000000\n", + " 13059.000000\n", " \n", " \n", " mean\n", @@ -2853,6 +747,7 @@ " 7092.142457\n", " 4406.853010\n", " 47.335171\n", + " 8.518797\n", " 511.127842\n", " 2.835975\n", " \n", @@ -2862,6 +757,7 @@ " 4578.263193\n", " 2844.800855\n", " 96.743006\n", + " 5.579019\n", " 334.741135\n", " 1.939365\n", " \n", @@ -2873,14 +769,16 @@ " 0.000000\n", " 0.000000\n", " 0.000000\n", + " 0.000000\n", " \n", " \n", " 25%\n", " 410.008918\n", " 2470.545974\n", - " 1535.126118\n", + " 1535.293728\n", " 0.000000\n", - " 251.834931\n", + " 4.196217\n", + " 251.938710\n", " 1.000000\n", " \n", " \n", @@ -2889,17 +787,19 @@ " 7612.072403\n", " 4729.922470\n", " 0.000000\n", + " 8.385816\n", " 503.148975\n", " 3.000000\n", " \n", " \n", " 75%\n", - " 842.262193\n", - " 9735.210895\n", - " 6049.600045\n", - " 12.521186\n", - " 720.505705\n", - " 4.109848\n", + " 842.254990\n", + " 9735.660463\n", + " 6049.459005\n", + " 13.112245\n", + " 12.009396\n", + " 720.534532\n", + " 4.000000\n", " \n", " \n", " max\n", @@ -2907,6 +807,7 @@ " 19881.482422\n", " 12353.780273\n", " 360.000000\n", + " 31.715034\n", " 1902.901978\n", " 6.000000\n", " \n", @@ -2920,20 +821,20 @@ "mean 628.253689 7092.142457 4406.853010 47.335171 \n", "std 266.386661 4578.263193 2844.800855 96.743006 \n", "min 100.020531 0.000000 0.000000 0.000000 \n", - "25% 410.008918 2470.545974 1535.126118 0.000000 \n", + "25% 410.008918 2470.545974 1535.293728 0.000000 \n", "50% 640.362667 7612.072403 4729.922470 0.000000 \n", - "75% 842.262193 9735.210895 6049.600045 12.521186 \n", + "75% 842.254990 9735.660463 6049.459005 13.112245 \n", "max 1199.729004 19881.482422 12353.780273 360.000000 \n", "\n", - " FlightTimeMin dayOfWeek \n", - "count 13059.000000 13059.000000 \n", - "mean 511.127842 2.835975 \n", - "std 334.741135 1.939365 \n", - "min 0.000000 0.000000 \n", - "25% 251.834931 1.000000 \n", - "50% 503.148975 3.000000 \n", - "75% 720.505705 4.109848 \n", - "max 1902.901978 6.000000 " + " FlightTimeHour FlightTimeMin dayOfWeek \n", + "count 13059.000000 13059.000000 13059.000000 \n", + "mean 8.518797 511.127842 2.835975 \n", + "std 5.579019 334.741135 1.939365 \n", + "min 0.000000 0.000000 0.000000 \n", + "25% 4.196217 251.938710 1.000000 \n", + "50% 8.385816 503.148975 3.000000 \n", + "75% 12.009396 720.534532 4.000000 \n", + "max 31.715034 1902.901978 6.000000 " ] }, "execution_count": 8, @@ -2947,20 +848,49 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(13059, 27)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "from eland.tests.frame.common import TestData" + "ed_df.shape" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID',\n", + " 'DestCityName', 'DestCountry', 'DestLocation', 'DestRegion',\n", + " 'DestWeather', 'DistanceKilometers', 'DistanceMiles', 'FlightDelay',\n", + " 'FlightDelayMin', 'FlightDelayType', 'FlightNum', 'FlightTimeHour',\n", + " 'FlightTimeMin', 'Origin', 'OriginAirportID', 'OriginCityName',\n", + " 'OriginCountry', 'OriginLocation', 'OriginRegion', 'OriginWeather',\n", + " 'dayOfWeek', 'timestamp'],\n", + " dtype='object')" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "td = TestData()" + "ed_df.columns" ] }, { @@ -2971,12 +901,34 @@ { "data": { "text/plain": [ - "Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8,\n", - " 9,\n", - " ...\n", - " 13049, 13050, 13051, 13052, 13053, 13054, 13055, 13056, 13057,\n", - " 13058],\n", - " dtype='int64', length=13059)" + "AvgTicketPrice float64\n", + "Cancelled bool\n", + "Carrier object\n", + "Dest object\n", + "DestAirportID object\n", + "DestCityName object\n", + "DestCountry object\n", + "DestLocation object\n", + "DestRegion object\n", + "DestWeather object\n", + "DistanceKilometers float64\n", + "DistanceMiles float64\n", + "FlightDelay bool\n", + "FlightDelayMin int64\n", + "FlightDelayType object\n", + "FlightNum object\n", + "FlightTimeHour float64\n", + "FlightTimeMin float64\n", + "Origin object\n", + "OriginAirportID object\n", + "OriginCityName object\n", + "OriginCountry object\n", + "OriginLocation object\n", + "OriginRegion object\n", + "OriginWeather object\n", + "dayOfWeek int64\n", + "timestamp datetime64[ns]\n", + "dtype: object" ] }, "execution_count": 17, @@ -2985,20 +937,20 @@ } ], "source": [ - "td.pd_flights().index" + "ed_df.dtypes" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\n", - "Int64Index: 13059 entries, 0 to 13058\n", + "\n", + "RangeIndex: 13059 entries, 0 to 13058\n", "Data columns (total 27 columns):\n", "AvgTicketPrice 13059 non-null float64\n", "Cancelled 13059 non-null bool\n", @@ -3028,732 +980,12 @@ "dayOfWeek 13059 non-null int64\n", "timestamp 13059 non-null datetime64[ns]\n", "dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n", - "memory usage: 2.6+ MB\n" + "memory usage: 56.0 bytes\n" ] } ], "source": [ - "td.pd_flights().info()" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "df = pd.DataFrame(np.random.randn(100000, 4))" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 100000 entries, 0 to 99999\n", - "Data columns (total 4 columns):\n", - "0 100000 non-null float64\n", - "1 100000 non-null float64\n", - "2 100000 non-null float64\n", - "3 100000 non-null float64\n", - "dtypes: float64(4)\n", - "memory usage: 3.1 MB\n" - ] - } - ], - "source": [ - "df.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "df.iloc[:999998] = np.nan" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 100000 entries, 0 to 99999\n", - "Data columns (total 4 columns):\n", - "0 0 non-null float64\n", - "1 0 non-null float64\n", - "2 0 non-null float64\n", - "3 0 non-null float64\n", - "dtypes: float64(4)\n", - "memory usage: 3.1 MB\n" - ] - } - ], - "source": [ - "df.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "sdf = df.to_sparse()" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index 80\n", - "0 800000\n", - "1 800000\n", - "2 800000\n", - "3 800000\n", - "dtype: int64" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.memory_usage()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "pandas.core.sparse.frame.SparseDataFrame" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(sdf)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "cols = ['string', 'int', 'float', 'object']\n", - "\n", - "string_series = pd.SparseSeries(['a', 'b', 'c'])\n", - "int_series = pd.SparseSeries([1, 2, 3])\n", - "float_series = pd.SparseSeries([1.1, 1.2, 1.3])\n", - "object_series = pd.SparseSeries([[], {}, set()])\n", - "sdf = pd.SparseDataFrame({\n", - " 'string': string_series,\n", - " 'int': int_series,\n", - " 'float': float_series,\n", - " 'object': object_series,\n", - "})\n", - "sdf = sdf[cols]" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
stringintfloatobject
0a11.1[]
1b21.2{}
2c31.3{}
\n", - "
" - ], - "text/plain": [ - " string int float object\n", - "0 a 1 1.1 []\n", - "1 b 2 1.2 {}\n", - "2 c 3 1.3 {}" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sdf" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [], - "source": [ - "from pandas._libs.sparse import IntIndex\n", - "\n", - "arr = pd.SparseArray(data=['a', 'b', 'c'], sparse_index=IntIndex(10, [5, 6, 8]).to_block_index(), \n", - " dtype='Sparse[object, nan]')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[nan, nan, nan, nan, nan, a, b, nan, c, nan]\n", - "Fill: nan\n", - "BlockIndex\n", - "Block locations: array([5, 8], dtype=int32)\n", - "Block lengths: array([2, 1], dtype=int32)\n" - ] - } - ], - "source": [ - "print(arr)" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [], - "source": [ - "max_rows = 60\n", - "head_rows = max_rows / 2\n", - "\n", - "arr = pd.SparseArray(data=['a', 'b', 'c'], sparse_index=BlockIndex(1000000000,[0,10],[1,2]))" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[a, nan, nan, nan, nan, nan, nan, nan, nan, nan, b, c, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, ...]\n", - "Fill: nan\n", - "BlockIndex\n", - "Block locations: array([ 0, 10], dtype=int32)\n", - "Block lengths: array([1, 2], dtype=int32)\n" - ] - } - ], - "source": [ - "print(arr)" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'SparseArray' object has no attribute 'info'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m: 'SparseArray' object has no attribute 'info'" - ] - } - ], - "source": [ - "prin" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "ename": "AssertionError", - "evalue": "Non array-like type must have the same length as the index", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m pd.SparseArray(100, kind='block', \n\u001b[1;32m 5\u001b[0m \u001b[0msparse_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mBlockIndex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m8\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m fill_value=0)\n\u001b[0m", - "\u001b[0;32m/anaconda3/lib/python3.6/site-packages/pandas/core/arrays/sparse.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data, sparse_index, index, fill_value, kind, dtype, copy)\u001b[0m\n\u001b[1;32m 666\u001b[0m raise AssertionError(\"Non array-like type {type} must \"\n\u001b[1;32m 667\u001b[0m \u001b[0;34m\"have the same length as the index\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 668\u001b[0;31m .format(type=type(sparse_values)))\n\u001b[0m\u001b[1;32m 669\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sparse_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msparse_index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 670\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sparse_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msparse_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mAssertionError\u001b[0m: Non array-like type must have the same length as the index" - ] - } - ], - "source": [ - "from pandas.core.arrays.sparse import BlockIndex, IntIndex, _make_index\n", - "import numpy as np\n", - "\n", - "pd.SparseArray(100, kind='block', \n", - " sparse_index=BlockIndex(100, [8,10], [1,1]), \n", - " fill_value=0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", - "Fill: 0\n", - "BlockIndex\n", - "Block locations: array([1, 5], dtype=int32)\n", - "Block lengths: array([2, 3], dtype=int32)" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.SparseArray(1, index= [None, 3, 2, 7, np.inf], kind='block',\n", - " sparse_index= BlockIndex(20, [1,5], [2,3]),\n", - " fill_value=0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "range(0, 1)\n" - ] - } - ], - "source": [ - "print(range(1))" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 1]), 'b': [1, 2, 3]})" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
01.01
1NaN2
21.03
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 1.0 1\n", - "1 NaN 2\n", - "2 1.0 3" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "a True\n", - "b False\n", - "dtype: bool" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.dtypes.apply(pd.api.types.is_sparse)" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 3 entries, 0 to 2\n", - "Data columns (total 2 columns):\n", - "a -4 non-null Sparse[float64, nan]\n", - "b -3 non-null int64\n", - "dtypes: Sparse[float64, nan](1), int64(1)\n", - "memory usage: 128.0 bytes\n" - ] - } - ], - "source": [ - "df.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "440000.00000000006" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "400000 * 1.1" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "480000.0" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "400000 * 1.2" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "class Dog:\n", - "\n", - " kind = 'canine' # class variable shared by all instances\n", - "\n", - " def __init__(self, name):\n", - " self.name = name # instance variable unique to each instance\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "a = Dog('fred')" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "b = Dog('kim')" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "fred\n" - ] - } - ], - "source": [ - "print(a.name)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "buddy\n" - ] - } - ], - "source": [ - "print(b.name)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "b.name = 'buddy'" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'Dog' object has no attribute 'copy'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m: 'Dog' object has no attribute 'copy'" - ] - } - ], - "source": [ - "a = b.copy()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "buddy\n" - ] - } - ], - "source": [ - "print(a.name)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "b.name = 'tom'" + "ed_df.info()" ] }, { @@ -3762,74 +994,167 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "tom\n" - ] + "data": { + "text/plain": [ + "AvgTicketPrice 13059\n", + "Cancelled 13059\n", + "Carrier 13059\n", + "Dest 13059\n", + "DestAirportID 13059\n", + "DestCityName 13059\n", + "DestCountry 13059\n", + "DestLocation 13059\n", + "DestRegion 13059\n", + "DestWeather 13059\n", + "DistanceKilometers 13059\n", + "DistanceMiles 13059\n", + "FlightDelay 13059\n", + "FlightDelayMin 13059\n", + "FlightDelayType 13059\n", + "FlightNum 13059\n", + "FlightTimeHour 13059\n", + "FlightTimeMin 13059\n", + "Origin 13059\n", + "OriginAirportID 13059\n", + "OriginCityName 13059\n", + "OriginCountry 13059\n", + "OriginLocation 13059\n", + "OriginRegion 13059\n", + "OriginWeather 13059\n", + "dayOfWeek 13059\n", + "timestamp 13059\n", + "dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(a.name)" + "ed_df.count()" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "[('a', 123), ('c', '-'), ('b', 'test')]\n", - "{'a': 123, 'b': 'test', 'c': '-'}\n" - ] - }, - { - "ename": "TypeError", - "evalue": "'zip' object is not subscriptable", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msorted_pair_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdict_to_sort\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mvalue_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0msorted_pair_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m: 'zip' object is not subscriptable" - ] + "data": { + "text/plain": [ + "bool 2\n", + "datetime64[ns] 1\n", + "float64 5\n", + "int64 2\n", + "object 17\n", + "dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "dict_to_sort = {'a': 123, 'b': 'test', 'c': '-'}\n", - " \n", - "dict_key = {'a': 1, 'b': 3, 'c': 2} # The order should be \"a c b\"\n", - " \n", - "# sort dict_to_sort by using dict_key\n", - "sorted_pair_list = sorted(dict_to_sort.items(), key=lambda x: dict_key.get(x[0]))\n", - " \n", - "# the list of values\n", - "print(sorted_pair_list)\n", - "print(dict_to_sort)\n", - "value_list = zip(*sorted_pair_list)[1]" + "ed_df.get_dtype_counts()" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " Carrier Dest OriginRegion\n", + "0 Kibana Airlines Sydney Kingsford Smith International Airport DE-HE\n", + "1 Logstash Airways Venice Marco Polo Airport SE-BD\n", + "2 Logstash Airways Venice Marco Polo Airport IT-34\n", + "3 Kibana Airlines Treviso-Sant'Angelo Airport IT-72\n", + "4 Kibana Airlines Xi'an Xianyang International Airport MX-DIF\n", + "5 JetBeats Genoa Cristoforo Colombo Airport CA-AB\n", + "6 JetBeats Zurich Airport CH-ZH\n", + "7 Kibana Airlines Ottawa Macdonald-Cartier International Airport IT-62\n", + "8 Kibana Airlines Rajiv Gandhi International Airport IT-25\n", + "9 Logstash Airways Treviso-Sant'Angelo Airport RU-MOS\n", + "10 JetBeats Helsinki Vantaa Airport US-NM\n", + "11 Logstash Airways Vienna International Airport IT-34\n", + "12 Logstash Airways Shanghai Pudong International Airport MX-DIF\n", + "13 Logstash Airways Ottawa Macdonald-Cartier International Airport IT-72\n", + "14 Logstash Airways Luis Munoz Marin International Airport IT-62\n", + "15 Kibana Airlines Cologne Bonn Airport SE-BD\n", + "16 Logstash Airways Venice Marco Polo Airport MX-DIF\n", + "17 ES-Air Ministro Pistarini International Airport US-OH\n", + "18 ES-Air Shanghai Pudong International Airport RU-MUR\n", + "19 JetBeats Indira Gandhi International Airport US-WY\n", + "20 JetBeats Wichita Mid Continent Airport US-PA\n", + "21 ES-Air Ottawa Macdonald-Cartier International Airport US-NJ\n", + "22 JetBeats Itami Airport DK-84\n", + "23 Logstash Airways Vienna International Airport US-WA\n", + "24 Logstash Airways Charles de Gaulle International Airport DE-BE\n", + "25 ES-Air Narita International Airport GB-ENG\n", + "26 Kibana Airlines Itami Airport FI-ES\n", + "27 JetBeats San Diego International Airport US-AZ\n", + "28 Kibana Airlines Verona Villafranca Airport SE-BD\n", + "29 Logstash Airways Zurich Airport US-OK\n", + "... ... ... ...\n", + "13029 NaN NaN NaN\n", + "13030 ES-Air Narita International Airport IT-82\n", + "13031 Kibana Airlines Narita International Airport US-KY\n", + "13032 JetBeats Wichita Mid Continent Airport US-WA\n", + "13033 Logstash Airways Sheremetyevo International Airport US-OR\n", + "13034 ES-Air El Dorado International Airport SE-BD\n", + "13035 JetBeats Turin Airport US-NC\n", + "13036 Kibana Airlines Winnipeg / James Armstrong Richardson Internat... IT-34\n", + "13037 ES-Air Chengdu Shuangliu International Airport IT-82\n", + "13038 JetBeats San Diego International Airport US-NY\n", + "13039 ES-Air Ministro Pistarini International Airport SE-BD\n", + "13040 JetBeats Vienna International Airport CA-ON\n", + "13041 ES-Air Louisville International Standiford Field IT-25\n", + "13042 Logstash Airways Ottawa Macdonald-Cartier International Airport GB-ENG\n", + "13043 Kibana Airlines Shanghai Pudong International Airport SE-BD\n", + "13044 Kibana Airlines Zurich Airport US-FL\n", + "13045 Kibana Airlines London Heathrow Airport SE-BD\n", + "13046 Kibana Airlines Ottawa Macdonald-Cartier International Airport MX-DIF\n", + "13047 ES-Air Manchester Airport SE-BD\n", + "13048 Logstash Airways Comodoro Arturo Merino Benitez International A... SE-BD\n", + "13049 Kibana Airlines Olenya Air Base SE-BD\n", + "13050 Logstash Airways Sheremetyevo International Airport IT-52\n", + "13051 JetBeats Indira Gandhi International Airport IT-88\n", + "13052 Logstash Airways Stockholm-Arlanda Airport GB-ENG\n", + "13053 Kibana Airlines Rochester International Airport SE-BD\n", + "13054 Logstash Airways New Chitose Airport SE-BD\n", + "13055 Logstash Airways San Antonio International Airport SE-BD\n", + "13056 JetBeats Zurich Airport CH-ZH\n", + "13057 ES-Air Vienna International Airport RU-AMU\n", + "13058 Kibana Airlines Rajiv Gandhi International Airport SE-BD\n", + "\n", + "[13059 rows x 3 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ed_df['Carrier', 'Dest', 'OriginRegion']" + ] + }, + { + "cell_type": "code", + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ - "a = {'ground': 'obj1', 'floor 1': 'obj2', 'basement': 'obj3'}\n", - "a_list = ['floor 1', 'ground', 'basement']\n", - "index_map = {v: i for i, v in enumerate(a_list)}\n", - "b = sorted(a.items(), key=lambda pair: index_map[pair[0]])\n", - "\n", - "import pandas as pd\n", - "df = pd.DataFrame(data=[a])" + "ed_df2 = ed_df['Carrier', 'Dest', 'OriginRegion']" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -3853,144 +1178,135 @@ " \n", " \n", " \n", - " basement\n", - " floor 1\n", - " ground\n", + " Carrier\n", + " Dest\n", + " OriginRegion\n", " \n", " \n", " \n", " \n", " 0\n", - " obj3\n", - " obj2\n", - " obj1\n", + " Kibana Airlines\n", + " Sydney Kingsford Smith International Airport\n", + " DE-HE\n", + " \n", + " \n", + " 1\n", + " Logstash Airways\n", + " Venice Marco Polo Airport\n", + " SE-BD\n", + " \n", + " \n", + " 2\n", + " Logstash Airways\n", + " Venice Marco Polo Airport\n", + " IT-34\n", + " \n", + " \n", + " 3\n", + " Kibana Airlines\n", + " Treviso-Sant'Angelo Airport\n", + " IT-72\n", + " \n", + " \n", + " 4\n", + " Kibana Airlines\n", + " Xi'an Xianyang International Airport\n", + " MX-DIF\n", + " \n", + " \n", + " 5\n", + " JetBeats\n", + " Genoa Cristoforo Colombo Airport\n", + " CA-AB\n", + " \n", + " \n", + " 6\n", + " JetBeats\n", + " Zurich Airport\n", + " CH-ZH\n", + " \n", + " \n", + " 7\n", + " Kibana Airlines\n", + " Ottawa Macdonald-Cartier International Airport\n", + " IT-62\n", + " \n", + " \n", + " 8\n", + " Kibana Airlines\n", + " Rajiv Gandhi International Airport\n", + " IT-25\n", + " \n", + " \n", + " 9\n", + " Logstash Airways\n", + " Treviso-Sant'Angelo Airport\n", + " RU-MOS\n", " \n", " \n", "\n", "" ], "text/plain": [ - " basement floor 1 ground\n", - "0 obj3 obj2 obj1" + " Carrier Dest \\\n", + "0 Kibana Airlines Sydney Kingsford Smith International Airport \n", + "1 Logstash Airways Venice Marco Polo Airport \n", + "2 Logstash Airways Venice Marco Polo Airport \n", + "3 Kibana Airlines Treviso-Sant'Angelo Airport \n", + "4 Kibana Airlines Xi'an Xianyang International Airport \n", + "5 JetBeats Genoa Cristoforo Colombo Airport \n", + "6 JetBeats Zurich Airport \n", + "7 Kibana Airlines Ottawa Macdonald-Cartier International Airport \n", + "8 Kibana Airlines Rajiv Gandhi International Airport \n", + "9 Logstash Airways Treviso-Sant'Angelo Airport \n", + "\n", + " OriginRegion \n", + "0 DE-HE \n", + "1 SE-BD \n", + "2 IT-34 \n", + "3 IT-72 \n", + "4 MX-DIF \n", + "5 CA-AB \n", + "6 CH-ZH \n", + "7 IT-62 \n", + "8 IT-25 \n", + "9 RU-MOS " ] }, - "execution_count": 31, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df" + "ed_df2.head(10)" ] }, { "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "cols = ['ground', 'floor 1','basement']\n", - "df = df[cols] " - ] - }, - { - "cell_type": "code", - "execution_count": 34, + "execution_count": 20, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
groundfloor 1basement
0obj1obj2obj3
\n", - "
" - ], - "text/plain": [ - " ground floor 1 basement\n", - "0 obj1 obj2 obj3" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 13059 entries, 0 to 13058\n", + "Data columns (total 3 columns):\n", + "Carrier 13059 non-null object\n", + "Dest 13059 non-null object\n", + "OriginRegion 13059 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 56.0 bytes\n" + ] } ], "source": [ - "df" + "ed_df2.info()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null,