From 9030f84f4c8ba1836f403c44c8d5a507771daad0 Mon Sep 17 00:00:00 2001 From: Stephen Dodson Date: Tue, 25 Jun 2019 08:41:25 +0000 Subject: [PATCH] Added __getitem__ Implementation copies DataFrame and changes underlying mappings object. --- eland/client.py | 2 + eland/frame.py | 63 +- eland/mappings.py | 63 +- eland/tests/client/test_mappings_pytest.py | 23 + eland/tests/frame/common.py | 2 + eland/tests/frame/test_indexing_pytest.py | 37 + eland/tests/test.ipynb | 3120 +++++++++++++++++++- eland/utils.py | 4 +- 8 files changed, 3267 insertions(+), 47 deletions(-) diff --git a/eland/client.py b/eland/client.py index 5359e15..aa1a6a1 100644 --- a/eland/client.py +++ b/eland/client.py @@ -7,6 +7,8 @@ class Client(): def __init__(self, es=None): if isinstance(es, Elasticsearch): self.es = es + elif isinstance(es, Client): + self.es = es.es else: self.es = Elasticsearch(es) diff --git a/eland/frame.py b/eland/frame.py index 6631781..6e42e0e 100644 --- a/eland/frame.py +++ b/eland/frame.py @@ -44,6 +44,9 @@ class DataFrame(): index_pattern : str An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-*). + operations: list of operation + A list of Elasticsearch analytics operations e.g. filter, aggregations etc. + See Also -------- @@ -69,13 +72,26 @@ class DataFrame(): object is created, the object is not rebuilt and so inconsistencies can occur. """ - def __init__(self, client, index_pattern): + def __init__(self, + client, + index_pattern, + mappings=None, + operations=None): self.client = ed.Client(client) self.index_pattern = index_pattern # Get and persist mappings, this allows us to correctly # map returned types from Elasticsearch to pandas datatypes - self.mappings = ed.Mappings(self.client, self.index_pattern) + if mappings is None: + self.mappings = ed.Mappings(self.client, self.index_pattern) + else: + self.mappings = mappings + + # Initialise a list of 'operations' + # these are filters + self.operations = [] + if operations is not None: + self.operations.extend(operations) def _es_results_to_pandas(self, results): """ @@ -174,7 +190,7 @@ class DataFrame(): is_source_field = False pd_dtype = 'object' else: - is_source_field, pd_dtype = self.mappings.is_source_field(name[:-1]) + is_source_field, pd_dtype = self.mappings.source_field_pd_dtype(name[:-1]) if not is_source_field and type(x) is dict: for a in x: @@ -182,7 +198,7 @@ class DataFrame(): elif not is_source_field and type(x) is list: for a in x: flatten(a, name) - else: + elif is_source_field == True: # only print source fields from mappings (TODO - not so efficient for large number of fields and filtered mapping) field_name = name[:-1] # Coerce types - for now just datetime @@ -213,6 +229,19 @@ class DataFrame(): # Create pandas DataFrame df = pd.DataFrame(data=rows) + # _source may not contain all columns in the mapping + # therefore, fill in missing columns + # (note this returns self.columns NOT IN df.columns) + missing_columns = list(set(self.columns) - set(df.columns)) + + for missing in missing_columns: + is_source_field, pd_dtype = self.mappings.source_field_pd_dtype(missing) + df[missing] = None + df[missing].astype(pd_dtype) + + # Sort columns in mapping order + df = df[self.columns] + return df def head(self, n=5): @@ -266,7 +295,7 @@ class DataFrame(): 1 - number of columns """ num_rows = len(self) - num_columns = self.columns + num_columns = len(self.columns) return num_rows, num_columns @@ -275,15 +304,28 @@ class DataFrame(): return self.mappings.source_fields() def __getitem__(self, item): + # df['a'] -> item == str + # df['a', 'b'] -> item == (str, str) tuple + columns = [] if isinstance(item, str): - if item not in self.mappings.is_source_field(item): + if not self.mappings.is_source_field(item): raise TypeError('Column does not exist: [{0}]'.format(item)) - return Column(item) + columns.append(item) + elif isinstance(item, tuple): + columns.extend(list(item)) + + if len(columns) > 0: + # Return new eland.DataFrame with modified mappings + mappings = ed.Mappings(mappings=self.mappings, columns=columns) + + return DataFrame(self.client, self.index_pattern, mappings=mappings) + """ elif isinstance(item, BooleanFilter): self._filter = item.build() return self else: raise TypeError('Unsupported expr: [{0}]'.format(item)) + """ def __len__(self): """ @@ -295,6 +337,10 @@ class DataFrame(): # Rendering Methods def __repr__(self): + return self.to_string() + + + def to_string(self): # The return for this is display.options.max_rows max_rows = 60 head_rows = max_rows / 2 @@ -310,6 +356,8 @@ class DataFrame(): # NOTE: this sparse DataFrame can't be used as the middle # section is all NaNs. However, it gives us potentially a nice way # to use the pandas IO methods. + # TODO - if data is indexed by time series, return top/bottom of + # time series, rather than first max_rows items sdf = pd.DataFrame({item: pd.SparseArray(data=head[item], sparse_index= BlockIndex( @@ -320,4 +368,3 @@ class DataFrame(): return sdf.to_string(max_rows=max_rows) return head.to_string(max_rows=max_rows) - diff --git a/eland/mappings.py b/eland/mappings.py index a884307..41abcc9 100644 --- a/eland/mappings.py +++ b/eland/mappings.py @@ -26,7 +26,11 @@ class Mappings(): origin_location.lat True text object True False """ - def __init__(self, client, index_pattern): + def __init__(self, + client=None, + index_pattern=None, + mappings=None, + columns=None): """ Parameters ---------- @@ -35,29 +39,38 @@ class Mappings(): index_pattern: str Elasticsearch index pattern + + Copy constructor arguments + + mappings: Mappings + Object to copy + + columns: list of str + Columns to copy """ - # persist index_pattern for debugging - self.index_pattern = index_pattern + if (client is not None) and (index_pattern is not None): + get_mapping = client.indices().get_mapping(index=index_pattern) - mappings = client.indices().get_mapping(index=index_pattern) + # Get all fields (including all nested) and then field_caps + # for these names (fields=* doesn't appear to work effectively...) + all_fields = Mappings._extract_fields_from_mapping(get_mapping) + all_fields_caps = client.field_caps(index=index_pattern, fields=list(all_fields.keys())) - # Get all fields (including all nested) and then field_caps - # for these names (fields=* doesn't appear to work effectively...) - all_fields = Mappings._extract_fields_from_mapping(mappings) - all_fields_caps = client.field_caps(index=index_pattern, fields=list(all_fields.keys())) + # Get top level (not sub-field multifield) mappings + source_fields = Mappings._extract_fields_from_mapping(get_mapping, source_only=True) - # Get top level (not sub-field multifield) mappings - source_fields = Mappings._extract_fields_from_mapping(mappings, source_only=True) - - # Populate capability matrix of fields - # field_name, es_dtype, pd_dtype, is_searchable, is_aggregtable, is_source - self.mappings_capabilities = Mappings._create_capability_matrix(all_fields, source_fields, all_fields_caps) + # Populate capability matrix of fields + # field_name, es_dtype, pd_dtype, is_searchable, is_aggregtable, is_source + self.mappings_capabilities = Mappings._create_capability_matrix(all_fields, source_fields, all_fields_caps) + else: + # Copy object and restrict mapping columns + self.mappings_capabilities = mappings.mappings_capabilities.loc[columns] # Cache source field types for efficient lookup # (this massively improves performance of DataFrame.flatten) self.source_field_pd_dtypes = {} - for field_name in source_fields: + for field_name in self.source_fields(): pd_dtype = self.mappings_capabilities.loc[field_name]['pd_dtype'] self.source_field_pd_dtypes[field_name] = pd_dtype @@ -265,7 +278,7 @@ class Mappings(): def pd_dtype """ - def is_source_field(self, field_name): + def source_field_pd_dtype(self, field_name): """ Parameters ---------- @@ -287,6 +300,24 @@ class Mappings(): return is_source_field, pd_dtype + def is_source_field(self, field_name): + """ + Parameters + ---------- + field_name: str + + Returns + ------- + is_source_field: bool + Is this field name a top-level source field? + """ + is_source_field = False + + if field_name in self.source_field_pd_dtypes: + is_source_field = True + + return is_source_field + def numeric_source_fields(self): """ Returns diff --git a/eland/tests/client/test_mappings_pytest.py b/eland/tests/client/test_mappings_pytest.py index 34aac5b..133da72 100644 --- a/eland/tests/client/test_mappings_pytest.py +++ b/eland/tests/client/test_mappings_pytest.py @@ -20,5 +20,28 @@ class TestMapping(): assert mappings.count_source_fields() == TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT + def test_copy(self): + mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME) + assert mappings.all_fields() == TEST_MAPPING1_EXPECTED_DF.index.tolist() + assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings.mappings_capabilities['es_dtype'])) + assert mappings.count_source_fields() == TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT + # Pick 1 source field + columns = ['dest_location'] + mappings_copy1 = ed.Mappings(mappings=mappings, columns=columns) + + assert mappings_copy1.all_fields() == columns + assert mappings_copy1.count_source_fields() == len(columns) + + # Pick 3 source fields (out of order) + columns = ['dest_location', 'city', 'user_name'] + mappings_copy2 = ed.Mappings(mappings=mappings, columns=columns) + + assert mappings_copy2.all_fields() == columns + assert mappings_copy2.count_source_fields() == len(columns) + + # Check original is still ok + assert mappings.all_fields() == TEST_MAPPING1_EXPECTED_DF.index.tolist() + assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings.mappings_capabilities['es_dtype'])) + assert mappings.count_source_fields() == TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT diff --git a/eland/tests/frame/common.py b/eland/tests/frame/common.py index 6b9068c..da3d5ce 100644 --- a/eland/tests/frame/common.py +++ b/eland/tests/frame/common.py @@ -23,6 +23,8 @@ _pd_ecommerce['order_date'] = \ pd.to_datetime(_pd_ecommerce['order_date']) _pd_ecommerce['products.created_on'] = \ _pd_ecommerce['products.created_on'].apply(lambda x: pd.to_datetime(x)) +_pd_ecommerce.insert(2, 'customer_birth_date', None) +_pd_ecommerce['customer_birth_date'].astype('datetime64') _ed_ecommerce = ed.read_es(ELASTICSEARCH_HOST, ECOMMERCE_INDEX_NAME) class TestData: diff --git a/eland/tests/frame/test_indexing_pytest.py b/eland/tests/frame/test_indexing_pytest.py index 9548f13..045c9be 100644 --- a/eland/tests/frame/test_indexing_pytest.py +++ b/eland/tests/frame/test_indexing_pytest.py @@ -59,3 +59,40 @@ class TestDataFrameIndexing(TestData): def test_to_string(self): print(self.ed_flights()) + def test_get_item(self): + # Test 1 attribute + ed_carrier = self.ed_flights()['Carrier'] + + carrier_head = ed_carrier.head(5) + + carrier_head_expected = pd.DataFrame( + {'Carrier':[ + 'Kibana Airlines', + 'Logstash Airways', + 'Logstash Airways', + 'Kibana Airlines', + 'Kibana Airlines' + ]}) + + assert_frame_equal(carrier_head_expected, carrier_head) + + #carrier_to_string = ed_carrier.to_string() + #print(carrier_to_string) + + # Test multiple attributes (out of order) + ed_3_items = self.ed_flights()['Dest','Carrier','FlightDelay'] + + ed_3_items_head = ed_3_items.head(5) + + ed_3_items_expected = pd.DataFrame(dict( + Dest={0: 'Sydney Kingsford Smith International Airport', 1: 'Venice Marco Polo Airport', + 2: 'Venice Marco Polo Airport', 3: "Treviso-Sant'Angelo Airport", + 4: "Xi'an Xianyang International Airport"}, + Carrier={0: 'Kibana Airlines', 1: 'Logstash Airways', 2: 'Logstash Airways', 3: 'Kibana Airlines', + 4: 'Kibana Airlines'}, + FlightDelay={0: False, 1: False, 2: False, 3: True, 4: False})) + + assert_frame_equal(ed_3_items_expected, ed_3_items_head) + + #ed_3_items_to_string = ed_3_items.to_string() + #print(ed_3_items_to_string) diff --git a/eland/tests/test.ipynb b/eland/tests/test.ipynb index 85de534..8f8e298 100644 --- a/eland/tests/test.ipynb +++ b/eland/tests/test.ipynb @@ -273,6 +273,2114 @@ "pd_df.head()" ] }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "pd_sdf = pd_df.to_sparse()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AvgTicketPriceCancelledCarrierDestDestAirportIDDestCityNameDestCountryDestLocationDestRegionDestWeather...FlightTimeMinOriginOriginAirportIDOriginCityNameOriginCountryOriginLocationOriginRegionOriginWeatherdayOfWeektimestamp
0841.265642FalseKibana AirlinesSydney Kingsford Smith International AirportSYDSydneyAU{'lat': '-33.94609833', 'lon': '151.177002'}SE-BDRain...1030.770416Frankfurt am Main AirportFRAFrankfurt am MainDE{'lat': '50.033333', 'lon': '8.570556'}DE-HESunny02018-01-01 00:00:00
1882.982662FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Sunny...464.389481Cape Town International AirportCPTCape TownZA{'lat': '-33.96480179', 'lon': '18.60169983'}SE-BDClear02018-01-01 18:27:00
2190.636904FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Cloudy...0.000000Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Rain02018-01-01 17:11:14
3181.694216TrueKibana AirlinesTreviso-Sant'Angelo AirportTV01TrevisoIT{'lat': '45.648399', 'lon': '12.1944'}IT-34Clear...222.749059Naples International AirportNA01NaplesIT{'lat': '40.886002', 'lon': '14.2908'}IT-72Thunder & Lightning02018-01-01 10:33:28
4730.041778FalseKibana AirlinesXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDClear...785.779071Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFDamaging Wind02018-01-01 05:13:00
5418.152089FalseJetBeatsGenoa Cristoforo Colombo AirportGE01GenovaIT{'lat': '44.4133', 'lon': '8.8375'}IT-42Thunder & Lightning...393.590441Edmonton International AirportCYEGEdmontonCA{'lat': '53.30970001', 'lon': '-113.5800018'}CA-ABRain02018-01-01 01:43:03
6180.246816FalseJetBeatsZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHHail...300.000000Zurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHClear02018-01-01 13:49:53
7585.184310FalseKibana AirlinesOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONClear...614.942480Ciampino___G. B. Pastine International AirportRM12RomeIT{'lat': '41.7994', 'lon': '12.5949'}IT-62Thunder & Lightning02018-01-01 04:54:59
8960.869736TrueKibana AirlinesRajiv Gandhi International AirportHYDHyderabadIN{'lat': '17.23131752', 'lon': '78.42985535'}SE-BDCloudy...602.030591Milano Linate AirportMI11MilanIT{'lat': '45.445099', 'lon': '9.27674'}IT-25Heavy Fog02018-01-01 12:09:35
9296.877773FalseLogstash AirwaysTreviso-Sant'Angelo AirportTV01TrevisoIT{'lat': '45.648399', 'lon': '12.1944'}IT-34Rain...174.822216Sheremetyevo International AirportSVOMoscowRU{'lat': '55.972599', 'lon': '37.4146'}RU-MOSCloudy02018-01-01 12:09:35
10906.437948FalseJetBeatsHelsinki Vantaa AirportHELHelsinkiFI{'lat': '60.31719971', 'lon': '24.9633007'}FI-ESRain...503.045170Albuquerque International Sunport AirportABQAlbuquerqueUS{'lat': '35.040199', 'lon': '-106.609001'}US-NMRain02018-01-01 22:06:14
11704.463771FalseLogstash AirwaysVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Cloudy...36.075018Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Rain02018-01-01 11:52:34
12922.499077TrueLogstash AirwaysShanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDClear...679.768391Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFHeavy Fog02018-01-01 02:13:46
13374.959276FalseLogstash AirwaysOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONRain...330.418282Naples International AirportNA01NaplesIT{'lat': '40.886002', 'lon': '14.2908'}IT-72Rain02018-01-01 14:21:13
14552.917371FalseLogstash AirwaysLuis Munoz Marin International AirportSJUSan JuanPR{'lat': '18.43939972', 'lon': '-66.00180054'}PR-U-AClear...407.145031Ciampino___G. B. Pastine International AirportRM12RomeIT{'lat': '41.7994', 'lon': '12.5949'}IT-62Cloudy02018-01-01 17:42:53
15566.487557TrueKibana AirlinesCologne Bonn AirportCGNCologneDE{'lat': '50.86589813', 'lon': '7.142739773'}DE-NWSunny...656.712658Chengdu Shuangliu International AirportCTUChengduCN{'lat': '30.57850075', 'lon': '103.9469986'}SE-BDThunder & Lightning02018-01-01 19:55:32
16989.952787TrueLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Damaging Wind...773.030334Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFThunder & Lightning02018-01-01 07:49:27
17569.613255FalseES-AirMinistro Pistarini International AirportEZEBuenos AiresAR{'lat': '-34.8222', 'lon': '-58.5358'}SE-BDCloudy...704.716920Cleveland Hopkins International AirportCLEClevelandUS{'lat': '41.4117012', 'lon': '-81.84980011'}US-OHRain02018-01-01 01:30:47
18277.429707FalseES-AirShanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDClear...355.957996Olenya Air BaseXLMOOlenegorskRU{'lat': '68.15180206', 'lon': '33.46390152'}RU-MURHail02018-01-01 07:58:17
19772.100846FalseJetBeatsIndira Gandhi International AirportDELNew DelhiIN{'lat': '28.5665', 'lon': '77.103104'}SE-BDClear...875.114675Casper-Natrona County International AirportCPRCasperUS{'lat': '42.90800095', 'lon': '-106.4639969'}US-WYCloudy02018-01-01 00:02:06
20167.599922FalseJetBeatsWichita Mid Continent AirportICTWichitaUS{'lat': '37.64989853', 'lon': '-97.43309784'}US-KSClear...373.966883Erie International Tom Ridge FieldERIErieUS{'lat': '42.08312701', 'lon': '-80.17386675'}US-PACloudy02018-01-01 01:08:20
21253.210065FalseES-AirOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONHail...130.667700Newark Liberty International AirportEWRNewarkUS{'lat': '40.69250107', 'lon': '-74.16870117'}US-NJClear02018-01-01 01:08:20
22917.247620FalseJetBeatsItami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDDamaging Wind...574.495310Copenhagen Kastrup AirportCPHCopenhagenDK{'lat': '55.61790085', 'lon': '12.65600014'}DK-84Sunny02018-01-01 07:48:35
23451.591176FalseLogstash AirwaysVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Heavy Fog...579.728943Seattle Tacoma International AirportSEASeattleUS{'lat': '47.44900131', 'lon': '-122.3089981'}US-WAHeavy Fog02018-01-01 18:57:21
24307.067201FalseLogstash AirwaysCharles de Gaulle International AirportCDGParisFR{'lat': '49.01279831', 'lon': '2.549999952'}FR-JClear...50.157229Berlin-Tegel AirportTXLBerlinDE{'lat': '52.5597', 'lon': '13.2877'}DE-BERain02018-01-01 13:18:25
25268.241596FalseES-AirNarita International AirportNRTTokyoJP{'lat': '35.76470184', 'lon': '140.3860016'}SE-BDRain...527.567422Manchester AirportMANManchesterGB{'lat': '53.35369873', 'lon': '-2.274950027'}GB-ENGThunder & Lightning02018-01-01 08:20:35
26975.812632TrueKibana AirlinesItami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDHail...386.259764Helsinki Vantaa AirportHELHelsinkiFI{'lat': '60.31719971', 'lon': '24.9633007'}FI-ESRain02018-01-01 15:38:32
27134.214546FalseJetBeatsSan Diego International AirportSANSan DiegoUS{'lat': '32.73360062', 'lon': '-117.1900024'}US-CAClear...24.479650Phoenix Sky Harbor International AirportPHXPhoenixUS{'lat': '33.43429947', 'lon': '-112.012001'}US-AZClear02018-01-01 03:08:45
28988.897564FalseKibana AirlinesVerona Villafranca AirportVR10VeronaIT{'lat': '45.395699', 'lon': '10.8885'}IT-34Sunny...568.351033New Chitose AirportCTSChitose / TomakomaiJP{'lat': '42.77519989', 'lon': '141.6920013'}SE-BDDamaging Wind02018-01-01 01:16:59
29511.067220FalseLogstash AirwaysZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHRain...425.889194Tulsa International AirportTULTulsaUS{'lat': '36.19839859', 'lon': '-95.88809967'}US-OKRain02018-01-01 18:00:59
..................................................................
13029795.905278FalseKibana AirlinesMalpensa International AirportMI12MilanIT{'lat': '45.6306', 'lon': '8.72811'}IT-25Sunny...534.375826Itami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDSunny62018-02-11 20:10:13
13030863.388068FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDDamaging Wind...141.172633Tokyo Haneda International AirportHNDTokyoJP{'lat': '35.552299', 'lon': '139.779999'}SE-BDClear62018-02-11 18:59:53
13031575.183008FalseJetBeatsSavannah Hilton Head International AirportSAVSavannahUS{'lat': '32.12760162', 'lon': '-81.20210266'}US-GAThunder & Lightning...1113.137060OR Tambo International AirportJNBJohannesburgZA{'lat': '-26.1392', 'lon': '28.246'}SE-BDHail62018-02-11 00:57:48
13032817.368952FalseJetBeatsSyracuse Hancock International AirportSYRSyracuseUS{'lat': '43.11119843', 'lon': '-76.10630035'}US-NYRain...714.964864El Dorado International AirportBOGBogotaCO{'lat': '4.70159', 'lon': '-74.1469'}CO-CUNThunder & Lightning62018-02-11 12:02:49
13033579.582455FalseES-AirTampa International AirportTPATampaUS{'lat': '27.97550011', 'lon': '-82.53320313'}US-FLRain...234.929046Jorge Chavez International AirportLIMLimaPE{'lat': '-12.0219', 'lon': '-77.114304'}SE-BDThunder & Lightning62018-02-11 02:07:40
130341004.916638FalseJetBeatsOlenya Air BaseXLMOOlenegorskRU{'lat': '68.15180206', 'lon': '33.46390152'}RU-MURClear...526.895776Gimpo International AirportGMPSeoulKR{'lat': '37.5583', 'lon': '126.791'}SE-BDSunny62018-02-11 00:35:04
13035357.562842TrueLogstash AirwaysShanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDThunder & Lightning...0.000000Shanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDThunder & Lightning62018-02-11 11:19:12
13036429.580539FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Sunny...150.000000Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Cloudy62018-02-11 15:07:11
13037729.788171TrueES-AirVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Rain...691.944839Ukrainka Air BaseXHBUBelogorskRU{'lat': '51.169997', 'lon': '128.445007'}RU-AMUDamaging Wind62018-02-11 10:24:42
13038564.897695FalseES-AirPisa International AirportPI05PisaIT{'lat': '43.683899', 'lon': '10.3927'}IT-52Heavy Fog...567.387339OR Tambo International AirportJNBJohannesburgZA{'lat': '-26.1392', 'lon': '28.246'}SE-BDDamaging Wind62018-02-11 00:42:06
130391014.052787FalseLogstash AirwaysVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Thunder & Lightning...690.092327Montreal / Pierre Elliott Trudeau Internationa...YULMontrealCA{'lat': '45.47060013', 'lon': '-73.74079895'}CA-QCThunder & Lightning62018-02-11 10:56:31
13040455.243843FalseES-AirLondon Luton AirportLTNLondonGB{'lat': '51.87469864', 'lon': '-0.368333012'}GB-ENGCloudy...3.028293London Heathrow AirportLHRLondonGB{'lat': '51.4706', 'lon': '-0.461941'}GB-ENGClear62018-02-11 00:39:37
13041611.370232FalseLogstash AirwaysJorge Chavez International AirportLIMLimaPE{'lat': '-12.0219', 'lon': '-77.114304'}SE-BDSunny...338.875531Casper-Natrona County International AirportCPRCasperUS{'lat': '42.90800095', 'lon': '-106.4639969'}US-WYRain62018-02-11 10:24:30
13042595.961285FalseJetBeatsOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONClear...375.129587Frankfurt am Main AirportFRAFrankfurt am MainDE{'lat': '50.033333', 'lon': '8.570556'}DE-HEClear62018-02-11 09:02:07
13043782.747648FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDClear...156.858481Tokyo Haneda International AirportHNDTokyoJP{'lat': '35.552299', 'lon': '139.779999'}SE-BDThunder & Lightning62018-02-11 04:45:06
13044891.117221FalseJetBeatsWinnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBClear...354.106457Vienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Thunder & Lightning62018-02-11 00:51:14
13045587.169921FalseLogstash AirwaysBrisbane International AirportBNEBrisbaneAU{'lat': '-27.38419914', 'lon': '153.1170044'}SE-BDRain...771.305442Amsterdam Airport SchipholAMSAmsterdamNL{'lat': '52.30860138', 'lon': '4.76388979'}NL-NHSunny62018-02-11 05:41:51
13046739.132165FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDRain...542.955572Winnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBHail62018-02-11 10:02:21
13047605.191876FalseJetBeatsPortland International Jetport AirportPWMPortlandUS{'lat': '43.64619827', 'lon': '-70.30930328'}US-METhunder & Lightning...564.599857Jeju International AirportCJUJeju CityKR{'lat': '33.51129913', 'lon': '126.4929962'}SE-BDCloudy62018-02-11 15:55:10
13048361.767659TrueLogstash AirwaysDubai International AirportDXBDubaiAE{'lat': '25.25279999', 'lon': '55.36439896'}SE-BDSunny...180.000000Dubai International AirportDXBDubaiAE{'lat': '25.25279999', 'lon': '55.36439896'}SE-BDHail62018-02-11 04:11:14
13049662.306992FalseES-AirWinnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBHeavy Fog...835.954429Ministro Pistarini International AirportEZEBuenos AiresAR{'lat': '-34.8222', 'lon': '-58.5358'}AR-BSunny62018-02-11 10:13:32
13050630.779526FalseJetBeatsHelsinki Vantaa AirportHELHelsinkiFI{'lat': '60.31719971', 'lon': '24.9633007'}FI-ESSunny...451.755639Beijing Capital International AirportPEKBeijingCN{'lat': '40.08010101', 'lon': '116.5849991'}SE-BDCloudy62018-02-11 11:23:23
13051937.771279TrueLogstash AirwaysLester B. Pearson International AirportYYZTorontoCA{'lat': '43.67720032', 'lon': '-79.63059998'}CA-ONSunny...507.451571Leonardo da Vinci___Fiumicino AirportRM11RomeIT{'lat': '41.8002778', 'lon': '12.2388889'}IT-62Hail62018-02-11 01:13:50
130521085.155339FalseLogstash AirwaysMelbourne International AirportMELMelbourneAU{'lat': '-37.673302', 'lon': '144.843002'}SE-BDCloudy...1044.451122Bologna Guglielmo Marconi AirportBO08BolognaIT{'lat': '44.5354', 'lon': '11.2887'}IT-45Cloudy62018-02-11 18:35:42
130531191.964104FalseLogstash AirwaysZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHHail...728.715904Portland International Jetport AirportPWMPortlandUS{'lat': '43.64619827', 'lon': '-70.30930328'}US-MEClear62018-02-11 19:02:10
130541080.446279FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDRain...402.929088Pisa International AirportPI05PisaIT{'lat': '43.683899', 'lon': '10.3927'}IT-52Sunny62018-02-11 20:42:25
13055646.612941FalseLogstash AirwaysZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHRain...644.418029Winnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBRain62018-02-11 01:41:57
13056997.751876FalseLogstash AirwaysUkrainka Air BaseXHBUBelogorskRU{'lat': '51.169997', 'lon': '128.445007'}RU-AMURain...937.540811Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFSunny62018-02-11 04:09:27
130571102.814465FalseJetBeatsMinistro Pistarini International AirportEZEBuenos AiresAR{'lat': '-34.8222', 'lon': '-58.5358'}SE-BDHail...1697.404971Itami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDHail62018-02-11 08:28:21
13058858.144337FalseJetBeatsWashington Dulles International AirportIADWashingtonUS{'lat': '38.94449997', 'lon': '-77.45580292'}US-DCHeavy Fog...1610.761827Adelaide International AirportADLAdelaideAU{'lat': '-34.945', 'lon': '138.531006'}SE-BDRain62018-02-11 14:54:34
\n", + "

13059 rows × 27 columns

\n", + "
" + ], + "text/plain": [ + " AvgTicketPrice Cancelled Carrier \\\n", + "0 841.265642 False Kibana Airlines \n", + "1 882.982662 False Logstash Airways \n", + "2 190.636904 False Logstash Airways \n", + "3 181.694216 True Kibana Airlines \n", + "4 730.041778 False Kibana Airlines \n", + "5 418.152089 False JetBeats \n", + "6 180.246816 False JetBeats \n", + "7 585.184310 False Kibana Airlines \n", + "8 960.869736 True Kibana Airlines \n", + "9 296.877773 False Logstash Airways \n", + "10 906.437948 False JetBeats \n", + "11 704.463771 False Logstash Airways \n", + "12 922.499077 True Logstash Airways \n", + "13 374.959276 False Logstash Airways \n", + "14 552.917371 False Logstash Airways \n", + "15 566.487557 True Kibana Airlines \n", + "16 989.952787 True Logstash Airways \n", + "17 569.613255 False ES-Air \n", + "18 277.429707 False ES-Air \n", + "19 772.100846 False JetBeats \n", + "20 167.599922 False JetBeats \n", + "21 253.210065 False ES-Air \n", + "22 917.247620 False JetBeats \n", + "23 451.591176 False Logstash Airways \n", + "24 307.067201 False Logstash Airways \n", + "25 268.241596 False ES-Air \n", + "26 975.812632 True Kibana Airlines \n", + "27 134.214546 False JetBeats \n", + "28 988.897564 False Kibana Airlines \n", + "29 511.067220 False Logstash Airways \n", + "... ... ... ... \n", + "13029 795.905278 False Kibana Airlines \n", + "13030 863.388068 False Logstash Airways \n", + "13031 575.183008 False JetBeats \n", + "13032 817.368952 False JetBeats \n", + "13033 579.582455 False ES-Air \n", + "13034 1004.916638 False JetBeats \n", + "13035 357.562842 True Logstash Airways \n", + "13036 429.580539 False Logstash Airways \n", + "13037 729.788171 True ES-Air \n", + "13038 564.897695 False ES-Air \n", + "13039 1014.052787 False Logstash Airways \n", + "13040 455.243843 False ES-Air \n", + "13041 611.370232 False Logstash Airways \n", + "13042 595.961285 False JetBeats \n", + "13043 782.747648 False Logstash Airways \n", + "13044 891.117221 False JetBeats \n", + "13045 587.169921 False Logstash Airways \n", + "13046 739.132165 False Logstash Airways \n", + "13047 605.191876 False JetBeats \n", + "13048 361.767659 True Logstash Airways \n", + "13049 662.306992 False ES-Air \n", + "13050 630.779526 False JetBeats \n", + "13051 937.771279 True Logstash Airways \n", + "13052 1085.155339 False Logstash Airways \n", + "13053 1191.964104 False Logstash Airways \n", + "13054 1080.446279 False Logstash Airways \n", + "13055 646.612941 False Logstash Airways \n", + "13056 997.751876 False Logstash Airways \n", + "13057 1102.814465 False JetBeats \n", + "13058 858.144337 False JetBeats \n", + "\n", + " Dest DestAirportID \\\n", + "0 Sydney Kingsford Smith International Airport SYD \n", + "1 Venice Marco Polo Airport VE05 \n", + "2 Venice Marco Polo Airport VE05 \n", + "3 Treviso-Sant'Angelo Airport TV01 \n", + "4 Xi'an Xianyang International Airport XIY \n", + "5 Genoa Cristoforo Colombo Airport GE01 \n", + "6 Zurich Airport ZRH \n", + "7 Ottawa Macdonald-Cartier International Airport YOW \n", + "8 Rajiv Gandhi International Airport HYD \n", + "9 Treviso-Sant'Angelo Airport TV01 \n", + "10 Helsinki Vantaa Airport HEL \n", + "11 Vienna International Airport VIE \n", + "12 Shanghai Pudong International Airport PVG \n", + "13 Ottawa Macdonald-Cartier International Airport YOW \n", + "14 Luis Munoz Marin International Airport SJU \n", + "15 Cologne Bonn Airport CGN \n", + "16 Venice Marco Polo Airport VE05 \n", + "17 Ministro Pistarini International Airport EZE \n", + "18 Shanghai Pudong International Airport PVG \n", + "19 Indira Gandhi International Airport DEL \n", + "20 Wichita Mid Continent Airport ICT \n", + "21 Ottawa Macdonald-Cartier International Airport YOW \n", + "22 Itami Airport ITM \n", + "23 Vienna International Airport VIE \n", + "24 Charles de Gaulle International Airport CDG \n", + "25 Narita International Airport NRT \n", + "26 Itami Airport ITM \n", + "27 San Diego International Airport SAN \n", + "28 Verona Villafranca Airport VR10 \n", + "29 Zurich Airport ZRH \n", + "... ... ... \n", + "13029 Malpensa International Airport MI12 \n", + "13030 Xi'an Xianyang International Airport XIY \n", + "13031 Savannah Hilton Head International Airport SAV \n", + "13032 Syracuse Hancock International Airport SYR \n", + "13033 Tampa International Airport TPA \n", + "13034 Olenya Air Base XLMO \n", + "13035 Shanghai Pudong International Airport PVG \n", + "13036 Venice Marco Polo Airport VE05 \n", + "13037 Vienna International Airport VIE \n", + "13038 Pisa International Airport PI05 \n", + "13039 Vienna International Airport VIE \n", + "13040 London Luton Airport LTN \n", + "13041 Jorge Chavez International Airport LIM \n", + "13042 Ottawa Macdonald-Cartier International Airport YOW \n", + "13043 Xi'an Xianyang International Airport XIY \n", + "13044 Winnipeg / James Armstrong Richardson Internat... YWG \n", + "13045 Brisbane International Airport BNE \n", + "13046 Xi'an Xianyang International Airport XIY \n", + "13047 Portland International Jetport Airport PWM \n", + "13048 Dubai International Airport DXB \n", + "13049 Winnipeg / James Armstrong Richardson Internat... YWG \n", + "13050 Helsinki Vantaa Airport HEL \n", + "13051 Lester B. Pearson International Airport YYZ \n", + "13052 Melbourne International Airport MEL \n", + "13053 Zurich Airport ZRH \n", + "13054 Xi'an Xianyang International Airport XIY \n", + "13055 Zurich Airport ZRH \n", + "13056 Ukrainka Air Base XHBU \n", + "13057 Ministro Pistarini International Airport EZE \n", + "13058 Washington Dulles International Airport IAD \n", + "\n", + " DestCityName DestCountry \\\n", + "0 Sydney AU \n", + "1 Venice IT \n", + "2 Venice IT \n", + "3 Treviso IT \n", + "4 Xi'an CN \n", + "5 Genova IT \n", + "6 Zurich CH \n", + "7 Ottawa CA \n", + "8 Hyderabad IN \n", + "9 Treviso IT \n", + "10 Helsinki FI \n", + "11 Vienna AT \n", + "12 Shanghai CN \n", + "13 Ottawa CA \n", + "14 San Juan PR \n", + "15 Cologne DE \n", + "16 Venice IT \n", + "17 Buenos Aires AR \n", + "18 Shanghai CN \n", + "19 New Delhi IN \n", + "20 Wichita US \n", + "21 Ottawa CA \n", + "22 Osaka JP \n", + "23 Vienna AT \n", + "24 Paris FR \n", + "25 Tokyo JP \n", + "26 Osaka JP \n", + "27 San Diego US \n", + "28 Verona IT \n", + "29 Zurich CH \n", + "... ... ... \n", + "13029 Milan IT \n", + "13030 Xi'an CN \n", + "13031 Savannah US \n", + "13032 Syracuse US \n", + "13033 Tampa US \n", + "13034 Olenegorsk RU \n", + "13035 Shanghai CN \n", + "13036 Venice IT \n", + "13037 Vienna AT \n", + "13038 Pisa IT \n", + "13039 Vienna AT \n", + "13040 London GB \n", + "13041 Lima PE \n", + "13042 Ottawa CA \n", + "13043 Xi'an CN \n", + "13044 Winnipeg CA \n", + "13045 Brisbane AU \n", + "13046 Xi'an CN \n", + "13047 Portland US \n", + "13048 Dubai AE \n", + "13049 Winnipeg CA \n", + "13050 Helsinki FI \n", + "13051 Toronto CA \n", + "13052 Melbourne AU \n", + "13053 Zurich CH \n", + "13054 Xi'an CN \n", + "13055 Zurich CH \n", + "13056 Belogorsk RU \n", + "13057 Buenos Aires AR \n", + "13058 Washington US \n", + "\n", + " DestLocation DestRegion \\\n", + "0 {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n", + "1 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "3 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", + "4 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", + "5 {'lat': '44.4133', 'lon': '8.8375'} IT-42 \n", + "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", + "7 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", + "8 {'lat': '17.23131752', 'lon': '78.42985535'} SE-BD \n", + "9 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", + "10 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", + "11 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", + "12 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", + "13 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", + "14 {'lat': '18.43939972', 'lon': '-66.00180054'} PR-U-A \n", + "15 {'lat': '50.86589813', 'lon': '7.142739773'} DE-NW \n", + "16 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "17 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n", + "18 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", + "19 {'lat': '28.5665', 'lon': '77.103104'} SE-BD \n", + "20 {'lat': '37.64989853', 'lon': '-97.43309784'} US-KS \n", + "21 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", + "22 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", + "23 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", + "24 {'lat': '49.01279831', 'lon': '2.549999952'} FR-J \n", + "25 {'lat': '35.76470184', 'lon': '140.3860016'} SE-BD \n", + "26 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", + "27 {'lat': '32.73360062', 'lon': '-117.1900024'} US-CA \n", + "28 {'lat': '45.395699', 'lon': '10.8885'} IT-34 \n", + "29 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", + "... ... ... \n", + "13029 {'lat': '45.6306', 'lon': '8.72811'} IT-25 \n", + "13030 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", + "13031 {'lat': '32.12760162', 'lon': '-81.20210266'} US-GA \n", + "13032 {'lat': '43.11119843', 'lon': '-76.10630035'} US-NY \n", + "13033 {'lat': '27.97550011', 'lon': '-82.53320313'} US-FL \n", + "13034 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n", + "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", + "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "13037 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", + "13038 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n", + "13039 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", + "13040 {'lat': '51.87469864', 'lon': '-0.368333012'} GB-ENG \n", + "13041 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n", + "13042 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", + "13043 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", + "13044 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", + "13045 {'lat': '-27.38419914', 'lon': '153.1170044'} SE-BD \n", + "13046 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", + "13047 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n", + "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n", + "13049 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", + "13050 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", + "13051 {'lat': '43.67720032', 'lon': '-79.63059998'} CA-ON \n", + "13052 {'lat': '-37.673302', 'lon': '144.843002'} SE-BD \n", + "13053 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", + "13054 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", + "13055 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", + "13056 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n", + "13057 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n", + "13058 {'lat': '38.94449997', 'lon': '-77.45580292'} US-DC \n", + "\n", + " DestWeather ... FlightTimeMin \\\n", + "0 Rain ... 1030.770416 \n", + "1 Sunny ... 464.389481 \n", + "2 Cloudy ... 0.000000 \n", + "3 Clear ... 222.749059 \n", + "4 Clear ... 785.779071 \n", + "5 Thunder & Lightning ... 393.590441 \n", + "6 Hail ... 300.000000 \n", + "7 Clear ... 614.942480 \n", + "8 Cloudy ... 602.030591 \n", + "9 Rain ... 174.822216 \n", + "10 Rain ... 503.045170 \n", + "11 Cloudy ... 36.075018 \n", + "12 Clear ... 679.768391 \n", + "13 Rain ... 330.418282 \n", + "14 Clear ... 407.145031 \n", + "15 Sunny ... 656.712658 \n", + "16 Damaging Wind ... 773.030334 \n", + "17 Cloudy ... 704.716920 \n", + "18 Clear ... 355.957996 \n", + "19 Clear ... 875.114675 \n", + "20 Clear ... 373.966883 \n", + "21 Hail ... 130.667700 \n", + "22 Damaging Wind ... 574.495310 \n", + "23 Heavy Fog ... 579.728943 \n", + "24 Clear ... 50.157229 \n", + "25 Rain ... 527.567422 \n", + "26 Hail ... 386.259764 \n", + "27 Clear ... 24.479650 \n", + "28 Sunny ... 568.351033 \n", + "29 Rain ... 425.889194 \n", + "... ... ... ... \n", + "13029 Sunny ... 534.375826 \n", + "13030 Damaging Wind ... 141.172633 \n", + "13031 Thunder & Lightning ... 1113.137060 \n", + "13032 Rain ... 714.964864 \n", + "13033 Rain ... 234.929046 \n", + "13034 Clear ... 526.895776 \n", + "13035 Thunder & Lightning ... 0.000000 \n", + "13036 Sunny ... 150.000000 \n", + "13037 Rain ... 691.944839 \n", + "13038 Heavy Fog ... 567.387339 \n", + "13039 Thunder & Lightning ... 690.092327 \n", + "13040 Cloudy ... 3.028293 \n", + "13041 Sunny ... 338.875531 \n", + "13042 Clear ... 375.129587 \n", + "13043 Clear ... 156.858481 \n", + "13044 Clear ... 354.106457 \n", + "13045 Rain ... 771.305442 \n", + "13046 Rain ... 542.955572 \n", + "13047 Thunder & Lightning ... 564.599857 \n", + "13048 Sunny ... 180.000000 \n", + "13049 Heavy Fog ... 835.954429 \n", + "13050 Sunny ... 451.755639 \n", + "13051 Sunny ... 507.451571 \n", + "13052 Cloudy ... 1044.451122 \n", + "13053 Hail ... 728.715904 \n", + "13054 Rain ... 402.929088 \n", + "13055 Rain ... 644.418029 \n", + "13056 Rain ... 937.540811 \n", + "13057 Hail ... 1697.404971 \n", + "13058 Heavy Fog ... 1610.761827 \n", + "\n", + " Origin OriginAirportID \\\n", + "0 Frankfurt am Main Airport FRA \n", + "1 Cape Town International Airport CPT \n", + "2 Venice Marco Polo Airport VE05 \n", + "3 Naples International Airport NA01 \n", + "4 Licenciado Benito Juarez International Airport AICM \n", + "5 Edmonton International Airport CYEG \n", + "6 Zurich Airport ZRH \n", + "7 Ciampino___G. B. Pastine International Airport RM12 \n", + "8 Milano Linate Airport MI11 \n", + "9 Sheremetyevo International Airport SVO \n", + "10 Albuquerque International Sunport Airport ABQ \n", + "11 Venice Marco Polo Airport VE05 \n", + "12 Licenciado Benito Juarez International Airport AICM \n", + "13 Naples International Airport NA01 \n", + "14 Ciampino___G. B. Pastine International Airport RM12 \n", + "15 Chengdu Shuangliu International Airport CTU \n", + "16 Licenciado Benito Juarez International Airport AICM \n", + "17 Cleveland Hopkins International Airport CLE \n", + "18 Olenya Air Base XLMO \n", + "19 Casper-Natrona County International Airport CPR \n", + "20 Erie International Tom Ridge Field ERI \n", + "21 Newark Liberty International Airport EWR \n", + "22 Copenhagen Kastrup Airport CPH \n", + "23 Seattle Tacoma International Airport SEA \n", + "24 Berlin-Tegel Airport TXL \n", + "25 Manchester Airport MAN \n", + "26 Helsinki Vantaa Airport HEL \n", + "27 Phoenix Sky Harbor International Airport PHX \n", + "28 New Chitose Airport CTS \n", + "29 Tulsa International Airport TUL \n", + "... ... ... \n", + "13029 Itami Airport ITM \n", + "13030 Tokyo Haneda International Airport HND \n", + "13031 OR Tambo International Airport JNB \n", + "13032 El Dorado International Airport BOG \n", + "13033 Jorge Chavez International Airport LIM \n", + "13034 Gimpo International Airport GMP \n", + "13035 Shanghai Pudong International Airport PVG \n", + "13036 Venice Marco Polo Airport VE05 \n", + "13037 Ukrainka Air Base XHBU \n", + "13038 OR Tambo International Airport JNB \n", + "13039 Montreal / Pierre Elliott Trudeau Internationa... YUL \n", + "13040 London Heathrow Airport LHR \n", + "13041 Casper-Natrona County International Airport CPR \n", + "13042 Frankfurt am Main Airport FRA \n", + "13043 Tokyo Haneda International Airport HND \n", + "13044 Vienna International Airport VIE \n", + "13045 Amsterdam Airport Schiphol AMS \n", + "13046 Winnipeg / James Armstrong Richardson Internat... YWG \n", + "13047 Jeju International Airport CJU \n", + "13048 Dubai International Airport DXB \n", + "13049 Ministro Pistarini International Airport EZE \n", + "13050 Beijing Capital International Airport PEK \n", + "13051 Leonardo da Vinci___Fiumicino Airport RM11 \n", + "13052 Bologna Guglielmo Marconi Airport BO08 \n", + "13053 Portland International Jetport Airport PWM \n", + "13054 Pisa International Airport PI05 \n", + "13055 Winnipeg / James Armstrong Richardson Internat... YWG \n", + "13056 Licenciado Benito Juarez International Airport AICM \n", + "13057 Itami Airport ITM \n", + "13058 Adelaide International Airport ADL \n", + "\n", + " OriginCityName OriginCountry \\\n", + "0 Frankfurt am Main DE \n", + "1 Cape Town ZA \n", + "2 Venice IT \n", + "3 Naples IT \n", + "4 Mexico City MX \n", + "5 Edmonton CA \n", + "6 Zurich CH \n", + "7 Rome IT \n", + "8 Milan IT \n", + "9 Moscow RU \n", + "10 Albuquerque US \n", + "11 Venice IT \n", + "12 Mexico City MX \n", + "13 Naples IT \n", + "14 Rome IT \n", + "15 Chengdu CN \n", + "16 Mexico City MX \n", + "17 Cleveland US \n", + "18 Olenegorsk RU \n", + "19 Casper US \n", + "20 Erie US \n", + "21 Newark US \n", + "22 Copenhagen DK \n", + "23 Seattle US \n", + "24 Berlin DE \n", + "25 Manchester GB \n", + "26 Helsinki FI \n", + "27 Phoenix US \n", + "28 Chitose / Tomakomai JP \n", + "29 Tulsa US \n", + "... ... ... \n", + "13029 Osaka JP \n", + "13030 Tokyo JP \n", + "13031 Johannesburg ZA \n", + "13032 Bogota CO \n", + "13033 Lima PE \n", + "13034 Seoul KR \n", + "13035 Shanghai CN \n", + "13036 Venice IT \n", + "13037 Belogorsk RU \n", + "13038 Johannesburg ZA \n", + "13039 Montreal CA \n", + "13040 London GB \n", + "13041 Casper US \n", + "13042 Frankfurt am Main DE \n", + "13043 Tokyo JP \n", + "13044 Vienna AT \n", + "13045 Amsterdam NL \n", + "13046 Winnipeg CA \n", + "13047 Jeju City KR \n", + "13048 Dubai AE \n", + "13049 Buenos Aires AR \n", + "13050 Beijing CN \n", + "13051 Rome IT \n", + "13052 Bologna IT \n", + "13053 Portland US \n", + "13054 Pisa IT \n", + "13055 Winnipeg CA \n", + "13056 Mexico City MX \n", + "13057 Osaka JP \n", + "13058 Adelaide AU \n", + "\n", + " OriginLocation OriginRegion \\\n", + "0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n", + "1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n", + "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n", + "4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", + "5 {'lat': '53.30970001', 'lon': '-113.5800018'} CA-AB \n", + "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", + "7 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n", + "8 {'lat': '45.445099', 'lon': '9.27674'} IT-25 \n", + "9 {'lat': '55.972599', 'lon': '37.4146'} RU-MOS \n", + "10 {'lat': '35.040199', 'lon': '-106.609001'} US-NM \n", + "11 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "12 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", + "13 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n", + "14 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n", + "15 {'lat': '30.57850075', 'lon': '103.9469986'} SE-BD \n", + "16 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", + "17 {'lat': '41.4117012', 'lon': '-81.84980011'} US-OH \n", + "18 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n", + "19 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n", + "20 {'lat': '42.08312701', 'lon': '-80.17386675'} US-PA \n", + "21 {'lat': '40.69250107', 'lon': '-74.16870117'} US-NJ \n", + "22 {'lat': '55.61790085', 'lon': '12.65600014'} DK-84 \n", + "23 {'lat': '47.44900131', 'lon': '-122.3089981'} US-WA \n", + "24 {'lat': '52.5597', 'lon': '13.2877'} DE-BE \n", + "25 {'lat': '53.35369873', 'lon': '-2.274950027'} GB-ENG \n", + "26 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", + "27 {'lat': '33.43429947', 'lon': '-112.012001'} US-AZ \n", + "28 {'lat': '42.77519989', 'lon': '141.6920013'} SE-BD \n", + "29 {'lat': '36.19839859', 'lon': '-95.88809967'} US-OK \n", + "... ... ... \n", + "13029 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", + "13030 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n", + "13031 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n", + "13032 {'lat': '4.70159', 'lon': '-74.1469'} CO-CUN \n", + "13033 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n", + "13034 {'lat': '37.5583', 'lon': '126.791'} SE-BD \n", + "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", + "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "13037 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n", + "13038 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n", + "13039 {'lat': '45.47060013', 'lon': '-73.74079895'} CA-QC \n", + "13040 {'lat': '51.4706', 'lon': '-0.461941'} GB-ENG \n", + "13041 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n", + "13042 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n", + "13043 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n", + "13044 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", + "13045 {'lat': '52.30860138', 'lon': '4.76388979'} NL-NH \n", + "13046 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", + "13047 {'lat': '33.51129913', 'lon': '126.4929962'} SE-BD \n", + "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n", + "13049 {'lat': '-34.8222', 'lon': '-58.5358'} AR-B \n", + "13050 {'lat': '40.08010101', 'lon': '116.5849991'} SE-BD \n", + "13051 {'lat': '41.8002778', 'lon': '12.2388889'} IT-62 \n", + "13052 {'lat': '44.5354', 'lon': '11.2887'} IT-45 \n", + "13053 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n", + "13054 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n", + "13055 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", + "13056 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", + "13057 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", + "13058 {'lat': '-34.945', 'lon': '138.531006'} SE-BD \n", + "\n", + " OriginWeather dayOfWeek timestamp \n", + "0 Sunny 0 2018-01-01 00:00:00 \n", + "1 Clear 0 2018-01-01 18:27:00 \n", + "2 Rain 0 2018-01-01 17:11:14 \n", + "3 Thunder & Lightning 0 2018-01-01 10:33:28 \n", + "4 Damaging Wind 0 2018-01-01 05:13:00 \n", + "5 Rain 0 2018-01-01 01:43:03 \n", + "6 Clear 0 2018-01-01 13:49:53 \n", + "7 Thunder & Lightning 0 2018-01-01 04:54:59 \n", + "8 Heavy Fog 0 2018-01-01 12:09:35 \n", + "9 Cloudy 0 2018-01-01 12:09:35 \n", + "10 Rain 0 2018-01-01 22:06:14 \n", + "11 Rain 0 2018-01-01 11:52:34 \n", + "12 Heavy Fog 0 2018-01-01 02:13:46 \n", + "13 Rain 0 2018-01-01 14:21:13 \n", + "14 Cloudy 0 2018-01-01 17:42:53 \n", + "15 Thunder & Lightning 0 2018-01-01 19:55:32 \n", + "16 Thunder & Lightning 0 2018-01-01 07:49:27 \n", + "17 Rain 0 2018-01-01 01:30:47 \n", + "18 Hail 0 2018-01-01 07:58:17 \n", + "19 Cloudy 0 2018-01-01 00:02:06 \n", + "20 Cloudy 0 2018-01-01 01:08:20 \n", + "21 Clear 0 2018-01-01 01:08:20 \n", + "22 Sunny 0 2018-01-01 07:48:35 \n", + "23 Heavy Fog 0 2018-01-01 18:57:21 \n", + "24 Rain 0 2018-01-01 13:18:25 \n", + "25 Thunder & Lightning 0 2018-01-01 08:20:35 \n", + "26 Rain 0 2018-01-01 15:38:32 \n", + "27 Clear 0 2018-01-01 03:08:45 \n", + "28 Damaging Wind 0 2018-01-01 01:16:59 \n", + "29 Rain 0 2018-01-01 18:00:59 \n", + "... ... ... ... \n", + "13029 Sunny 6 2018-02-11 20:10:13 \n", + "13030 Clear 6 2018-02-11 18:59:53 \n", + "13031 Hail 6 2018-02-11 00:57:48 \n", + "13032 Thunder & Lightning 6 2018-02-11 12:02:49 \n", + "13033 Thunder & Lightning 6 2018-02-11 02:07:40 \n", + "13034 Sunny 6 2018-02-11 00:35:04 \n", + "13035 Thunder & Lightning 6 2018-02-11 11:19:12 \n", + "13036 Cloudy 6 2018-02-11 15:07:11 \n", + "13037 Damaging Wind 6 2018-02-11 10:24:42 \n", + "13038 Damaging Wind 6 2018-02-11 00:42:06 \n", + "13039 Thunder & Lightning 6 2018-02-11 10:56:31 \n", + "13040 Clear 6 2018-02-11 00:39:37 \n", + "13041 Rain 6 2018-02-11 10:24:30 \n", + "13042 Clear 6 2018-02-11 09:02:07 \n", + "13043 Thunder & Lightning 6 2018-02-11 04:45:06 \n", + "13044 Thunder & Lightning 6 2018-02-11 00:51:14 \n", + "13045 Sunny 6 2018-02-11 05:41:51 \n", + "13046 Hail 6 2018-02-11 10:02:21 \n", + "13047 Cloudy 6 2018-02-11 15:55:10 \n", + "13048 Hail 6 2018-02-11 04:11:14 \n", + "13049 Sunny 6 2018-02-11 10:13:32 \n", + "13050 Cloudy 6 2018-02-11 11:23:23 \n", + "13051 Hail 6 2018-02-11 01:13:50 \n", + "13052 Cloudy 6 2018-02-11 18:35:42 \n", + "13053 Clear 6 2018-02-11 19:02:10 \n", + "13054 Sunny 6 2018-02-11 20:42:25 \n", + "13055 Rain 6 2018-02-11 01:41:57 \n", + "13056 Sunny 6 2018-02-11 04:09:27 \n", + "13057 Hail 6 2018-02-11 08:28:21 \n", + "13058 Rain 6 2018-02-11 14:54:34 \n", + "\n", + "[13059 rows x 27 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd_sdf" + ] + }, { "cell_type": "code", "execution_count": 4, @@ -421,7 +2529,7 @@ } ], "source": [ - "pd_df.describe()" + "pd_sdf.describe()" ] }, { @@ -839,17 +2947,242 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "d = {'col1': [1.2, 20], 'col2': [int(1), int(30)], 'col3': ['2019-02-01 03:04:05', '2018-02-01 01:03:04'], 'col4': ['2019-02-01 03:04:05', '2018-02-01 01:03:04']}\n", - "df = pd.DataFrame(data=d)" + "from eland.tests.frame.common import TestData" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "td = TestData()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8,\n", + " 9,\n", + " ...\n", + " 13049, 13050, 13051, 13052, 13053, 13054, 13055, 13056, 13057,\n", + " 13058],\n", + " dtype='int64', length=13059)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "td.pd_flights().index" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 13059 entries, 0 to 13058\n", + "Data columns (total 27 columns):\n", + "AvgTicketPrice 13059 non-null float64\n", + "Cancelled 13059 non-null bool\n", + "Carrier 13059 non-null object\n", + "Dest 13059 non-null object\n", + "DestAirportID 13059 non-null object\n", + "DestCityName 13059 non-null object\n", + "DestCountry 13059 non-null object\n", + "DestLocation 13059 non-null object\n", + "DestRegion 13059 non-null object\n", + "DestWeather 13059 non-null object\n", + "DistanceKilometers 13059 non-null float64\n", + "DistanceMiles 13059 non-null float64\n", + "FlightDelay 13059 non-null bool\n", + "FlightDelayMin 13059 non-null int64\n", + "FlightDelayType 13059 non-null object\n", + "FlightNum 13059 non-null object\n", + "FlightTimeHour 13059 non-null float64\n", + "FlightTimeMin 13059 non-null float64\n", + "Origin 13059 non-null object\n", + "OriginAirportID 13059 non-null object\n", + "OriginCityName 13059 non-null object\n", + "OriginCountry 13059 non-null object\n", + "OriginLocation 13059 non-null object\n", + "OriginRegion 13059 non-null object\n", + "OriginWeather 13059 non-null object\n", + "dayOfWeek 13059 non-null int64\n", + "timestamp 13059 non-null datetime64[ns]\n", + "dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n", + "memory usage: 2.6+ MB\n" + ] + } + ], + "source": [ + "td.pd_flights().info()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "df = pd.DataFrame(np.random.randn(100000, 4))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 100000 entries, 0 to 99999\n", + "Data columns (total 4 columns):\n", + "0 100000 non-null float64\n", + "1 100000 non-null float64\n", + "2 100000 non-null float64\n", + "3 100000 non-null float64\n", + "dtypes: float64(4)\n", + "memory usage: 3.1 MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "df.iloc[:999998] = np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 100000 entries, 0 to 99999\n", + "Data columns (total 4 columns):\n", + "0 0 non-null float64\n", + "1 0 non-null float64\n", + "2 0 non-null float64\n", + "3 0 non-null float64\n", + "dtypes: float64(4)\n", + "memory usage: 3.1 MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "sdf = df.to_sparse()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index 80\n", + "0 800000\n", + "1 800000\n", + "2 800000\n", + "3 800000\n", + "dtype: int64" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.memory_usage()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.sparse.frame.SparseDataFrame" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(sdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "cols = ['string', 'int', 'float', 'object']\n", + "\n", + "string_series = pd.SparseSeries(['a', 'b', 'c'])\n", + "int_series = pd.SparseSeries([1, 2, 3])\n", + "float_series = pd.SparseSeries([1.1, 1.2, 1.3])\n", + "object_series = pd.SparseSeries([[], {}, set()])\n", + "sdf = pd.SparseDataFrame({\n", + " 'string': string_series,\n", + " 'int': int_series,\n", + " 'float': float_series,\n", + " 'object': object_series,\n", + "})\n", + "sdf = sdf[cols]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -873,38 +3206,334 @@ " \n", " \n", " \n", - " col1\n", - " col2\n", - " col3\n", - " col4\n", + " string\n", + " int\n", + " float\n", + " object\n", " \n", " \n", " \n", " \n", " 0\n", - " 1.2\n", + " a\n", " 1\n", - " 2019-02-01 03:04:05\n", - " 2019-02-01 03:04:05\n", + " 1.1\n", + " []\n", " \n", " \n", " 1\n", - " 20.0\n", - " 30\n", - " 2018-02-01 01:03:04\n", - " 2018-02-01 01:03:04\n", + " b\n", + " 2\n", + " 1.2\n", + " {}\n", + " \n", + " \n", + " 2\n", + " c\n", + " 3\n", + " 1.3\n", + " {}\n", " \n", " \n", "\n", "" ], "text/plain": [ - " col1 col2 col3 col4\n", - "0 1.2 1 2019-02-01 03:04:05 2019-02-01 03:04:05\n", - "1 20.0 30 2018-02-01 01:03:04 2018-02-01 01:03:04" + " string int float object\n", + "0 a 1 1.1 []\n", + "1 b 2 1.2 {}\n", + "2 c 3 1.3 {}" ] }, - "execution_count": 10, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sdf" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas._libs.sparse import IntIndex\n", + "\n", + "arr = pd.SparseArray(data=['a', 'b', 'c'], sparse_index=IntIndex(10, [5, 6, 8]).to_block_index(), \n", + " dtype='Sparse[object, nan]')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[nan, nan, nan, nan, nan, a, b, nan, c, nan]\n", + "Fill: nan\n", + "BlockIndex\n", + "Block locations: array([5, 8], dtype=int32)\n", + "Block lengths: array([2, 1], dtype=int32)\n" + ] + } + ], + "source": [ + "print(arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "max_rows = 60\n", + "head_rows = max_rows / 2\n", + "\n", + "arr = pd.SparseArray(data=['a', 'b', 'c'], sparse_index=BlockIndex(1000000000,[0,10],[1,2]))" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[a, nan, nan, nan, nan, nan, nan, nan, nan, nan, b, c, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, ...]\n", + "Fill: nan\n", + "BlockIndex\n", + "Block locations: array([ 0, 10], dtype=int32)\n", + "Block lengths: array([1, 2], dtype=int32)\n" + ] + } + ], + "source": [ + "print(arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'SparseArray' object has no attribute 'info'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: 'SparseArray' object has no attribute 'info'" + ] + } + ], + "source": [ + "prin" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "ename": "AssertionError", + "evalue": "Non array-like type must have the same length as the index", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m pd.SparseArray(100, kind='block', \n\u001b[1;32m 5\u001b[0m \u001b[0msparse_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mBlockIndex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m8\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m fill_value=0)\n\u001b[0m", + "\u001b[0;32m/anaconda3/lib/python3.6/site-packages/pandas/core/arrays/sparse.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data, sparse_index, index, fill_value, kind, dtype, copy)\u001b[0m\n\u001b[1;32m 666\u001b[0m raise AssertionError(\"Non array-like type {type} must \"\n\u001b[1;32m 667\u001b[0m \u001b[0;34m\"have the same length as the index\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 668\u001b[0;31m .format(type=type(sparse_values)))\n\u001b[0m\u001b[1;32m 669\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sparse_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msparse_index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 670\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sparse_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msparse_values\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAssertionError\u001b[0m: Non array-like type must have the same length as the index" + ] + } + ], + "source": [ + "from pandas.core.arrays.sparse import BlockIndex, IntIndex, _make_index\n", + "import numpy as np\n", + "\n", + "pd.SparseArray(100, kind='block', \n", + " sparse_index=BlockIndex(100, [8,10], [1,1]), \n", + " fill_value=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", + "Fill: 0\n", + "BlockIndex\n", + "Block locations: array([1, 5], dtype=int32)\n", + "Block lengths: array([2, 3], dtype=int32)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.SparseArray(1, index= [None, 3, 2, 7, np.inf], kind='block',\n", + " sparse_index= BlockIndex(20, [1,5], [2,3]),\n", + " fill_value=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "range(0, 1)\n" + ] + } + ], + "source": [ + "print(range(1))" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame({'a': pd.SparseArray([1, np.nan, 1]), 'b': [1, 2, 3]})" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
01.01
1NaN2
21.03
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1.0 1\n", + "1 NaN 2\n", + "2 1.0 3" + ] + }, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -913,6 +3542,455 @@ "df" ] }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a True\n", + "b False\n", + "dtype: bool" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes.apply(pd.api.types.is_sparse)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 3 entries, 0 to 2\n", + "Data columns (total 2 columns):\n", + "a -4 non-null Sparse[float64, nan]\n", + "b -3 non-null int64\n", + "dtypes: Sparse[float64, nan](1), int64(1)\n", + "memory usage: 128.0 bytes\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "440000.00000000006" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "400000 * 1.1" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "480000.0" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "400000 * 1.2" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "class Dog:\n", + "\n", + " kind = 'canine' # class variable shared by all instances\n", + "\n", + " def __init__(self, name):\n", + " self.name = name # instance variable unique to each instance\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "a = Dog('fred')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "b = Dog('kim')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fred\n" + ] + } + ], + "source": [ + "print(a.name)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "buddy\n" + ] + } + ], + "source": [ + "print(b.name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "b.name = 'buddy'" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'Dog' object has no attribute 'copy'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mb\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m: 'Dog' object has no attribute 'copy'" + ] + } + ], + "source": [ + "a = b.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "buddy\n" + ] + } + ], + "source": [ + "print(a.name)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "b.name = 'tom'" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tom\n" + ] + } + ], + "source": [ + "print(a.name)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('a', 123), ('c', '-'), ('b', 'test')]\n", + "{'a': 123, 'b': 'test', 'c': '-'}\n" + ] + }, + { + "ename": "TypeError", + "evalue": "'zip' object is not subscriptable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msorted_pair_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdict_to_sort\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mvalue_list\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0msorted_pair_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: 'zip' object is not subscriptable" + ] + } + ], + "source": [ + "dict_to_sort = {'a': 123, 'b': 'test', 'c': '-'}\n", + " \n", + "dict_key = {'a': 1, 'b': 3, 'c': 2} # The order should be \"a c b\"\n", + " \n", + "# sort dict_to_sort by using dict_key\n", + "sorted_pair_list = sorted(dict_to_sort.items(), key=lambda x: dict_key.get(x[0]))\n", + " \n", + "# the list of values\n", + "print(sorted_pair_list)\n", + "print(dict_to_sort)\n", + "value_list = zip(*sorted_pair_list)[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "a = {'ground': 'obj1', 'floor 1': 'obj2', 'basement': 'obj3'}\n", + "a_list = ['floor 1', 'ground', 'basement']\n", + "index_map = {v: i for i, v in enumerate(a_list)}\n", + "b = sorted(a.items(), key=lambda pair: index_map[pair[0]])\n", + "\n", + "import pandas as pd\n", + "df = pd.DataFrame(data=[a])" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
basementfloor 1ground
0obj3obj2obj1
\n", + "
" + ], + "text/plain": [ + " basement floor 1 ground\n", + "0 obj3 obj2 obj1" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "cols = ['ground', 'floor 1','basement']\n", + "df = df[cols] " + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
groundfloor 1basement
0obj1obj2obj3
\n", + "
" + ], + "text/plain": [ + " ground floor 1 basement\n", + "0 obj1 obj2 obj3" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -937,7 +4015,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.6.8" } }, "nbformat": 4, diff --git a/eland/utils.py b/eland/utils.py index 5135bfe..99f597c 100644 --- a/eland/utils.py +++ b/eland/utils.py @@ -1,4 +1,4 @@ -import eland +import eland as ed def read_es(es_params, index_pattern): - return eland.DataFrame(es_params, index_pattern) + return ed.DataFrame(client=es_params, index_pattern=index_pattern)