diff --git a/NOTES.md b/NOTES.md new file mode 100644 index 0000000..6b71e1a --- /dev/null +++ b/NOTES.md @@ -0,0 +1,58 @@ +# Implementation Notes + +The goal of an `eland.DataFrame` is to enable users who are familiar with `pandas.DataFrame` +to access, explore and manipulate data that resides in Elasticsearch. + +Ideally, all data should reside in Elasticsearch and not to reside in memory. +This restricts the API, but allows access to huge data sets that do not fit into memory, and allows +use of powerful Elasticsearch features such as aggrergations. + +## Implementation Details + +### 3rd Party System Access + +Generally, integrations with [3rd party storage systems](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html) +(SQL, Google Big Query etc.) involve accessing these systems and reading all external data into an +in-core pandas data structure. This also applies to [Apache Arrow](https://arrow.apache.org/docs/python/pandas.html) +structures. + +Whilst this provides access to data in these systems, for large datasets this can require significant +in-core memory, and for systems such as Elasticsearch, bulk export of data can be an inefficient way +of exploring the data. + +An alternative option is to create an API that proxies `pandas.DataFrame`-like calls to Elasticsearch +queries and operations. This could allow the Elasticsearch cluster to perform operations such as +aggregations rather than exporting all the data and performing this operation in-core. + +### Implementation Options + +An option would be to replace the `pandas.DataFrame` backend in-core memory structures with Elasticsearch +accessors. This would allow full access to the `pandas.DataFrame` APIs. However, this has issues: + +* If a `pandas.DataFrame` instance maps to an index, typical manipulation of a `pandas.DataFrame` +may involve creating many derived `pandas.DataFrame` instances. Constructing an index per +`pandas.DataFrame` may result in many Elasticsearch indexes and a significant load on Elasticsearch. +For example, `df_a = df['a']` should not require Elasticsearch indices `df` and `df_a` + +* Not all `pandas.DataFrame` APIs map to things we may want to do in Elasticsearch. In particular, +API calls that involve exporting all data from Elasticsearch into memory e.g. `df.to_dict()`. + +* The backend `pandas.DataFrame` structures are not easily abstractable and are deeply embedded in +the implementation. + +Another option is to create a `eland.DataFrame` API that mimics appropriate aspects of +the `pandas.DataFrame` API. This resolves some of the issues above as: + +* `df_a = df['a']` could be implemented as a change to the Elasticsearch query used, rather +than a new index + +* Instead of supporting the enitre `pandas.DataFrame` API we can support a subset appropriate for +Elasticsearch. If addition calls are required, we could to create a `eland.DataFrame.to_pandas()` +method which would explicitly export all data to a `pandas.DataFrame` + +* Creating a new `eland.DataFrame` API gives us full flexibility in terms of implementation. However, +it does create a large amount of work which may duplicate a lot of the `pandas` code - for example, +printing objects etc. - this creates maintenance issues etc. + + + diff --git a/eland/__init__.py b/eland/__init__.py index 094154d..d8305a6 100644 --- a/eland/__init__.py +++ b/eland/__init__.py @@ -1,4 +1,5 @@ from .utils import * -from .frame import * +from .dataframe import * from .client import * from .mappings import * +from .index import * diff --git a/eland/frame.py b/eland/dataframe.py similarity index 64% rename from eland/frame.py rename to eland/dataframe.py index ac5063b..bf2532f 100644 --- a/eland/frame.py +++ b/eland/dataframe.py @@ -23,21 +23,19 @@ Similarly, only Elasticsearch searchable fields can be searched or filtered, and only Elasticsearch aggregatable fields can be aggregated or grouped. """ -import eland as ed - -from elasticsearch import Elasticsearch -from elasticsearch_dsl import Search +import sys import pandas as pd - -from pandas.core.arrays.sparse import BlockIndex - +from elasticsearch_dsl import Search +from pandas.compat import StringIO +from pandas.core import common as com +from pandas.io.common import _expand_user, _stringify_path from pandas.io.formats import format as fmt from pandas.io.formats.printing import pprint_thing +from pandas.io.formats import console -from io import StringIO +import eland as ed -import sys class DataFrame(): """ @@ -79,26 +77,24 @@ class DataFrame(): object is created, the object is not rebuilt and so inconsistencies can occur. """ + def __init__(self, client, index_pattern, mappings=None, - operations=None): - self.client = ed.Client(client) - self.index_pattern = index_pattern + index_field=None): + + self._client = ed.Client(client) + self._index_pattern = index_pattern # Get and persist mappings, this allows us to correctly # map returned types from Elasticsearch to pandas datatypes if mappings is None: - self.mappings = ed.Mappings(self.client, self.index_pattern) + self._mappings = ed.Mappings(self._client, self._index_pattern) else: - self.mappings = mappings + self._mappings = mappings - # Initialise a list of 'operations' - # these are filters - self.operations = [] - if operations is not None: - self.operations.extend(operations) + self._index = ed.Index(index_field) def _es_results_to_pandas(self, results): """ @@ -187,6 +183,7 @@ class DataFrame(): TODO - an option here is to use Elasticsearch's multi-field matching instead of pandas treatment of lists (which isn't great) NOTE - using this lists is generally not a good way to use this API """ + def flatten_dict(y): out = {} @@ -197,7 +194,7 @@ class DataFrame(): is_source_field = False pd_dtype = 'object' else: - is_source_field, pd_dtype = self.mappings.source_field_pd_dtype(name[:-1]) + is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(name[:-1]) if not is_source_field and type(x) is dict: for a in x: @@ -205,7 +202,7 @@ class DataFrame(): elif not is_source_field and type(x) is list: for a in x: flatten(a, name) - elif is_source_field == True: # only print source fields from mappings (TODO - not so efficient for large number of fields and filtered mapping) + elif is_source_field == True: # only print source fields from mappings (TODO - not so efficient for large number of fields and filtered mapping) field_name = name[:-1] # Coerce types - for now just datetime @@ -227,14 +224,22 @@ class DataFrame(): return out rows = [] + index = [] for hit in results['hits']['hits']: row = hit['_source'] + # get index value - can be _id or can be field value in source + if self._index.is_source_field: + index_field = row[self._index.index_field] + else: + index_field = hit[self._index.index_field] + index.append(index_field) + # flatten row to map correctly to 2D DataFrame rows.append(flatten_dict(row)) # Create pandas DataFrame - df = pd.DataFrame(data=rows) + df = pd.DataFrame(data=rows, index=index) # _source may not contain all columns in the mapping # therefore, fill in missing columns @@ -242,7 +247,7 @@ class DataFrame(): missing_columns = list(set(self.columns) - set(df.columns)) for missing in missing_columns: - is_source_field, pd_dtype = self.mappings.source_field_pd_dtype(missing) + is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(missing) df[missing] = None df[missing].astype(pd_dtype) @@ -252,20 +257,32 @@ class DataFrame(): return df def head(self, n=5): - results = self.client.search(index=self.index_pattern, size=n) + sort_params = self._index.sort_field + ":asc" + + results = self._client.search(index=self._index_pattern, size=n, sort=sort_params) return self._es_results_to_pandas(results) - + + def tail(self, n=5): + sort_params = self._index.sort_field + ":desc" + + results = self._client.search(index=self._index_pattern, size=n, sort=sort_params) + + df = self._es_results_to_pandas(results) + + # reverse order (index ascending) + return df.sort_index() + def describe(self): - numeric_source_fields = self.mappings.numeric_source_fields() + numeric_source_fields = self._mappings.numeric_source_fields() # for each field we compute: # count, mean, std, min, 25%, 50%, 75%, max - search = Search(using=self.client, index=self.index_pattern).extra(size=0) + search = Search(using=self._client, index=self._index_pattern).extra(size=0) for field in numeric_source_fields: - search.aggs.metric('extended_stats_'+field, 'extended_stats', field=field) - search.aggs.metric('percentiles_'+field, 'percentiles', field=field) + search.aggs.metric('extended_stats_' + field, 'extended_stats', field=field) + search.aggs.metric('percentiles_' + field, 'percentiles', field=field) response = search.execute() @@ -273,21 +290,21 @@ class DataFrame(): for field in numeric_source_fields: values = [] - values.append(response.aggregations['extended_stats_'+field]['count']) - values.append(response.aggregations['extended_stats_'+field]['avg']) - values.append(response.aggregations['extended_stats_'+field]['std_deviation']) - values.append(response.aggregations['extended_stats_'+field]['min']) - values.append(response.aggregations['percentiles_'+field]['values']['25.0']) - values.append(response.aggregations['percentiles_'+field]['values']['50.0']) - values.append(response.aggregations['percentiles_'+field]['values']['75.0']) - values.append(response.aggregations['extended_stats_'+field]['max']) - + values.append(response.aggregations['extended_stats_' + field]['count']) + values.append(response.aggregations['extended_stats_' + field]['avg']) + values.append(response.aggregations['extended_stats_' + field]['std_deviation']) + values.append(response.aggregations['extended_stats_' + field]['min']) + values.append(response.aggregations['percentiles_' + field]['values']['25.0']) + values.append(response.aggregations['percentiles_' + field]['values']['50.0']) + values.append(response.aggregations['percentiles_' + field]['values']['75.0']) + values.append(response.aggregations['extended_stats_' + field]['max']) + # if not None if (values.count(None) < len(values)): results[field] = values df = pd.DataFrame(data=results, index=['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']) - + return df def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, @@ -305,12 +322,10 @@ class DataFrame(): if buf is None: # pragma: no cover buf = sys.stdout - fake_df = self.__fake_dataframe__() - lines = [] lines.append(str(type(self))) - lines.append(fake_df.index._summary()) + lines.append(self.index_summary()) if len(self.columns) == 0: lines.append('Empty {name}'.format(name=type(self).__name__)) @@ -322,7 +337,7 @@ class DataFrame(): # hack if max_cols is None: max_cols = pd.get_option('display.max_info_columns', - len(self.columns) + 1) + len(self.columns) + 1) max_rows = pd.get_option('display.max_info_rows', len(self) + 1) @@ -404,7 +419,6 @@ class DataFrame(): fmt.buffer_put_lines(buf, lines) - @property def shape(self): """ @@ -423,14 +437,32 @@ class DataFrame(): @property def columns(self): - return pd.Index(self.mappings.source_fields()) + return pd.Index(self._mappings.source_fields()) + + @property + def index(self): + return self._index + + def set_index(self, index_field): + copy = self.copy() + copy._index = ed.Index(index_field) + return copy + + def index_summary(self): + head = self.head(1).index[0] + tail = self.tail(1).index[0] + index_summary = ', %s to %s' % (pprint_thing(head), + pprint_thing(tail)) + + name = "Index" + return '%s: %s entries%s' % (name, len(self), index_summary) @property def dtypes(self): - return self.mappings.dtypes() + return self._mappings.dtypes() def get_dtype_counts(self): - return self.mappings.get_dtype_counts() + return self._mappings.get_dtype_counts() def count(self): """ @@ -446,63 +478,155 @@ class DataFrame(): for a single document. """ counts = {} - for field in self.mappings.source_fields(): - exists_query = {"query":{"exists":{"field":field}}} - field_exists_count = self.client.count(index=self.index_pattern, body=exists_query) + for field in self._mappings.source_fields(): + exists_query = {"query": {"exists": {"field": field}}} + field_exists_count = self._client.count(index=self._index_pattern, body=exists_query) counts[field] = field_exists_count - count = pd.Series(data=counts, index=self.mappings.source_fields()) + count = pd.Series(data=counts, index=self._mappings.source_fields()) return count + def index_count(self): + """ + Returns + ------- + index_count: int + Count of docs where index_field exists + """ + exists_query = {"query": {"exists": {"field": self._index.index_field}}} - def __getitem__(self, item): - # df['a'] -> item == str - # df['a', 'b'] -> item == (str, str) tuple + index_count = self._client.count(index=self._index_pattern, body=exists_query) + + return index_count + + def _filter_by_columns(self, columns): + # Return new eland.DataFrame with modified mappings + mappings = ed.Mappings(mappings=self._mappings, columns=columns) + + return DataFrame(self._client, self._index_pattern, mappings=mappings) + + def __getitem__(self, key): + # NOTE: there is a difference between pandas here. + # e.g. df['a'] returns pd.Series, df[['a','b']] return pd.DataFrame + # we always return DataFrame - TODO maybe create eland.Series at some point... + + # Implementation mainly copied from pandas v0.24.2 + # (https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html) + key = com.apply_if_callable(key, self) + + # TODO - add slice capabilities - need to add index features first + # e.g. set index etc. + # Do we have a slicer (on rows)? + """ + indexer = convert_to_index_sliceable(self, key) + if indexer is not None: + return self._slice(indexer, axis=0) + # Do we have a (boolean) DataFrame? + if isinstance(key, DataFrame): + return self._getitem_frame(key) + """ + + # Do we have a (boolean) 1d indexer? + """ + if com.is_bool_indexer(key): + return self._getitem_bool_array(key) + """ + + # We are left with two options: a single key, and a collection of keys, columns = [] - if isinstance(item, str): - if not self.mappings.is_source_field(item): - raise TypeError('Column does not exist: [{0}]'.format(item)) - columns.append(item) - elif isinstance(item, tuple): - columns.extend(list(item)) - elif isinstance(item, list): - columns.extend(item) - - if len(columns) > 0: - # Return new eland.DataFrame with modified mappings - mappings = ed.Mappings(mappings=self.mappings, columns=columns) - - return DataFrame(self.client, self.index_pattern, mappings=mappings) - """ - elif isinstance(item, BooleanFilter): - self._filter = item.build() - return self + if isinstance(key, str): + if not self._mappings.is_source_field(key): + raise TypeError('Column does not exist: [{0}]'.format(key)) + columns.append(key) + elif isinstance(key, list): + columns.extend(key) else: - raise TypeError('Unsupported expr: [{0}]'.format(item)) - """ + raise TypeError('__getitem__ arguments invalid: [{0}]'.format(key)) + + return self._filter_by_columns(columns) def __len__(self): """ Returns length of info axis, but here we use the index. """ - return self.client.count(index=self.index_pattern) + return self._client.count(index=self._index_pattern) + + def copy(self): + # TODO - test and validate...may need deep copying + return ed.DataFrame(self._client, + self._index_pattern, + self._mappings, + self._index) # ---------------------------------------------------------------------- # Rendering Methods - def __repr__(self): """ - Return a string representation for a particular DataFrame. + From pandas """ - return self.to_string() + buf = StringIO() + + max_rows = pd.get_option("display.max_rows") + max_cols = pd.get_option("display.max_columns") + show_dimensions = pd.get_option("display.show_dimensions") + if pd.get_option("display.expand_frame_repr"): + width, _ = console.get_console_size() + else: + width = None + self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols, + line_width=width, show_dimensions=show_dimensions) + + return buf.getvalue() + + def to_string(self, buf=None, columns=None, col_space=None, header=True, + index=True, na_rep='NaN', formatters=None, float_format=None, + sparsify=None, index_names=True, justify=None, + max_rows=None, max_cols=None, show_dimensions=True, + decimal='.', line_width=None): + """ + From pandas + """ + if max_rows == None: + max_rows = pd.get_option('display.max_rows') + + sdf = self.__fake_dataframe__(max_rows=max_rows+1) + + _show_dimensions = show_dimensions + + if buf is not None: + _buf = _expand_user(_stringify_path(buf)) + else: + _buf = StringIO() + + sdf.to_string(buf=_buf, columns=columns, + col_space=col_space, na_rep=na_rep, + formatters=formatters, + float_format=float_format, + sparsify=sparsify, justify=justify, + index_names=index_names, + header=header, index=index, + max_rows=max_rows, + max_cols=max_cols, + show_dimensions=False, # print this outside of this call + decimal=decimal, + line_width=line_width) + + if _show_dimensions: + _buf.write("\n\n[{nrows} rows x {ncols} columns]" + .format(nrows=self.index_count(), ncols=len(self.columns))) + + if buf is None: + result = _buf.getvalue() + return result def __fake_dataframe__(self, max_rows=1): - head_rows = max_rows / 2 + 1 + head_rows = int(max_rows / 2) + max_rows % 2 tail_rows = max_rows - head_rows - head = self.head(max_rows) + head = self.head(head_rows) + tail = self.tail(tail_rows) num_rows = len(self) @@ -514,8 +638,9 @@ class DataFrame(): # to use the pandas IO methods. # TODO - if data is indexed by time series, return top/bottom of # time series, rather than first max_rows items + """ if tail_rows > 0: - locations = [0, num_rows-tail_rows] + locations = [0, num_rows - tail_rows] lengths = [head_rows, tail_rows] else: locations = [0] @@ -526,21 +651,13 @@ class DataFrame(): BlockIndex( num_rows, locations, lengths)) for item in self.columns}) - - return sdf - - return head + """ + return pd.concat([head, tail]) - def to_string(self): - # TODO - this doesn't return 'notebook' friendly results yet.. - # TODO - don't hard code max_rows - use pandas default/ES default - max_rows = 60 - - df = self.__fake_dataframe__(max_rows=max_rows) - - return df.to_string(max_rows=max_rows, show_dimensions=True) + return pd.concat([head, tail]) +# From pandas.DataFrame def _put_str(s, space): - return '{s}'.format(s=s)[:space].ljust(space) \ No newline at end of file + return '{s}'.format(s=s)[:space].ljust(space) diff --git a/eland/index.py b/eland/index.py new file mode 100644 index 0000000..be0425f --- /dev/null +++ b/eland/index.py @@ -0,0 +1,46 @@ +""" +class Index + +The index for an eland.DataFrame. + +Currently, the index is a field that exists in every document in an Elasticsearch index. +For slicing and sorting operations it must be a docvalues field. By default _id is used, +which can't be used for range queries and is inefficient for sorting: + +https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-id-field.html +(The value of the _id field is also accessible in aggregations or for sorting, +but doing so is discouraged as it requires to load a lot of data in memory. +In case sorting or aggregating on the _id field is required, it is advised to duplicate +the content of the _id field in another field that has doc_values enabled.) + +""" +class Index: + ID_INDEX_FIELD = '_id' + ID_SORT_FIELD = '_doc' # if index field is _id, sort by _doc + + def __init__(self, index_field=None): + # Calls setter + self.index_field = index_field + + @property + def sort_field(self): + if self._index_field == self.ID_INDEX_FIELD: + return self.ID_SORT_FIELD + return self._index_field + + @property + def is_source_field(self): + return self._is_source_field + + @property + def index_field(self): + return self._index_field + + @index_field.setter + def index_field(self, index_field): + if index_field == None: + self._index_field = Index.ID_INDEX_FIELD + self._is_source_field = False + else: + self._index_field = index_field + self._is_source_field = True diff --git a/eland/mappings.py b/eland/mappings.py index 4064f9c..b9e31e7 100644 --- a/eland/mappings.py +++ b/eland/mappings.py @@ -64,18 +64,22 @@ class Mappings(): # Populate capability matrix of fields # field_name, es_dtype, pd_dtype, is_searchable, is_aggregtable, is_source - self.mappings_capabilities = Mappings._create_capability_matrix(all_fields, source_fields, all_fields_caps) + self._mappings_capabilities = Mappings._create_capability_matrix(all_fields, source_fields, all_fields_caps) else: - # Reference object and restrict mapping columns - self.mappings_capabilities = mappings.mappings_capabilities.loc[columns] + if columns is not None: + # Reference object and restrict mapping columns + self._mappings_capabilities = mappings._mappings_capabilities.loc[columns] + else: + # straight copy + self._mappings_capabilities = mappings._mappings_capabilities.copy() # Cache source field types for efficient lookup # (this massively improves performance of DataFrame.flatten) - self.source_field_pd_dtypes = {} + self._source_field_pd_dtypes = {} - for field_name in self.mappings_capabilities[self.mappings_capabilities._source == True].index: - pd_dtype = self.mappings_capabilities.loc[field_name]['pd_dtype'] - self.source_field_pd_dtypes[field_name] = pd_dtype + for field_name in self._mappings_capabilities[self._mappings_capabilities._source == True].index: + pd_dtype = self._mappings_capabilities.loc[field_name]['pd_dtype'] + self._source_field_pd_dtypes[field_name] = pd_dtype def _extract_fields_from_mapping(mappings, source_only=False): """ @@ -262,24 +266,29 @@ class Mappings(): all_fields: list All typed fields in the index mapping """ - return self.mappings_capabilities.index.tolist() + return self._mappings_capabilities.index.tolist() + + def field_capabilities(self, field_name): + """ + Parameters + ---------- + field_name: str - """ - def pd_dtypes_groupby_source_fields(self): Returns ------- - groups: dict - Calls pandas.core.groupby.GroupBy.groups for _source fields - E.g. - { - 'bool': Index(['Cancelled', 'FlightDelay'], dtype='object'), - 'datetime64[ns]': Index(['timestamp'], dtype='object'), - 'float64': Index(['AvgTicketPrice', 'DistanceKilometers', 'DistanceMiles',... - } - return self.mappings_capabilities[self.mappings_capabilities._source == True].groupby('pd_dtype').groups - - def pd_dtype - """ + mappings_capabilities: pd.Series with index values: + _source: bool + Is this field name a top-level source field? + ed_dtype: str + The Elasticsearch data type + pd_dtype: str + The pandas data type + searchable: bool + Is the field searchable in Elasticsearch? + aggregatable: bool + Is the field aggregatable in Elasticsearch? + """ + return self._mappings_capabilities.loc[field_name] def source_field_pd_dtype(self, field_name): """ @@ -297,9 +306,9 @@ class Mappings(): pd_dtype = 'object' is_source_field = False - if field_name in self.source_field_pd_dtypes: + if field_name in self._source_field_pd_dtypes: is_source_field = True - pd_dtype = self.source_field_pd_dtypes[field_name] + pd_dtype = self._source_field_pd_dtypes[field_name] return is_source_field, pd_dtype @@ -316,7 +325,7 @@ class Mappings(): """ is_source_field = False - if field_name in self.source_field_pd_dtypes: + if field_name in self._source_field_pd_dtypes: is_source_field = True return is_source_field @@ -328,9 +337,9 @@ class Mappings(): numeric_source_fields: list of str List of source fields where pd_dtype == (int64 or float64) """ - return self.mappings_capabilities[(self.mappings_capabilities._source == True) & - ((self.mappings_capabilities.pd_dtype == 'int64') | - (self.mappings_capabilities.pd_dtype == 'float64'))].index.tolist() + return self._mappings_capabilities[(self._mappings_capabilities._source == True) & + ((self._mappings_capabilities.pd_dtype == 'int64') | + (self._mappings_capabilities.pd_dtype == 'float64'))].index.tolist() def source_fields(self): """ @@ -339,7 +348,7 @@ class Mappings(): source_fields: list of str List of source fields """ - return self.source_field_pd_dtypes.keys() + return self._source_field_pd_dtypes.keys() def count_source_fields(self): """ @@ -357,7 +366,7 @@ class Mappings(): dtypes: pd.Series Source field name + pd_dtype """ - return pd.Series(self.source_field_pd_dtypes) + return pd.Series(self._source_field_pd_dtypes) def get_dtype_counts(self): """ @@ -368,5 +377,5 @@ class Mappings(): get_dtype_counts : Series Series with the count of columns with each dtype. """ - return pd.Series(self.mappings_capabilities[self.mappings_capabilities._source == True].groupby('pd_dtype')[ + return pd.Series(self._mappings_capabilities[self._mappings_capabilities._source == True].groupby('pd_dtype')[ '_source'].count().to_dict()) diff --git a/eland/tests/client/test_mappings_pytest.py b/eland/tests/client/test_mappings_pytest.py index 0163ba6..c56994f 100644 --- a/eland/tests/client/test_mappings_pytest.py +++ b/eland/tests/client/test_mappings_pytest.py @@ -69,3 +69,22 @@ class TestMapping(TestData): expected_get_dtype_counts = pd.Series({'datetime64[ns]': 1, 'float64': 1, 'int64': 5, 'object': 11}) assert_series_equal(expected_get_dtype_counts, mappings.get_dtype_counts()) + + def test_mapping_capabilities(self): + mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME) + + field_capabilities = mappings.field_capabilities('city') + + assert True == field_capabilities['_source'] + assert 'text' == field_capabilities['es_dtype'] + assert 'object' == field_capabilities['pd_dtype'] + assert True == field_capabilities['searchable'] + assert False == field_capabilities['aggregatable'] + + field_capabilities = mappings.field_capabilities('city.raw') + + assert False == field_capabilities['_source'] + assert 'keyword' == field_capabilities['es_dtype'] + assert 'object' == field_capabilities['pd_dtype'] + assert True == field_capabilities['searchable'] + assert True == field_capabilities['aggregatable'] diff --git a/eland/tests/common.py b/eland/tests/common.py index da3d5ce..25d1ad7 100644 --- a/eland/tests/common.py +++ b/eland/tests/common.py @@ -16,6 +16,7 @@ from eland.tests import FLIGHTS_DF_FILE_NAME, FLIGHTS_INDEX_NAME,\ _pd_flights = pd.read_json(FLIGHTS_DF_FILE_NAME).sort_index() _pd_flights['timestamp'] = \ pd.to_datetime(_pd_flights['timestamp']) +_pd_flights.index = _pd_flights.index.map(str) # make index 'object' not int _ed_flights = ed.read_es(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME) _pd_ecommerce = pd.read_json(ECOMMERCE_DF_FILE_NAME).sort_index() @@ -24,6 +25,7 @@ _pd_ecommerce['order_date'] = \ _pd_ecommerce['products.created_on'] = \ _pd_ecommerce['products.created_on'].apply(lambda x: pd.to_datetime(x)) _pd_ecommerce.insert(2, 'customer_birth_date', None) +_pd_ecommerce.index = _pd_ecommerce.index.map(str) # make index 'object' not int _pd_ecommerce['customer_birth_date'].astype('datetime64') _ed_ecommerce = ed.read_es(ELASTICSEARCH_HOST, ECOMMERCE_INDEX_NAME) diff --git a/eland/tests/frame/__init__.py b/eland/tests/dataframe/__init__.py similarity index 100% rename from eland/tests/frame/__init__.py rename to eland/tests/dataframe/__init__.py diff --git a/eland/tests/dataframe/test_getitem_pytest.py b/eland/tests/dataframe/test_getitem_pytest.py new file mode 100644 index 0000000..0b6a5bf --- /dev/null +++ b/eland/tests/dataframe/test_getitem_pytest.py @@ -0,0 +1,63 @@ +# File called _pytest for PyCharm compatability +from eland.tests.common import TestData + +import pandas as pd +import io + +from pandas.util.testing import ( + assert_series_equal, assert_frame_equal) + +class TestDataFrameGetItem(TestData): + + def test_getitem_basic(self): + # Test 1 attribute + pd_carrier = self.pd_flights()['Carrier'] + ed_carrier = self.ed_flights()['Carrier'] + + # pandas returns a Series here + assert_frame_equal(pd.DataFrame(pd_carrier.head(100)), ed_carrier.head(100)) + + pd_3_items = self.pd_flights()[['Dest','Carrier','FlightDelay']] + ed_3_items = self.ed_flights()[['Dest','Carrier','FlightDelay']] + + assert_frame_equal(pd_3_items.head(100), ed_3_items.head(100)) + + # Test numerics + numerics = ['DistanceMiles', 'AvgTicketPrice', 'FlightTimeMin'] + ed_numerics = self.ed_flights()[numerics] + pd_numerics = self.pd_flights()[numerics] + + assert_frame_equal(pd_numerics.head(100), ed_numerics.head(100)) + + # just test headers + ed_numerics_describe = ed_numerics.describe() + assert ed_numerics_describe.columns.tolist() == numerics + + def test_getattr_basic(self): + # Test 1 attribute + pd_carrier = self.pd_flights().Carrier + #ed_carrier = self.ed_flights().Carrier + + print(type(pd_carrier)) + print(pd_carrier) + + def test_boolean(self): + # Test 1 attribute + pd_carrier = self.pd_flights()['Carrier == "Kibana Airlines"'] + #ed_carrier = self.ed_flights().Carrier + + print(type(pd_carrier)) + print(pd_carrier) + + + def test_loc(self): + pd = self.pd_flights().loc[10:15, ['Dest', 'Carrier']] + + print(type(pd)) + print(pd) + + pd = self.pd_flights().loc[10] + + print(type(pd)) + print(pd) + diff --git a/eland/tests/frame/test_indexing_pytest.py b/eland/tests/dataframe/test_indexing_pytest.py similarity index 70% rename from eland/tests/frame/test_indexing_pytest.py rename to eland/tests/dataframe/test_indexing_pytest.py index d926850..d3565d7 100644 --- a/eland/tests/frame/test_indexing_pytest.py +++ b/eland/tests/dataframe/test_indexing_pytest.py @@ -10,8 +10,8 @@ from pandas.util.testing import ( class TestDataFrameIndexing(TestData): def test_mapping(self): - ed_flights_mappings = pd.DataFrame(self.ed_flights().mappings.mappings_capabilities - [self.ed_flights().mappings.mappings_capabilities._source==True] + ed_flights_mappings = pd.DataFrame(self.ed_flights()._mappings._mappings_capabilities + [self.ed_flights()._mappings._mappings_capabilities._source==True] ['pd_dtype']) pd_flights_mappings = pd.DataFrame(self.pd_flights().dtypes, columns = ['pd_dtype']) @@ -25,6 +25,8 @@ class TestDataFrameIndexing(TestData): pd_flights_head = self.pd_flights().head() ed_flights_head = self.ed_flights().head() + print(ed_flights_head) + assert_frame_equal(pd_flights_head, ed_flights_head) pd_ecommerce_head = self.pd_ecommerce().head() @@ -32,10 +34,25 @@ class TestDataFrameIndexing(TestData): assert_frame_equal(pd_ecommerce_head, ed_ecommerce_head) + def test_tail(self): + pd_flights_tail = self.pd_flights().tail() + ed_flights_tail = self.ed_flights().tail() + + print(ed_flights_tail) + + assert_frame_equal(pd_flights_tail, ed_flights_tail) + + pd_ecommerce_tail = self.pd_ecommerce().tail() + ed_ecommerce_tail = self.ed_ecommerce().tail() + + assert_frame_equal(pd_ecommerce_tail, ed_ecommerce_tail) + def test_describe(self): pd_flights_describe = self.pd_flights().describe() ed_flights_describe = self.ed_flights().describe() + print(ed_flights_describe) + # TODO - this fails now as ES aggregations are approximate # if ES percentile agg uses # "hdr": { @@ -47,6 +64,8 @@ class TestDataFrameIndexing(TestData): pd_ecommerce_describe = self.pd_ecommerce().describe() ed_ecommerce_describe = self.ed_ecommerce().describe() + print(ed_ecommerce_describe) + # We don't compare ecommerce here as the default dtypes in pandas from read_json # don't match the mapping types. This is mainly because the products field is # nested and so can be treated as a multi-field in ES, but not in pandas @@ -57,52 +76,7 @@ class TestDataFrameIndexing(TestData): def test_to_string(self): print(self.ed_flights()) - - def test_getitem(self): - # Test 1 attribute - ed_carrier = self.ed_flights()['Carrier'] - - carrier_head = ed_carrier.head(5) - - carrier_head_expected = pd.DataFrame( - {'Carrier':[ - 'Kibana Airlines', - 'Logstash Airways', - 'Logstash Airways', - 'Kibana Airlines', - 'Kibana Airlines' - ]}) - - assert_frame_equal(carrier_head_expected, carrier_head) - - #carrier_to_string = ed_carrier.to_string() - #print(carrier_to_string) - - # Test multiple attributes (out of order) - ed_3_items = self.ed_flights()['Dest','Carrier','FlightDelay'] - - ed_3_items_head = ed_3_items.head(5) - - ed_3_items_expected = pd.DataFrame(dict( - Dest={0: 'Sydney Kingsford Smith International Airport', 1: 'Venice Marco Polo Airport', - 2: 'Venice Marco Polo Airport', 3: "Treviso-Sant'Angelo Airport", - 4: "Xi'an Xianyang International Airport"}, - Carrier={0: 'Kibana Airlines', 1: 'Logstash Airways', 2: 'Logstash Airways', 3: 'Kibana Airlines', - 4: 'Kibana Airlines'}, - FlightDelay={0: False, 1: False, 2: False, 3: True, 4: False})) - - assert_frame_equal(ed_3_items_expected, ed_3_items_head) - - #ed_3_items_to_string = ed_3_items.to_string() - #print(ed_3_items_to_string) - - # Test numerics - numerics = ['DistanceMiles', 'AvgTicketPrice', 'FlightTimeMin'] - ed_numerics = self.ed_flights()[numerics] - - # just test headers - ed_numerics_describe = ed_numerics.describe() - assert ed_numerics_describe.columns.tolist() == numerics + print(self.ed_flights().to_string()) def test_info(self): ed_flights_info_buf = io.StringIO() @@ -111,6 +85,8 @@ class TestDataFrameIndexing(TestData): self.ed_flights().info(buf=ed_flights_info_buf) self.pd_flights().info(buf=pd_flights_info_buf) + print(ed_flights_info_buf.getvalue()) + ed_flights_info = (ed_flights_info_buf.getvalue().splitlines()) pd_flights_info = (pd_flights_info_buf.getvalue().splitlines()) @@ -148,7 +124,7 @@ class TestDataFrameIndexing(TestData): assert_series_equal(pd_flights_get_dtype_counts, ed_flights_get_dtype_counts) - def test_properties(self): + def test_get_properties(self): pd_flights_shape = self.pd_flights().shape ed_flights_shape = self.ed_flights().shape @@ -164,3 +140,16 @@ class TestDataFrameIndexing(TestData): assert_series_equal(pd_flights_dtypes, ed_flights_dtypes) + def test_index(self): + pd_flights = self.pd_flights() + pd_flights_timestamp = pd_flights.set_index('timestamp') + pd_flights.info() + pd_flights_timestamp.info() + pd_flights.info() + + ed_flights = self.ed_flights() + ed_flights_timestamp = ed_flights.set_index('timestamp') + ed_flights.info() + ed_flights_timestamp.info() + ed_flights.info() + diff --git a/eland/tests/setup_tests.py b/eland/tests/setup_tests.py index b758a26..b60fa1e 100644 --- a/eland/tests/setup_tests.py +++ b/eland/tests/setup_tests.py @@ -33,7 +33,8 @@ def _setup_data(es): # make timestamp datetime 2018-01-01T12:09:35 #values['timestamp'] = datetime.strptime(values['timestamp'], '%Y-%m-%dT%H:%M:%S') - action = {'_index': index_name, '_source': values} + # Use integer as id field for repeatable results + action = {'_index': index_name, '_source': values, '_id': str(n)} actions.append(action) diff --git a/eland/tests/test.ipynb b/eland/tests/test.ipynb index a34a9bb..84e0121 100644 --- a/eland/tests/test.ipynb +++ b/eland/tests/test.ipynb @@ -424,37 +424,10 @@ "pd_df.describe()" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Eland" - ] - }, { "cell_type": "code", "execution_count": 5, "metadata": {}, - "outputs": [], - "source": [ - "import eland as ed" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "ed_df = ed.read_es('localhost', 'flights')" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "scrolled": true - }, "outputs": [ { "data": { @@ -621,72 +594,4311 @@ "
13059 rows × 27 columns
\n", + "" + ], + "text/plain": [ + " AvgTicketPrice Cancelled Carrier \\\n", + "0 841.265642 False Kibana Airlines \n", + "1 882.982662 False Logstash Airways \n", + "2 190.636904 False Logstash Airways \n", + "3 181.694216 True Kibana Airlines \n", + "4 730.041778 False Kibana Airlines \n", + "5 418.152089 False JetBeats \n", + "6 180.246816 False JetBeats \n", + "7 585.184310 False Kibana Airlines \n", + "8 960.869736 True Kibana Airlines \n", + "9 296.877773 False Logstash Airways \n", + "10 906.437948 False JetBeats \n", + "11 704.463771 False Logstash Airways \n", + "12 922.499077 True Logstash Airways \n", + "13 374.959276 False Logstash Airways \n", + "14 552.917371 False Logstash Airways \n", + "15 566.487557 True Kibana Airlines \n", + "16 989.952787 True Logstash Airways \n", + "17 569.613255 False ES-Air \n", + "18 277.429707 False ES-Air \n", + "19 772.100846 False JetBeats \n", + "20 167.599922 False JetBeats \n", + "21 253.210065 False ES-Air \n", + "22 917.247620 False JetBeats \n", + "23 451.591176 False Logstash Airways \n", + "24 307.067201 False Logstash Airways \n", + "25 268.241596 False ES-Air \n", + "26 975.812632 True Kibana Airlines \n", + "27 134.214546 False JetBeats \n", + "28 988.897564 False Kibana Airlines \n", + "29 511.067220 False Logstash Airways \n", + "... ... ... ... \n", + "13029 795.905278 False Kibana Airlines \n", + "13030 863.388068 False Logstash Airways \n", + "13031 575.183008 False JetBeats \n", + "13032 817.368952 False JetBeats \n", + "13033 579.582455 False ES-Air \n", + "13034 1004.916638 False JetBeats \n", + "13035 357.562842 True Logstash Airways \n", + "13036 429.580539 False Logstash Airways \n", + "13037 729.788171 True ES-Air \n", + "13038 564.897695 False ES-Air \n", + "13039 1014.052787 False Logstash Airways \n", + "13040 455.243843 False ES-Air \n", + "13041 611.370232 False Logstash Airways \n", + "13042 595.961285 False JetBeats \n", + "13043 782.747648 False Logstash Airways \n", + "13044 891.117221 False JetBeats \n", + "13045 587.169921 False Logstash Airways \n", + "13046 739.132165 False Logstash Airways \n", + "13047 605.191876 False JetBeats \n", + "13048 361.767659 True Logstash Airways \n", + "13049 662.306992 False ES-Air \n", + "13050 630.779526 False JetBeats \n", + "13051 937.771279 True Logstash Airways \n", + "13052 1085.155339 False Logstash Airways \n", + "13053 1191.964104 False Logstash Airways \n", + "13054 1080.446279 False Logstash Airways \n", + "13055 646.612941 False Logstash Airways \n", + "13056 997.751876 False Logstash Airways \n", + "13057 1102.814465 False JetBeats \n", + "13058 858.144337 False JetBeats \n", + "\n", + " Dest DestAirportID \\\n", + "0 Sydney Kingsford Smith International Airport SYD \n", + "1 Venice Marco Polo Airport VE05 \n", + "2 Venice Marco Polo Airport VE05 \n", + "3 Treviso-Sant'Angelo Airport TV01 \n", + "4 Xi'an Xianyang International Airport XIY \n", + "5 Genoa Cristoforo Colombo Airport GE01 \n", + "6 Zurich Airport ZRH \n", + "7 Ottawa Macdonald-Cartier International Airport YOW \n", + "8 Rajiv Gandhi International Airport HYD \n", + "9 Treviso-Sant'Angelo Airport TV01 \n", + "10 Helsinki Vantaa Airport HEL \n", + "11 Vienna International Airport VIE \n", + "12 Shanghai Pudong International Airport PVG \n", + "13 Ottawa Macdonald-Cartier International Airport YOW \n", + "14 Luis Munoz Marin International Airport SJU \n", + "15 Cologne Bonn Airport CGN \n", + "16 Venice Marco Polo Airport VE05 \n", + "17 Ministro Pistarini International Airport EZE \n", + "18 Shanghai Pudong International Airport PVG \n", + "19 Indira Gandhi International Airport DEL \n", + "20 Wichita Mid Continent Airport ICT \n", + "21 Ottawa Macdonald-Cartier International Airport YOW \n", + "22 Itami Airport ITM \n", + "23 Vienna International Airport VIE \n", + "24 Charles de Gaulle International Airport CDG \n", + "25 Narita International Airport NRT \n", + "26 Itami Airport ITM \n", + "27 San Diego International Airport SAN \n", + "28 Verona Villafranca Airport VR10 \n", + "29 Zurich Airport ZRH \n", + "... ... ... \n", + "13029 Malpensa International Airport MI12 \n", + "13030 Xi'an Xianyang International Airport XIY \n", + "13031 Savannah Hilton Head International Airport SAV \n", + "13032 Syracuse Hancock International Airport SYR \n", + "13033 Tampa International Airport TPA \n", + "13034 Olenya Air Base XLMO \n", + "13035 Shanghai Pudong International Airport PVG \n", + "13036 Venice Marco Polo Airport VE05 \n", + "13037 Vienna International Airport VIE \n", + "13038 Pisa International Airport PI05 \n", + "13039 Vienna International Airport VIE \n", + "13040 London Luton Airport LTN \n", + "13041 Jorge Chavez International Airport LIM \n", + "13042 Ottawa Macdonald-Cartier International Airport YOW \n", + "13043 Xi'an Xianyang International Airport XIY \n", + "13044 Winnipeg / James Armstrong Richardson Internat... YWG \n", + "13045 Brisbane International Airport BNE \n", + "13046 Xi'an Xianyang International Airport XIY \n", + "13047 Portland International Jetport Airport PWM \n", + "13048 Dubai International Airport DXB \n", + "13049 Winnipeg / James Armstrong Richardson Internat... YWG \n", + "13050 Helsinki Vantaa Airport HEL \n", + "13051 Lester B. Pearson International Airport YYZ \n", + "13052 Melbourne International Airport MEL \n", + "13053 Zurich Airport ZRH \n", + "13054 Xi'an Xianyang International Airport XIY \n", + "13055 Zurich Airport ZRH \n", + "13056 Ukrainka Air Base XHBU \n", + "13057 Ministro Pistarini International Airport EZE \n", + "13058 Washington Dulles International Airport IAD \n", + "\n", + " DestCityName DestCountry \\\n", + "0 Sydney AU \n", + "1 Venice IT \n", + "2 Venice IT \n", + "3 Treviso IT \n", + "4 Xi'an CN \n", + "5 Genova IT \n", + "6 Zurich CH \n", + "7 Ottawa CA \n", + "8 Hyderabad IN \n", + "9 Treviso IT \n", + "10 Helsinki FI \n", + "11 Vienna AT \n", + "12 Shanghai CN \n", + "13 Ottawa CA \n", + "14 San Juan PR \n", + "15 Cologne DE \n", + "16 Venice IT \n", + "17 Buenos Aires AR \n", + "18 Shanghai CN \n", + "19 New Delhi IN \n", + "20 Wichita US \n", + "21 Ottawa CA \n", + "22 Osaka JP \n", + "23 Vienna AT \n", + "24 Paris FR \n", + "25 Tokyo JP \n", + "26 Osaka JP \n", + "27 San Diego US \n", + "28 Verona IT \n", + "29 Zurich CH \n", + "... ... ... \n", + "13029 Milan IT \n", + "13030 Xi'an CN \n", + "13031 Savannah US \n", + "13032 Syracuse US \n", + "13033 Tampa US \n", + "13034 Olenegorsk RU \n", + "13035 Shanghai CN \n", + "13036 Venice IT \n", + "13037 Vienna AT \n", + "13038 Pisa IT \n", + "13039 Vienna AT \n", + "13040 London GB \n", + "13041 Lima PE \n", + "13042 Ottawa CA \n", + "13043 Xi'an CN \n", + "13044 Winnipeg CA \n", + "13045 Brisbane AU \n", + "13046 Xi'an CN \n", + "13047 Portland US \n", + "13048 Dubai AE \n", + "13049 Winnipeg CA \n", + "13050 Helsinki FI \n", + "13051 Toronto CA \n", + "13052 Melbourne AU \n", + "13053 Zurich CH \n", + "13054 Xi'an CN \n", + "13055 Zurich CH \n", + "13056 Belogorsk RU \n", + "13057 Buenos Aires AR \n", + "13058 Washington US \n", + "\n", + " DestLocation DestRegion \\\n", + "0 {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n", + "1 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "3 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", + "4 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", + "5 {'lat': '44.4133', 'lon': '8.8375'} IT-42 \n", + "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", + "7 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", + "8 {'lat': '17.23131752', 'lon': '78.42985535'} SE-BD \n", + "9 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", + "10 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", + "11 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", + "12 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", + "13 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", + "14 {'lat': '18.43939972', 'lon': '-66.00180054'} PR-U-A \n", + "15 {'lat': '50.86589813', 'lon': '7.142739773'} DE-NW \n", + "16 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "17 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n", + "18 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", + "19 {'lat': '28.5665', 'lon': '77.103104'} SE-BD \n", + "20 {'lat': '37.64989853', 'lon': '-97.43309784'} US-KS \n", + "21 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", + "22 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", + "23 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", + "24 {'lat': '49.01279831', 'lon': '2.549999952'} FR-J \n", + "25 {'lat': '35.76470184', 'lon': '140.3860016'} SE-BD \n", + "26 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", + "27 {'lat': '32.73360062', 'lon': '-117.1900024'} US-CA \n", + "28 {'lat': '45.395699', 'lon': '10.8885'} IT-34 \n", + "29 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", + "... ... ... \n", + "13029 {'lat': '45.6306', 'lon': '8.72811'} IT-25 \n", + "13030 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", + "13031 {'lat': '32.12760162', 'lon': '-81.20210266'} US-GA \n", + "13032 {'lat': '43.11119843', 'lon': '-76.10630035'} US-NY \n", + "13033 {'lat': '27.97550011', 'lon': '-82.53320313'} US-FL \n", + "13034 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n", + "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", + "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "13037 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", + "13038 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n", + "13039 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", + "13040 {'lat': '51.87469864', 'lon': '-0.368333012'} GB-ENG \n", + "13041 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n", + "13042 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", + "13043 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", + "13044 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", + "13045 {'lat': '-27.38419914', 'lon': '153.1170044'} SE-BD \n", + "13046 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", + "13047 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n", + "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n", + "13049 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", + "13050 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", + "13051 {'lat': '43.67720032', 'lon': '-79.63059998'} CA-ON \n", + "13052 {'lat': '-37.673302', 'lon': '144.843002'} SE-BD \n", + "13053 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", + "13054 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", + "13055 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", + "13056 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n", + "13057 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n", + "13058 {'lat': '38.94449997', 'lon': '-77.45580292'} US-DC \n", + "\n", + " DestWeather ... FlightTimeMin \\\n", + "0 Rain ... 1030.770416 \n", + "1 Sunny ... 464.389481 \n", + "2 Cloudy ... 0.000000 \n", + "3 Clear ... 222.749059 \n", + "4 Clear ... 785.779071 \n", + "5 Thunder & Lightning ... 393.590441 \n", + "6 Hail ... 300.000000 \n", + "7 Clear ... 614.942480 \n", + "8 Cloudy ... 602.030591 \n", + "9 Rain ... 174.822216 \n", + "10 Rain ... 503.045170 \n", + "11 Cloudy ... 36.075018 \n", + "12 Clear ... 679.768391 \n", + "13 Rain ... 330.418282 \n", + "14 Clear ... 407.145031 \n", + "15 Sunny ... 656.712658 \n", + "16 Damaging Wind ... 773.030334 \n", + "17 Cloudy ... 704.716920 \n", + "18 Clear ... 355.957996 \n", + "19 Clear ... 875.114675 \n", + "20 Clear ... 373.966883 \n", + "21 Hail ... 130.667700 \n", + "22 Damaging Wind ... 574.495310 \n", + "23 Heavy Fog ... 579.728943 \n", + "24 Clear ... 50.157229 \n", + "25 Rain ... 527.567422 \n", + "26 Hail ... 386.259764 \n", + "27 Clear ... 24.479650 \n", + "28 Sunny ... 568.351033 \n", + "29 Rain ... 425.889194 \n", + "... ... ... ... \n", + "13029 Sunny ... 534.375826 \n", + "13030 Damaging Wind ... 141.172633 \n", + "13031 Thunder & Lightning ... 1113.137060 \n", + "13032 Rain ... 714.964864 \n", + "13033 Rain ... 234.929046 \n", + "13034 Clear ... 526.895776 \n", + "13035 Thunder & Lightning ... 0.000000 \n", + "13036 Sunny ... 150.000000 \n", + "13037 Rain ... 691.944839 \n", + "13038 Heavy Fog ... 567.387339 \n", + "13039 Thunder & Lightning ... 690.092327 \n", + "13040 Cloudy ... 3.028293 \n", + "13041 Sunny ... 338.875531 \n", + "13042 Clear ... 375.129587 \n", + "13043 Clear ... 156.858481 \n", + "13044 Clear ... 354.106457 \n", + "13045 Rain ... 771.305442 \n", + "13046 Rain ... 542.955572 \n", + "13047 Thunder & Lightning ... 564.599857 \n", + "13048 Sunny ... 180.000000 \n", + "13049 Heavy Fog ... 835.954429 \n", + "13050 Sunny ... 451.755639 \n", + "13051 Sunny ... 507.451571 \n", + "13052 Cloudy ... 1044.451122 \n", + "13053 Hail ... 728.715904 \n", + "13054 Rain ... 402.929088 \n", + "13055 Rain ... 644.418029 \n", + "13056 Rain ... 937.540811 \n", + "13057 Hail ... 1697.404971 \n", + "13058 Heavy Fog ... 1610.761827 \n", + "\n", + " Origin OriginAirportID \\\n", + "0 Frankfurt am Main Airport FRA \n", + "1 Cape Town International Airport CPT \n", + "2 Venice Marco Polo Airport VE05 \n", + "3 Naples International Airport NA01 \n", + "4 Licenciado Benito Juarez International Airport AICM \n", + "5 Edmonton International Airport CYEG \n", + "6 Zurich Airport ZRH \n", + "7 Ciampino___G. B. Pastine International Airport RM12 \n", + "8 Milano Linate Airport MI11 \n", + "9 Sheremetyevo International Airport SVO \n", + "10 Albuquerque International Sunport Airport ABQ \n", + "11 Venice Marco Polo Airport VE05 \n", + "12 Licenciado Benito Juarez International Airport AICM \n", + "13 Naples International Airport NA01 \n", + "14 Ciampino___G. B. Pastine International Airport RM12 \n", + "15 Chengdu Shuangliu International Airport CTU \n", + "16 Licenciado Benito Juarez International Airport AICM \n", + "17 Cleveland Hopkins International Airport CLE \n", + "18 Olenya Air Base XLMO \n", + "19 Casper-Natrona County International Airport CPR \n", + "20 Erie International Tom Ridge Field ERI \n", + "21 Newark Liberty International Airport EWR \n", + "22 Copenhagen Kastrup Airport CPH \n", + "23 Seattle Tacoma International Airport SEA \n", + "24 Berlin-Tegel Airport TXL \n", + "25 Manchester Airport MAN \n", + "26 Helsinki Vantaa Airport HEL \n", + "27 Phoenix Sky Harbor International Airport PHX \n", + "28 New Chitose Airport CTS \n", + "29 Tulsa International Airport TUL \n", + "... ... ... \n", + "13029 Itami Airport ITM \n", + "13030 Tokyo Haneda International Airport HND \n", + "13031 OR Tambo International Airport JNB \n", + "13032 El Dorado International Airport BOG \n", + "13033 Jorge Chavez International Airport LIM \n", + "13034 Gimpo International Airport GMP \n", + "13035 Shanghai Pudong International Airport PVG \n", + "13036 Venice Marco Polo Airport VE05 \n", + "13037 Ukrainka Air Base XHBU \n", + "13038 OR Tambo International Airport JNB \n", + "13039 Montreal / Pierre Elliott Trudeau Internationa... YUL \n", + "13040 London Heathrow Airport LHR \n", + "13041 Casper-Natrona County International Airport CPR \n", + "13042 Frankfurt am Main Airport FRA \n", + "13043 Tokyo Haneda International Airport HND \n", + "13044 Vienna International Airport VIE \n", + "13045 Amsterdam Airport Schiphol AMS \n", + "13046 Winnipeg / James Armstrong Richardson Internat... YWG \n", + "13047 Jeju International Airport CJU \n", + "13048 Dubai International Airport DXB \n", + "13049 Ministro Pistarini International Airport EZE \n", + "13050 Beijing Capital International Airport PEK \n", + "13051 Leonardo da Vinci___Fiumicino Airport RM11 \n", + "13052 Bologna Guglielmo Marconi Airport BO08 \n", + "13053 Portland International Jetport Airport PWM \n", + "13054 Pisa International Airport PI05 \n", + "13055 Winnipeg / James Armstrong Richardson Internat... YWG \n", + "13056 Licenciado Benito Juarez International Airport AICM \n", + "13057 Itami Airport ITM \n", + "13058 Adelaide International Airport ADL \n", + "\n", + " OriginCityName OriginCountry \\\n", + "0 Frankfurt am Main DE \n", + "1 Cape Town ZA \n", + "2 Venice IT \n", + "3 Naples IT \n", + "4 Mexico City MX \n", + "5 Edmonton CA \n", + "6 Zurich CH \n", + "7 Rome IT \n", + "8 Milan IT \n", + "9 Moscow RU \n", + "10 Albuquerque US \n", + "11 Venice IT \n", + "12 Mexico City MX \n", + "13 Naples IT \n", + "14 Rome IT \n", + "15 Chengdu CN \n", + "16 Mexico City MX \n", + "17 Cleveland US \n", + "18 Olenegorsk RU \n", + "19 Casper US \n", + "20 Erie US \n", + "21 Newark US \n", + "22 Copenhagen DK \n", + "23 Seattle US \n", + "24 Berlin DE \n", + "25 Manchester GB \n", + "26 Helsinki FI \n", + "27 Phoenix US \n", + "28 Chitose / Tomakomai JP \n", + "29 Tulsa US \n", + "... ... ... \n", + "13029 Osaka JP \n", + "13030 Tokyo JP \n", + "13031 Johannesburg ZA \n", + "13032 Bogota CO \n", + "13033 Lima PE \n", + "13034 Seoul KR \n", + "13035 Shanghai CN \n", + "13036 Venice IT \n", + "13037 Belogorsk RU \n", + "13038 Johannesburg ZA \n", + "13039 Montreal CA \n", + "13040 London GB \n", + "13041 Casper US \n", + "13042 Frankfurt am Main DE \n", + "13043 Tokyo JP \n", + "13044 Vienna AT \n", + "13045 Amsterdam NL \n", + "13046 Winnipeg CA \n", + "13047 Jeju City KR \n", + "13048 Dubai AE \n", + "13049 Buenos Aires AR \n", + "13050 Beijing CN \n", + "13051 Rome IT \n", + "13052 Bologna IT \n", + "13053 Portland US \n", + "13054 Pisa IT \n", + "13055 Winnipeg CA \n", + "13056 Mexico City MX \n", + "13057 Osaka JP \n", + "13058 Adelaide AU \n", + "\n", + " OriginLocation OriginRegion \\\n", + "0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n", + "1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n", + "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n", + "4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", + "5 {'lat': '53.30970001', 'lon': '-113.5800018'} CA-AB \n", + "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", + "7 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n", + "8 {'lat': '45.445099', 'lon': '9.27674'} IT-25 \n", + "9 {'lat': '55.972599', 'lon': '37.4146'} RU-MOS \n", + "10 {'lat': '35.040199', 'lon': '-106.609001'} US-NM \n", + "11 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "12 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", + "13 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n", + "14 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n", + "15 {'lat': '30.57850075', 'lon': '103.9469986'} SE-BD \n", + "16 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", + "17 {'lat': '41.4117012', 'lon': '-81.84980011'} US-OH \n", + "18 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n", + "19 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n", + "20 {'lat': '42.08312701', 'lon': '-80.17386675'} US-PA \n", + "21 {'lat': '40.69250107', 'lon': '-74.16870117'} US-NJ \n", + "22 {'lat': '55.61790085', 'lon': '12.65600014'} DK-84 \n", + "23 {'lat': '47.44900131', 'lon': '-122.3089981'} US-WA \n", + "24 {'lat': '52.5597', 'lon': '13.2877'} DE-BE \n", + "25 {'lat': '53.35369873', 'lon': '-2.274950027'} GB-ENG \n", + "26 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", + "27 {'lat': '33.43429947', 'lon': '-112.012001'} US-AZ \n", + "28 {'lat': '42.77519989', 'lon': '141.6920013'} SE-BD \n", + "29 {'lat': '36.19839859', 'lon': '-95.88809967'} US-OK \n", + "... ... ... \n", + "13029 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", + "13030 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n", + "13031 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n", + "13032 {'lat': '4.70159', 'lon': '-74.1469'} CO-CUN \n", + "13033 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n", + "13034 {'lat': '37.5583', 'lon': '126.791'} SE-BD \n", + "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", + "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", + "13037 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n", + "13038 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n", + "13039 {'lat': '45.47060013', 'lon': '-73.74079895'} CA-QC \n", + "13040 {'lat': '51.4706', 'lon': '-0.461941'} GB-ENG \n", + "13041 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n", + "13042 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n", + "13043 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n", + "13044 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", + "13045 {'lat': '52.30860138', 'lon': '4.76388979'} NL-NH \n", + "13046 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", + "13047 {'lat': '33.51129913', 'lon': '126.4929962'} SE-BD \n", + "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n", + "13049 {'lat': '-34.8222', 'lon': '-58.5358'} AR-B \n", + "13050 {'lat': '40.08010101', 'lon': '116.5849991'} SE-BD \n", + "13051 {'lat': '41.8002778', 'lon': '12.2388889'} IT-62 \n", + "13052 {'lat': '44.5354', 'lon': '11.2887'} IT-45 \n", + "13053 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n", + "13054 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n", + "13055 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", + "13056 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", + "13057 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", + "13058 {'lat': '-34.945', 'lon': '138.531006'} SE-BD \n", + "\n", + " OriginWeather dayOfWeek timestamp \n", + "0 Sunny 0 2018-01-01 00:00:00 \n", + "1 Clear 0 2018-01-01 18:27:00 \n", + "2 Rain 0 2018-01-01 17:11:14 \n", + "3 Thunder & Lightning 0 2018-01-01 10:33:28 \n", + "4 Damaging Wind 0 2018-01-01 05:13:00 \n", + "5 Rain 0 2018-01-01 01:43:03 \n", + "6 Clear 0 2018-01-01 13:49:53 \n", + "7 Thunder & Lightning 0 2018-01-01 04:54:59 \n", + "8 Heavy Fog 0 2018-01-01 12:09:35 \n", + "9 Cloudy 0 2018-01-01 12:09:35 \n", + "10 Rain 0 2018-01-01 22:06:14 \n", + "11 Rain 0 2018-01-01 11:52:34 \n", + "12 Heavy Fog 0 2018-01-01 02:13:46 \n", + "13 Rain 0 2018-01-01 14:21:13 \n", + "14 Cloudy 0 2018-01-01 17:42:53 \n", + "15 Thunder & Lightning 0 2018-01-01 19:55:32 \n", + "16 Thunder & Lightning 0 2018-01-01 07:49:27 \n", + "17 Rain 0 2018-01-01 01:30:47 \n", + "18 Hail 0 2018-01-01 07:58:17 \n", + "19 Cloudy 0 2018-01-01 00:02:06 \n", + "20 Cloudy 0 2018-01-01 01:08:20 \n", + "21 Clear 0 2018-01-01 01:08:20 \n", + "22 Sunny 0 2018-01-01 07:48:35 \n", + "23 Heavy Fog 0 2018-01-01 18:57:21 \n", + "24 Rain 0 2018-01-01 13:18:25 \n", + "25 Thunder & Lightning 0 2018-01-01 08:20:35 \n", + "26 Rain 0 2018-01-01 15:38:32 \n", + "27 Clear 0 2018-01-01 03:08:45 \n", + "28 Damaging Wind 0 2018-01-01 01:16:59 \n", + "29 Rain 0 2018-01-01 18:00:59 \n", + "... ... ... ... \n", + "13029 Sunny 6 2018-02-11 20:10:13 \n", + "13030 Clear 6 2018-02-11 18:59:53 \n", + "13031 Hail 6 2018-02-11 00:57:48 \n", + "13032 Thunder & Lightning 6 2018-02-11 12:02:49 \n", + "13033 Thunder & Lightning 6 2018-02-11 02:07:40 \n", + "13034 Sunny 6 2018-02-11 00:35:04 \n", + "13035 Thunder & Lightning 6 2018-02-11 11:19:12 \n", + "13036 Cloudy 6 2018-02-11 15:07:11 \n", + "13037 Damaging Wind 6 2018-02-11 10:24:42 \n", + "13038 Damaging Wind 6 2018-02-11 00:42:06 \n", + "13039 Thunder & Lightning 6 2018-02-11 10:56:31 \n", + "13040 Clear 6 2018-02-11 00:39:37 \n", + "13041 Rain 6 2018-02-11 10:24:30 \n", + "13042 Clear 6 2018-02-11 09:02:07 \n", + "13043 Thunder & Lightning 6 2018-02-11 04:45:06 \n", + "13044 Thunder & Lightning 6 2018-02-11 00:51:14 \n", + "13045 Sunny 6 2018-02-11 05:41:51 \n", + "13046 Hail 6 2018-02-11 10:02:21 \n", + "13047 Cloudy 6 2018-02-11 15:55:10 \n", + "13048 Hail 6 2018-02-11 04:11:14 \n", + "13049 Sunny 6 2018-02-11 10:13:32 \n", + "13050 Cloudy 6 2018-02-11 11:23:23 \n", + "13051 Hail 6 2018-02-11 01:13:50 \n", + "13052 Cloudy 6 2018-02-11 18:35:42 \n", + "13053 Clear 6 2018-02-11 19:02:10 \n", + "13054 Sunny 6 2018-02-11 20:42:25 \n", + "13055 Rain 6 2018-02-11 01:41:57 \n", + "13056 Sunny 6 2018-02-11 04:09:27 \n", + "13057 Hail 6 2018-02-11 08:28:21 \n", + "13058 Rain 6 2018-02-11 14:54:34 \n", + "\n", + "[13059 rows x 27 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd_df" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " | AvgTicketPrice | \n", + "Cancelled | \n", + "Carrier | \n", + "Dest | \n", + "DestAirportID | \n", + "DestCityName | \n", + "DestCountry | \n", + "DestLocation | \n", + "DestRegion | \n", + "DestWeather | \n", + "... | \n", + "FlightTimeMin | \n", + "Origin | \n", + "OriginAirportID | \n", + "OriginCityName | \n", + "OriginCountry | \n", + "OriginLocation | \n", + "OriginRegion | \n", + "OriginWeather | \n", + "dayOfWeek | \n", + "timestamp | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "841.265642 | \n", + "False | \n", + "Kibana Airlines | \n", + "Sydney Kingsford Smith International Airport | \n", + "SYD | \n", + "Sydney | \n", + "AU | \n", + "{'lat': '-33.94609833', 'lon': '151.177002'} | \n", + "SE-BD | \n", + "Rain | \n", + "... | \n", + "1030.770416 | \n", + "Frankfurt am Main Airport | \n", + "FRA | \n", + "Frankfurt am Main | \n", + "DE | \n", + "{'lat': '50.033333', 'lon': '8.570556'} | \n", + "DE-HE | \n", + "Sunny | \n", + "0 | \n", + "2018-01-01 00:00:00 | \n", + "
1 | \n", + "882.982662 | \n", + "False | \n", + "Logstash Airways | \n", + "Venice Marco Polo Airport | \n", + "VE05 | \n", + "Venice | \n", + "IT | \n", + "{'lat': '45.505299', 'lon': '12.3519'} | \n", + "IT-34 | \n", + "Sunny | \n", + "... | \n", + "464.389481 | \n", + "Cape Town International Airport | \n", + "CPT | \n", + "Cape Town | \n", + "ZA | \n", + "{'lat': '-33.96480179', 'lon': '18.60169983'} | \n", + "SE-BD | \n", + "Clear | \n", + "0 | \n", + "2018-01-01 18:27:00 | \n", + "
2 | \n", + "190.636904 | \n", + "False | \n", + "Logstash Airways | \n", + "Venice Marco Polo Airport | \n", + "VE05 | \n", + "Venice | \n", + "IT | \n", + "{'lat': '45.505299', 'lon': '12.3519'} | \n", + "IT-34 | \n", + "Cloudy | \n", + "... | \n", + "0.000000 | \n", + "Venice Marco Polo Airport | \n", + "VE05 | \n", + "Venice | \n", + "IT | \n", + "{'lat': '45.505299', 'lon': '12.3519'} | \n", + "IT-34 | \n", + "Rain | \n", + "0 | \n", + "2018-01-01 17:11:14 | \n", + "
3 | \n", + "181.694216 | \n", + "True | \n", + "Kibana Airlines | \n", + "Treviso-Sant'Angelo Airport | \n", + "TV01 | \n", + "Treviso | \n", + "IT | \n", + "{'lat': '45.648399', 'lon': '12.1944'} | \n", + "IT-34 | \n", + "Clear | \n", + "... | \n", + "222.749059 | \n", + "Naples International Airport | \n", + "NA01 | \n", + "Naples | \n", + "IT | \n", + "{'lat': '40.886002', 'lon': '14.2908'} | \n", + "IT-72 | \n", + "Thunder & Lightning | \n", + "0 | \n", + "2018-01-01 10:33:28 | \n", + "
4 | \n", + "730.041778 | \n", + "False | \n", + "Kibana Airlines | \n", + "Xi'an Xianyang International Airport | \n", + "XIY | \n", + "Xi'an | \n", + "CN | \n", + "{'lat': '34.447102', 'lon': '108.751999'} | \n", + "SE-BD | \n", + "Clear | \n", + "... | \n", + "785.779071 | \n", + "Licenciado Benito Juarez International Airport | \n", + "AICM | \n", + "Mexico City | \n", + "MX | \n", + "{'lat': '19.4363', 'lon': '-99.072098'} | \n", + "MX-DIF | \n", + "Damaging Wind | \n", + "0 | \n", + "2018-01-01 05:13:00 | \n", + "
5 | \n", + "418.152089 | \n", + "False | \n", + "JetBeats | \n", + "Genoa Cristoforo Colombo Airport | \n", + "GE01 | \n", + "Genova | \n", + "IT | \n", + "{'lat': '44.4133', 'lon': '8.8375'} | \n", + "IT-42 | \n", + "Thunder & Lightning | \n", + "... | \n", + "393.590441 | \n", + "Edmonton International Airport | \n", + "CYEG | \n", + "Edmonton | \n", + "CA | \n", + "{'lat': '53.30970001', 'lon': '-113.5800018'} | \n", + "CA-AB | \n", + "Rain | \n", + "0 | \n", + "2018-01-01 01:43:03 | \n", + "
6 | \n", + "180.246816 | \n", + "False | \n", + "JetBeats | \n", + "Zurich Airport | \n", + "ZRH | \n", + "Zurich | \n", + "CH | \n", + "{'lat': '47.464699', 'lon': '8.54917'} | \n", + "CH-ZH | \n", + "Hail | \n", + "... | \n", + "300.000000 | \n", + "Zurich Airport | \n", + "ZRH | \n", + "Zurich | \n", + "CH | \n", + "{'lat': '47.464699', 'lon': '8.54917'} | \n", + "CH-ZH | \n", + "Clear | \n", + "0 | \n", + "2018-01-01 13:49:53 | \n", + "
7 | \n", + "585.184310 | \n", + "False | \n", + "Kibana Airlines | \n", + "Ottawa Macdonald-Cartier International Airport | \n", + "YOW | \n", + "Ottawa | \n", + "CA | \n", + "{'lat': '45.32249832', 'lon': '-75.66919708'} | \n", + "CA-ON | \n", + "Clear | \n", + "... | \n", + "614.942480 | \n", + "Ciampino___G. B. Pastine International Airport | \n", + "RM12 | \n", + "Rome | \n", + "IT | \n", + "{'lat': '41.7994', 'lon': '12.5949'} | \n", + "IT-62 | \n", + "Thunder & Lightning | \n", + "0 | \n", + "2018-01-01 04:54:59 | \n", + "
8 | \n", + "960.869736 | \n", + "True | \n", + "Kibana Airlines | \n", + "Rajiv Gandhi International Airport | \n", + "HYD | \n", + "Hyderabad | \n", + "IN | \n", + "{'lat': '17.23131752', 'lon': '78.42985535'} | \n", + "SE-BD | \n", + "Cloudy | \n", + "... | \n", + "602.030591 | \n", + "Milano Linate Airport | \n", + "MI11 | \n", + "Milan | \n", + "IT | \n", + "{'lat': '45.445099', 'lon': '9.27674'} | \n", + "IT-25 | \n", + "Heavy Fog | \n", + "0 | \n", + "2018-01-01 12:09:35 | \n", + "
9 | \n", + "296.877773 | \n", + "False | \n", + "Logstash Airways | \n", + "Treviso-Sant'Angelo Airport | \n", + "TV01 | \n", + "Treviso | \n", + "IT | \n", + "{'lat': '45.648399', 'lon': '12.1944'} | \n", + "IT-34 | \n", + "Rain | \n", + "... | \n", + "174.822216 | \n", + "Sheremetyevo International Airport | \n", + "SVO | \n", + "Moscow | \n", + "RU | \n", + "{'lat': '55.972599', 'lon': '37.4146'} | \n", + "RU-MOS | \n", + "Cloudy | \n", + "0 | \n", + "2018-01-01 12:09:35 | \n", + "
10 | \n", + "906.437948 | \n", + "False | \n", + "JetBeats | \n", + "Helsinki Vantaa Airport | \n", + "HEL | \n", + "Helsinki | \n", + "FI | \n", + "{'lat': '60.31719971', 'lon': '24.9633007'} | \n", + "FI-ES | \n", + "Rain | \n", + "... | \n", + "503.045170 | \n", + "Albuquerque International Sunport Airport | \n", + "ABQ | \n", + "Albuquerque | \n", + "US | \n", + "{'lat': '35.040199', 'lon': '-106.609001'} | \n", + "US-NM | \n", + "Rain | \n", + "0 | \n", + "2018-01-01 22:06:14 | \n", + "
11 | \n", + "704.463771 | \n", + "False | \n", + "Logstash Airways | \n", + "Vienna International Airport | \n", + "VIE | \n", + "Vienna | \n", + "AT | \n", + "{'lat': '48.11029816', 'lon': '16.56970024'} | \n", + "AT-9 | \n", + "Cloudy | \n", + "... | \n", + "36.075018 | \n", + "Venice Marco Polo Airport | \n", + "VE05 | \n", + "Venice | \n", + "IT | \n", + "{'lat': '45.505299', 'lon': '12.3519'} | \n", + "IT-34 | \n", + "Rain | \n", + "0 | \n", + "2018-01-01 11:52:34 | \n", + "
12 | \n", + "922.499077 | \n", + "True | \n", + "Logstash Airways | \n", + "Shanghai Pudong International Airport | \n", + "PVG | \n", + "Shanghai | \n", + "CN | \n", + "{'lat': '31.14340019', 'lon': '121.8050003'} | \n", + "SE-BD | \n", + "Clear | \n", + "... | \n", + "679.768391 | \n", + "Licenciado Benito Juarez International Airport | \n", + "AICM | \n", + "Mexico City | \n", + "MX | \n", + "{'lat': '19.4363', 'lon': '-99.072098'} | \n", + "MX-DIF | \n", + "Heavy Fog | \n", + "0 | \n", + "2018-01-01 02:13:46 | \n", + "
13 | \n", + "374.959276 | \n", + "False | \n", + "Logstash Airways | \n", + "Ottawa Macdonald-Cartier International Airport | \n", + "YOW | \n", + "Ottawa | \n", + "CA | \n", + "{'lat': '45.32249832', 'lon': '-75.66919708'} | \n", + "CA-ON | \n", + "Rain | \n", + "... | \n", + "330.418282 | \n", + "Naples International Airport | \n", + "NA01 | \n", + "Naples | \n", + "IT | \n", + "{'lat': '40.886002', 'lon': '14.2908'} | \n", + "IT-72 | \n", + "Rain | \n", + "0 | \n", + "2018-01-01 14:21:13 | \n", + "
14 | \n", + "552.917371 | \n", + "False | \n", + "Logstash Airways | \n", + "Luis Munoz Marin International Airport | \n", + "SJU | \n", + "San Juan | \n", + "PR | \n", + "{'lat': '18.43939972', 'lon': '-66.00180054'} | \n", + "PR-U-A | \n", + "Clear | \n", + "... | \n", + "407.145031 | \n", + "Ciampino___G. B. Pastine International Airport | \n", + "RM12 | \n", + "Rome | \n", + "IT | \n", + "{'lat': '41.7994', 'lon': '12.5949'} | \n", + "IT-62 | \n", + "Cloudy | \n", + "0 | \n", + "2018-01-01 17:42:53 | \n", + "
15 | \n", + "566.487557 | \n", + "True | \n", + "Kibana Airlines | \n", + "Cologne Bonn Airport | \n", + "CGN | \n", + "Cologne | \n", + "DE | \n", + "{'lat': '50.86589813', 'lon': '7.142739773'} | \n", + "DE-NW | \n", + "Sunny | \n", + "... | \n", + "656.712658 | \n", + "Chengdu Shuangliu International Airport | \n", + "CTU | \n", + "Chengdu | \n", + "CN | \n", + "{'lat': '30.57850075', 'lon': '103.9469986'} | \n", + "SE-BD | \n", + "Thunder & Lightning | \n", + "0 | \n", + "2018-01-01 19:55:32 | \n", + "
16 | \n", + "989.952787 | \n", + "True | \n", + "Logstash Airways | \n", + "Venice Marco Polo Airport | \n", + "VE05 | \n", + "Venice | \n", + "IT | \n", + "{'lat': '45.505299', 'lon': '12.3519'} | \n", + "IT-34 | \n", + "Damaging Wind | \n", + "... | \n", + "773.030334 | \n", + "Licenciado Benito Juarez International Airport | \n", + "AICM | \n", + "Mexico City | \n", + "MX | \n", + "{'lat': '19.4363', 'lon': '-99.072098'} | \n", + "MX-DIF | \n", + "Thunder & Lightning | \n", + "0 | \n", + "2018-01-01 07:49:27 | \n", + "
17 | \n", + "569.613255 | \n", + "False | \n", + "ES-Air | \n", + "Ministro Pistarini International Airport | \n", + "EZE | \n", + "Buenos Aires | \n", + "AR | \n", + "{'lat': '-34.8222', 'lon': '-58.5358'} | \n", + "SE-BD | \n", + "Cloudy | \n", + "... | \n", + "704.716920 | \n", + "Cleveland Hopkins International Airport | \n", + "CLE | \n", + "Cleveland | \n", + "US | \n", + "{'lat': '41.4117012', 'lon': '-81.84980011'} | \n", + "US-OH | \n", + "Rain | \n", + "0 | \n", + "2018-01-01 01:30:47 | \n", + "
18 | \n", + "277.429707 | \n", + "False | \n", + "ES-Air | \n", + "Shanghai Pudong International Airport | \n", + "PVG | \n", + "Shanghai | \n", + "CN | \n", + "{'lat': '31.14340019', 'lon': '121.8050003'} | \n", + "SE-BD | \n", + "Clear | \n", + "... | \n", + "355.957996 | \n", + "Olenya Air Base | \n", + "XLMO | \n", + "Olenegorsk | \n", + "RU | \n", + "{'lat': '68.15180206', 'lon': '33.46390152'} | \n", + "RU-MUR | \n", + "Hail | \n", + "0 | \n", + "2018-01-01 07:58:17 | \n", + "
19 | \n", + "772.100846 | \n", + "False | \n", + "JetBeats | \n", + "Indira Gandhi International Airport | \n", + "DEL | \n", + "New Delhi | \n", + "IN | \n", + "{'lat': '28.5665', 'lon': '77.103104'} | \n", + "SE-BD | \n", + "Clear | \n", + "... | \n", + "875.114675 | \n", + "Casper-Natrona County International Airport | \n", + "CPR | \n", + "Casper | \n", + "US | \n", + "{'lat': '42.90800095', 'lon': '-106.4639969'} | \n", + "US-WY | \n", + "Cloudy | \n", + "0 | \n", + "2018-01-01 00:02:06 | \n", + "
20 | \n", + "167.599922 | \n", + "False | \n", + "JetBeats | \n", + "Wichita Mid Continent Airport | \n", + "ICT | \n", + "Wichita | \n", + "US | \n", + "{'lat': '37.64989853', 'lon': '-97.43309784'} | \n", + "US-KS | \n", + "Clear | \n", + "... | \n", + "373.966883 | \n", + "Erie International Tom Ridge Field | \n", + "ERI | \n", + "Erie | \n", + "US | \n", + "{'lat': '42.08312701', 'lon': '-80.17386675'} | \n", + "US-PA | \n", + "Cloudy | \n", + "0 | \n", + "2018-01-01 01:08:20 | \n", + "
21 | \n", + "253.210065 | \n", + "False | \n", + "ES-Air | \n", + "Ottawa Macdonald-Cartier International Airport | \n", + "YOW | \n", + "Ottawa | \n", + "CA | \n", + "{'lat': '45.32249832', 'lon': '-75.66919708'} | \n", + "CA-ON | \n", + "Hail | \n", + "... | \n", + "130.667700 | \n", + "Newark Liberty International Airport | \n", + "EWR | \n", + "Newark | \n", + "US | \n", + "{'lat': '40.69250107', 'lon': '-74.16870117'} | \n", + "US-NJ | \n", + "Clear | \n", + "0 | \n", + "2018-01-01 01:08:20 | \n", + "
22 | \n", + "917.247620 | \n", + "False | \n", + "JetBeats | \n", + "Itami Airport | \n", + "ITM | \n", + "Osaka | \n", + "JP | \n", + "{'lat': '34.78549957', 'lon': '135.4380035'} | \n", + "SE-BD | \n", + "Damaging Wind | \n", + "... | \n", + "574.495310 | \n", + "Copenhagen Kastrup Airport | \n", + "CPH | \n", + "Copenhagen | \n", + "DK | \n", + "{'lat': '55.61790085', 'lon': '12.65600014'} | \n", + "DK-84 | \n", + "Sunny | \n", + "0 | \n", + "2018-01-01 07:48:35 | \n", + "
23 | \n", + "451.591176 | \n", + "False | \n", + "Logstash Airways | \n", + "Vienna International Airport | \n", + "VIE | \n", + "Vienna | \n", + "AT | \n", + "{'lat': '48.11029816', 'lon': '16.56970024'} | \n", + "AT-9 | \n", + "Heavy Fog | \n", + "... | \n", + "579.728943 | \n", + "Seattle Tacoma International Airport | \n", + "SEA | \n", + "Seattle | \n", + "US | \n", + "{'lat': '47.44900131', 'lon': '-122.3089981'} | \n", + "US-WA | \n", + "Heavy Fog | \n", + "0 | \n", + "2018-01-01 18:57:21 | \n", + "
24 | \n", + "307.067201 | \n", + "False | \n", + "Logstash Airways | \n", + "Charles de Gaulle International Airport | \n", + "CDG | \n", + "Paris | \n", + "FR | \n", + "{'lat': '49.01279831', 'lon': '2.549999952'} | \n", + "FR-J | \n", + "Clear | \n", + "... | \n", + "50.157229 | \n", + "Berlin-Tegel Airport | \n", + "TXL | \n", + "Berlin | \n", + "DE | \n", + "{'lat': '52.5597', 'lon': '13.2877'} | \n", + "DE-BE | \n", + "Rain | \n", + "0 | \n", + "2018-01-01 13:18:25 | \n", + "
25 | \n", + "268.241596 | \n", + "False | \n", + "ES-Air | \n", + "Narita International Airport | \n", + "NRT | \n", + "Tokyo | \n", + "JP | \n", + "{'lat': '35.76470184', 'lon': '140.3860016'} | \n", + "SE-BD | \n", + "Rain | \n", + "... | \n", + "527.567422 | \n", + "Manchester Airport | \n", + "MAN | \n", + "Manchester | \n", + "GB | \n", + "{'lat': '53.35369873', 'lon': '-2.274950027'} | \n", + "GB-ENG | \n", + "Thunder & Lightning | \n", + "0 | \n", + "2018-01-01 08:20:35 | \n", + "
26 | \n", + "975.812632 | \n", + "True | \n", + "Kibana Airlines | \n", + "Itami Airport | \n", + "ITM | \n", + "Osaka | \n", + "JP | \n", + "{'lat': '34.78549957', 'lon': '135.4380035'} | \n", + "SE-BD | \n", + "Hail | \n", + "... | \n", + "386.259764 | \n", + "Helsinki Vantaa Airport | \n", + "HEL | \n", + "Helsinki | \n", + "FI | \n", + "{'lat': '60.31719971', 'lon': '24.9633007'} | \n", + "FI-ES | \n", + "Rain | \n", + "0 | \n", + "2018-01-01 15:38:32 | \n", + "
27 | \n", + "134.214546 | \n", + "False | \n", + "JetBeats | \n", + "San Diego International Airport | \n", + "SAN | \n", + "San Diego | \n", + "US | \n", + "{'lat': '32.73360062', 'lon': '-117.1900024'} | \n", + "US-CA | \n", + "Clear | \n", + "... | \n", + "24.479650 | \n", + "Phoenix Sky Harbor International Airport | \n", + "PHX | \n", + "Phoenix | \n", + "US | \n", + "{'lat': '33.43429947', 'lon': '-112.012001'} | \n", + "US-AZ | \n", + "Clear | \n", + "0 | \n", + "2018-01-01 03:08:45 | \n", + "
28 | \n", + "988.897564 | \n", + "False | \n", + "Kibana Airlines | \n", + "Verona Villafranca Airport | \n", + "VR10 | \n", + "Verona | \n", + "IT | \n", + "{'lat': '45.395699', 'lon': '10.8885'} | \n", + "IT-34 | \n", + "Sunny | \n", + "... | \n", + "568.351033 | \n", + "New Chitose Airport | \n", + "CTS | \n", + "Chitose / Tomakomai | \n", + "JP | \n", + "{'lat': '42.77519989', 'lon': '141.6920013'} | \n", + "SE-BD | \n", + "Damaging Wind | \n", + "0 | \n", + "2018-01-01 01:16:59 | \n", + "
29 | \n", + "511.067220 | \n", + "False | \n", + "Logstash Airways | \n", + "Zurich Airport | \n", + "ZRH | \n", + "Zurich | \n", + "CH | \n", + "{'lat': '47.464699', 'lon': '8.54917'} | \n", + "CH-ZH | \n", + "Rain | \n", + "... | \n", + "425.889194 | \n", + "Tulsa International Airport | \n", + "TUL | \n", + "Tulsa | \n", + "US | \n", + "{'lat': '36.19839859', 'lon': '-95.88809967'} | \n", + "US-OK | \n", + "Rain | \n", + "0 | \n", + "2018-01-01 18:00:59 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
13029 | \n", + "795.905278 | \n", + "False | \n", + "Kibana Airlines | \n", + "Malpensa International Airport | \n", + "MI12 | \n", + "Milan | \n", + "IT | \n", + "{'lat': '45.6306', 'lon': '8.72811'} | \n", + "IT-25 | \n", + "Sunny | \n", + "... | \n", + "534.375826 | \n", + "Itami Airport | \n", + "ITM | \n", + "Osaka | \n", + "JP | \n", + "{'lat': '34.78549957', 'lon': '135.4380035'} | \n", + "SE-BD | \n", + "Sunny | \n", + "6 | \n", + "2018-02-11 20:10:13 | \n", + "
13030 | \n", + "863.388068 | \n", + "False | \n", + "Logstash Airways | \n", + "Xi'an Xianyang International Airport | \n", + "XIY | \n", + "Xi'an | \n", + "CN | \n", + "{'lat': '34.447102', 'lon': '108.751999'} | \n", + "SE-BD | \n", + "Damaging Wind | \n", + "... | \n", + "141.172633 | \n", + "Tokyo Haneda International Airport | \n", + "HND | \n", + "Tokyo | \n", + "JP | \n", + "{'lat': '35.552299', 'lon': '139.779999'} | \n", + "SE-BD | \n", + "Clear | \n", + "6 | \n", + "2018-02-11 18:59:53 | \n", + "
13031 | \n", + "575.183008 | \n", + "False | \n", + "JetBeats | \n", + "Savannah Hilton Head International Airport | \n", + "SAV | \n", + "Savannah | \n", + "US | \n", + "{'lat': '32.12760162', 'lon': '-81.20210266'} | \n", + "US-GA | \n", + "Thunder & Lightning | \n", + "... | \n", + "1113.137060 | \n", + "OR Tambo International Airport | \n", + "JNB | \n", + "Johannesburg | \n", + "ZA | \n", + "{'lat': '-26.1392', 'lon': '28.246'} | \n", + "SE-BD | \n", + "Hail | \n", + "6 | \n", + "2018-02-11 00:57:48 | \n", + "
13032 | \n", + "817.368952 | \n", + "False | \n", + "JetBeats | \n", + "Syracuse Hancock International Airport | \n", + "SYR | \n", + "Syracuse | \n", + "US | \n", + "{'lat': '43.11119843', 'lon': '-76.10630035'} | \n", + "US-NY | \n", + "Rain | \n", + "... | \n", + "714.964864 | \n", + "El Dorado International Airport | \n", + "BOG | \n", + "Bogota | \n", + "CO | \n", + "{'lat': '4.70159', 'lon': '-74.1469'} | \n", + "CO-CUN | \n", + "Thunder & Lightning | \n", + "6 | \n", + "2018-02-11 12:02:49 | \n", + "
13033 | \n", + "579.582455 | \n", + "False | \n", + "ES-Air | \n", + "Tampa International Airport | \n", + "TPA | \n", + "Tampa | \n", + "US | \n", + "{'lat': '27.97550011', 'lon': '-82.53320313'} | \n", + "US-FL | \n", + "Rain | \n", + "... | \n", + "234.929046 | \n", + "Jorge Chavez International Airport | \n", + "LIM | \n", + "Lima | \n", + "PE | \n", + "{'lat': '-12.0219', 'lon': '-77.114304'} | \n", + "SE-BD | \n", + "Thunder & Lightning | \n", + "6 | \n", + "2018-02-11 02:07:40 | \n", + "
13034 | \n", + "1004.916638 | \n", + "False | \n", + "JetBeats | \n", + "Olenya Air Base | \n", + "XLMO | \n", + "Olenegorsk | \n", + "RU | \n", + "{'lat': '68.15180206', 'lon': '33.46390152'} | \n", + "RU-MUR | \n", + "Clear | \n", + "... | \n", + "526.895776 | \n", + "Gimpo International Airport | \n", + "GMP | \n", + "Seoul | \n", + "KR | \n", + "{'lat': '37.5583', 'lon': '126.791'} | \n", + "SE-BD | \n", + "Sunny | \n", + "6 | \n", + "2018-02-11 00:35:04 | \n", + "
13035 | \n", + "357.562842 | \n", + "True | \n", + "Logstash Airways | \n", + "Shanghai Pudong International Airport | \n", + "PVG | \n", + "Shanghai | \n", + "CN | \n", + "{'lat': '31.14340019', 'lon': '121.8050003'} | \n", + "SE-BD | \n", + "Thunder & Lightning | \n", + "... | \n", + "0.000000 | \n", + "Shanghai Pudong International Airport | \n", + "PVG | \n", + "Shanghai | \n", + "CN | \n", + "{'lat': '31.14340019', 'lon': '121.8050003'} | \n", + "SE-BD | \n", + "Thunder & Lightning | \n", + "6 | \n", + "2018-02-11 11:19:12 | \n", + "
13036 | \n", + "429.580539 | \n", + "False | \n", + "Logstash Airways | \n", + "Venice Marco Polo Airport | \n", + "VE05 | \n", + "Venice | \n", + "IT | \n", + "{'lat': '45.505299', 'lon': '12.3519'} | \n", + "IT-34 | \n", + "Sunny | \n", + "... | \n", + "150.000000 | \n", + "Venice Marco Polo Airport | \n", + "VE05 | \n", + "Venice | \n", + "IT | \n", + "{'lat': '45.505299', 'lon': '12.3519'} | \n", + "IT-34 | \n", + "Cloudy | \n", + "6 | \n", + "2018-02-11 15:07:11 | \n", + "
13037 | \n", + "729.788171 | \n", + "True | \n", + "ES-Air | \n", + "Vienna International Airport | \n", + "VIE | \n", + "Vienna | \n", + "AT | \n", + "{'lat': '48.11029816', 'lon': '16.56970024'} | \n", + "AT-9 | \n", + "Rain | \n", + "... | \n", + "691.944839 | \n", + "Ukrainka Air Base | \n", + "XHBU | \n", + "Belogorsk | \n", + "RU | \n", + "{'lat': '51.169997', 'lon': '128.445007'} | \n", + "RU-AMU | \n", + "Damaging Wind | \n", + "6 | \n", + "2018-02-11 10:24:42 | \n", + "
13038 | \n", + "564.897695 | \n", + "False | \n", + "ES-Air | \n", + "Pisa International Airport | \n", + "PI05 | \n", + "Pisa | \n", + "IT | \n", + "{'lat': '43.683899', 'lon': '10.3927'} | \n", + "IT-52 | \n", + "Heavy Fog | \n", + "... | \n", + "567.387339 | \n", + "OR Tambo International Airport | \n", + "JNB | \n", + "Johannesburg | \n", + "ZA | \n", + "{'lat': '-26.1392', 'lon': '28.246'} | \n", + "SE-BD | \n", + "Damaging Wind | \n", + "6 | \n", + "2018-02-11 00:42:06 | \n", + "
13039 | \n", + "1014.052787 | \n", + "False | \n", + "Logstash Airways | \n", + "Vienna International Airport | \n", + "VIE | \n", + "Vienna | \n", + "AT | \n", + "{'lat': '48.11029816', 'lon': '16.56970024'} | \n", + "AT-9 | \n", + "Thunder & Lightning | \n", + "... | \n", + "690.092327 | \n", + "Montreal / Pierre Elliott Trudeau Internationa... | \n", + "YUL | \n", + "Montreal | \n", + "CA | \n", + "{'lat': '45.47060013', 'lon': '-73.74079895'} | \n", + "CA-QC | \n", + "Thunder & Lightning | \n", + "6 | \n", + "2018-02-11 10:56:31 | \n", + "
13040 | \n", + "455.243843 | \n", + "False | \n", + "ES-Air | \n", + "London Luton Airport | \n", + "LTN | \n", + "London | \n", + "GB | \n", + "{'lat': '51.87469864', 'lon': '-0.368333012'} | \n", + "GB-ENG | \n", + "Cloudy | \n", + "... | \n", + "3.028293 | \n", + "London Heathrow Airport | \n", + "LHR | \n", + "London | \n", + "GB | \n", + "{'lat': '51.4706', 'lon': '-0.461941'} | \n", + "GB-ENG | \n", + "Clear | \n", + "6 | \n", + "2018-02-11 00:39:37 | \n", + "
13041 | \n", + "611.370232 | \n", + "False | \n", + "Logstash Airways | \n", + "Jorge Chavez International Airport | \n", + "LIM | \n", + "Lima | \n", + "PE | \n", + "{'lat': '-12.0219', 'lon': '-77.114304'} | \n", + "SE-BD | \n", + "Sunny | \n", + "... | \n", + "338.875531 | \n", + "Casper-Natrona County International Airport | \n", + "CPR | \n", + "Casper | \n", + "US | \n", + "{'lat': '42.90800095', 'lon': '-106.4639969'} | \n", + "US-WY | \n", + "Rain | \n", + "6 | \n", + "2018-02-11 10:24:30 | \n", + "
13042 | \n", + "595.961285 | \n", + "False | \n", + "JetBeats | \n", + "Ottawa Macdonald-Cartier International Airport | \n", + "YOW | \n", + "Ottawa | \n", + "CA | \n", + "{'lat': '45.32249832', 'lon': '-75.66919708'} | \n", + "CA-ON | \n", + "Clear | \n", + "... | \n", + "375.129587 | \n", + "Frankfurt am Main Airport | \n", + "FRA | \n", + "Frankfurt am Main | \n", + "DE | \n", + "{'lat': '50.033333', 'lon': '8.570556'} | \n", + "DE-HE | \n", + "Clear | \n", + "6 | \n", + "2018-02-11 09:02:07 | \n", + "
13043 | \n", + "782.747648 | \n", + "False | \n", + "Logstash Airways | \n", + "Xi'an Xianyang International Airport | \n", + "XIY | \n", + "Xi'an | \n", + "CN | \n", + "{'lat': '34.447102', 'lon': '108.751999'} | \n", + "SE-BD | \n", + "Clear | \n", + "... | \n", + "156.858481 | \n", + "Tokyo Haneda International Airport | \n", + "HND | \n", + "Tokyo | \n", + "JP | \n", + "{'lat': '35.552299', 'lon': '139.779999'} | \n", + "SE-BD | \n", + "Thunder & Lightning | \n", + "6 | \n", + "2018-02-11 04:45:06 | \n", + "
13044 | \n", + "891.117221 | \n", + "False | \n", + "JetBeats | \n", + "Winnipeg / James Armstrong Richardson Internat... | \n", + "YWG | \n", + "Winnipeg | \n", + "CA | \n", + "{'lat': '49.90999985', 'lon': '-97.23989868'} | \n", + "CA-MB | \n", + "Clear | \n", + "... | \n", + "354.106457 | \n", + "Vienna International Airport | \n", + "VIE | \n", + "Vienna | \n", + "AT | \n", + "{'lat': '48.11029816', 'lon': '16.56970024'} | \n", + "AT-9 | \n", + "Thunder & Lightning | \n", + "6 | \n", + "2018-02-11 00:51:14 | \n", + "
13045 | \n", + "587.169921 | \n", + "False | \n", + "Logstash Airways | \n", + "Brisbane International Airport | \n", + "BNE | \n", + "Brisbane | \n", + "AU | \n", + "{'lat': '-27.38419914', 'lon': '153.1170044'} | \n", + "SE-BD | \n", + "Rain | \n", + "... | \n", + "771.305442 | \n", + "Amsterdam Airport Schiphol | \n", + "AMS | \n", + "Amsterdam | \n", + "NL | \n", + "{'lat': '52.30860138', 'lon': '4.76388979'} | \n", + "NL-NH | \n", + "Sunny | \n", + "6 | \n", + "2018-02-11 05:41:51 | \n", + "
13046 | \n", + "739.132165 | \n", + "False | \n", + "Logstash Airways | \n", + "Xi'an Xianyang International Airport | \n", + "XIY | \n", + "Xi'an | \n", + "CN | \n", + "{'lat': '34.447102', 'lon': '108.751999'} | \n", + "SE-BD | \n", + "Rain | \n", + "... | \n", + "542.955572 | \n", + "Winnipeg / James Armstrong Richardson Internat... | \n", + "YWG | \n", + "Winnipeg | \n", + "CA | \n", + "{'lat': '49.90999985', 'lon': '-97.23989868'} | \n", + "CA-MB | \n", + "Hail | \n", + "6 | \n", + "2018-02-11 10:02:21 | \n", + "
13047 | \n", + "605.191876 | \n", + "False | \n", + "JetBeats | \n", + "Portland International Jetport Airport | \n", + "PWM | \n", + "Portland | \n", + "US | \n", + "{'lat': '43.64619827', 'lon': '-70.30930328'} | \n", + "US-ME | \n", + "Thunder & Lightning | \n", + "... | \n", + "564.599857 | \n", + "Jeju International Airport | \n", + "CJU | \n", + "Jeju City | \n", + "KR | \n", + "{'lat': '33.51129913', 'lon': '126.4929962'} | \n", + "SE-BD | \n", + "Cloudy | \n", + "6 | \n", + "2018-02-11 15:55:10 | \n", + "
13048 | \n", + "361.767659 | \n", + "True | \n", + "Logstash Airways | \n", + "Dubai International Airport | \n", + "DXB | \n", + "Dubai | \n", + "AE | \n", + "{'lat': '25.25279999', 'lon': '55.36439896'} | \n", + "SE-BD | \n", + "Sunny | \n", + "... | \n", + "180.000000 | \n", + "Dubai International Airport | \n", + "DXB | \n", + "Dubai | \n", + "AE | \n", + "{'lat': '25.25279999', 'lon': '55.36439896'} | \n", + "SE-BD | \n", + "Hail | \n", + "6 | \n", + "2018-02-11 04:11:14 | \n", + "
13049 | \n", + "662.306992 | \n", + "False | \n", + "ES-Air | \n", + "Winnipeg / James Armstrong Richardson Internat... | \n", + "YWG | \n", + "Winnipeg | \n", + "CA | \n", + "{'lat': '49.90999985', 'lon': '-97.23989868'} | \n", + "CA-MB | \n", + "Heavy Fog | \n", + "... | \n", + "835.954429 | \n", + "Ministro Pistarini International Airport | \n", + "EZE | \n", + "Buenos Aires | \n", + "AR | \n", + "{'lat': '-34.8222', 'lon': '-58.5358'} | \n", + "AR-B | \n", + "Sunny | \n", + "6 | \n", + "2018-02-11 10:13:32 | \n", + "
13050 | \n", + "630.779526 | \n", + "False | \n", + "JetBeats | \n", + "Helsinki Vantaa Airport | \n", + "HEL | \n", + "Helsinki | \n", + "FI | \n", + "{'lat': '60.31719971', 'lon': '24.9633007'} | \n", + "FI-ES | \n", + "Sunny | \n", + "... | \n", + "451.755639 | \n", + "Beijing Capital International Airport | \n", + "PEK | \n", + "Beijing | \n", + "CN | \n", + "{'lat': '40.08010101', 'lon': '116.5849991'} | \n", + "SE-BD | \n", + "Cloudy | \n", + "6 | \n", + "2018-02-11 11:23:23 | \n", + "
13051 | \n", + "937.771279 | \n", + "True | \n", + "Logstash Airways | \n", + "Lester B. Pearson International Airport | \n", + "YYZ | \n", + "Toronto | \n", + "CA | \n", + "{'lat': '43.67720032', 'lon': '-79.63059998'} | \n", + "CA-ON | \n", + "Sunny | \n", + "... | \n", + "507.451571 | \n", + "Leonardo da Vinci___Fiumicino Airport | \n", + "RM11 | \n", + "Rome | \n", + "IT | \n", + "{'lat': '41.8002778', 'lon': '12.2388889'} | \n", + "IT-62 | \n", + "Hail | \n", + "6 | \n", + "2018-02-11 01:13:50 | \n", + "
13052 | \n", + "1085.155339 | \n", + "False | \n", + "Logstash Airways | \n", + "Melbourne International Airport | \n", + "MEL | \n", + "Melbourne | \n", + "AU | \n", + "{'lat': '-37.673302', 'lon': '144.843002'} | \n", + "SE-BD | \n", + "Cloudy | \n", + "... | \n", + "1044.451122 | \n", + "Bologna Guglielmo Marconi Airport | \n", + "BO08 | \n", + "Bologna | \n", + "IT | \n", + "{'lat': '44.5354', 'lon': '11.2887'} | \n", + "IT-45 | \n", + "Cloudy | \n", + "6 | \n", + "2018-02-11 18:35:42 | \n", + "
13053 | \n", + "1191.964104 | \n", + "False | \n", + "Logstash Airways | \n", + "Zurich Airport | \n", + "ZRH | \n", + "Zurich | \n", + "CH | \n", + "{'lat': '47.464699', 'lon': '8.54917'} | \n", + "CH-ZH | \n", + "Hail | \n", + "... | \n", + "728.715904 | \n", + "Portland International Jetport Airport | \n", + "PWM | \n", + "Portland | \n", + "US | \n", + "{'lat': '43.64619827', 'lon': '-70.30930328'} | \n", + "US-ME | \n", + "Clear | \n", + "6 | \n", + "2018-02-11 19:02:10 | \n", + "
13054 | \n", + "1080.446279 | \n", + "False | \n", + "Logstash Airways | \n", + "Xi'an Xianyang International Airport | \n", + "XIY | \n", + "Xi'an | \n", + "CN | \n", + "{'lat': '34.447102', 'lon': '108.751999'} | \n", + "SE-BD | \n", + "Rain | \n", + "... | \n", + "402.929088 | \n", + "Pisa International Airport | \n", + "PI05 | \n", + "Pisa | \n", + "IT | \n", + "{'lat': '43.683899', 'lon': '10.3927'} | \n", + "IT-52 | \n", + "Sunny | \n", + "6 | \n", + "2018-02-11 20:42:25 | \n", + "
13055 | \n", + "646.612941 | \n", + "False | \n", + "Logstash Airways | \n", + "Zurich Airport | \n", + "ZRH | \n", + "Zurich | \n", + "CH | \n", + "{'lat': '47.464699', 'lon': '8.54917'} | \n", + "CH-ZH | \n", + "Rain | \n", + "... | \n", + "644.418029 | \n", + "Winnipeg / James Armstrong Richardson Internat... | \n", + "YWG | \n", + "Winnipeg | \n", + "CA | \n", + "{'lat': '49.90999985', 'lon': '-97.23989868'} | \n", + "CA-MB | \n", + "Rain | \n", + "6 | \n", + "2018-02-11 01:41:57 | \n", + "
13056 | \n", + "997.751876 | \n", + "False | \n", + "Logstash Airways | \n", + "Ukrainka Air Base | \n", + "XHBU | \n", + "Belogorsk | \n", + "RU | \n", + "{'lat': '51.169997', 'lon': '128.445007'} | \n", + "RU-AMU | \n", + "Rain | \n", + "... | \n", + "937.540811 | \n", + "Licenciado Benito Juarez International Airport | \n", + "AICM | \n", + "Mexico City | \n", + "MX | \n", + "{'lat': '19.4363', 'lon': '-99.072098'} | \n", + "MX-DIF | \n", + "Sunny | \n", + "6 | \n", + "2018-02-11 04:09:27 | \n", + "
13057 | \n", + "1102.814465 | \n", + "False | \n", + "JetBeats | \n", + "Ministro Pistarini International Airport | \n", + "EZE | \n", + "Buenos Aires | \n", + "AR | \n", + "{'lat': '-34.8222', 'lon': '-58.5358'} | \n", + "SE-BD | \n", + "Hail | \n", + "... | \n", + "1697.404971 | \n", + "Itami Airport | \n", + "ITM | \n", + "Osaka | \n", + "JP | \n", + "{'lat': '34.78549957', 'lon': '135.4380035'} | \n", + "SE-BD | \n", + "Hail | \n", + "6 | \n", + "2018-02-11 08:28:21 | \n", + "
13058 | \n", + "858.144337 | \n", + "False | \n", + "JetBeats | \n", + "Washington Dulles International Airport | \n", + "IAD | \n", + "Washington | \n", + "US | \n", + "{'lat': '38.94449997', 'lon': '-77.45580292'} | \n", + "US-DC | \n", + "Heavy Fog | \n", + "... | \n", + "1610.761827 | \n", + "Adelaide International Airport | \n", + "ADL | \n", + "Adelaide | \n", + "AU | \n", + "{'lat': '-34.945', 'lon': '138.531006'} | \n", + "SE-BD | \n", + "Rain | \n", + "6 | \n", + "2018-02-11 14:54:34 | \n", + "
13059 rows × 27 columns
\n", + "\n", + " | AvgTicketPrice | \n", + "Cancelled | \n", + "Carrier | \n", + "Dest | \n", + "DestAirportID | \n", + "DestCityName | \n", + "DestCountry | \n", + "DestLocation | \n", + "DestRegion | \n", + "DestWeather | \n", + "... | \n", + "FlightTimeMin | \n", + "Origin | \n", + "OriginAirportID | \n", + "OriginCityName | \n", + "OriginCountry | \n", + "OriginLocation | \n", + "OriginRegion | \n", + "OriginWeather | \n", + "dayOfWeek | \n", + "timestamp | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
PNr3fWsBVUchcQJXWbLE | \n", + "841.265642 | \n", + "False | \n", + "Kibana Airlines | \n", + "Sydney Kingsford Smith International Airport | \n", + "SYD | \n", + "Sydney | \n", + "AU | \n", + "{'lat': '-33.94609833', 'lon': '151.177002'} | \n", + "SE-BD | \n", + "Rain | \n", + "... | \n", + "1030.770416 | \n", + "Frankfurt am Main Airport | \n", + "FRA | \n", + "Frankfurt am Main | \n", + "DE | \n", + "{'lat': '50.033333', 'lon': '8.570556'} | \n", + "DE-HE | \n", + "Sunny | \n", + "0 | \n", + "2018-01-01 00:00:00 | \n", + "
Pdr3fWsBVUchcQJXWbLE | \n", + "882.982662 | \n", + "False | \n", + "Logstash Airways | \n", + "Venice Marco Polo Airport | \n", + "VE05 | \n", + "Venice | \n", + "IT | \n", + "{'lat': '45.505299', 'lon': '12.3519'} | \n", + "IT-34 | \n", + "Sunny | \n", + "... | \n", + "464.389481 | \n", + "Cape Town International Airport | \n", + "CPT | \n", + "Cape Town | \n", + "ZA | \n", + "{'lat': '-33.96480179', 'lon': '18.60169983'} | \n", + "SE-BD | \n", + "Clear | \n", + "0 | \n", + "2018-01-01 18:27:00 | \n", + "
Ptr3fWsBVUchcQJXWbLE | \n", + "190.636904 | \n", + "False | \n", + "Logstash Airways | \n", + "Venice Marco Polo Airport | \n", + "VE05 | \n", + "Venice | \n", + "IT | \n", + "{'lat': '45.505299', 'lon': '12.3519'} | \n", + "IT-34 | \n", + "Cloudy | \n", + "... | \n", + "0.000000 | \n", + "Venice Marco Polo Airport | \n", + "VE05 | \n", + "Venice | \n", + "IT | \n", + "{'lat': '45.505299', 'lon': '12.3519'} | \n", + "IT-34 | \n", + "Rain | \n", + "0 | \n", + "2018-01-01 17:11:14 | \n", + "
P9r3fWsBVUchcQJXWbLE | \n", + "181.694216 | \n", + "True | \n", + "Kibana Airlines | \n", + "Treviso-Sant'Angelo Airport | \n", + "TV01 | \n", + "Treviso | \n", + "IT | \n", + "{'lat': '45.648399', 'lon': '12.1944'} | \n", + "IT-34 | \n", + "Clear | \n", + "... | \n", + "222.749059 | \n", + "Naples International Airport | \n", + "NA01 | \n", + "Naples | \n", + "IT | \n", + "{'lat': '40.886002', 'lon': '14.2908'} | \n", + "IT-72 | \n", + "Thunder & Lightning | \n", + "0 | \n", + "2018-01-01 10:33:28 | \n", + "
QNr3fWsBVUchcQJXWbLE | \n", + "730.041778 | \n", + "False | \n", + "Kibana Airlines | \n", + "Xi'an Xianyang International Airport | \n", + "XIY | \n", + "Xi'an | \n", + "CN | \n", + "{'lat': '34.447102', 'lon': '108.751999'} | \n", + "SE-BD | \n", + "Clear | \n", + "... | \n", + "785.779071 | \n", + "Licenciado Benito Juarez International Airport | \n", + "AICM | \n", + "Mexico City | \n", + "MX | \n", + "{'lat': '19.4363', 'lon': '-99.072098'} | \n", + "MX-DIF | \n", + "Damaging Wind | \n", + "0 | \n", + "2018-01-01 05:13:00 | \n", + "
5 rows × 27 columns
\n", "\n", - " | Carrier | \n", - "Dest | \n", - "OriginRegion | \n", - "
---|---|---|---|
0 | \n", - "Kibana Airlines | \n", - "Sydney Kingsford Smith International Airport | \n", - "DE-HE | \n", - "
1 | \n", - "Logstash Airways | \n", - "Venice Marco Polo Airport | \n", - "SE-BD | \n", - "
2 | \n", - "Logstash Airways | \n", - "Venice Marco Polo Airport | \n", - "IT-34 | \n", - "
3 | \n", - "Kibana Airlines | \n", - "Treviso-Sant'Angelo Airport | \n", - "IT-72 | \n", - "
4 | \n", - "Kibana Airlines | \n", - "Xi'an Xianyang International Airport | \n", - "MX-DIF | \n", - "
5 | \n", - "JetBeats | \n", - "Genoa Cristoforo Colombo Airport | \n", - "CA-AB | \n", - "
6 | \n", - "JetBeats | \n", - "Zurich Airport | \n", - "CH-ZH | \n", - "
7 | \n", - "Kibana Airlines | \n", - "Ottawa Macdonald-Cartier International Airport | \n", - "IT-62 | \n", - "
8 | \n", - "Kibana Airlines | \n", - "Rajiv Gandhi International Airport | \n", - "IT-25 | \n", - "
9 | \n", - "Logstash Airways | \n", - "Treviso-Sant'Angelo Airport | \n", - "RU-MOS | \n", - "