diff --git a/eland/__init__.py b/eland/__init__.py index eee58ae..bf309ea 100644 --- a/eland/__init__.py +++ b/eland/__init__.py @@ -1,7 +1,15 @@ +import os + +# Set modin to pandas to avoid starting ray or other +os.environ["MODIN_ENGINE"] = 'python' +os.environ["MODIN_BACKEND"] = 'pandas' + from .client import * -from .ndframe import * from .index import * from .mappings import * +from .operations import * +from .query_compiler import * +from .ndframe import * from .series import * from .dataframe import * from .utils import * diff --git a/eland/client.py b/eland/client.py index 3b1231f..5806481 100644 --- a/eland/client.py +++ b/eland/client.py @@ -1,37 +1,34 @@ from elasticsearch import Elasticsearch from elasticsearch import helpers -class Client(): +class Client: """ eland client - implemented as facade to control access to Elasticsearch methods """ def __init__(self, es=None): if isinstance(es, Elasticsearch): - self.es = es + self._es = es elif isinstance(es, Client): - self.es = es.es + self._es = es._es else: - self.es = Elasticsearch(es) + self._es = Elasticsearch(es) - def info(self): - return self.es.info() - - def indices(self): - return self.es.indices + def get_mapping(self, **kwargs): + return self._es.indices.get_mapping(**kwargs) def bulk(self, actions, refresh=False): - return helpers.bulk(self.es, actions, refresh=refresh) + return helpers.bulk(self._es, actions, refresh=refresh) def scan(self, **kwargs): - return helpers.scan(self.es, **kwargs) + return helpers.scan(self._es, **kwargs) def search(self, **kwargs): - return self.es.search(**kwargs) + return self._es.search(**kwargs) def field_caps(self, **kwargs): - return self.es.field_caps(**kwargs) + return self._es.field_caps(**kwargs) def count(self, **kwargs): - count_json = self.es.count(**kwargs) + count_json = self._es.count(**kwargs) return count_json['count'] diff --git a/eland/dataframe.py b/eland/dataframe.py index b4ed1c4..a7e45c4 100644 --- a/eland/dataframe.py +++ b/eland/dataframe.py @@ -1,394 +1,58 @@ -""" -DataFrame ---------- -An efficient 2D container for potentially mixed-type time series or other -labeled data series. - -The underlying data resides in Elasticsearch and the API aligns as much as -possible with pandas.DataFrame API. - -This allows the eland.DataFrame to access large datasets stored in Elasticsearch, -without storing the dataset in local memory. - -Implementation Details ----------------------- - -Elasticsearch indexes can be configured in many different ways, and these indexes -utilise different data structures to pandas.DataFrame. - -eland.DataFrame operations that return individual rows (e.g. df.head()) return -_source data. If _source is not enabled, this data is not accessible. - -Similarly, only Elasticsearch searchable fields can be searched or filtered, and -only Elasticsearch aggregatable fields can be aggregated or grouped. - -""" -import sys +from eland import NDFrame import pandas as pd -from pandas.io.formats import format as fmt -from pandas.io.formats.printing import pprint_thing -from pandas.compat import StringIO -from pandas.io.common import _expand_user, _stringify_path -from pandas.io.formats import console -from pandas.core import common as com - -from eland import NDFrame -from eland import Index -from eland import Series - - - - class DataFrame(NDFrame): - """ - pandas.DataFrame like API that proxies into Elasticsearch index(es). - - Parameters - ---------- - client : eland.Client - A reference to a Elasticsearch python client - - index_pattern : str - An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-*). - - See Also - -------- - - Examples - -------- - - import eland as ed - client = ed.Client(Elasticsearch()) - df = ed.DataFrame(client, 'reviews') - df.head() - reviewerId vendorId rating date - 0 0 0 5 2006-04-07 17:08 - 1 1 1 5 2006-05-04 12:16 - 2 2 2 4 2006-04-21 12:26 - 3 3 3 5 2006-04-18 15:48 - 4 3 4 5 2006-04-18 15:49 - - Notice that the types are based on Elasticsearch mappings - - Notes - ----- - If the Elasticsearch index is deleted or index mappings are changed after this - object is created, the object is not rebuilt and so inconsistencies can occur. - - """ - + # TODO create effectively 2 constructors + # 1. client, index_pattern, columns, index_field + # 2. query_compiler def __init__(self, - client, - index_pattern, - mappings=None, - index_field=None): + client=None, + index_pattern=None, + columns=None, + index_field=None, + query_compiler=None): # python 3 syntax - super().__init__(client, index_pattern, mappings=mappings, index_field=index_field) + super().__init__( + client=client, + index_pattern=index_pattern, + columns=columns, + index_field=index_field, + query_compiler=query_compiler) - def head(self, n=5): - return super()._head(n) + def _get_columns(self): + return self._query_compiler.columns - def tail(self, n=5): - return super()._tail(n) - - def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, - null_counts=None): - """ - Print a concise summary of a DataFrame. - - This method prints information about a DataFrame including - the index dtype and column dtypes, non-null values and memory usage. - - This copies a lot of code from pandas.DataFrame.info as it is difficult - to split out the appropriate code or creating a SparseDataFrame gives - incorrect results on types and counts. - """ - if buf is None: # pragma: no cover - buf = sys.stdout - - lines = [] - - lines.append(str(type(self))) - lines.append(self._index_summary()) - - if len(self.columns) == 0: - lines.append('Empty {name}'.format(name=type(self).__name__)) - fmt.buffer_put_lines(buf, lines) - return - - cols = self.columns - - # hack - if max_cols is None: - max_cols = pd.get_option('display.max_info_columns', - len(self.columns) + 1) - - max_rows = pd.get_option('display.max_info_rows', len(self) + 1) - - if null_counts is None: - show_counts = ((len(self.columns) <= max_cols) and - (len(self) < max_rows)) - else: - show_counts = null_counts - exceeds_info_cols = len(self.columns) > max_cols - - def _verbose_repr(): - lines.append('Data columns (total %d columns):' % - len(self.columns)) - space = max(len(pprint_thing(k)) for k in self.columns) + 4 - counts = None - - tmpl = "{count}{dtype}" - if show_counts: - counts = self.count() - if len(cols) != len(counts): # pragma: no cover - raise AssertionError( - 'Columns must equal counts ' - '({cols:d} != {counts:d})'.format( - cols=len(cols), counts=len(counts))) - tmpl = "{count} non-null {dtype}" - - dtypes = self.dtypes - for i, col in enumerate(self.columns): - dtype = dtypes.iloc[i] - col = pprint_thing(col) - - count = "" - if show_counts: - count = counts.iloc[i] - - lines.append(_put_str(col, space) + tmpl.format(count=count, - dtype=dtype)) - - def _non_verbose_repr(): - lines.append(self.columns._summary(name='Columns')) - - def _sizeof_fmt(num, size_qualifier): - # returns size in human readable format - for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: - if num < 1024.0: - return ("{num:3.1f}{size_q} " - "{x}".format(num=num, size_q=size_qualifier, x=x)) - num /= 1024.0 - return "{num:3.1f}{size_q} {pb}".format(num=num, - size_q=size_qualifier, - pb='PB') - - if verbose: - _verbose_repr() - elif verbose is False: # specifically set to False, not nesc None - _non_verbose_repr() - else: - if exceeds_info_cols: - _non_verbose_repr() - else: - _verbose_repr() - - counts = self.get_dtype_counts() - dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k - in sorted(counts.items())] - lines.append('dtypes: {types}'.format(types=', '.join(dtypes))) - - if memory_usage is None: - memory_usage = pd.get_option('display.memory_usage') - if memory_usage: - # append memory usage of df to display - size_qualifier = '' - - # TODO - this is different from pd.DataFrame as we shouldn't - # really hold much in memory. For now just approximate with getsizeof + ignore deep - mem_usage = sys.getsizeof(self) - lines.append("memory usage: {mem}\n".format( - mem=_sizeof_fmt(mem_usage, size_qualifier))) - - fmt.buffer_put_lines(buf, lines) + columns = property(_get_columns) @property - def shape(self): + def empty(self): + """Determines if the DataFrame is empty. + + Returns: + True if the DataFrame is empty. + False otherwise. """ - Return a tuple representing the dimensionality of the DataFrame. + # TODO - this is called on every attribute get (most methods) from modin/pandas/base.py:3337 + # (as Index.__len__ performs an query) we may want to cache self.index.empty() + return len(self.columns) == 0 or len(self.index) == 0 - Returns - ------- - shape: tuple - 0 - number of rows - 1 - number of columns - """ - num_rows = len(self) - num_columns = len(self.columns) + def head(self, n=5): + return super().head(n) - return num_rows, num_columns + def tail(self, n=5): + return super().tail(n) - def set_index(self, index_field): - copy = self.copy() - copy._index = Index(index_field) - return copy - - def _index_summary(self): - head = self.head(1).index[0] - tail = self.tail(1).index[0] - index_summary = ', %s to %s' % (pprint_thing(head), - pprint_thing(tail)) - - name = "Index" - return '%s: %s entries%s' % (name, len(self), index_summary) - - def count(self): - """ - Count non-NA cells for each column (TODO row) - - Counts are based on exists queries against ES - - This is inefficient, as it creates N queries (N is number of fields). - - An alternative approach is to use value_count aggregations. However, they have issues in that: - 1. They can only be used with aggregatable fields (e.g. keyword not text) - 2. For list fields they return multiple counts. E.g. tags=['elastic', 'ml'] returns value_count=2 - for a single document. - """ - counts = {} - for field in self._mappings.source_fields(): - exists_query = {"query": {"exists": {"field": field}}} - field_exists_count = self._client.count(index=self._index_pattern, body=exists_query) - counts[field] = field_exists_count - - count = pd.Series(data=counts, index=self._mappings.source_fields()) - - return count - - def describe(self): - return super()._describe() - - - def __getitem__(self, key): - # NOTE: there is a difference between pandas here. - # e.g. df['a'] returns pd.Series, df[['a','b']] return pd.DataFrame - - # Implementation mainly copied from pandas v0.24.2 - # (https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html) - key = com.apply_if_callable(key, self) - - # TODO - add slice capabilities - need to add index features first - # e.g. set index etc. - # Do we have a slicer (on rows)? - """ - indexer = convert_to_index_sliceable(self, key) - if indexer is not None: - return self._slice(indexer, axis=0) - # Do we have a (boolean) DataFrame? - if isinstance(key, DataFrame): - return self._getitem_frame(key) - """ - - # Do we have a (boolean) 1d indexer? - """ - if com.is_bool_indexer(key): - return self._getitem_bool_array(key) - """ - - # We are left with two options: a single key, and a collection of keys, - columns = [] - is_single_key = False - if isinstance(key, str): - if not self._mappings.is_source_field(key): - raise TypeError('Column does not exist: [{0}]'.format(key)) - columns.append(key) - is_single_key = True - elif isinstance(key, list): - columns.extend(key) - else: - raise TypeError('__getitem__ arguments invalid: [{0}]'.format(key)) - - mappings = self._filter_mappings(columns) - - # Return new eland.DataFrame with modified mappings - if is_single_key: - return Series(self._client, self._index_pattern, mappings=mappings) - else: - return DataFrame(self._client, self._index_pattern, mappings=mappings) - - - def __getattr__(self, name): - # Note: obj.x will always call obj.__getattribute__('x') prior to - # calling obj.__getattr__('x'). - mappings = self._filter_mappings([name]) - - return Series(self._client, self._index_pattern, mappings=mappings) - - def copy(self): - # TODO - test and validate...may need deep copying - return DataFrame(self._client, - self._index_pattern, - self._mappings, - self._index) - - # ---------------------------------------------------------------------- - # Rendering Methods def __repr__(self): - """ - From pandas - """ - buf = StringIO() + num_rows = pd.get_option("max_rows") or 60 + num_cols = pd.get_option("max_columns") or 20 - max_rows = pd.get_option("display.max_rows") - max_cols = pd.get_option("display.max_columns") - show_dimensions = pd.get_option("display.show_dimensions") - if pd.get_option("display.expand_frame_repr"): - width, _ = console.get_console_size() + result = repr(self._build_repr_df(num_rows, num_cols)) + if len(self.index) > num_rows or len(self.columns) > num_cols: + # The split here is so that we don't repr pandas row lengths. + return result.rsplit("\n\n", 1)[0] + "\n\n[{0} rows x {1} columns]".format( + len(self.index), len(self.columns) + ) else: - width = None - self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols, - line_width=width, show_dimensions=show_dimensions) - - return buf.getvalue() - - def to_string(self, buf=None, columns=None, col_space=None, header=True, - index=True, na_rep='NaN', formatters=None, float_format=None, - sparsify=None, index_names=True, justify=None, - max_rows=None, max_cols=None, show_dimensions=True, - decimal='.', line_width=None): - """ - From pandas - """ - if max_rows == None: - max_rows = pd.get_option('display.max_rows') - - df = self._fake_head_tail_df(max_rows=max_rows+1) - - if buf is not None: - _buf = _expand_user(_stringify_path(buf)) - else: - _buf = StringIO() - - df.to_string(buf=_buf, columns=columns, - col_space=col_space, na_rep=na_rep, - formatters=formatters, - float_format=float_format, - sparsify=sparsify, justify=justify, - index_names=index_names, - header=header, index=index, - max_rows=max_rows, - max_cols=max_cols, - show_dimensions=False, # print this outside of this call - decimal=decimal, - line_width=line_width) - - # Our fake dataframe has incorrect number of rows (max_rows*2+1) - write out - # the correct number of rows - if show_dimensions: - _buf.write("\n\n[{nrows} rows x {ncols} columns]" - .format(nrows=self._index_count(), ncols=len(self.columns))) - - if buf is None: - result = _buf.getvalue() return result - - def to_pandas(selfs): - return super()._to_pandas() - -# From pandas.DataFrame -def _put_str(s, space): - return '{s}'.format(s=s)[:space].ljust(space) diff --git a/eland/index.py b/eland/index.py index be0425f..eb13ce6 100644 --- a/eland/index.py +++ b/eland/index.py @@ -18,10 +18,12 @@ class Index: ID_INDEX_FIELD = '_id' ID_SORT_FIELD = '_doc' # if index field is _id, sort by _doc - def __init__(self, index_field=None): + def __init__(self, query_compiler, index_field=None): # Calls setter self.index_field = index_field + self._query_compiler = query_compiler + @property def sort_field(self): if self._index_field == self.ID_INDEX_FIELD: @@ -38,9 +40,12 @@ class Index: @index_field.setter def index_field(self, index_field): - if index_field == None: + if index_field == None or index_field == Index.ID_INDEX_FIELD: self._index_field = Index.ID_INDEX_FIELD self._is_source_field = False else: self._index_field = index_field self._is_source_field = True + + def __len__(self): + return self._query_compiler._index_count() diff --git a/eland/mappings.py b/eland/mappings.py index 1dc0bee..76d7286 100644 --- a/eland/mappings.py +++ b/eland/mappings.py @@ -4,7 +4,7 @@ import pandas as pd from pandas.core.dtypes.common import (is_float_dtype, is_bool_dtype, is_integer_dtype, is_datetime_or_timedelta_dtype, is_string_dtype) -class Mappings(): +class Mappings: """ General purpose to manage Elasticsearch to/from pandas mappings @@ -53,7 +53,7 @@ class Mappings(): Columns to copy """ if (client is not None) and (index_pattern is not None): - get_mapping = client.indices().get_mapping(index=index_pattern) + get_mapping = client.get_mapping(index=index_pattern) # Get all fields (including all nested) and then field_caps # for these names (fields=* doesn't appear to work effectively...) @@ -67,12 +67,8 @@ class Mappings(): # field_name, es_dtype, pd_dtype, is_searchable, is_aggregtable, is_source self._mappings_capabilities = Mappings._create_capability_matrix(all_fields, source_fields, all_fields_caps) else: - if columns is not None: - # Reference object and restrict mapping columns - self._mappings_capabilities = mappings._mappings_capabilities.loc[columns] - else: - # straight copy - self._mappings_capabilities = mappings._mappings_capabilities.copy() + # straight copy + self._mappings_capabilities = mappings._mappings_capabilities.copy() # Cache source field types for efficient lookup # (this massively improves performance of DataFrame.flatten) diff --git a/eland/ndframe.py b/eland/ndframe.py index 9fbd312..c1b7f5b 100644 --- a/eland/ndframe.py +++ b/eland/ndframe.py @@ -22,350 +22,55 @@ Similarly, only Elasticsearch searchable fields can be searched or filtered, and only Elasticsearch aggregatable fields can be aggregated or grouped. """ -import pandas as pd -import functools -from elasticsearch_dsl import Search -import eland as ed +from modin.pandas.base import BasePandasDataset -from pandas.core.generic import NDFrame as pd_NDFrame -from pandas._libs import Timestamp, iNaT, properties +from eland import ElandQueryCompiler -class NDFrame(): - """ - pandas.DataFrame/Series like API that proxies into Elasticsearch index(es). +class NDFrame(BasePandasDataset): - Parameters - ---------- - client : eland.Client - A reference to a Elasticsearch python client - - index_pattern : str - An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-*). - - See Also - -------- - - """ def __init__(self, - client, - index_pattern, - mappings=None, - index_field=None): - - self._client = ed.Client(client) - self._index_pattern = index_pattern - - # Get and persist mappings, this allows us to correctly - # map returned types from Elasticsearch to pandas datatypes - if mappings is None: - self._mappings = ed.Mappings(self._client, self._index_pattern) - else: - self._mappings = mappings - - self._index = ed.Index(index_field) - - def _es_results_to_pandas(self, results): + client=None, + index_pattern=None, + columns=None, + index_field=None, + query_compiler=None): """ + pandas.DataFrame/Series like API that proxies into Elasticsearch index(es). + Parameters ---------- - results: dict - Elasticsearch results from self.client.search - - Returns - ------- - df: pandas.DataFrame - _source values extracted from results and mapped to pandas DataFrame - dtypes are mapped via Mapping object - - Notes - ----- - Fields containing lists in Elasticsearch don't map easily to pandas.DataFrame - For example, an index with mapping: - ``` - "mappings" : { - "properties" : { - "group" : { - "type" : "keyword" - }, - "user" : { - "type" : "nested", - "properties" : { - "first" : { - "type" : "keyword" - }, - "last" : { - "type" : "keyword" - } - } - } - } - } - ``` - Adding a document: - ``` - "_source" : { - "group" : "amsterdam", - "user" : [ - { - "first" : "John", - "last" : "Smith" - }, - { - "first" : "Alice", - "last" : "White" - } - ] - } - ``` - (https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html) - this would be transformed internally (in Elasticsearch) into a document that looks more like this: - ``` - { - "group" : "amsterdam", - "user.first" : [ "alice", "john" ], - "user.last" : [ "smith", "white" ] - } - ``` - When mapping this a pandas data frame we mimic this transformation. - - Similarly, if a list is added to Elasticsearch: - ``` - PUT my_index/_doc/1 - { - "list" : [ - 0, 1, 2 - ] - } - ``` - The mapping is: - ``` - "mappings" : { - "properties" : { - "user" : { - "type" : "long" - } - } - } - ``` - TODO - explain how lists are handled (https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html) - TODO - an option here is to use Elasticsearch's multi-field matching instead of pandas treatment of lists (which isn't great) - NOTE - using this lists is generally not a good way to use this API + client : eland.Client + A reference to a Elasticsearch python client """ - def flatten_dict(y): - out = {} + if query_compiler is None: + query_compiler = ElandQueryCompiler(client=client, + index_pattern=index_pattern, + columns=columns, + index_field=index_field) + self._query_compiler = query_compiler - def flatten(x, name=''): - # We flatten into source fields e.g. if type=geo_point - # location: {lat=52.38, lon=4.90} - if name == '': - is_source_field = False - pd_dtype = 'object' - else: - is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(name[:-1]) + def _get_index(self): + return self._query_compiler.index - if not is_source_field and type(x) is dict: - for a in x: - flatten(x[a], name + a + '.') - elif not is_source_field and type(x) is list: - for a in x: - flatten(a, name) - elif is_source_field == True: # only print source fields from mappings (TODO - not so efficient for large number of fields and filtered mapping) - field_name = name[:-1] + index = property(_get_index) - # Coerce types - for now just datetime - if pd_dtype == 'datetime64[ns]': - x = pd.to_datetime(x) + def _build_repr_df(self, num_rows, num_cols): + # Overriden version of BasePandasDataset._build_repr_df + # to avoid issues with concat + if len(self.index) <= num_rows: + return self.to_pandas() - # Elasticsearch can have multiple values for a field. These are represented as lists, so - # create lists for this pivot (see notes above) - if field_name in out: - if type(out[field_name]) is not list: - l = [out[field_name]] - out[field_name] = l - out[field_name].append(x) - else: - out[field_name] = x + num_rows = num_rows + 1 - flatten(y) + head_rows = int(num_rows / 2) + num_rows % 2 + tail_rows = num_rows - head_rows - return out - - rows = [] - index = [] - if isinstance(results, dict): - iterator = results['hits']['hits'] - else: - iterator = results - - for hit in iterator: - row = hit['_source'] - - # get index value - can be _id or can be field value in source - if self._index.is_source_field: - index_field = row[self._index.index_field] - else: - index_field = hit[self._index.index_field] - index.append(index_field) - - # flatten row to map correctly to 2D DataFrame - rows.append(flatten_dict(row)) - - # Create pandas DataFrame - df = pd.DataFrame(data=rows, index=index) - - # _source may not contain all columns in the mapping - # therefore, fill in missing columns - # (note this returns self.columns NOT IN df.columns) - missing_columns = list(set(self._columns) - set(df.columns)) - - for missing in missing_columns: - is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(missing) - df[missing] = None - df[missing].astype(pd_dtype) - - # Sort columns in mapping order - df = df[self._columns] - - return df - - def _head(self, n=5): - """ - Protected method that returns head as pandas.DataFrame. - - Returns - ------- - _head - pandas.DataFrame of top N values - """ - sort_params = self._index.sort_field + ":asc" - - results = self._client.search(index=self._index_pattern, size=n, sort=sort_params) - - return self._es_results_to_pandas(results) - - def _tail(self, n=5): - """ - Protected method that returns tail as pandas.DataFrame. - - Returns - ------- - _tail - pandas.DataFrame of last N values - """ - sort_params = self._index.sort_field + ":desc" - - results = self._client.search(index=self._index_pattern, size=n, sort=sort_params) - - df = self._es_results_to_pandas(results) - - # reverse order (index ascending) - return df.sort_index() - - def _to_pandas(self): - """ - Protected method that returns all data as pandas.DataFrame. - - Returns - ------- - df - pandas.DataFrame of all values - """ - sort_params = self._index.sort_field + ":asc" - - results = self._client.scan(index=self._index_pattern) - - # We sort here rather than in scan - once everything is in core this - # should be faster - return self._es_results_to_pandas(results) - - def _describe(self): - numeric_source_fields = self._mappings.numeric_source_fields() - - # for each field we compute: - # count, mean, std, min, 25%, 50%, 75%, max - search = Search(using=self._client, index=self._index_pattern).extra(size=0) - - for field in numeric_source_fields: - search.aggs.metric('extended_stats_' + field, 'extended_stats', field=field) - search.aggs.metric('percentiles_' + field, 'percentiles', field=field) - - response = search.execute() - - results = {} - - for field in numeric_source_fields: - values = list() - values.append(response.aggregations['extended_stats_' + field]['count']) - values.append(response.aggregations['extended_stats_' + field]['avg']) - values.append(response.aggregations['extended_stats_' + field]['std_deviation']) - values.append(response.aggregations['extended_stats_' + field]['min']) - values.append(response.aggregations['percentiles_' + field]['values']['25.0']) - values.append(response.aggregations['percentiles_' + field]['values']['50.0']) - values.append(response.aggregations['percentiles_' + field]['values']['75.0']) - values.append(response.aggregations['extended_stats_' + field]['max']) - - # if not None - if values.count(None) < len(values): - results[field] = values - - df = pd.DataFrame(data=results, index=['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']) - - return df - - def _filter_mappings(self, columns): - mappings = ed.Mappings(mappings=self._mappings, columns=columns) - - return mappings - - @property - def columns(self): - return self._columns - - @property - def index(self): - return self._index - - @property - def dtypes(self): - return self._mappings.dtypes() - - @property - def _columns(self): - return pd.Index(self._mappings.source_fields()) - - def get_dtype_counts(self): - return self._mappings.get_dtype_counts() - - def _index_count(self): - """ - Returns - ------- - index_count: int - Count of docs where index_field exists - """ - exists_query = {"query": {"exists": {"field": self._index.index_field}}} - - index_count = self._client.count(index=self._index_pattern, body=exists_query) - - return index_count - - def __len__(self): - """ - Returns length of info axis, but here we use the index. - """ - return self._client.count(index=self._index_pattern) - - def _fake_head_tail_df(self, max_rows=1): - """ - Create a 'fake' pd.DataFrame of the entire ed.DataFrame - by concat head and tail. Used for display. - """ - head_rows = int(max_rows / 2) + max_rows % 2 - tail_rows = max_rows - head_rows - - head = self._head(head_rows) - tail = self._tail(tail_rows) + head = self.head(head_rows).to_pandas() + tail = self.tail(tail_rows).to_pandas() return head.append(tail) + + def to_pandas(self): + return self._query_compiler.to_pandas() diff --git a/eland/operations.py b/eland/operations.py new file mode 100644 index 0000000..8705c4d --- /dev/null +++ b/eland/operations.py @@ -0,0 +1,232 @@ +from enum import Enum + + +class Operations: + """ + A collector of the queries and selectors we apply to queries to return the appropriate results. + + For example, + - a list of the columns in the DataFrame (a subset of columns in the index) + - a size limit on the results (e.g. for head(n=5)) + - a query to filter the results (e.g. df.A > 10) + + This is maintained as a 'task graph' (inspired by dask) + + A task graph is a dictionary mapping keys to computations: + + A key is any hashable value that is not a task: + ``` + {'x': 1, + 'y': 2, + 'z': (add, 'x', 'y'), + 'w': (sum, ['x', 'y', 'z']), + 'v': [(sum, ['w', 'z']), 2]} + ``` + (see https://docs.dask.org/en/latest/spec.html) + """ + + class SortOrder(Enum): + ASC = 0 + DESC = 1 + + @staticmethod + def reverse(order): + if order == Operations.SortOrder.ASC: + return Operations.SortOrder.DESC + + return Operations.SortOrder.ASC + + @staticmethod + def to_string(order): + if order == Operations.SortOrder.ASC: + return ":asc" + + return ":desc" + + def __init__(self, tasks=None): + if tasks == None: + self._tasks = [] + else: + self._tasks = tasks + + def __constructor__(self, *args, **kwargs): + return type(self)(*args, **kwargs) + + def copy(self): + return self.__constructor__(tasks=self._tasks.copy()) + + def head(self, index, n): + # Add a task that is an ascending sort with size=n + task = ('head', (index.sort_field, n)) + self._tasks.append(task) + + def tail(self, index, n): + + # Add a task that is descending sort with size=n + task = ('tail', (index.sort_field, n)) + self._tasks.append(task) + + def set_columns(self, columns): + self._tasks['columns'] = columns + + def __repr__(self): + return repr(self._tasks) + + def to_pandas(self, query_compiler): + query, post_processing = self._to_es_query() + + size, sort_params = Operations._query_to_params(query) + + es_results = query_compiler._client.search( + index=query_compiler._index_pattern, + size=size, + sort=sort_params) + + df = query_compiler._es_results_to_pandas(es_results) + + return self._apply_df_post_processing(df, post_processing) + + def to_count(self, query_compiler): + query, post_processing = self._to_es_query() + + size = query['query_size'] # can be None + + pp_size = self._count_post_processing(post_processing) + if pp_size is not None: + if size is not None: + size = min(size, pp_size) + else: + size = pp_size + + # Size is dictated by operations + if size is not None: + return size + + exists_query = {"query": {"exists": {"field": query_compiler.index.index_field}}} + + return query_compiler._client.count(index=query_compiler._index_pattern, body=exists_query) + + @staticmethod + def _query_to_params(query): + sort_params = None + if query['query_sort_field'] and query['query_sort_order']: + sort_params = query['query_sort_field'] + Operations.SortOrder.to_string(query['query_sort_order']) + + size = query['query_size'] + + return size, sort_params + 1 + @staticmethod + def _count_post_processing(post_processing): + size = None + for action in post_processing: + if action[0] == 'head' or action[0] == 'tail': + if size is None or action[1][1] < size: + size = action[1][1] + + return size + + @staticmethod + def _apply_df_post_processing(df, post_processing): + for action in post_processing: + print(action) + if action == 'sort_index': + df = df.sort_index() + elif action[0] == 'head': + df = df.head(action[1][1]) + elif action[0] == 'tail': + df = df.tail(action[1][1]) + + return df + + def _to_es_query(self): + # We now try and combine all tasks into an Elasticsearch query + # Some operations can be simply combined into a single query + # other operations require pre-queries and then combinations + # other operations require in-core post-processing of results + query = {"query_sort_field": None, + "query_sort_order": None, + "query_size": None} + + post_processing = [] + + for task in self._tasks: + if task[0] == 'head': + query, post_processing = self._resolve_head(task, query, post_processing) + elif task[0] == 'tail': + query, post_processing = self._resolve_tail(task, query, post_processing) + + return query, post_processing + + def _resolve_head(self, item, query, post_processing): + # head - sort asc, size n + # |12345-------------| + query_sort_field = item[1][0] + query_sort_order = Operations.SortOrder.ASC + query_size = item[1][1] + + # If we are already postprocessing the query results, we just get 'head' of these + # (note, currently we just append another head, we don't optimise by + # overwriting previous head) + if len(post_processing) > 0: + post_processing.append(item) + return query, post_processing + + if query['query_sort_field'] is None: + query['query_sort_field'] = query_sort_field + # if it is already sorted we use existing field + + if query['query_sort_order'] is None: + query['query_sort_order'] = query_sort_order + # if it is already sorted we get head of existing order + + if query['query_size'] is None: + query['query_size'] = query_size + else: + # truncate if head is smaller + if query_size < query['query_size']: + query['query_size'] = query_size + + return query, post_processing + + def _resolve_tail(self, item, query, post_processing): + # tail - sort desc, size n, post-process sort asc + # |-------------12345| + query_sort_field = item[1][0] + query_sort_order = Operations.SortOrder.DESC + query_size = item[1][1] + + # If this is a tail of a tail adjust settings and return + if query['query_size'] is not None and \ + query['query_sort_order'] == query_sort_order and \ + post_processing == [('sort_index')]: + if query_size < query['query_size']: + query['query_size'] = query_size + return query, post_processing + + # If we are already postprocessing the query results, just get 'tail' of these + # (note, currently we just append another tail, we don't optimise by + # overwriting previous tail) + if len(post_processing) > 0: + post_processing.append(item) + return query, post_processing + + # If results are already constrained, just get 'tail' of these + # (note, currently we just append another tail, we don't optimise by + # overwriting previous tail) + if query['query_size'] is not None: + post_processing.append(item) + return query, post_processing + else: + query['query_size'] = query_size + if query['query_sort_field'] is None: + query['query_sort_field'] = query_sort_field + if query['query_sort_order'] is None: + query['query_sort_order'] = query_sort_order + else: + # reverse sort order + query['query_sort_order'] = Operations.SortOrder.reverse(query_sort_order) + + post_processing.append(('sort_index')) + + return query, post_processing diff --git a/eland/query_compiler.py b/eland/query_compiler.py new file mode 100644 index 0000000..49b0125 --- /dev/null +++ b/eland/query_compiler.py @@ -0,0 +1,247 @@ +import pandas as pd +from modin.backends.base.query_compiler import BaseQueryCompiler + +from eland import Client +from eland import Index +from eland import Mappings +from eland import Operations + + +class ElandQueryCompiler(BaseQueryCompiler): + + def __init__(self, + client=None, + index_pattern=None, + columns=None, + index_field=None, + operations=None): + self._client = Client(client) + self._index_pattern = index_pattern + + # Get and persist mappings, this allows us to correctly + # map returned types from Elasticsearch to pandas datatypes + self._mappings = Mappings(client=self._client, index_pattern=self._index_pattern) + + self._index = Index(self, index_field) + + if operations is None: + self._operations = Operations() + else: + self._operations = operations + + def _get_index(self): + return self._index + + def _get_columns(self): + return pd.Index(self._mappings.source_fields()) + + columns = property(_get_columns) + index = property(_get_index) + + # END Index, columns, and dtypes objects + + def _es_results_to_pandas(self, results): + """ + Parameters + ---------- + results: dict + Elasticsearch results from self.client.search + + Returns + ------- + df: pandas.DataFrame + _source values extracted from results and mapped to pandas DataFrame + dtypes are mapped via Mapping object + + Notes + ----- + Fields containing lists in Elasticsearch don't map easily to pandas.DataFrame + For example, an index with mapping: + ``` + "mappings" : { + "properties" : { + "group" : { + "type" : "keyword" + }, + "user" : { + "type" : "nested", + "properties" : { + "first" : { + "type" : "keyword" + }, + "last" : { + "type" : "keyword" + } + } + } + } + } + ``` + Adding a document: + ``` + "_source" : { + "group" : "amsterdam", + "user" : [ + { + "first" : "John", + "last" : "Smith" + }, + { + "first" : "Alice", + "last" : "White" + } + ] + } + ``` + (https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html) + this would be transformed internally (in Elasticsearch) into a document that looks more like this: + ``` + { + "group" : "amsterdam", + "user.first" : [ "alice", "john" ], + "user.last" : [ "smith", "white" ] + } + ``` + When mapping this a pandas data frame we mimic this transformation. + + Similarly, if a list is added to Elasticsearch: + ``` + PUT my_index/_doc/1 + { + "list" : [ + 0, 1, 2 + ] + } + ``` + The mapping is: + ``` + "mappings" : { + "properties" : { + "user" : { + "type" : "long" + } + } + } + ``` + TODO - explain how lists are handled (https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html) + TODO - an option here is to use Elasticsearch's multi-field matching instead of pandas treatment of lists (which isn't great) + NOTE - using this lists is generally not a good way to use this API + """ + + def flatten_dict(y): + out = {} + + def flatten(x, name=''): + # We flatten into source fields e.g. if type=geo_point + # location: {lat=52.38, lon=4.90} + if name == '': + is_source_field = False + pd_dtype = 'object' + else: + is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(name[:-1]) + + if not is_source_field and type(x) is dict: + for a in x: + flatten(x[a], name + a + '.') + elif not is_source_field and type(x) is list: + for a in x: + flatten(a, name) + elif is_source_field == True: # only print source fields from mappings (TODO - not so efficient for large number of fields and filtered mapping) + field_name = name[:-1] + + # Coerce types - for now just datetime + if pd_dtype == 'datetime64[ns]': + x = pd.to_datetime(x) + + # Elasticsearch can have multiple values for a field. These are represented as lists, so + # create lists for this pivot (see notes above) + if field_name in out: + if type(out[field_name]) is not list: + l = [out[field_name]] + out[field_name] = l + out[field_name].append(x) + else: + out[field_name] = x + + flatten(y) + + return out + + rows = [] + index = [] + if isinstance(results, dict): + iterator = results['hits']['hits'] + else: + iterator = results + + for hit in iterator: + row = hit['_source'] + + # get index value - can be _id or can be field value in source + if self._index.is_source_field: + index_field = row[self._index.index_field] + else: + index_field = hit[self._index.index_field] + index.append(index_field) + + # flatten row to map correctly to 2D DataFrame + rows.append(flatten_dict(row)) + + # Create pandas DataFrame + df = pd.DataFrame(data=rows, index=index) + + # _source may not contain all columns in the mapping + # therefore, fill in missing columns + # (note this returns self.columns NOT IN df.columns) + missing_columns = list(set(self.columns) - set(df.columns)) + + for missing in missing_columns: + is_source_field, pd_dtype = self._mappings.source_field_pd_dtype(missing) + df[missing] = None + df[missing].astype(pd_dtype) + + # Sort columns in mapping order + df = df[self.columns] + + return df + + def _index_count(self): + """ + Returns + ------- + index_count: int + Count of docs where index_field exists + """ + return self._operations.to_count(self) + + def copy(self): + return self.__constructor__( + client=self._client, + index_pattern=self._index_pattern, + columns=self.columns, + index_field=self._index.index_field, + operations=self._operations.copy() + ) + + def head(self, n): + result = self.copy() + + result._operations.head(self._index, n) + + return result + + def tail(self, n): + result = self.copy() + + result._operations.tail(self._index, n) + + return result + + # To/From Pandas + def to_pandas(self): + """Converts Eland DataFrame to Pandas DataFrame. + + Returns: + Pandas DataFrame + """ + return self._operations.to_pandas(self) diff --git a/eland/series.py b/eland/series.py index 47473ed..e69de29 100644 --- a/eland/series.py +++ b/eland/series.py @@ -1,402 +0,0 @@ -""" -Series ---------- -One-dimensional ndarray with axis labels (including time series). - -The underlying data resides in Elasticsearch and the API aligns as much as -possible with pandas.DataFrame API. - -This allows the eland.Series to access large datasets stored in Elasticsearch, -without storing the dataset in local memory. - -Implementation Details ----------------------- -Based on NDFrame which underpins eland.1DataFrame - -""" -import sys - -import pandas as pd -import pandas.compat as compat -from pandas.compat import StringIO -from pandas.core.dtypes.common import ( - is_categorical_dtype) -from pandas.io.formats import format as fmt -from pandas.io.formats.printing import pprint_thing - -from eland import Index -from eland import NDFrame - - -class Series(NDFrame): - """ - pandas.Series like API that proxies into Elasticsearch index(es). - - Parameters - ---------- - client : eland.Client - A reference to a Elasticsearch python client - - index_pattern : str - An Elasticsearch index pattern. This can contain wildcards (e.g. filebeat-*). - - field_name : str - The field to base the series on - - See Also - -------- - - Examples - -------- - - import eland as ed - client = ed.Client(Elasticsearch()) - s = ed.DataFrame(client, 'reviews', 'date') - df.head() - reviewerId vendorId rating date - 0 0 0 5 2006-04-07 17:08 - 1 1 1 5 2006-05-04 12:16 - 2 2 2 4 2006-04-21 12:26 - 3 3 3 5 2006-04-18 15:48 - 4 3 4 5 2006-04-18 15:49 - - Notice that the types are based on Elasticsearch mappings - - Notes - ----- - If the Elasticsearch index is deleted or index mappings are changed after this - object is created, the object is not rebuilt and so inconsistencies can occur. - - """ - - def __init__(self, - client, - index_pattern, - field_name=None, - mappings=None, - index_field=None): - # python 3 syntax - super().__init__(client, index_pattern, mappings=mappings, index_field=index_field) - - # now select column (field_name) - if field_name is not None: - self._mappings = self._filter_mappings([field_name]) - elif len(self._mappings.source_fields()) != 1: - raise TypeError('Series must have 1 field: [{0}]'.format(len(self._mappings.source_fields()))) - - def head(self, n=5): - return self._df_to_series(super()._head(n)) - - def tail(self, n=5): - return self._df_to_series(super()._tail(n)) - - def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, - null_counts=None): - """ - Print a concise summary of a DataFrame. - - This method prints information about a DataFrame including - the index dtype and column dtypes, non-null values and memory usage. - - This copies a lot of code from pandas.DataFrame.info as it is difficult - to split out the appropriate code or creating a SparseDataFrame gives - incorrect results on types and counts. - """ - if buf is None: # pragma: no cover - buf = sys.stdout - - lines = [] - - lines.append(str(type(self))) - lines.append(self._index_summary()) - - if len(self.columns) == 0: - lines.append('Empty {name}'.format(name=type(self).__name__)) - fmt.buffer_put_lines(buf, lines) - return - - cols = self.columns - - # hack - if max_cols is None: - max_cols = pd.get_option('display.max_info_columns', - len(self.columns) + 1) - - max_rows = pd.get_option('display.max_info_rows', len(self) + 1) - - if null_counts is None: - show_counts = ((len(self.columns) <= max_cols) and - (len(self) < max_rows)) - else: - show_counts = null_counts - exceeds_info_cols = len(self.columns) > max_cols - - def _verbose_repr(): - lines.append('Data columns (total %d columns):' % - len(self.columns)) - space = max(len(pprint_thing(k)) for k in self.columns) + 4 - counts = None - - tmpl = "{count}{dtype}" - if show_counts: - counts = self.count() - if len(cols) != len(counts): # pragma: no cover - raise AssertionError( - 'Columns must equal counts ' - '({cols:d} != {counts:d})'.format( - cols=len(cols), counts=len(counts))) - tmpl = "{count} non-null {dtype}" - - dtypes = self.dtypes - for i, col in enumerate(self._columns): - dtype = dtypes.iloc[i] - col = pprint_thing(col) - - count = "" - if show_counts: - count = counts.iloc[i] - - lines.append(_put_str(col, space) + tmpl.format(count=count, - dtype=dtype)) - - def _non_verbose_repr(): - lines.append(self._columns._summary(name='Columns')) - - def _sizeof_fmt(num, size_qualifier): - # returns size in human readable format - for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: - if num < 1024.0: - return ("{num:3.1f}{size_q} " - "{x}".format(num=num, size_q=size_qualifier, x=x)) - num /= 1024.0 - return "{num:3.1f}{size_q} {pb}".format(num=num, - size_q=size_qualifier, - pb='PB') - - if verbose: - _verbose_repr() - elif verbose is False: # specifically set to False, not nesc None - _non_verbose_repr() - else: - if exceeds_info_cols: - _non_verbose_repr() - else: - _verbose_repr() - - counts = self.get_dtype_counts() - dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k - in sorted(counts.items())] - lines.append('dtypes: {types}'.format(types=', '.join(dtypes))) - - if memory_usage is None: - memory_usage = pd.get_option('display.memory_usage') - if memory_usage: - # append memory usage of df to display - size_qualifier = '' - - # TODO - this is different from pd.DataFrame as we shouldn't - # really hold much in memory. For now just approximate with getsizeof + ignore deep - mem_usage = sys.getsizeof(self) - lines.append("memory usage: {mem}\n".format( - mem=_sizeof_fmt(mem_usage, size_qualifier))) - - fmt.buffer_put_lines(buf, lines) - - @property - def name(self): - return list(self._mappings.source_fields())[0] - - @property - def shape(self): - """ - Return a tuple representing the dimensionality of the DataFrame. - - Returns - ------- - shape: tuple - 0 - number of rows - 1 - number of columns - """ - num_rows = len(self) - num_columns = len(self._columns) - - return num_rows, num_columns - - @property - def set_index(self, index_field): - copy = self.copy() - copy._index = Index(index_field) - return copy - - def _index_summary(self): - head = self.head(1).index[0] - tail = self.tail(1).index[0] - index_summary = ', %s to %s' % (pprint_thing(head), - pprint_thing(tail)) - - name = "Index" - return '%s: %s entries%s' % (name, len(self), index_summary) - - def count(self): - """ - Count non-NA cells for each column (TODO row) - - Counts are based on exists queries against ES - - This is inefficient, as it creates N queries (N is number of fields). - - An alternative approach is to use value_count aggregations. However, they have issues in that: - 1. They can only be used with aggregatable fields (e.g. keyword not text) - 2. For list fields they return multiple counts. E.g. tags=['elastic', 'ml'] returns value_count=2 - for a single document. - """ - counts = {} - for field in self._mappings.source_fields(): - exists_query = {"query": {"exists": {"field": field}}} - field_exists_count = self._client.count(index=self._index_pattern, body=exists_query) - counts[field] = field_exists_count - - count = pd.Series(data=counts, index=self._mappings.source_fields()) - - return count - - def describe(self): - return super()._describe() - - def _df_to_series(self, df): - return df[self.name] - - # ---------------------------------------------------------------------- - # Rendering Methods - def __repr__(self): - """ - From pandas - """ - buf = StringIO() - - max_rows = pd.get_option("display.max_rows") - - self.to_string(buf=buf, na_rep='NaN', float_format=None, header=True, index=True, length=True, - dtype=True, name=True, max_rows=max_rows) - - return buf.getvalue() - - def to_string(self, buf=None, na_rep='NaN', - float_format=None, header=True, - index=True, length=True, dtype=True, - name=True, max_rows=None): - """ - From pandas 0.24.2 - - Render a string representation of the Series. - - Parameters - ---------- - buf : StringIO-like, optional - buffer to write to - na_rep : string, optional - string representation of NAN to use, default 'NaN' - float_format : one-parameter function, optional - formatter function to apply to columns' elements if they are floats - default None - header : boolean, default True - Add the Series header (index name) - index : bool, optional - Add index (row) labels, default True - length : boolean, default False - Add the Series length - dtype : boolean, default False - Add the Series dtype - name : boolean, default False - Add the Series name if not None - max_rows : int, optional - Maximum number of rows to show before truncating. If None, show - all. - - Returns - ------- - formatted : string (if not buffer passed) - """ - if max_rows == None: - max_rows = pd.get_option("display.max_rows") - - df = self._fake_head_tail_df(max_rows=max_rows + 1) - - s = self._df_to_series(df) - - formatter = Series.SeriesFormatter(s, len(self), name=name, length=length, - header=header, index=index, - dtype=dtype, na_rep=na_rep, - float_format=float_format, - max_rows=max_rows) - result = formatter.to_string() - - # catch contract violations - if not isinstance(result, compat.text_type): - raise AssertionError("result must be of type unicode, type" - " of result is {0!r}" - "".format(result.__class__.__name__)) - - if buf is None: - return result - else: - try: - buf.write(result) - except AttributeError: - with open(buf, 'w') as f: - f.write(result) - - class SeriesFormatter(fmt.SeriesFormatter): - """ - A hacked overridden version of pandas.io.formats.SeriesFormatter that writes correct length - """ - def __init__(self, series, series_length, buf=None, length=True, header=True, index=True, - na_rep='NaN', name=False, float_format=None, dtype=True, - max_rows=None): - super().__init__(series, buf=buf, length=length, header=header, index=index, - na_rep=na_rep, name=name, float_format=float_format, dtype=dtype, - max_rows=max_rows) - self._series_length = series_length - - def _get_footer(self): - """ - Overridden with length change - (from pandas 0.24.2 io.formats.SeriesFormatter) - """ - name = self.series.name - footer = '' - - if getattr(self.series.index, 'freq', None) is not None: - footer += 'Freq: {freq}'.format(freq=self.series.index.freqstr) - - if self.name is not False and name is not None: - if footer: - footer += ', ' - - series_name = pprint_thing(name, - escape_chars=('\t', '\r', '\n')) - footer += ("Name: {sname}".format(sname=series_name) - if name is not None else "") - - if (self.length is True or - (self.length == 'truncate' and self.truncate_v)): - if footer: - footer += ', ' - footer += 'Length: {length}'.format(length=self._series_length) - - if self.dtype is not False and self.dtype is not None: - name = getattr(self.tr_series.dtype, 'name', None) - if name: - if footer: - footer += ', ' - footer += 'dtype: {typ}'.format(typ=pprint_thing(name)) - - # level infos are added to the end and in a new line, like it is done - # for Categoricals - if is_categorical_dtype(self.tr_series.dtype): - level_info = self.tr_series._values._repr_categories_info() - if footer: - footer += "\n" - footer += level_info - - return compat.text_type(footer) diff --git a/eland/tests/__init__.py b/eland/tests/__init__.py deleted file mode 100644 index c98fe4f..0000000 --- a/eland/tests/__init__.py +++ /dev/null @@ -1,486 +0,0 @@ -import os -import pandas as pd - -ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) - -# Define test files and indices -ELASTICSEARCH_HOST = 'localhost' # TODO externalise this - -FLIGHTS_INDEX_NAME = 'flights' -FLIGHTS_MAPPING = { "mappings" : { - "properties" : { - "AvgTicketPrice" : { - "type" : "float" - }, - "Cancelled" : { - "type" : "boolean" - }, - "Carrier" : { - "type" : "keyword" - }, - "Dest" : { - "type" : "keyword" - }, - "DestAirportID" : { - "type" : "keyword" - }, - "DestCityName" : { - "type" : "keyword" - }, - "DestCountry" : { - "type" : "keyword" - }, - "DestLocation" : { - "type" : "geo_point" - }, - "DestRegion" : { - "type" : "keyword" - }, - "DestWeather" : { - "type" : "keyword" - }, - "DistanceKilometers" : { - "type" : "float" - }, - "DistanceMiles" : { - "type" : "float" - }, - "FlightDelay" : { - "type" : "boolean" - }, - "FlightDelayMin" : { - "type" : "integer" - }, - "FlightDelayType" : { - "type" : "keyword" - }, - "FlightNum" : { - "type" : "keyword" - }, - "FlightTimeHour" : { - "type" : "float" - }, - "FlightTimeMin" : { - "type" : "float" - }, - "Origin" : { - "type" : "keyword" - }, - "OriginAirportID" : { - "type" : "keyword" - }, - "OriginCityName" : { - "type" : "keyword" - }, - "OriginCountry" : { - "type" : "keyword" - }, - "OriginLocation" : { - "type" : "geo_point" - }, - "OriginRegion" : { - "type" : "keyword" - }, - "OriginWeather" : { - "type" : "keyword" - }, - "dayOfWeek" : { - "type" : "integer" - }, - "timestamp" : { - "type" : "date" - } - } - } } -FLIGHTS_FILE_NAME = ROOT_DIR + '/flights.json.gz' -FLIGHTS_DF_FILE_NAME = ROOT_DIR + '/flights_df.json.gz' - -ECOMMERCE_INDEX_NAME = 'ecommerce' -ECOMMERCE_MAPPING = { "mappings" : { - "properties" : { - "category" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword" - } - } - }, - "currency" : { - "type" : "keyword" - }, - "customer_birth_date" : { - "type" : "date" - }, - "customer_first_name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "customer_full_name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "customer_gender" : { - "type" : "keyword" - }, - "customer_id" : { - "type" : "keyword" - }, - "customer_last_name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "customer_phone" : { - "type" : "keyword" - }, - "day_of_week" : { - "type" : "keyword" - }, - "day_of_week_i" : { - "type" : "integer" - }, - "email" : { - "type" : "keyword" - }, - "geoip" : { - "properties" : { - "city_name" : { - "type" : "keyword" - }, - "continent_name" : { - "type" : "keyword" - }, - "country_iso_code" : { - "type" : "keyword" - }, - "location" : { - "type" : "geo_point" - }, - "region_name" : { - "type" : "keyword" - } - } - }, - "manufacturer" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword" - } - } - }, - "order_date" : { - "type" : "date" - }, - "order_id" : { - "type" : "keyword" - }, - "products" : { - "properties" : { - "_id" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword", - "ignore_above" : 256 - } - } - }, - "base_price" : { - "type" : "half_float" - }, - "base_unit_price" : { - "type" : "half_float" - }, - "category" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword" - } - } - }, - "created_on" : { - "type" : "date" - }, - "discount_amount" : { - "type" : "half_float" - }, - "discount_percentage" : { - "type" : "half_float" - }, - "manufacturer" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword" - } - } - }, - "min_price" : { - "type" : "half_float" - }, - "price" : { - "type" : "half_float" - }, - "product_id" : { - "type" : "long" - }, - "product_name" : { - "type" : "text", - "fields" : { - "keyword" : { - "type" : "keyword" - } - }, - "analyzer" : "english" - }, - "quantity" : { - "type" : "integer" - }, - "sku" : { - "type" : "keyword" - }, - "tax_amount" : { - "type" : "half_float" - }, - "taxful_price" : { - "type" : "half_float" - }, - "taxless_price" : { - "type" : "half_float" - }, - "unit_discount_amount" : { - "type" : "half_float" - } - } - }, - "sku" : { - "type" : "keyword" - }, - "taxful_total_price" : { - "type" : "half_float" - }, - "taxless_total_price" : { - "type" : "half_float" - }, - "total_quantity" : { - "type" : "integer" - }, - "total_unique_products" : { - "type" : "integer" - }, - "type" : { - "type" : "keyword" - }, - "user" : { - "type" : "keyword" - } - } - } } -ECOMMERCE_FILE_NAME = ROOT_DIR + '/ecommerce.json.gz' -ECOMMERCE_DF_FILE_NAME = ROOT_DIR + '/ecommerce_df.json.gz' - -TEST_MAPPING1 = { - 'mappings': { - 'properties': { - 'city': { - 'type': 'text', - 'fields': { - 'raw': { - 'type': 'keyword' - } - } - }, - 'text': { - 'type': 'text', - 'fields': { - 'english': { - 'type': 'text', - 'analyzer': 'english' - } - } - }, - 'origin_location': { - 'properties': { - 'lat': { - 'type': 'text', - 'index_prefixes': {}, - 'fields': { - 'keyword': { - 'type': 'keyword', - 'ignore_above': 256 - } - } - }, - 'lon': { - 'type': 'text', - 'fields': { - 'keyword': { - 'type': 'keyword', - 'ignore_above': 256 - } - } - } - } - }, - 'maps-telemetry': { - 'properties': { - 'attributesPerMap': { - 'properties': { - 'dataSourcesCount': { - 'properties': { - 'avg': { - 'type': 'long' - }, - 'max': { - 'type': 'long' - }, - 'min': { - 'type': 'long' - } - } - }, - 'emsVectorLayersCount': { - 'dynamic': 'true', - 'properties': { - 'france_departments': { - 'properties': { - 'avg': { - 'type': 'float' - }, - 'max': { - 'type': 'long' - }, - 'min': { - 'type': 'long' - } - } - } - } - } - } - } - } - }, - 'type': { - 'type': 'keyword' - }, - 'name': { - 'type': 'text' - }, - 'user_name': { - 'type': 'keyword' - }, - 'email': { - 'type': 'keyword' - }, - 'content': { - 'type': 'text' - }, - 'tweeted_at': { - 'type': 'date' - }, - 'dest_location': { - 'type': 'geo_point' - }, - 'my_join_field': { - 'type': 'join', - 'relations': { - 'question': ['answer', 'comment'], - 'answer': 'vote' - } - } - } - } - } - -TEST_MAPPING1_INDEX_NAME = 'mapping1' - -TEST_MAPPING1_EXPECTED = { - 'city': 'text', - 'city.raw': 'keyword', - 'content': 'text', - 'dest_location': 'geo_point', - 'email': 'keyword', - 'maps-telemetry.attributesPerMap.dataSourcesCount.avg': 'long', - 'maps-telemetry.attributesPerMap.dataSourcesCount.max': 'long', - 'maps-telemetry.attributesPerMap.dataSourcesCount.min': 'long', - 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.avg': 'float', - 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.max': 'long', - 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.min': 'long', - 'my_join_field': 'join', - 'name': 'text', - 'origin_location.lat': 'text', - 'origin_location.lat.keyword': 'keyword', - 'origin_location.lon': 'text', - 'origin_location.lon.keyword': 'keyword', - 'text': 'text', - 'text.english': 'text', - 'tweeted_at': 'date', - 'type': 'keyword', - 'user_name': 'keyword' -} - -TEST_MAPPING1_EXPECTED_DF = pd.DataFrame.from_dict(data=TEST_MAPPING1_EXPECTED, orient='index', columns=['es_dtype']) -TEST_MAPPING1_EXPECTED_SOURCE_FIELD_DF = TEST_MAPPING1_EXPECTED_DF.drop(index=['city.raw', - 'origin_location.lat.keyword', - 'origin_location.lon.keyword', - 'text.english']) -TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT = len(TEST_MAPPING1_EXPECTED_SOURCE_FIELD_DF.index) - -TEST_NESTED_USER_GROUP_INDEX_NAME = 'nested_user_group' -TEST_NESTED_USER_GROUP_MAPPING = { - 'mappings': { - 'properties': { - 'group': { - 'type': 'keyword' - }, - 'user': { - 'properties': { - 'first': { - 'type': 'keyword' - }, - 'last': { - 'type': 'keyword' - }, - 'address' : { - 'type' : 'keyword' - } - } - } - } -} -} - -TEST_NESTED_USER_GROUP_DOCS = [ -{'_index':TEST_NESTED_USER_GROUP_INDEX_NAME, -'_source': - {'group':'amsterdam','user':[ - {'first':'Manke','last':'Nelis','address':['Elandsgracht', 'Amsterdam']}, - {'first':'Johnny','last':'Jordaan','address':['Elandsstraat', 'Amsterdam']}]}}, -{'_index':TEST_NESTED_USER_GROUP_INDEX_NAME, -'_source': - {'group':'london','user':[ - {'first':'Alice','last':'Monkton'}, - {'first':'Jimmy','last':'White','address':['London']}]}}, -{'_index':TEST_NESTED_USER_GROUP_INDEX_NAME, -'_source':{'group':'new york','user':[ - {'first':'Bill','last':'Jones'}]}} -] - diff --git a/eland/tests/client/__init__.py b/eland/tests/client/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/eland/tests/client/test_mappings_pytest.py b/eland/tests/client/test_mappings_pytest.py deleted file mode 100644 index c19d43f..0000000 --- a/eland/tests/client/test_mappings_pytest.py +++ /dev/null @@ -1,124 +0,0 @@ -# File called _pytest for PyCharm compatability - -import numpy as np -from pandas.util.testing import ( - assert_series_equal, assert_frame_equal) - -import eland as ed -from eland.tests import * -from eland.tests.common import TestData - - -class TestMapping(TestData): - - # Requires 'setup_tests.py' to be run prior to this - def test_fields(self): - mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME) - - assert TEST_MAPPING1_EXPECTED_DF.index.tolist() == mappings.all_fields() - - assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings._mappings_capabilities['es_dtype'])) - - assert TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT == mappings.count_source_fields() - - def test_copy(self): - mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME) - - assert TEST_MAPPING1_EXPECTED_DF.index.tolist() == mappings.all_fields() - assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings._mappings_capabilities['es_dtype'])) - assert TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT == mappings.count_source_fields() - - # Pick 1 source field - columns = ['dest_location'] - mappings_copy1 = ed.Mappings(mappings=mappings, columns=columns) - - assert columns == mappings_copy1.all_fields() - assert len(columns) == mappings_copy1.count_source_fields() - - # Pick 3 source fields (out of order) - columns = ['dest_location', 'city', 'user_name'] - mappings_copy2 = ed.Mappings(mappings=mappings, columns=columns) - - assert columns == mappings_copy2.all_fields() - assert len(columns) == mappings_copy2.count_source_fields() - - # Check original is still ok - assert TEST_MAPPING1_EXPECTED_DF.index.tolist() == mappings.all_fields() - assert_frame_equal(TEST_MAPPING1_EXPECTED_DF, pd.DataFrame(mappings._mappings_capabilities['es_dtype'])) - assert TEST_MAPPING1_EXPECTED_SOURCE_FIELD_COUNT == mappings.count_source_fields() - - def test_dtypes(self): - mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME) - - expected_dtypes = pd.Series( - {'city': 'object', 'content': 'object', 'dest_location': 'object', 'email': 'object', - 'maps-telemetry.attributesPerMap.dataSourcesCount.avg': 'int64', - 'maps-telemetry.attributesPerMap.dataSourcesCount.max': 'int64', - 'maps-telemetry.attributesPerMap.dataSourcesCount.min': 'int64', - 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.avg': 'float64', - 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.max': 'int64', - 'maps-telemetry.attributesPerMap.emsVectorLayersCount.france_departments.min': 'int64', - 'my_join_field': 'object', 'name': 'object', 'origin_location.lat': 'object', - 'origin_location.lon': 'object', 'text': 'object', 'tweeted_at': 'datetime64[ns]', - 'type': 'object', 'user_name': 'object'}) - - assert_series_equal(expected_dtypes, mappings.dtypes()) - - def test_get_dtype_counts(self): - mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME) - - expected_get_dtype_counts = pd.Series({'datetime64[ns]': 1, 'float64': 1, 'int64': 5, 'object': 11}) - - assert_series_equal(expected_get_dtype_counts, mappings.get_dtype_counts()) - - def test_mapping_capabilities(self): - mappings = ed.Mappings(ed.Client(ELASTICSEARCH_HOST), TEST_MAPPING1_INDEX_NAME) - - field_capabilities = mappings.field_capabilities('city') - - assert True == field_capabilities['_source'] - assert 'text' == field_capabilities['es_dtype'] - assert 'object' == field_capabilities['pd_dtype'] - assert True == field_capabilities['searchable'] - assert False == field_capabilities['aggregatable'] - - field_capabilities = mappings.field_capabilities('city.raw') - - assert False == field_capabilities['_source'] - assert 'keyword' == field_capabilities['es_dtype'] - assert 'object' == field_capabilities['pd_dtype'] - assert True == field_capabilities['searchable'] - assert True == field_capabilities['aggregatable'] - - def test_generate_es_mappings(self): - df = pd.DataFrame(data={'A': np.random.rand(3), - 'B': 1, - 'C': 'foo', - 'D': pd.Timestamp('20190102'), - 'E': [1.0, 2.0, 3.0], - 'F': False, - 'G': [1, 2, 3]}, - index=['0','1','2']) - - expected_mappings = {'mappings': { - 'properties': {'A': {'type': 'double'}, - 'B': {'type': 'long'}, - 'C': {'type': 'keyword'}, - 'D': {'type': 'date'}, - 'E': {'type': 'double'}, - 'F': {'type': 'boolean'}, - 'G': {'type': 'long'}}}} - - mappings = ed.Mappings._generate_es_mappings(df) - - assert expected_mappings == mappings - - # Now create index - index_name = 'eland_test_generate_es_mappings' - - ed.pandas_to_es(df, ELASTICSEARCH_HOST, index_name, if_exists="replace", refresh=True) - - ed_df = ed.DataFrame(ELASTICSEARCH_HOST, index_name) - ed_df_head = ed_df.head() - - assert_frame_equal(df, ed_df_head) diff --git a/eland/tests/common.py b/eland/tests/common.py deleted file mode 100644 index 25d1ad7..0000000 --- a/eland/tests/common.py +++ /dev/null @@ -1,44 +0,0 @@ -import pytest - -import eland as ed - -import pandas as pd - -import os - -ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) - -# Create pandas and eland data frames -from eland.tests import ELASTICSEARCH_HOST -from eland.tests import FLIGHTS_DF_FILE_NAME, FLIGHTS_INDEX_NAME,\ - ECOMMERCE_DF_FILE_NAME, ECOMMERCE_INDEX_NAME - -_pd_flights = pd.read_json(FLIGHTS_DF_FILE_NAME).sort_index() -_pd_flights['timestamp'] = \ - pd.to_datetime(_pd_flights['timestamp']) -_pd_flights.index = _pd_flights.index.map(str) # make index 'object' not int -_ed_flights = ed.read_es(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME) - -_pd_ecommerce = pd.read_json(ECOMMERCE_DF_FILE_NAME).sort_index() -_pd_ecommerce['order_date'] = \ - pd.to_datetime(_pd_ecommerce['order_date']) -_pd_ecommerce['products.created_on'] = \ - _pd_ecommerce['products.created_on'].apply(lambda x: pd.to_datetime(x)) -_pd_ecommerce.insert(2, 'customer_birth_date', None) -_pd_ecommerce.index = _pd_ecommerce.index.map(str) # make index 'object' not int -_pd_ecommerce['customer_birth_date'].astype('datetime64') -_ed_ecommerce = ed.read_es(ELASTICSEARCH_HOST, ECOMMERCE_INDEX_NAME) - -class TestData: - - def pd_flights(self): - return _pd_flights - - def ed_flights(self): - return _ed_flights - - def pd_ecommerce(self): - return _pd_ecommerce - - def ed_ecommerce(self): - return _ed_ecommerce diff --git a/eland/tests/dataframe/test_basics_pytest.py b/eland/tests/dataframe/test_basics_pytest.py deleted file mode 100644 index dce92e9..0000000 --- a/eland/tests/dataframe/test_basics_pytest.py +++ /dev/null @@ -1,159 +0,0 @@ -# File called _pytest for PyCharm compatability -from eland.tests.common import TestData - -import pandas as pd -import io - -from pandas.util.testing import ( - assert_series_equal, assert_frame_equal) - -class TestDataFrameBasics(TestData): - - def test_mapping(self): - ed_flights_mappings = pd.DataFrame(self.ed_flights()._mappings._mappings_capabilities - [self.ed_flights()._mappings._mappings_capabilities._source==True] - ['pd_dtype']) - pd_flights_mappings = pd.DataFrame(self.pd_flights().dtypes, columns = ['pd_dtype']) - - assert_frame_equal(pd_flights_mappings, ed_flights_mappings) - - # We don't compare ecommerce here as the default dtypes in pandas from read_json - # don't match the mapping types. This is mainly because the products field is - # nested and so can be treated as a multi-field in ES, but not in pandas - - def test_head(self): - pd_flights_head = self.pd_flights().head() - ed_flights_head = self.ed_flights().head() - - print(ed_flights_head) - - assert_frame_equal(pd_flights_head, ed_flights_head) - - pd_ecommerce_head = self.pd_ecommerce().head() - ed_ecommerce_head = self.ed_ecommerce().head() - - assert_frame_equal(pd_ecommerce_head, ed_ecommerce_head) - - def test_tail(self): - pd_flights_tail = self.pd_flights().tail() - ed_flights_tail = self.ed_flights().tail() - - print(ed_flights_tail) - - assert_frame_equal(pd_flights_tail, ed_flights_tail) - - pd_ecommerce_tail = self.pd_ecommerce().tail() - ed_ecommerce_tail = self.ed_ecommerce().tail() - - assert_frame_equal(pd_ecommerce_tail, ed_ecommerce_tail) - - def test_describe(self): - pd_flights_describe = self.pd_flights().describe() - ed_flights_describe = self.ed_flights().describe() - - print(ed_flights_describe) - - # TODO - this fails now as ES aggregations are approximate - # if ES percentile agg uses - # "hdr": { - # "number_of_significant_value_digits": 3 - # } - # this works - #assert_almost_equal(pd_flights_describe, ed_flights_describe) - - pd_ecommerce_describe = self.pd_ecommerce().describe() - ed_ecommerce_describe = self.ed_ecommerce().describe() - - print(ed_ecommerce_describe) - - # We don't compare ecommerce here as the default dtypes in pandas from read_json - # don't match the mapping types. This is mainly because the products field is - # nested and so can be treated as a multi-field in ES, but not in pandas - - def test_size(self): - assert self.pd_flights().shape == self.ed_flights().shape - assert len(self.pd_flights()) == len(self.ed_flights()) - - def test_to_string(self): - print(self.ed_flights()) - print(self.ed_flights().to_string()) - - def test_info(self): - ed_flights_info_buf = io.StringIO() - pd_flights_info_buf = io.StringIO() - - self.ed_flights().info(buf=ed_flights_info_buf) - self.pd_flights().info(buf=pd_flights_info_buf) - - print(ed_flights_info_buf.getvalue()) - - ed_flights_info = (ed_flights_info_buf.getvalue().splitlines()) - pd_flights_info = (pd_flights_info_buf.getvalue().splitlines()) - - flights_diff = set(ed_flights_info).symmetric_difference(set(pd_flights_info)) - - ed_ecommerce_info_buf = io.StringIO() - pd_ecommerce_info_buf = io.StringIO() - - self.ed_ecommerce().info(buf=ed_ecommerce_info_buf) - self.pd_ecommerce().info(buf=pd_ecommerce_info_buf) - - ed_ecommerce_info = (ed_ecommerce_info_buf.getvalue().splitlines()) - pd_ecommerce_info = (pd_ecommerce_info_buf.getvalue().splitlines()) - - # We don't compare ecommerce here as the default dtypes in pandas from read_json - # don't match the mapping types. This is mainly because the products field is - # nested and so can be treated as a multi-field in ES, but not in pandas - ecommerce_diff = set(ed_ecommerce_info).symmetric_difference(set(pd_ecommerce_info)) - - - def test_count(self): - pd_flights_count = self.pd_flights().count() - ed_flights_count = self.ed_flights().count() - - assert_series_equal(pd_flights_count, ed_flights_count) - - pd_ecommerce_count = self.pd_ecommerce().count() - ed_ecommerce_count = self.ed_ecommerce().count() - - assert_series_equal(pd_ecommerce_count, ed_ecommerce_count) - - def test_get_dtype_counts(self): - pd_flights_get_dtype_counts = self.pd_flights().get_dtype_counts().sort_index() - ed_flights_get_dtype_counts = self.ed_flights().get_dtype_counts().sort_index() - - assert_series_equal(pd_flights_get_dtype_counts, ed_flights_get_dtype_counts) - - def test_get_properties(self): - pd_flights_shape = self.pd_flights().shape - ed_flights_shape = self.ed_flights().shape - - assert pd_flights_shape == ed_flights_shape - - pd_flights_columns = self.pd_flights().columns - ed_flights_columns = self.ed_flights().columns - - assert pd_flights_columns.tolist() == ed_flights_columns.tolist() - - pd_flights_dtypes = self.pd_flights().dtypes - ed_flights_dtypes = self.ed_flights().dtypes - - assert_series_equal(pd_flights_dtypes, ed_flights_dtypes) - - def test_index(self): - pd_flights = self.pd_flights() - pd_flights_timestamp = pd_flights.set_index('timestamp') - pd_flights.info() - pd_flights_timestamp.info() - pd_flights.info() - - ed_flights = self.ed_flights() - ed_flights_timestamp = ed_flights.set_index('timestamp') - ed_flights.info() - ed_flights_timestamp.info() - ed_flights.info() - - def test_to_pandas(self): - ed_ecommerce_pd_df = self.ed_ecommerce().to_pandas() - - assert_frame_equal(self.pd_ecommerce(), ed_ecommerce_pd_df) diff --git a/eland/tests/dataframe/test_getitem_pytest.py b/eland/tests/dataframe/test_getitem_pytest.py deleted file mode 100644 index e9b58ba..0000000 --- a/eland/tests/dataframe/test_getitem_pytest.py +++ /dev/null @@ -1,47 +0,0 @@ -# File called _pytest for PyCharm compatability -from eland.tests.common import TestData - -import pandas as pd -import io - -from pandas.util.testing import ( - assert_series_equal, assert_frame_equal) - -class TestDataFrameGetItem(TestData): - - def test_getitem_basic(self): - # Test 1 attribute - pd_carrier = self.pd_flights()['Carrier'] - ed_carrier = self.ed_flights()['Carrier'] - - # pandas returns a Series here - assert_series_equal(pd_carrier.head(100), ed_carrier.head(100)) - - pd_3_items = self.pd_flights()[['Dest','Carrier','FlightDelay']] - ed_3_items = self.ed_flights()[['Dest','Carrier','FlightDelay']] - - assert_frame_equal(pd_3_items.head(100), ed_3_items.head(100)) - - # Test numerics - numerics = ['DistanceMiles', 'AvgTicketPrice', 'FlightTimeMin'] - ed_numerics = self.ed_flights()[numerics] - pd_numerics = self.pd_flights()[numerics] - - assert_frame_equal(pd_numerics.head(100), ed_numerics.head(100)) - - # just test headers - ed_numerics_describe = ed_numerics.describe() - assert ed_numerics_describe.columns.tolist() == numerics - - def test_getattr_basic(self): - # Test 1 attribute - pd_carrier = self.pd_flights().Carrier - ed_carrier = self.ed_flights().Carrier - - assert_series_equal(pd_carrier.head(100), ed_carrier.head(100)) - - pd_avgticketprice = self.pd_flights().AvgTicketPrice - ed_avgticketprice = self.ed_flights().AvgTicketPrice - - assert_series_equal(pd_avgticketprice.head(100), ed_avgticketprice.head(100)) - diff --git a/eland/tests/dataframe/test_head_tail_pytest.py b/eland/tests/dataframe/test_head_tail_pytest.py new file mode 100644 index 0000000..1e3b7de --- /dev/null +++ b/eland/tests/dataframe/test_head_tail_pytest.py @@ -0,0 +1,79 @@ +# File called _pytest for PyCharm compatability +import pandas as pd +import io + +import eland as ed + +from pandas.util.testing import ( + assert_series_equal, assert_frame_equal) + +class TestDataFrameHeadTail(): + + def test_head(self): + ed_flights = ed.read_es(es_params='localhost', index_pattern='flights') + + head_10 = ed_flights.head(10) + print(head_10._query_compiler._operations._to_es_query()) + + head_8 = head_10.head(8) + print(head_8._query_compiler._operations._to_es_query()) + + head_20 = head_10.head(20) + print(head_20._query_compiler._operations._to_es_query()) + + def test_tail(self): + ed_flights = ed.read_es(es_params='localhost', index_pattern='flights') + + tail_10 = ed_flights.tail(10) + print(tail_10._query_compiler._operations._to_es_query()) + print(tail_10) + + tail_8 = tail_10.tail(8) + print(tail_8._query_compiler._operations._to_es_query()) + + tail_20 = tail_10.tail(20) + print(tail_20._query_compiler._operations._to_es_query()) + + def test_head_tail(self): + ed_flights = ed.read_es(es_params='localhost', index_pattern='flights') + + head_10 = ed_flights.head(10) + print(head_10._query_compiler._operations._to_es_query()) + + tail_8 = head_10.tail(8) + print(tail_8._query_compiler._operations._to_es_query()) + + tail_5 = tail_8.tail(5) + print(tail_5._query_compiler._operations._to_es_query()) + + head_4 = tail_5.head(4) + print(head_4._query_compiler._operations._to_es_query()) + + def test_tail_head(self): + ed_flights = ed.read_es(es_params='localhost', index_pattern='flights') + + tail_10 = ed_flights.tail(10) + print(tail_10._query_compiler._operations._to_es_query()) + + head_8 = tail_10.head(8) + print(head_8._query_compiler._operations._to_es_query()) + + head_5 = head_8.head(5) + print(head_5._query_compiler._operations._to_es_query()) + + tail_4 = head_5.tail(4) + print(tail_4._query_compiler._operations._to_es_query()) + + def test_head_tail_print(self): + ed_flights = ed.read_es(es_params='localhost', index_pattern='flights') + + tail_100 = ed_flights.tail(100) + print(tail_100._query_compiler._operations._to_es_query()) + print(tail_100) + + head_10 = tail_100.head(10) + print(head_10) + + tail_4 = head_10.tail(4) + print(tail_4._query_compiler._operations._to_es_query()) + print(tail_4) diff --git a/eland/tests/ecommerce.json.gz b/eland/tests/ecommerce.json.gz deleted file mode 100644 index b1a5dff..0000000 Binary files a/eland/tests/ecommerce.json.gz and /dev/null differ diff --git a/eland/tests/ecommerce_df.json.gz b/eland/tests/ecommerce_df.json.gz deleted file mode 100644 index 11f5a98..0000000 Binary files a/eland/tests/ecommerce_df.json.gz and /dev/null differ diff --git a/eland/tests/flights.json.gz b/eland/tests/flights.json.gz deleted file mode 100644 index df976e6..0000000 Binary files a/eland/tests/flights.json.gz and /dev/null differ diff --git a/eland/tests/flights_df.json.gz b/eland/tests/flights_df.json.gz deleted file mode 100644 index 5aed61e..0000000 Binary files a/eland/tests/flights_df.json.gz and /dev/null differ diff --git a/eland/tests/series/test_basics_pytest.py b/eland/tests/series/test_basics_pytest.py deleted file mode 100644 index 861b8bf..0000000 --- a/eland/tests/series/test_basics_pytest.py +++ /dev/null @@ -1,32 +0,0 @@ -# File called _pytest for PyCharm compatability -from eland.tests.common import TestData - -import pandas as pd -import eland as ed -import io - -from eland.tests import ELASTICSEARCH_HOST -from eland.tests import FLIGHTS_INDEX_NAME - -from pandas.util.testing import ( - assert_series_equal, assert_frame_equal) - -class TestSeriesBasics(TestData): - - def test_head_tail(self): - pd_s = self.pd_flights()['Carrier'] - ed_s = ed.Series(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME, 'Carrier') - - pd_s_head = pd_s.head(10) - ed_s_head = ed_s.head(10) - - assert_series_equal(pd_s_head, ed_s_head) - - pd_s_tail = pd_s.tail(10) - ed_s_tail = ed_s.tail(10) - - assert_series_equal(pd_s_tail, ed_s_tail) - - def test_print(self): - ed_s = ed.Series(ELASTICSEARCH_HOST, FLIGHTS_INDEX_NAME, 'timestamp') - print(ed_s.to_string()) diff --git a/eland/tests/setup_tests.py b/eland/tests/setup_tests.py deleted file mode 100644 index b60fa1e..0000000 --- a/eland/tests/setup_tests.py +++ /dev/null @@ -1,69 +0,0 @@ -import pandas as pd -from elasticsearch import Elasticsearch -from elasticsearch import helpers - -from eland.tests import * - -DATA_LIST = [ - (FLIGHTS_FILE_NAME, FLIGHTS_INDEX_NAME, FLIGHTS_MAPPING), - (ECOMMERCE_FILE_NAME, ECOMMERCE_INDEX_NAME, ECOMMERCE_MAPPING) -] - -def _setup_data(es): - # Read json file and index records into Elasticsearch - for data in DATA_LIST: - json_file_name = data[0] - index_name = data[1] - mapping = data[2] - - # Delete index - print("Deleting index:", index_name) - es.indices.delete(index=index_name, ignore=[400, 404]) - print("Creating index:", index_name) - es.indices.create(index=index_name, body=mapping) - - df = pd.read_json(json_file_name, lines=True) - - actions = [] - n = 0 - - print("Adding", df.shape[0], "items to index:", index_name) - for index, row in df.iterrows(): - values = row.to_dict() - # make timestamp datetime 2018-01-01T12:09:35 - #values['timestamp'] = datetime.strptime(values['timestamp'], '%Y-%m-%dT%H:%M:%S') - - # Use integer as id field for repeatable results - action = {'_index': index_name, '_source': values, '_id': str(n)} - - actions.append(action) - - n = n + 1 - - if n % 10000 == 0: - helpers.bulk(es, actions) - actions = [] - - helpers.bulk(es, actions) - actions = [] - - print("Done", index_name) - -def _setup_test_mappings(es): - # Create a complex mapping containing many Elasticsearch features - es.indices.delete(index=TEST_MAPPING1_INDEX_NAME, ignore=[400, 404]) - es.indices.create(index=TEST_MAPPING1_INDEX_NAME, body=TEST_MAPPING1) - -def _setup_test_nested(es): - es.indices.delete(index=TEST_NESTED_USER_GROUP_INDEX_NAME, ignore=[400, 404]) - es.indices.create(index=TEST_NESTED_USER_GROUP_INDEX_NAME, body=TEST_NESTED_USER_GROUP_MAPPING) - - helpers.bulk(es, TEST_NESTED_USER_GROUP_DOCS) - -if __name__ == '__main__': - # Create connection to Elasticsearch - use defaults - es = Elasticsearch(ELASTICSEARCH_HOST) - - _setup_data(es) - _setup_test_mappings(es) - _setup_test_nested(es) diff --git a/eland/tests/test.ipynb b/eland/tests/test.ipynb deleted file mode 100644 index 84e0121..0000000 --- a/eland/tests/test.ipynb +++ /dev/null @@ -1,5759 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Pandas" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "pycharm": { - "is_executing": false - } - }, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "pd_df = pd.read_json('flights.json.gz', lines=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AvgTicketPriceCancelledCarrierDestDestAirportIDDestCityNameDestCountryDestLocationDestRegionDestWeather...FlightTimeMinOriginOriginAirportIDOriginCityNameOriginCountryOriginLocationOriginRegionOriginWeatherdayOfWeektimestamp
0841.265642FalseKibana AirlinesSydney Kingsford Smith International AirportSYDSydneyAU{'lat': '-33.94609833', 'lon': '151.177002'}SE-BDRain...1030.770416Frankfurt am Main AirportFRAFrankfurt am MainDE{'lat': '50.033333', 'lon': '8.570556'}DE-HESunny02018-01-01 00:00:00
1882.982662FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Sunny...464.389481Cape Town International AirportCPTCape TownZA{'lat': '-33.96480179', 'lon': '18.60169983'}SE-BDClear02018-01-01 18:27:00
2190.636904FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Cloudy...0.000000Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Rain02018-01-01 17:11:14
3181.694216TrueKibana AirlinesTreviso-Sant'Angelo AirportTV01TrevisoIT{'lat': '45.648399', 'lon': '12.1944'}IT-34Clear...222.749059Naples International AirportNA01NaplesIT{'lat': '40.886002', 'lon': '14.2908'}IT-72Thunder & Lightning02018-01-01 10:33:28
4730.041778FalseKibana AirlinesXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDClear...785.779071Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFDamaging Wind02018-01-01 05:13:00
\n", - "

5 rows × 27 columns

\n", - "
" - ], - "text/plain": [ - " AvgTicketPrice Cancelled Carrier \\\n", - "0 841.265642 False Kibana Airlines \n", - "1 882.982662 False Logstash Airways \n", - "2 190.636904 False Logstash Airways \n", - "3 181.694216 True Kibana Airlines \n", - "4 730.041778 False Kibana Airlines \n", - "\n", - " Dest DestAirportID DestCityName \\\n", - "0 Sydney Kingsford Smith International Airport SYD Sydney \n", - "1 Venice Marco Polo Airport VE05 Venice \n", - "2 Venice Marco Polo Airport VE05 Venice \n", - "3 Treviso-Sant'Angelo Airport TV01 Treviso \n", - "4 Xi'an Xianyang International Airport XIY Xi'an \n", - "\n", - " DestCountry DestLocation DestRegion \\\n", - "0 AU {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n", - "1 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "2 IT {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "3 IT {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", - "4 CN {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "\n", - " DestWeather ... FlightTimeMin \\\n", - "0 Rain ... 1030.770416 \n", - "1 Sunny ... 464.389481 \n", - "2 Cloudy ... 0.000000 \n", - "3 Clear ... 222.749059 \n", - "4 Clear ... 785.779071 \n", - "\n", - " Origin OriginAirportID \\\n", - "0 Frankfurt am Main Airport FRA \n", - "1 Cape Town International Airport CPT \n", - "2 Venice Marco Polo Airport VE05 \n", - "3 Naples International Airport NA01 \n", - "4 Licenciado Benito Juarez International Airport AICM \n", - "\n", - " OriginCityName OriginCountry \\\n", - "0 Frankfurt am Main DE \n", - "1 Cape Town ZA \n", - "2 Venice IT \n", - "3 Naples IT \n", - "4 Mexico City MX \n", - "\n", - " OriginLocation OriginRegion \\\n", - "0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n", - "1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n", - "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n", - "4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "\n", - " OriginWeather dayOfWeek timestamp \n", - "0 Sunny 0 2018-01-01 00:00:00 \n", - "1 Clear 0 2018-01-01 18:27:00 \n", - "2 Rain 0 2018-01-01 17:11:14 \n", - "3 Thunder & Lightning 0 2018-01-01 10:33:28 \n", - "4 Damaging Wind 0 2018-01-01 05:13:00 \n", - "\n", - "[5 rows x 27 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AvgTicketPriceDistanceKilometersDistanceMilesFlightDelayMinFlightTimeHourFlightTimeMindayOfWeek
count13059.00000013059.00000013059.00000013059.00000013059.00000013059.00000013059.000000
mean628.2536897092.1424554406.85301347.3351718.518797511.1278422.835975
std266.3968614578.4384972844.90978796.7467115.579233334.7539521.939439
min100.0205280.0000000.0000000.0000000.0000000.0000000.000000
25%409.8938162459.7056731528.3902470.0000004.205553252.3331921.000000
50%640.5566687610.3308664728.8403630.0000008.384086503.0451703.000000
75%842.1854709736.6376006050.06611415.00000012.006934720.4160364.000000
max1199.72905319881.48231512353.780369360.00000031.7150341902.9020326.000000
\n", - "
" - ], - "text/plain": [ - " AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n", - "count 13059.000000 13059.000000 13059.000000 13059.000000 \n", - "mean 628.253689 7092.142455 4406.853013 47.335171 \n", - "std 266.396861 4578.438497 2844.909787 96.746711 \n", - "min 100.020528 0.000000 0.000000 0.000000 \n", - "25% 409.893816 2459.705673 1528.390247 0.000000 \n", - "50% 640.556668 7610.330866 4728.840363 0.000000 \n", - "75% 842.185470 9736.637600 6050.066114 15.000000 \n", - "max 1199.729053 19881.482315 12353.780369 360.000000 \n", - "\n", - " FlightTimeHour FlightTimeMin dayOfWeek \n", - "count 13059.000000 13059.000000 13059.000000 \n", - "mean 8.518797 511.127842 2.835975 \n", - "std 5.579233 334.753952 1.939439 \n", - "min 0.000000 0.000000 0.000000 \n", - "25% 4.205553 252.333192 1.000000 \n", - "50% 8.384086 503.045170 3.000000 \n", - "75% 12.006934 720.416036 4.000000 \n", - "max 31.715034 1902.902032 6.000000 " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd_df.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AvgTicketPriceCancelledCarrierDestDestAirportIDDestCityNameDestCountryDestLocationDestRegionDestWeather...FlightTimeMinOriginOriginAirportIDOriginCityNameOriginCountryOriginLocationOriginRegionOriginWeatherdayOfWeektimestamp
0841.265642FalseKibana AirlinesSydney Kingsford Smith International AirportSYDSydneyAU{'lat': '-33.94609833', 'lon': '151.177002'}SE-BDRain...1030.770416Frankfurt am Main AirportFRAFrankfurt am MainDE{'lat': '50.033333', 'lon': '8.570556'}DE-HESunny02018-01-01 00:00:00
1882.982662FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Sunny...464.389481Cape Town International AirportCPTCape TownZA{'lat': '-33.96480179', 'lon': '18.60169983'}SE-BDClear02018-01-01 18:27:00
2190.636904FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Cloudy...0.000000Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Rain02018-01-01 17:11:14
3181.694216TrueKibana AirlinesTreviso-Sant'Angelo AirportTV01TrevisoIT{'lat': '45.648399', 'lon': '12.1944'}IT-34Clear...222.749059Naples International AirportNA01NaplesIT{'lat': '40.886002', 'lon': '14.2908'}IT-72Thunder & Lightning02018-01-01 10:33:28
4730.041778FalseKibana AirlinesXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDClear...785.779071Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFDamaging Wind02018-01-01 05:13:00
5418.152089FalseJetBeatsGenoa Cristoforo Colombo AirportGE01GenovaIT{'lat': '44.4133', 'lon': '8.8375'}IT-42Thunder & Lightning...393.590441Edmonton International AirportCYEGEdmontonCA{'lat': '53.30970001', 'lon': '-113.5800018'}CA-ABRain02018-01-01 01:43:03
6180.246816FalseJetBeatsZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHHail...300.000000Zurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHClear02018-01-01 13:49:53
7585.184310FalseKibana AirlinesOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONClear...614.942480Ciampino___G. B. Pastine International AirportRM12RomeIT{'lat': '41.7994', 'lon': '12.5949'}IT-62Thunder & Lightning02018-01-01 04:54:59
8960.869736TrueKibana AirlinesRajiv Gandhi International AirportHYDHyderabadIN{'lat': '17.23131752', 'lon': '78.42985535'}SE-BDCloudy...602.030591Milano Linate AirportMI11MilanIT{'lat': '45.445099', 'lon': '9.27674'}IT-25Heavy Fog02018-01-01 12:09:35
9296.877773FalseLogstash AirwaysTreviso-Sant'Angelo AirportTV01TrevisoIT{'lat': '45.648399', 'lon': '12.1944'}IT-34Rain...174.822216Sheremetyevo International AirportSVOMoscowRU{'lat': '55.972599', 'lon': '37.4146'}RU-MOSCloudy02018-01-01 12:09:35
10906.437948FalseJetBeatsHelsinki Vantaa AirportHELHelsinkiFI{'lat': '60.31719971', 'lon': '24.9633007'}FI-ESRain...503.045170Albuquerque International Sunport AirportABQAlbuquerqueUS{'lat': '35.040199', 'lon': '-106.609001'}US-NMRain02018-01-01 22:06:14
11704.463771FalseLogstash AirwaysVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Cloudy...36.075018Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Rain02018-01-01 11:52:34
12922.499077TrueLogstash AirwaysShanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDClear...679.768391Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFHeavy Fog02018-01-01 02:13:46
13374.959276FalseLogstash AirwaysOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONRain...330.418282Naples International AirportNA01NaplesIT{'lat': '40.886002', 'lon': '14.2908'}IT-72Rain02018-01-01 14:21:13
14552.917371FalseLogstash AirwaysLuis Munoz Marin International AirportSJUSan JuanPR{'lat': '18.43939972', 'lon': '-66.00180054'}PR-U-AClear...407.145031Ciampino___G. B. Pastine International AirportRM12RomeIT{'lat': '41.7994', 'lon': '12.5949'}IT-62Cloudy02018-01-01 17:42:53
15566.487557TrueKibana AirlinesCologne Bonn AirportCGNCologneDE{'lat': '50.86589813', 'lon': '7.142739773'}DE-NWSunny...656.712658Chengdu Shuangliu International AirportCTUChengduCN{'lat': '30.57850075', 'lon': '103.9469986'}SE-BDThunder & Lightning02018-01-01 19:55:32
16989.952787TrueLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Damaging Wind...773.030334Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFThunder & Lightning02018-01-01 07:49:27
17569.613255FalseES-AirMinistro Pistarini International AirportEZEBuenos AiresAR{'lat': '-34.8222', 'lon': '-58.5358'}SE-BDCloudy...704.716920Cleveland Hopkins International AirportCLEClevelandUS{'lat': '41.4117012', 'lon': '-81.84980011'}US-OHRain02018-01-01 01:30:47
18277.429707FalseES-AirShanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDClear...355.957996Olenya Air BaseXLMOOlenegorskRU{'lat': '68.15180206', 'lon': '33.46390152'}RU-MURHail02018-01-01 07:58:17
19772.100846FalseJetBeatsIndira Gandhi International AirportDELNew DelhiIN{'lat': '28.5665', 'lon': '77.103104'}SE-BDClear...875.114675Casper-Natrona County International AirportCPRCasperUS{'lat': '42.90800095', 'lon': '-106.4639969'}US-WYCloudy02018-01-01 00:02:06
20167.599922FalseJetBeatsWichita Mid Continent AirportICTWichitaUS{'lat': '37.64989853', 'lon': '-97.43309784'}US-KSClear...373.966883Erie International Tom Ridge FieldERIErieUS{'lat': '42.08312701', 'lon': '-80.17386675'}US-PACloudy02018-01-01 01:08:20
21253.210065FalseES-AirOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONHail...130.667700Newark Liberty International AirportEWRNewarkUS{'lat': '40.69250107', 'lon': '-74.16870117'}US-NJClear02018-01-01 01:08:20
22917.247620FalseJetBeatsItami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDDamaging Wind...574.495310Copenhagen Kastrup AirportCPHCopenhagenDK{'lat': '55.61790085', 'lon': '12.65600014'}DK-84Sunny02018-01-01 07:48:35
23451.591176FalseLogstash AirwaysVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Heavy Fog...579.728943Seattle Tacoma International AirportSEASeattleUS{'lat': '47.44900131', 'lon': '-122.3089981'}US-WAHeavy Fog02018-01-01 18:57:21
24307.067201FalseLogstash AirwaysCharles de Gaulle International AirportCDGParisFR{'lat': '49.01279831', 'lon': '2.549999952'}FR-JClear...50.157229Berlin-Tegel AirportTXLBerlinDE{'lat': '52.5597', 'lon': '13.2877'}DE-BERain02018-01-01 13:18:25
25268.241596FalseES-AirNarita International AirportNRTTokyoJP{'lat': '35.76470184', 'lon': '140.3860016'}SE-BDRain...527.567422Manchester AirportMANManchesterGB{'lat': '53.35369873', 'lon': '-2.274950027'}GB-ENGThunder & Lightning02018-01-01 08:20:35
26975.812632TrueKibana AirlinesItami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDHail...386.259764Helsinki Vantaa AirportHELHelsinkiFI{'lat': '60.31719971', 'lon': '24.9633007'}FI-ESRain02018-01-01 15:38:32
27134.214546FalseJetBeatsSan Diego International AirportSANSan DiegoUS{'lat': '32.73360062', 'lon': '-117.1900024'}US-CAClear...24.479650Phoenix Sky Harbor International AirportPHXPhoenixUS{'lat': '33.43429947', 'lon': '-112.012001'}US-AZClear02018-01-01 03:08:45
28988.897564FalseKibana AirlinesVerona Villafranca AirportVR10VeronaIT{'lat': '45.395699', 'lon': '10.8885'}IT-34Sunny...568.351033New Chitose AirportCTSChitose / TomakomaiJP{'lat': '42.77519989', 'lon': '141.6920013'}SE-BDDamaging Wind02018-01-01 01:16:59
29511.067220FalseLogstash AirwaysZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHRain...425.889194Tulsa International AirportTULTulsaUS{'lat': '36.19839859', 'lon': '-95.88809967'}US-OKRain02018-01-01 18:00:59
..................................................................
13029795.905278FalseKibana AirlinesMalpensa International AirportMI12MilanIT{'lat': '45.6306', 'lon': '8.72811'}IT-25Sunny...534.375826Itami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDSunny62018-02-11 20:10:13
13030863.388068FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDDamaging Wind...141.172633Tokyo Haneda International AirportHNDTokyoJP{'lat': '35.552299', 'lon': '139.779999'}SE-BDClear62018-02-11 18:59:53
13031575.183008FalseJetBeatsSavannah Hilton Head International AirportSAVSavannahUS{'lat': '32.12760162', 'lon': '-81.20210266'}US-GAThunder & Lightning...1113.137060OR Tambo International AirportJNBJohannesburgZA{'lat': '-26.1392', 'lon': '28.246'}SE-BDHail62018-02-11 00:57:48
13032817.368952FalseJetBeatsSyracuse Hancock International AirportSYRSyracuseUS{'lat': '43.11119843', 'lon': '-76.10630035'}US-NYRain...714.964864El Dorado International AirportBOGBogotaCO{'lat': '4.70159', 'lon': '-74.1469'}CO-CUNThunder & Lightning62018-02-11 12:02:49
13033579.582455FalseES-AirTampa International AirportTPATampaUS{'lat': '27.97550011', 'lon': '-82.53320313'}US-FLRain...234.929046Jorge Chavez International AirportLIMLimaPE{'lat': '-12.0219', 'lon': '-77.114304'}SE-BDThunder & Lightning62018-02-11 02:07:40
130341004.916638FalseJetBeatsOlenya Air BaseXLMOOlenegorskRU{'lat': '68.15180206', 'lon': '33.46390152'}RU-MURClear...526.895776Gimpo International AirportGMPSeoulKR{'lat': '37.5583', 'lon': '126.791'}SE-BDSunny62018-02-11 00:35:04
13035357.562842TrueLogstash AirwaysShanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDThunder & Lightning...0.000000Shanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDThunder & Lightning62018-02-11 11:19:12
13036429.580539FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Sunny...150.000000Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Cloudy62018-02-11 15:07:11
13037729.788171TrueES-AirVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Rain...691.944839Ukrainka Air BaseXHBUBelogorskRU{'lat': '51.169997', 'lon': '128.445007'}RU-AMUDamaging Wind62018-02-11 10:24:42
13038564.897695FalseES-AirPisa International AirportPI05PisaIT{'lat': '43.683899', 'lon': '10.3927'}IT-52Heavy Fog...567.387339OR Tambo International AirportJNBJohannesburgZA{'lat': '-26.1392', 'lon': '28.246'}SE-BDDamaging Wind62018-02-11 00:42:06
130391014.052787FalseLogstash AirwaysVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Thunder & Lightning...690.092327Montreal / Pierre Elliott Trudeau Internationa...YULMontrealCA{'lat': '45.47060013', 'lon': '-73.74079895'}CA-QCThunder & Lightning62018-02-11 10:56:31
13040455.243843FalseES-AirLondon Luton AirportLTNLondonGB{'lat': '51.87469864', 'lon': '-0.368333012'}GB-ENGCloudy...3.028293London Heathrow AirportLHRLondonGB{'lat': '51.4706', 'lon': '-0.461941'}GB-ENGClear62018-02-11 00:39:37
13041611.370232FalseLogstash AirwaysJorge Chavez International AirportLIMLimaPE{'lat': '-12.0219', 'lon': '-77.114304'}SE-BDSunny...338.875531Casper-Natrona County International AirportCPRCasperUS{'lat': '42.90800095', 'lon': '-106.4639969'}US-WYRain62018-02-11 10:24:30
13042595.961285FalseJetBeatsOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONClear...375.129587Frankfurt am Main AirportFRAFrankfurt am MainDE{'lat': '50.033333', 'lon': '8.570556'}DE-HEClear62018-02-11 09:02:07
13043782.747648FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDClear...156.858481Tokyo Haneda International AirportHNDTokyoJP{'lat': '35.552299', 'lon': '139.779999'}SE-BDThunder & Lightning62018-02-11 04:45:06
13044891.117221FalseJetBeatsWinnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBClear...354.106457Vienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Thunder & Lightning62018-02-11 00:51:14
13045587.169921FalseLogstash AirwaysBrisbane International AirportBNEBrisbaneAU{'lat': '-27.38419914', 'lon': '153.1170044'}SE-BDRain...771.305442Amsterdam Airport SchipholAMSAmsterdamNL{'lat': '52.30860138', 'lon': '4.76388979'}NL-NHSunny62018-02-11 05:41:51
13046739.132165FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDRain...542.955572Winnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBHail62018-02-11 10:02:21
13047605.191876FalseJetBeatsPortland International Jetport AirportPWMPortlandUS{'lat': '43.64619827', 'lon': '-70.30930328'}US-METhunder & Lightning...564.599857Jeju International AirportCJUJeju CityKR{'lat': '33.51129913', 'lon': '126.4929962'}SE-BDCloudy62018-02-11 15:55:10
13048361.767659TrueLogstash AirwaysDubai International AirportDXBDubaiAE{'lat': '25.25279999', 'lon': '55.36439896'}SE-BDSunny...180.000000Dubai International AirportDXBDubaiAE{'lat': '25.25279999', 'lon': '55.36439896'}SE-BDHail62018-02-11 04:11:14
13049662.306992FalseES-AirWinnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBHeavy Fog...835.954429Ministro Pistarini International AirportEZEBuenos AiresAR{'lat': '-34.8222', 'lon': '-58.5358'}AR-BSunny62018-02-11 10:13:32
13050630.779526FalseJetBeatsHelsinki Vantaa AirportHELHelsinkiFI{'lat': '60.31719971', 'lon': '24.9633007'}FI-ESSunny...451.755639Beijing Capital International AirportPEKBeijingCN{'lat': '40.08010101', 'lon': '116.5849991'}SE-BDCloudy62018-02-11 11:23:23
13051937.771279TrueLogstash AirwaysLester B. Pearson International AirportYYZTorontoCA{'lat': '43.67720032', 'lon': '-79.63059998'}CA-ONSunny...507.451571Leonardo da Vinci___Fiumicino AirportRM11RomeIT{'lat': '41.8002778', 'lon': '12.2388889'}IT-62Hail62018-02-11 01:13:50
130521085.155339FalseLogstash AirwaysMelbourne International AirportMELMelbourneAU{'lat': '-37.673302', 'lon': '144.843002'}SE-BDCloudy...1044.451122Bologna Guglielmo Marconi AirportBO08BolognaIT{'lat': '44.5354', 'lon': '11.2887'}IT-45Cloudy62018-02-11 18:35:42
130531191.964104FalseLogstash AirwaysZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHHail...728.715904Portland International Jetport AirportPWMPortlandUS{'lat': '43.64619827', 'lon': '-70.30930328'}US-MEClear62018-02-11 19:02:10
130541080.446279FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDRain...402.929088Pisa International AirportPI05PisaIT{'lat': '43.683899', 'lon': '10.3927'}IT-52Sunny62018-02-11 20:42:25
13055646.612941FalseLogstash AirwaysZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHRain...644.418029Winnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBRain62018-02-11 01:41:57
13056997.751876FalseLogstash AirwaysUkrainka Air BaseXHBUBelogorskRU{'lat': '51.169997', 'lon': '128.445007'}RU-AMURain...937.540811Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFSunny62018-02-11 04:09:27
130571102.814465FalseJetBeatsMinistro Pistarini International AirportEZEBuenos AiresAR{'lat': '-34.8222', 'lon': '-58.5358'}SE-BDHail...1697.404971Itami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDHail62018-02-11 08:28:21
13058858.144337FalseJetBeatsWashington Dulles International AirportIADWashingtonUS{'lat': '38.94449997', 'lon': '-77.45580292'}US-DCHeavy Fog...1610.761827Adelaide International AirportADLAdelaideAU{'lat': '-34.945', 'lon': '138.531006'}SE-BDRain62018-02-11 14:54:34
\n", - "

13059 rows × 27 columns

\n", - "
" - ], - "text/plain": [ - " AvgTicketPrice Cancelled Carrier \\\n", - "0 841.265642 False Kibana Airlines \n", - "1 882.982662 False Logstash Airways \n", - "2 190.636904 False Logstash Airways \n", - "3 181.694216 True Kibana Airlines \n", - "4 730.041778 False Kibana Airlines \n", - "5 418.152089 False JetBeats \n", - "6 180.246816 False JetBeats \n", - "7 585.184310 False Kibana Airlines \n", - "8 960.869736 True Kibana Airlines \n", - "9 296.877773 False Logstash Airways \n", - "10 906.437948 False JetBeats \n", - "11 704.463771 False Logstash Airways \n", - "12 922.499077 True Logstash Airways \n", - "13 374.959276 False Logstash Airways \n", - "14 552.917371 False Logstash Airways \n", - "15 566.487557 True Kibana Airlines \n", - "16 989.952787 True Logstash Airways \n", - "17 569.613255 False ES-Air \n", - "18 277.429707 False ES-Air \n", - "19 772.100846 False JetBeats \n", - "20 167.599922 False JetBeats \n", - "21 253.210065 False ES-Air \n", - "22 917.247620 False JetBeats \n", - "23 451.591176 False Logstash Airways \n", - "24 307.067201 False Logstash Airways \n", - "25 268.241596 False ES-Air \n", - "26 975.812632 True Kibana Airlines \n", - "27 134.214546 False JetBeats \n", - "28 988.897564 False Kibana Airlines \n", - "29 511.067220 False Logstash Airways \n", - "... ... ... ... \n", - "13029 795.905278 False Kibana Airlines \n", - "13030 863.388068 False Logstash Airways \n", - "13031 575.183008 False JetBeats \n", - "13032 817.368952 False JetBeats \n", - "13033 579.582455 False ES-Air \n", - "13034 1004.916638 False JetBeats \n", - "13035 357.562842 True Logstash Airways \n", - "13036 429.580539 False Logstash Airways \n", - "13037 729.788171 True ES-Air \n", - "13038 564.897695 False ES-Air \n", - "13039 1014.052787 False Logstash Airways \n", - "13040 455.243843 False ES-Air \n", - "13041 611.370232 False Logstash Airways \n", - "13042 595.961285 False JetBeats \n", - "13043 782.747648 False Logstash Airways \n", - "13044 891.117221 False JetBeats \n", - "13045 587.169921 False Logstash Airways \n", - "13046 739.132165 False Logstash Airways \n", - "13047 605.191876 False JetBeats \n", - "13048 361.767659 True Logstash Airways \n", - "13049 662.306992 False ES-Air \n", - "13050 630.779526 False JetBeats \n", - "13051 937.771279 True Logstash Airways \n", - "13052 1085.155339 False Logstash Airways \n", - "13053 1191.964104 False Logstash Airways \n", - "13054 1080.446279 False Logstash Airways \n", - "13055 646.612941 False Logstash Airways \n", - "13056 997.751876 False Logstash Airways \n", - "13057 1102.814465 False JetBeats \n", - "13058 858.144337 False JetBeats \n", - "\n", - " Dest DestAirportID \\\n", - "0 Sydney Kingsford Smith International Airport SYD \n", - "1 Venice Marco Polo Airport VE05 \n", - "2 Venice Marco Polo Airport VE05 \n", - "3 Treviso-Sant'Angelo Airport TV01 \n", - "4 Xi'an Xianyang International Airport XIY \n", - "5 Genoa Cristoforo Colombo Airport GE01 \n", - "6 Zurich Airport ZRH \n", - "7 Ottawa Macdonald-Cartier International Airport YOW \n", - "8 Rajiv Gandhi International Airport HYD \n", - "9 Treviso-Sant'Angelo Airport TV01 \n", - "10 Helsinki Vantaa Airport HEL \n", - "11 Vienna International Airport VIE \n", - "12 Shanghai Pudong International Airport PVG \n", - "13 Ottawa Macdonald-Cartier International Airport YOW \n", - "14 Luis Munoz Marin International Airport SJU \n", - "15 Cologne Bonn Airport CGN \n", - "16 Venice Marco Polo Airport VE05 \n", - "17 Ministro Pistarini International Airport EZE \n", - "18 Shanghai Pudong International Airport PVG \n", - "19 Indira Gandhi International Airport DEL \n", - "20 Wichita Mid Continent Airport ICT \n", - "21 Ottawa Macdonald-Cartier International Airport YOW \n", - "22 Itami Airport ITM \n", - "23 Vienna International Airport VIE \n", - "24 Charles de Gaulle International Airport CDG \n", - "25 Narita International Airport NRT \n", - "26 Itami Airport ITM \n", - "27 San Diego International Airport SAN \n", - "28 Verona Villafranca Airport VR10 \n", - "29 Zurich Airport ZRH \n", - "... ... ... \n", - "13029 Malpensa International Airport MI12 \n", - "13030 Xi'an Xianyang International Airport XIY \n", - "13031 Savannah Hilton Head International Airport SAV \n", - "13032 Syracuse Hancock International Airport SYR \n", - "13033 Tampa International Airport TPA \n", - "13034 Olenya Air Base XLMO \n", - "13035 Shanghai Pudong International Airport PVG \n", - "13036 Venice Marco Polo Airport VE05 \n", - "13037 Vienna International Airport VIE \n", - "13038 Pisa International Airport PI05 \n", - "13039 Vienna International Airport VIE \n", - "13040 London Luton Airport LTN \n", - "13041 Jorge Chavez International Airport LIM \n", - "13042 Ottawa Macdonald-Cartier International Airport YOW \n", - "13043 Xi'an Xianyang International Airport XIY \n", - "13044 Winnipeg / James Armstrong Richardson Internat... YWG \n", - "13045 Brisbane International Airport BNE \n", - "13046 Xi'an Xianyang International Airport XIY \n", - "13047 Portland International Jetport Airport PWM \n", - "13048 Dubai International Airport DXB \n", - "13049 Winnipeg / James Armstrong Richardson Internat... YWG \n", - "13050 Helsinki Vantaa Airport HEL \n", - "13051 Lester B. Pearson International Airport YYZ \n", - "13052 Melbourne International Airport MEL \n", - "13053 Zurich Airport ZRH \n", - "13054 Xi'an Xianyang International Airport XIY \n", - "13055 Zurich Airport ZRH \n", - "13056 Ukrainka Air Base XHBU \n", - "13057 Ministro Pistarini International Airport EZE \n", - "13058 Washington Dulles International Airport IAD \n", - "\n", - " DestCityName DestCountry \\\n", - "0 Sydney AU \n", - "1 Venice IT \n", - "2 Venice IT \n", - "3 Treviso IT \n", - "4 Xi'an CN \n", - "5 Genova IT \n", - "6 Zurich CH \n", - "7 Ottawa CA \n", - "8 Hyderabad IN \n", - "9 Treviso IT \n", - "10 Helsinki FI \n", - "11 Vienna AT \n", - "12 Shanghai CN \n", - "13 Ottawa CA \n", - "14 San Juan PR \n", - "15 Cologne DE \n", - "16 Venice IT \n", - "17 Buenos Aires AR \n", - "18 Shanghai CN \n", - "19 New Delhi IN \n", - "20 Wichita US \n", - "21 Ottawa CA \n", - "22 Osaka JP \n", - "23 Vienna AT \n", - "24 Paris FR \n", - "25 Tokyo JP \n", - "26 Osaka JP \n", - "27 San Diego US \n", - "28 Verona IT \n", - "29 Zurich CH \n", - "... ... ... \n", - "13029 Milan IT \n", - "13030 Xi'an CN \n", - "13031 Savannah US \n", - "13032 Syracuse US \n", - "13033 Tampa US \n", - "13034 Olenegorsk RU \n", - "13035 Shanghai CN \n", - "13036 Venice IT \n", - "13037 Vienna AT \n", - "13038 Pisa IT \n", - "13039 Vienna AT \n", - "13040 London GB \n", - "13041 Lima PE \n", - "13042 Ottawa CA \n", - "13043 Xi'an CN \n", - "13044 Winnipeg CA \n", - "13045 Brisbane AU \n", - "13046 Xi'an CN \n", - "13047 Portland US \n", - "13048 Dubai AE \n", - "13049 Winnipeg CA \n", - "13050 Helsinki FI \n", - "13051 Toronto CA \n", - "13052 Melbourne AU \n", - "13053 Zurich CH \n", - "13054 Xi'an CN \n", - "13055 Zurich CH \n", - "13056 Belogorsk RU \n", - "13057 Buenos Aires AR \n", - "13058 Washington US \n", - "\n", - " DestLocation DestRegion \\\n", - "0 {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n", - "1 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "3 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", - "4 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "5 {'lat': '44.4133', 'lon': '8.8375'} IT-42 \n", - "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "7 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", - "8 {'lat': '17.23131752', 'lon': '78.42985535'} SE-BD \n", - "9 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", - "10 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", - "11 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "12 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", - "13 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", - "14 {'lat': '18.43939972', 'lon': '-66.00180054'} PR-U-A \n", - "15 {'lat': '50.86589813', 'lon': '7.142739773'} DE-NW \n", - "16 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "17 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n", - "18 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", - "19 {'lat': '28.5665', 'lon': '77.103104'} SE-BD \n", - "20 {'lat': '37.64989853', 'lon': '-97.43309784'} US-KS \n", - "21 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", - "22 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", - "23 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "24 {'lat': '49.01279831', 'lon': '2.549999952'} FR-J \n", - "25 {'lat': '35.76470184', 'lon': '140.3860016'} SE-BD \n", - "26 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", - "27 {'lat': '32.73360062', 'lon': '-117.1900024'} US-CA \n", - "28 {'lat': '45.395699', 'lon': '10.8885'} IT-34 \n", - "29 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "... ... ... \n", - "13029 {'lat': '45.6306', 'lon': '8.72811'} IT-25 \n", - "13030 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "13031 {'lat': '32.12760162', 'lon': '-81.20210266'} US-GA \n", - "13032 {'lat': '43.11119843', 'lon': '-76.10630035'} US-NY \n", - "13033 {'lat': '27.97550011', 'lon': '-82.53320313'} US-FL \n", - "13034 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n", - "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", - "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "13037 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "13038 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n", - "13039 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "13040 {'lat': '51.87469864', 'lon': '-0.368333012'} GB-ENG \n", - "13041 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n", - "13042 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", - "13043 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "13044 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", - "13045 {'lat': '-27.38419914', 'lon': '153.1170044'} SE-BD \n", - "13046 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "13047 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n", - "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n", - "13049 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", - "13050 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", - "13051 {'lat': '43.67720032', 'lon': '-79.63059998'} CA-ON \n", - "13052 {'lat': '-37.673302', 'lon': '144.843002'} SE-BD \n", - "13053 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "13054 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "13055 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "13056 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n", - "13057 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n", - "13058 {'lat': '38.94449997', 'lon': '-77.45580292'} US-DC \n", - "\n", - " DestWeather ... FlightTimeMin \\\n", - "0 Rain ... 1030.770416 \n", - "1 Sunny ... 464.389481 \n", - "2 Cloudy ... 0.000000 \n", - "3 Clear ... 222.749059 \n", - "4 Clear ... 785.779071 \n", - "5 Thunder & Lightning ... 393.590441 \n", - "6 Hail ... 300.000000 \n", - "7 Clear ... 614.942480 \n", - "8 Cloudy ... 602.030591 \n", - "9 Rain ... 174.822216 \n", - "10 Rain ... 503.045170 \n", - "11 Cloudy ... 36.075018 \n", - "12 Clear ... 679.768391 \n", - "13 Rain ... 330.418282 \n", - "14 Clear ... 407.145031 \n", - "15 Sunny ... 656.712658 \n", - "16 Damaging Wind ... 773.030334 \n", - "17 Cloudy ... 704.716920 \n", - "18 Clear ... 355.957996 \n", - "19 Clear ... 875.114675 \n", - "20 Clear ... 373.966883 \n", - "21 Hail ... 130.667700 \n", - "22 Damaging Wind ... 574.495310 \n", - "23 Heavy Fog ... 579.728943 \n", - "24 Clear ... 50.157229 \n", - "25 Rain ... 527.567422 \n", - "26 Hail ... 386.259764 \n", - "27 Clear ... 24.479650 \n", - "28 Sunny ... 568.351033 \n", - "29 Rain ... 425.889194 \n", - "... ... ... ... \n", - "13029 Sunny ... 534.375826 \n", - "13030 Damaging Wind ... 141.172633 \n", - "13031 Thunder & Lightning ... 1113.137060 \n", - "13032 Rain ... 714.964864 \n", - "13033 Rain ... 234.929046 \n", - "13034 Clear ... 526.895776 \n", - "13035 Thunder & Lightning ... 0.000000 \n", - "13036 Sunny ... 150.000000 \n", - "13037 Rain ... 691.944839 \n", - "13038 Heavy Fog ... 567.387339 \n", - "13039 Thunder & Lightning ... 690.092327 \n", - "13040 Cloudy ... 3.028293 \n", - "13041 Sunny ... 338.875531 \n", - "13042 Clear ... 375.129587 \n", - "13043 Clear ... 156.858481 \n", - "13044 Clear ... 354.106457 \n", - "13045 Rain ... 771.305442 \n", - "13046 Rain ... 542.955572 \n", - "13047 Thunder & Lightning ... 564.599857 \n", - "13048 Sunny ... 180.000000 \n", - "13049 Heavy Fog ... 835.954429 \n", - "13050 Sunny ... 451.755639 \n", - "13051 Sunny ... 507.451571 \n", - "13052 Cloudy ... 1044.451122 \n", - "13053 Hail ... 728.715904 \n", - "13054 Rain ... 402.929088 \n", - "13055 Rain ... 644.418029 \n", - "13056 Rain ... 937.540811 \n", - "13057 Hail ... 1697.404971 \n", - "13058 Heavy Fog ... 1610.761827 \n", - "\n", - " Origin OriginAirportID \\\n", - "0 Frankfurt am Main Airport FRA \n", - "1 Cape Town International Airport CPT \n", - "2 Venice Marco Polo Airport VE05 \n", - "3 Naples International Airport NA01 \n", - "4 Licenciado Benito Juarez International Airport AICM \n", - "5 Edmonton International Airport CYEG \n", - "6 Zurich Airport ZRH \n", - "7 Ciampino___G. B. Pastine International Airport RM12 \n", - "8 Milano Linate Airport MI11 \n", - "9 Sheremetyevo International Airport SVO \n", - "10 Albuquerque International Sunport Airport ABQ \n", - "11 Venice Marco Polo Airport VE05 \n", - "12 Licenciado Benito Juarez International Airport AICM \n", - "13 Naples International Airport NA01 \n", - "14 Ciampino___G. B. Pastine International Airport RM12 \n", - "15 Chengdu Shuangliu International Airport CTU \n", - "16 Licenciado Benito Juarez International Airport AICM \n", - "17 Cleveland Hopkins International Airport CLE \n", - "18 Olenya Air Base XLMO \n", - "19 Casper-Natrona County International Airport CPR \n", - "20 Erie International Tom Ridge Field ERI \n", - "21 Newark Liberty International Airport EWR \n", - "22 Copenhagen Kastrup Airport CPH \n", - "23 Seattle Tacoma International Airport SEA \n", - "24 Berlin-Tegel Airport TXL \n", - "25 Manchester Airport MAN \n", - "26 Helsinki Vantaa Airport HEL \n", - "27 Phoenix Sky Harbor International Airport PHX \n", - "28 New Chitose Airport CTS \n", - "29 Tulsa International Airport TUL \n", - "... ... ... \n", - "13029 Itami Airport ITM \n", - "13030 Tokyo Haneda International Airport HND \n", - "13031 OR Tambo International Airport JNB \n", - "13032 El Dorado International Airport BOG \n", - "13033 Jorge Chavez International Airport LIM \n", - "13034 Gimpo International Airport GMP \n", - "13035 Shanghai Pudong International Airport PVG \n", - "13036 Venice Marco Polo Airport VE05 \n", - "13037 Ukrainka Air Base XHBU \n", - "13038 OR Tambo International Airport JNB \n", - "13039 Montreal / Pierre Elliott Trudeau Internationa... YUL \n", - "13040 London Heathrow Airport LHR \n", - "13041 Casper-Natrona County International Airport CPR \n", - "13042 Frankfurt am Main Airport FRA \n", - "13043 Tokyo Haneda International Airport HND \n", - "13044 Vienna International Airport VIE \n", - "13045 Amsterdam Airport Schiphol AMS \n", - "13046 Winnipeg / James Armstrong Richardson Internat... YWG \n", - "13047 Jeju International Airport CJU \n", - "13048 Dubai International Airport DXB \n", - "13049 Ministro Pistarini International Airport EZE \n", - "13050 Beijing Capital International Airport PEK \n", - "13051 Leonardo da Vinci___Fiumicino Airport RM11 \n", - "13052 Bologna Guglielmo Marconi Airport BO08 \n", - "13053 Portland International Jetport Airport PWM \n", - "13054 Pisa International Airport PI05 \n", - "13055 Winnipeg / James Armstrong Richardson Internat... YWG \n", - "13056 Licenciado Benito Juarez International Airport AICM \n", - "13057 Itami Airport ITM \n", - "13058 Adelaide International Airport ADL \n", - "\n", - " OriginCityName OriginCountry \\\n", - "0 Frankfurt am Main DE \n", - "1 Cape Town ZA \n", - "2 Venice IT \n", - "3 Naples IT \n", - "4 Mexico City MX \n", - "5 Edmonton CA \n", - "6 Zurich CH \n", - "7 Rome IT \n", - "8 Milan IT \n", - "9 Moscow RU \n", - "10 Albuquerque US \n", - "11 Venice IT \n", - "12 Mexico City MX \n", - "13 Naples IT \n", - "14 Rome IT \n", - "15 Chengdu CN \n", - "16 Mexico City MX \n", - "17 Cleveland US \n", - "18 Olenegorsk RU \n", - "19 Casper US \n", - "20 Erie US \n", - "21 Newark US \n", - "22 Copenhagen DK \n", - "23 Seattle US \n", - "24 Berlin DE \n", - "25 Manchester GB \n", - "26 Helsinki FI \n", - "27 Phoenix US \n", - "28 Chitose / Tomakomai JP \n", - "29 Tulsa US \n", - "... ... ... \n", - "13029 Osaka JP \n", - "13030 Tokyo JP \n", - "13031 Johannesburg ZA \n", - "13032 Bogota CO \n", - "13033 Lima PE \n", - "13034 Seoul KR \n", - "13035 Shanghai CN \n", - "13036 Venice IT \n", - "13037 Belogorsk RU \n", - "13038 Johannesburg ZA \n", - "13039 Montreal CA \n", - "13040 London GB \n", - "13041 Casper US \n", - "13042 Frankfurt am Main DE \n", - "13043 Tokyo JP \n", - "13044 Vienna AT \n", - "13045 Amsterdam NL \n", - "13046 Winnipeg CA \n", - "13047 Jeju City KR \n", - "13048 Dubai AE \n", - "13049 Buenos Aires AR \n", - "13050 Beijing CN \n", - "13051 Rome IT \n", - "13052 Bologna IT \n", - "13053 Portland US \n", - "13054 Pisa IT \n", - "13055 Winnipeg CA \n", - "13056 Mexico City MX \n", - "13057 Osaka JP \n", - "13058 Adelaide AU \n", - "\n", - " OriginLocation OriginRegion \\\n", - "0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n", - "1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n", - "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n", - "4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "5 {'lat': '53.30970001', 'lon': '-113.5800018'} CA-AB \n", - "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "7 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n", - "8 {'lat': '45.445099', 'lon': '9.27674'} IT-25 \n", - "9 {'lat': '55.972599', 'lon': '37.4146'} RU-MOS \n", - "10 {'lat': '35.040199', 'lon': '-106.609001'} US-NM \n", - "11 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "12 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "13 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n", - "14 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n", - "15 {'lat': '30.57850075', 'lon': '103.9469986'} SE-BD \n", - "16 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "17 {'lat': '41.4117012', 'lon': '-81.84980011'} US-OH \n", - "18 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n", - "19 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n", - "20 {'lat': '42.08312701', 'lon': '-80.17386675'} US-PA \n", - "21 {'lat': '40.69250107', 'lon': '-74.16870117'} US-NJ \n", - "22 {'lat': '55.61790085', 'lon': '12.65600014'} DK-84 \n", - "23 {'lat': '47.44900131', 'lon': '-122.3089981'} US-WA \n", - "24 {'lat': '52.5597', 'lon': '13.2877'} DE-BE \n", - "25 {'lat': '53.35369873', 'lon': '-2.274950027'} GB-ENG \n", - "26 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", - "27 {'lat': '33.43429947', 'lon': '-112.012001'} US-AZ \n", - "28 {'lat': '42.77519989', 'lon': '141.6920013'} SE-BD \n", - "29 {'lat': '36.19839859', 'lon': '-95.88809967'} US-OK \n", - "... ... ... \n", - "13029 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", - "13030 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n", - "13031 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n", - "13032 {'lat': '4.70159', 'lon': '-74.1469'} CO-CUN \n", - "13033 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n", - "13034 {'lat': '37.5583', 'lon': '126.791'} SE-BD \n", - "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", - "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "13037 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n", - "13038 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n", - "13039 {'lat': '45.47060013', 'lon': '-73.74079895'} CA-QC \n", - "13040 {'lat': '51.4706', 'lon': '-0.461941'} GB-ENG \n", - "13041 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n", - "13042 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n", - "13043 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n", - "13044 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "13045 {'lat': '52.30860138', 'lon': '4.76388979'} NL-NH \n", - "13046 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", - "13047 {'lat': '33.51129913', 'lon': '126.4929962'} SE-BD \n", - "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n", - "13049 {'lat': '-34.8222', 'lon': '-58.5358'} AR-B \n", - "13050 {'lat': '40.08010101', 'lon': '116.5849991'} SE-BD \n", - "13051 {'lat': '41.8002778', 'lon': '12.2388889'} IT-62 \n", - "13052 {'lat': '44.5354', 'lon': '11.2887'} IT-45 \n", - "13053 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n", - "13054 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n", - "13055 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", - "13056 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "13057 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", - "13058 {'lat': '-34.945', 'lon': '138.531006'} SE-BD \n", - "\n", - " OriginWeather dayOfWeek timestamp \n", - "0 Sunny 0 2018-01-01 00:00:00 \n", - "1 Clear 0 2018-01-01 18:27:00 \n", - "2 Rain 0 2018-01-01 17:11:14 \n", - "3 Thunder & Lightning 0 2018-01-01 10:33:28 \n", - "4 Damaging Wind 0 2018-01-01 05:13:00 \n", - "5 Rain 0 2018-01-01 01:43:03 \n", - "6 Clear 0 2018-01-01 13:49:53 \n", - "7 Thunder & Lightning 0 2018-01-01 04:54:59 \n", - "8 Heavy Fog 0 2018-01-01 12:09:35 \n", - "9 Cloudy 0 2018-01-01 12:09:35 \n", - "10 Rain 0 2018-01-01 22:06:14 \n", - "11 Rain 0 2018-01-01 11:52:34 \n", - "12 Heavy Fog 0 2018-01-01 02:13:46 \n", - "13 Rain 0 2018-01-01 14:21:13 \n", - "14 Cloudy 0 2018-01-01 17:42:53 \n", - "15 Thunder & Lightning 0 2018-01-01 19:55:32 \n", - "16 Thunder & Lightning 0 2018-01-01 07:49:27 \n", - "17 Rain 0 2018-01-01 01:30:47 \n", - "18 Hail 0 2018-01-01 07:58:17 \n", - "19 Cloudy 0 2018-01-01 00:02:06 \n", - "20 Cloudy 0 2018-01-01 01:08:20 \n", - "21 Clear 0 2018-01-01 01:08:20 \n", - "22 Sunny 0 2018-01-01 07:48:35 \n", - "23 Heavy Fog 0 2018-01-01 18:57:21 \n", - "24 Rain 0 2018-01-01 13:18:25 \n", - "25 Thunder & Lightning 0 2018-01-01 08:20:35 \n", - "26 Rain 0 2018-01-01 15:38:32 \n", - "27 Clear 0 2018-01-01 03:08:45 \n", - "28 Damaging Wind 0 2018-01-01 01:16:59 \n", - "29 Rain 0 2018-01-01 18:00:59 \n", - "... ... ... ... \n", - "13029 Sunny 6 2018-02-11 20:10:13 \n", - "13030 Clear 6 2018-02-11 18:59:53 \n", - "13031 Hail 6 2018-02-11 00:57:48 \n", - "13032 Thunder & Lightning 6 2018-02-11 12:02:49 \n", - "13033 Thunder & Lightning 6 2018-02-11 02:07:40 \n", - "13034 Sunny 6 2018-02-11 00:35:04 \n", - "13035 Thunder & Lightning 6 2018-02-11 11:19:12 \n", - "13036 Cloudy 6 2018-02-11 15:07:11 \n", - "13037 Damaging Wind 6 2018-02-11 10:24:42 \n", - "13038 Damaging Wind 6 2018-02-11 00:42:06 \n", - "13039 Thunder & Lightning 6 2018-02-11 10:56:31 \n", - "13040 Clear 6 2018-02-11 00:39:37 \n", - "13041 Rain 6 2018-02-11 10:24:30 \n", - "13042 Clear 6 2018-02-11 09:02:07 \n", - "13043 Thunder & Lightning 6 2018-02-11 04:45:06 \n", - "13044 Thunder & Lightning 6 2018-02-11 00:51:14 \n", - "13045 Sunny 6 2018-02-11 05:41:51 \n", - "13046 Hail 6 2018-02-11 10:02:21 \n", - "13047 Cloudy 6 2018-02-11 15:55:10 \n", - "13048 Hail 6 2018-02-11 04:11:14 \n", - "13049 Sunny 6 2018-02-11 10:13:32 \n", - "13050 Cloudy 6 2018-02-11 11:23:23 \n", - "13051 Hail 6 2018-02-11 01:13:50 \n", - "13052 Cloudy 6 2018-02-11 18:35:42 \n", - "13053 Clear 6 2018-02-11 19:02:10 \n", - "13054 Sunny 6 2018-02-11 20:42:25 \n", - "13055 Rain 6 2018-02-11 01:41:57 \n", - "13056 Sunny 6 2018-02-11 04:09:27 \n", - "13057 Hail 6 2018-02-11 08:28:21 \n", - "13058 Rain 6 2018-02-11 14:54:34 \n", - "\n", - "[13059 rows x 27 columns]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd_df" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AvgTicketPriceCancelledCarrierDestDestAirportIDDestCityNameDestCountryDestLocationDestRegionDestWeather...FlightTimeMinOriginOriginAirportIDOriginCityNameOriginCountryOriginLocationOriginRegionOriginWeatherdayOfWeektimestamp
0841.265642FalseKibana AirlinesSydney Kingsford Smith International AirportSYDSydneyAU{'lat': '-33.94609833', 'lon': '151.177002'}SE-BDRain...1030.770416Frankfurt am Main AirportFRAFrankfurt am MainDE{'lat': '50.033333', 'lon': '8.570556'}DE-HESunny02018-01-01 00:00:00
1882.982662FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Sunny...464.389481Cape Town International AirportCPTCape TownZA{'lat': '-33.96480179', 'lon': '18.60169983'}SE-BDClear02018-01-01 18:27:00
2190.636904FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Cloudy...0.000000Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Rain02018-01-01 17:11:14
3181.694216TrueKibana AirlinesTreviso-Sant'Angelo AirportTV01TrevisoIT{'lat': '45.648399', 'lon': '12.1944'}IT-34Clear...222.749059Naples International AirportNA01NaplesIT{'lat': '40.886002', 'lon': '14.2908'}IT-72Thunder & Lightning02018-01-01 10:33:28
4730.041778FalseKibana AirlinesXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDClear...785.779071Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFDamaging Wind02018-01-01 05:13:00
5418.152089FalseJetBeatsGenoa Cristoforo Colombo AirportGE01GenovaIT{'lat': '44.4133', 'lon': '8.8375'}IT-42Thunder & Lightning...393.590441Edmonton International AirportCYEGEdmontonCA{'lat': '53.30970001', 'lon': '-113.5800018'}CA-ABRain02018-01-01 01:43:03
6180.246816FalseJetBeatsZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHHail...300.000000Zurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHClear02018-01-01 13:49:53
7585.184310FalseKibana AirlinesOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONClear...614.942480Ciampino___G. B. Pastine International AirportRM12RomeIT{'lat': '41.7994', 'lon': '12.5949'}IT-62Thunder & Lightning02018-01-01 04:54:59
8960.869736TrueKibana AirlinesRajiv Gandhi International AirportHYDHyderabadIN{'lat': '17.23131752', 'lon': '78.42985535'}SE-BDCloudy...602.030591Milano Linate AirportMI11MilanIT{'lat': '45.445099', 'lon': '9.27674'}IT-25Heavy Fog02018-01-01 12:09:35
9296.877773FalseLogstash AirwaysTreviso-Sant'Angelo AirportTV01TrevisoIT{'lat': '45.648399', 'lon': '12.1944'}IT-34Rain...174.822216Sheremetyevo International AirportSVOMoscowRU{'lat': '55.972599', 'lon': '37.4146'}RU-MOSCloudy02018-01-01 12:09:35
10906.437948FalseJetBeatsHelsinki Vantaa AirportHELHelsinkiFI{'lat': '60.31719971', 'lon': '24.9633007'}FI-ESRain...503.045170Albuquerque International Sunport AirportABQAlbuquerqueUS{'lat': '35.040199', 'lon': '-106.609001'}US-NMRain02018-01-01 22:06:14
11704.463771FalseLogstash AirwaysVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Cloudy...36.075018Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Rain02018-01-01 11:52:34
12922.499077TrueLogstash AirwaysShanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDClear...679.768391Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFHeavy Fog02018-01-01 02:13:46
13374.959276FalseLogstash AirwaysOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONRain...330.418282Naples International AirportNA01NaplesIT{'lat': '40.886002', 'lon': '14.2908'}IT-72Rain02018-01-01 14:21:13
14552.917371FalseLogstash AirwaysLuis Munoz Marin International AirportSJUSan JuanPR{'lat': '18.43939972', 'lon': '-66.00180054'}PR-U-AClear...407.145031Ciampino___G. B. Pastine International AirportRM12RomeIT{'lat': '41.7994', 'lon': '12.5949'}IT-62Cloudy02018-01-01 17:42:53
15566.487557TrueKibana AirlinesCologne Bonn AirportCGNCologneDE{'lat': '50.86589813', 'lon': '7.142739773'}DE-NWSunny...656.712658Chengdu Shuangliu International AirportCTUChengduCN{'lat': '30.57850075', 'lon': '103.9469986'}SE-BDThunder & Lightning02018-01-01 19:55:32
16989.952787TrueLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Damaging Wind...773.030334Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFThunder & Lightning02018-01-01 07:49:27
17569.613255FalseES-AirMinistro Pistarini International AirportEZEBuenos AiresAR{'lat': '-34.8222', 'lon': '-58.5358'}SE-BDCloudy...704.716920Cleveland Hopkins International AirportCLEClevelandUS{'lat': '41.4117012', 'lon': '-81.84980011'}US-OHRain02018-01-01 01:30:47
18277.429707FalseES-AirShanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDClear...355.957996Olenya Air BaseXLMOOlenegorskRU{'lat': '68.15180206', 'lon': '33.46390152'}RU-MURHail02018-01-01 07:58:17
19772.100846FalseJetBeatsIndira Gandhi International AirportDELNew DelhiIN{'lat': '28.5665', 'lon': '77.103104'}SE-BDClear...875.114675Casper-Natrona County International AirportCPRCasperUS{'lat': '42.90800095', 'lon': '-106.4639969'}US-WYCloudy02018-01-01 00:02:06
20167.599922FalseJetBeatsWichita Mid Continent AirportICTWichitaUS{'lat': '37.64989853', 'lon': '-97.43309784'}US-KSClear...373.966883Erie International Tom Ridge FieldERIErieUS{'lat': '42.08312701', 'lon': '-80.17386675'}US-PACloudy02018-01-01 01:08:20
21253.210065FalseES-AirOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONHail...130.667700Newark Liberty International AirportEWRNewarkUS{'lat': '40.69250107', 'lon': '-74.16870117'}US-NJClear02018-01-01 01:08:20
22917.247620FalseJetBeatsItami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDDamaging Wind...574.495310Copenhagen Kastrup AirportCPHCopenhagenDK{'lat': '55.61790085', 'lon': '12.65600014'}DK-84Sunny02018-01-01 07:48:35
23451.591176FalseLogstash AirwaysVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Heavy Fog...579.728943Seattle Tacoma International AirportSEASeattleUS{'lat': '47.44900131', 'lon': '-122.3089981'}US-WAHeavy Fog02018-01-01 18:57:21
24307.067201FalseLogstash AirwaysCharles de Gaulle International AirportCDGParisFR{'lat': '49.01279831', 'lon': '2.549999952'}FR-JClear...50.157229Berlin-Tegel AirportTXLBerlinDE{'lat': '52.5597', 'lon': '13.2877'}DE-BERain02018-01-01 13:18:25
25268.241596FalseES-AirNarita International AirportNRTTokyoJP{'lat': '35.76470184', 'lon': '140.3860016'}SE-BDRain...527.567422Manchester AirportMANManchesterGB{'lat': '53.35369873', 'lon': '-2.274950027'}GB-ENGThunder & Lightning02018-01-01 08:20:35
26975.812632TrueKibana AirlinesItami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDHail...386.259764Helsinki Vantaa AirportHELHelsinkiFI{'lat': '60.31719971', 'lon': '24.9633007'}FI-ESRain02018-01-01 15:38:32
27134.214546FalseJetBeatsSan Diego International AirportSANSan DiegoUS{'lat': '32.73360062', 'lon': '-117.1900024'}US-CAClear...24.479650Phoenix Sky Harbor International AirportPHXPhoenixUS{'lat': '33.43429947', 'lon': '-112.012001'}US-AZClear02018-01-01 03:08:45
28988.897564FalseKibana AirlinesVerona Villafranca AirportVR10VeronaIT{'lat': '45.395699', 'lon': '10.8885'}IT-34Sunny...568.351033New Chitose AirportCTSChitose / TomakomaiJP{'lat': '42.77519989', 'lon': '141.6920013'}SE-BDDamaging Wind02018-01-01 01:16:59
29511.067220FalseLogstash AirwaysZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHRain...425.889194Tulsa International AirportTULTulsaUS{'lat': '36.19839859', 'lon': '-95.88809967'}US-OKRain02018-01-01 18:00:59
..................................................................
13029795.905278FalseKibana AirlinesMalpensa International AirportMI12MilanIT{'lat': '45.6306', 'lon': '8.72811'}IT-25Sunny...534.375826Itami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDSunny62018-02-11 20:10:13
13030863.388068FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDDamaging Wind...141.172633Tokyo Haneda International AirportHNDTokyoJP{'lat': '35.552299', 'lon': '139.779999'}SE-BDClear62018-02-11 18:59:53
13031575.183008FalseJetBeatsSavannah Hilton Head International AirportSAVSavannahUS{'lat': '32.12760162', 'lon': '-81.20210266'}US-GAThunder & Lightning...1113.137060OR Tambo International AirportJNBJohannesburgZA{'lat': '-26.1392', 'lon': '28.246'}SE-BDHail62018-02-11 00:57:48
13032817.368952FalseJetBeatsSyracuse Hancock International AirportSYRSyracuseUS{'lat': '43.11119843', 'lon': '-76.10630035'}US-NYRain...714.964864El Dorado International AirportBOGBogotaCO{'lat': '4.70159', 'lon': '-74.1469'}CO-CUNThunder & Lightning62018-02-11 12:02:49
13033579.582455FalseES-AirTampa International AirportTPATampaUS{'lat': '27.97550011', 'lon': '-82.53320313'}US-FLRain...234.929046Jorge Chavez International AirportLIMLimaPE{'lat': '-12.0219', 'lon': '-77.114304'}SE-BDThunder & Lightning62018-02-11 02:07:40
130341004.916638FalseJetBeatsOlenya Air BaseXLMOOlenegorskRU{'lat': '68.15180206', 'lon': '33.46390152'}RU-MURClear...526.895776Gimpo International AirportGMPSeoulKR{'lat': '37.5583', 'lon': '126.791'}SE-BDSunny62018-02-11 00:35:04
13035357.562842TrueLogstash AirwaysShanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDThunder & Lightning...0.000000Shanghai Pudong International AirportPVGShanghaiCN{'lat': '31.14340019', 'lon': '121.8050003'}SE-BDThunder & Lightning62018-02-11 11:19:12
13036429.580539FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Sunny...150.000000Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Cloudy62018-02-11 15:07:11
13037729.788171TrueES-AirVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Rain...691.944839Ukrainka Air BaseXHBUBelogorskRU{'lat': '51.169997', 'lon': '128.445007'}RU-AMUDamaging Wind62018-02-11 10:24:42
13038564.897695FalseES-AirPisa International AirportPI05PisaIT{'lat': '43.683899', 'lon': '10.3927'}IT-52Heavy Fog...567.387339OR Tambo International AirportJNBJohannesburgZA{'lat': '-26.1392', 'lon': '28.246'}SE-BDDamaging Wind62018-02-11 00:42:06
130391014.052787FalseLogstash AirwaysVienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Thunder & Lightning...690.092327Montreal / Pierre Elliott Trudeau Internationa...YULMontrealCA{'lat': '45.47060013', 'lon': '-73.74079895'}CA-QCThunder & Lightning62018-02-11 10:56:31
13040455.243843FalseES-AirLondon Luton AirportLTNLondonGB{'lat': '51.87469864', 'lon': '-0.368333012'}GB-ENGCloudy...3.028293London Heathrow AirportLHRLondonGB{'lat': '51.4706', 'lon': '-0.461941'}GB-ENGClear62018-02-11 00:39:37
13041611.370232FalseLogstash AirwaysJorge Chavez International AirportLIMLimaPE{'lat': '-12.0219', 'lon': '-77.114304'}SE-BDSunny...338.875531Casper-Natrona County International AirportCPRCasperUS{'lat': '42.90800095', 'lon': '-106.4639969'}US-WYRain62018-02-11 10:24:30
13042595.961285FalseJetBeatsOttawa Macdonald-Cartier International AirportYOWOttawaCA{'lat': '45.32249832', 'lon': '-75.66919708'}CA-ONClear...375.129587Frankfurt am Main AirportFRAFrankfurt am MainDE{'lat': '50.033333', 'lon': '8.570556'}DE-HEClear62018-02-11 09:02:07
13043782.747648FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDClear...156.858481Tokyo Haneda International AirportHNDTokyoJP{'lat': '35.552299', 'lon': '139.779999'}SE-BDThunder & Lightning62018-02-11 04:45:06
13044891.117221FalseJetBeatsWinnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBClear...354.106457Vienna International AirportVIEViennaAT{'lat': '48.11029816', 'lon': '16.56970024'}AT-9Thunder & Lightning62018-02-11 00:51:14
13045587.169921FalseLogstash AirwaysBrisbane International AirportBNEBrisbaneAU{'lat': '-27.38419914', 'lon': '153.1170044'}SE-BDRain...771.305442Amsterdam Airport SchipholAMSAmsterdamNL{'lat': '52.30860138', 'lon': '4.76388979'}NL-NHSunny62018-02-11 05:41:51
13046739.132165FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDRain...542.955572Winnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBHail62018-02-11 10:02:21
13047605.191876FalseJetBeatsPortland International Jetport AirportPWMPortlandUS{'lat': '43.64619827', 'lon': '-70.30930328'}US-METhunder & Lightning...564.599857Jeju International AirportCJUJeju CityKR{'lat': '33.51129913', 'lon': '126.4929962'}SE-BDCloudy62018-02-11 15:55:10
13048361.767659TrueLogstash AirwaysDubai International AirportDXBDubaiAE{'lat': '25.25279999', 'lon': '55.36439896'}SE-BDSunny...180.000000Dubai International AirportDXBDubaiAE{'lat': '25.25279999', 'lon': '55.36439896'}SE-BDHail62018-02-11 04:11:14
13049662.306992FalseES-AirWinnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBHeavy Fog...835.954429Ministro Pistarini International AirportEZEBuenos AiresAR{'lat': '-34.8222', 'lon': '-58.5358'}AR-BSunny62018-02-11 10:13:32
13050630.779526FalseJetBeatsHelsinki Vantaa AirportHELHelsinkiFI{'lat': '60.31719971', 'lon': '24.9633007'}FI-ESSunny...451.755639Beijing Capital International AirportPEKBeijingCN{'lat': '40.08010101', 'lon': '116.5849991'}SE-BDCloudy62018-02-11 11:23:23
13051937.771279TrueLogstash AirwaysLester B. Pearson International AirportYYZTorontoCA{'lat': '43.67720032', 'lon': '-79.63059998'}CA-ONSunny...507.451571Leonardo da Vinci___Fiumicino AirportRM11RomeIT{'lat': '41.8002778', 'lon': '12.2388889'}IT-62Hail62018-02-11 01:13:50
130521085.155339FalseLogstash AirwaysMelbourne International AirportMELMelbourneAU{'lat': '-37.673302', 'lon': '144.843002'}SE-BDCloudy...1044.451122Bologna Guglielmo Marconi AirportBO08BolognaIT{'lat': '44.5354', 'lon': '11.2887'}IT-45Cloudy62018-02-11 18:35:42
130531191.964104FalseLogstash AirwaysZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHHail...728.715904Portland International Jetport AirportPWMPortlandUS{'lat': '43.64619827', 'lon': '-70.30930328'}US-MEClear62018-02-11 19:02:10
130541080.446279FalseLogstash AirwaysXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDRain...402.929088Pisa International AirportPI05PisaIT{'lat': '43.683899', 'lon': '10.3927'}IT-52Sunny62018-02-11 20:42:25
13055646.612941FalseLogstash AirwaysZurich AirportZRHZurichCH{'lat': '47.464699', 'lon': '8.54917'}CH-ZHRain...644.418029Winnipeg / James Armstrong Richardson Internat...YWGWinnipegCA{'lat': '49.90999985', 'lon': '-97.23989868'}CA-MBRain62018-02-11 01:41:57
13056997.751876FalseLogstash AirwaysUkrainka Air BaseXHBUBelogorskRU{'lat': '51.169997', 'lon': '128.445007'}RU-AMURain...937.540811Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFSunny62018-02-11 04:09:27
130571102.814465FalseJetBeatsMinistro Pistarini International AirportEZEBuenos AiresAR{'lat': '-34.8222', 'lon': '-58.5358'}SE-BDHail...1697.404971Itami AirportITMOsakaJP{'lat': '34.78549957', 'lon': '135.4380035'}SE-BDHail62018-02-11 08:28:21
13058858.144337FalseJetBeatsWashington Dulles International AirportIADWashingtonUS{'lat': '38.94449997', 'lon': '-77.45580292'}US-DCHeavy Fog...1610.761827Adelaide International AirportADLAdelaideAU{'lat': '-34.945', 'lon': '138.531006'}SE-BDRain62018-02-11 14:54:34
\n", - "

13059 rows × 27 columns

\n", - "
" - ], - "text/plain": [ - " AvgTicketPrice Cancelled Carrier \\\n", - "0 841.265642 False Kibana Airlines \n", - "1 882.982662 False Logstash Airways \n", - "2 190.636904 False Logstash Airways \n", - "3 181.694216 True Kibana Airlines \n", - "4 730.041778 False Kibana Airlines \n", - "5 418.152089 False JetBeats \n", - "6 180.246816 False JetBeats \n", - "7 585.184310 False Kibana Airlines \n", - "8 960.869736 True Kibana Airlines \n", - "9 296.877773 False Logstash Airways \n", - "10 906.437948 False JetBeats \n", - "11 704.463771 False Logstash Airways \n", - "12 922.499077 True Logstash Airways \n", - "13 374.959276 False Logstash Airways \n", - "14 552.917371 False Logstash Airways \n", - "15 566.487557 True Kibana Airlines \n", - "16 989.952787 True Logstash Airways \n", - "17 569.613255 False ES-Air \n", - "18 277.429707 False ES-Air \n", - "19 772.100846 False JetBeats \n", - "20 167.599922 False JetBeats \n", - "21 253.210065 False ES-Air \n", - "22 917.247620 False JetBeats \n", - "23 451.591176 False Logstash Airways \n", - "24 307.067201 False Logstash Airways \n", - "25 268.241596 False ES-Air \n", - "26 975.812632 True Kibana Airlines \n", - "27 134.214546 False JetBeats \n", - "28 988.897564 False Kibana Airlines \n", - "29 511.067220 False Logstash Airways \n", - "... ... ... ... \n", - "13029 795.905278 False Kibana Airlines \n", - "13030 863.388068 False Logstash Airways \n", - "13031 575.183008 False JetBeats \n", - "13032 817.368952 False JetBeats \n", - "13033 579.582455 False ES-Air \n", - "13034 1004.916638 False JetBeats \n", - "13035 357.562842 True Logstash Airways \n", - "13036 429.580539 False Logstash Airways \n", - "13037 729.788171 True ES-Air \n", - "13038 564.897695 False ES-Air \n", - "13039 1014.052787 False Logstash Airways \n", - "13040 455.243843 False ES-Air \n", - "13041 611.370232 False Logstash Airways \n", - "13042 595.961285 False JetBeats \n", - "13043 782.747648 False Logstash Airways \n", - "13044 891.117221 False JetBeats \n", - "13045 587.169921 False Logstash Airways \n", - "13046 739.132165 False Logstash Airways \n", - "13047 605.191876 False JetBeats \n", - "13048 361.767659 True Logstash Airways \n", - "13049 662.306992 False ES-Air \n", - "13050 630.779526 False JetBeats \n", - "13051 937.771279 True Logstash Airways \n", - "13052 1085.155339 False Logstash Airways \n", - "13053 1191.964104 False Logstash Airways \n", - "13054 1080.446279 False Logstash Airways \n", - "13055 646.612941 False Logstash Airways \n", - "13056 997.751876 False Logstash Airways \n", - "13057 1102.814465 False JetBeats \n", - "13058 858.144337 False JetBeats \n", - "\n", - " Dest DestAirportID \\\n", - "0 Sydney Kingsford Smith International Airport SYD \n", - "1 Venice Marco Polo Airport VE05 \n", - "2 Venice Marco Polo Airport VE05 \n", - "3 Treviso-Sant'Angelo Airport TV01 \n", - "4 Xi'an Xianyang International Airport XIY \n", - "5 Genoa Cristoforo Colombo Airport GE01 \n", - "6 Zurich Airport ZRH \n", - "7 Ottawa Macdonald-Cartier International Airport YOW \n", - "8 Rajiv Gandhi International Airport HYD \n", - "9 Treviso-Sant'Angelo Airport TV01 \n", - "10 Helsinki Vantaa Airport HEL \n", - "11 Vienna International Airport VIE \n", - "12 Shanghai Pudong International Airport PVG \n", - "13 Ottawa Macdonald-Cartier International Airport YOW \n", - "14 Luis Munoz Marin International Airport SJU \n", - "15 Cologne Bonn Airport CGN \n", - "16 Venice Marco Polo Airport VE05 \n", - "17 Ministro Pistarini International Airport EZE \n", - "18 Shanghai Pudong International Airport PVG \n", - "19 Indira Gandhi International Airport DEL \n", - "20 Wichita Mid Continent Airport ICT \n", - "21 Ottawa Macdonald-Cartier International Airport YOW \n", - "22 Itami Airport ITM \n", - "23 Vienna International Airport VIE \n", - "24 Charles de Gaulle International Airport CDG \n", - "25 Narita International Airport NRT \n", - "26 Itami Airport ITM \n", - "27 San Diego International Airport SAN \n", - "28 Verona Villafranca Airport VR10 \n", - "29 Zurich Airport ZRH \n", - "... ... ... \n", - "13029 Malpensa International Airport MI12 \n", - "13030 Xi'an Xianyang International Airport XIY \n", - "13031 Savannah Hilton Head International Airport SAV \n", - "13032 Syracuse Hancock International Airport SYR \n", - "13033 Tampa International Airport TPA \n", - "13034 Olenya Air Base XLMO \n", - "13035 Shanghai Pudong International Airport PVG \n", - "13036 Venice Marco Polo Airport VE05 \n", - "13037 Vienna International Airport VIE \n", - "13038 Pisa International Airport PI05 \n", - "13039 Vienna International Airport VIE \n", - "13040 London Luton Airport LTN \n", - "13041 Jorge Chavez International Airport LIM \n", - "13042 Ottawa Macdonald-Cartier International Airport YOW \n", - "13043 Xi'an Xianyang International Airport XIY \n", - "13044 Winnipeg / James Armstrong Richardson Internat... YWG \n", - "13045 Brisbane International Airport BNE \n", - "13046 Xi'an Xianyang International Airport XIY \n", - "13047 Portland International Jetport Airport PWM \n", - "13048 Dubai International Airport DXB \n", - "13049 Winnipeg / James Armstrong Richardson Internat... YWG \n", - "13050 Helsinki Vantaa Airport HEL \n", - "13051 Lester B. Pearson International Airport YYZ \n", - "13052 Melbourne International Airport MEL \n", - "13053 Zurich Airport ZRH \n", - "13054 Xi'an Xianyang International Airport XIY \n", - "13055 Zurich Airport ZRH \n", - "13056 Ukrainka Air Base XHBU \n", - "13057 Ministro Pistarini International Airport EZE \n", - "13058 Washington Dulles International Airport IAD \n", - "\n", - " DestCityName DestCountry \\\n", - "0 Sydney AU \n", - "1 Venice IT \n", - "2 Venice IT \n", - "3 Treviso IT \n", - "4 Xi'an CN \n", - "5 Genova IT \n", - "6 Zurich CH \n", - "7 Ottawa CA \n", - "8 Hyderabad IN \n", - "9 Treviso IT \n", - "10 Helsinki FI \n", - "11 Vienna AT \n", - "12 Shanghai CN \n", - "13 Ottawa CA \n", - "14 San Juan PR \n", - "15 Cologne DE \n", - "16 Venice IT \n", - "17 Buenos Aires AR \n", - "18 Shanghai CN \n", - "19 New Delhi IN \n", - "20 Wichita US \n", - "21 Ottawa CA \n", - "22 Osaka JP \n", - "23 Vienna AT \n", - "24 Paris FR \n", - "25 Tokyo JP \n", - "26 Osaka JP \n", - "27 San Diego US \n", - "28 Verona IT \n", - "29 Zurich CH \n", - "... ... ... \n", - "13029 Milan IT \n", - "13030 Xi'an CN \n", - "13031 Savannah US \n", - "13032 Syracuse US \n", - "13033 Tampa US \n", - "13034 Olenegorsk RU \n", - "13035 Shanghai CN \n", - "13036 Venice IT \n", - "13037 Vienna AT \n", - "13038 Pisa IT \n", - "13039 Vienna AT \n", - "13040 London GB \n", - "13041 Lima PE \n", - "13042 Ottawa CA \n", - "13043 Xi'an CN \n", - "13044 Winnipeg CA \n", - "13045 Brisbane AU \n", - "13046 Xi'an CN \n", - "13047 Portland US \n", - "13048 Dubai AE \n", - "13049 Winnipeg CA \n", - "13050 Helsinki FI \n", - "13051 Toronto CA \n", - "13052 Melbourne AU \n", - "13053 Zurich CH \n", - "13054 Xi'an CN \n", - "13055 Zurich CH \n", - "13056 Belogorsk RU \n", - "13057 Buenos Aires AR \n", - "13058 Washington US \n", - "\n", - " DestLocation DestRegion \\\n", - "0 {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n", - "1 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "3 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", - "4 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "5 {'lat': '44.4133', 'lon': '8.8375'} IT-42 \n", - "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "7 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", - "8 {'lat': '17.23131752', 'lon': '78.42985535'} SE-BD \n", - "9 {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", - "10 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", - "11 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "12 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", - "13 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", - "14 {'lat': '18.43939972', 'lon': '-66.00180054'} PR-U-A \n", - "15 {'lat': '50.86589813', 'lon': '7.142739773'} DE-NW \n", - "16 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "17 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n", - "18 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", - "19 {'lat': '28.5665', 'lon': '77.103104'} SE-BD \n", - "20 {'lat': '37.64989853', 'lon': '-97.43309784'} US-KS \n", - "21 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", - "22 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", - "23 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "24 {'lat': '49.01279831', 'lon': '2.549999952'} FR-J \n", - "25 {'lat': '35.76470184', 'lon': '140.3860016'} SE-BD \n", - "26 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", - "27 {'lat': '32.73360062', 'lon': '-117.1900024'} US-CA \n", - "28 {'lat': '45.395699', 'lon': '10.8885'} IT-34 \n", - "29 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "... ... ... \n", - "13029 {'lat': '45.6306', 'lon': '8.72811'} IT-25 \n", - "13030 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "13031 {'lat': '32.12760162', 'lon': '-81.20210266'} US-GA \n", - "13032 {'lat': '43.11119843', 'lon': '-76.10630035'} US-NY \n", - "13033 {'lat': '27.97550011', 'lon': '-82.53320313'} US-FL \n", - "13034 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n", - "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", - "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "13037 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "13038 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n", - "13039 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "13040 {'lat': '51.87469864', 'lon': '-0.368333012'} GB-ENG \n", - "13041 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n", - "13042 {'lat': '45.32249832', 'lon': '-75.66919708'} CA-ON \n", - "13043 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "13044 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", - "13045 {'lat': '-27.38419914', 'lon': '153.1170044'} SE-BD \n", - "13046 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "13047 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n", - "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n", - "13049 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", - "13050 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", - "13051 {'lat': '43.67720032', 'lon': '-79.63059998'} CA-ON \n", - "13052 {'lat': '-37.673302', 'lon': '144.843002'} SE-BD \n", - "13053 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "13054 {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "13055 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "13056 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n", - "13057 {'lat': '-34.8222', 'lon': '-58.5358'} SE-BD \n", - "13058 {'lat': '38.94449997', 'lon': '-77.45580292'} US-DC \n", - "\n", - " DestWeather ... FlightTimeMin \\\n", - "0 Rain ... 1030.770416 \n", - "1 Sunny ... 464.389481 \n", - "2 Cloudy ... 0.000000 \n", - "3 Clear ... 222.749059 \n", - "4 Clear ... 785.779071 \n", - "5 Thunder & Lightning ... 393.590441 \n", - "6 Hail ... 300.000000 \n", - "7 Clear ... 614.942480 \n", - "8 Cloudy ... 602.030591 \n", - "9 Rain ... 174.822216 \n", - "10 Rain ... 503.045170 \n", - "11 Cloudy ... 36.075018 \n", - "12 Clear ... 679.768391 \n", - "13 Rain ... 330.418282 \n", - "14 Clear ... 407.145031 \n", - "15 Sunny ... 656.712658 \n", - "16 Damaging Wind ... 773.030334 \n", - "17 Cloudy ... 704.716920 \n", - "18 Clear ... 355.957996 \n", - "19 Clear ... 875.114675 \n", - "20 Clear ... 373.966883 \n", - "21 Hail ... 130.667700 \n", - "22 Damaging Wind ... 574.495310 \n", - "23 Heavy Fog ... 579.728943 \n", - "24 Clear ... 50.157229 \n", - "25 Rain ... 527.567422 \n", - "26 Hail ... 386.259764 \n", - "27 Clear ... 24.479650 \n", - "28 Sunny ... 568.351033 \n", - "29 Rain ... 425.889194 \n", - "... ... ... ... \n", - "13029 Sunny ... 534.375826 \n", - "13030 Damaging Wind ... 141.172633 \n", - "13031 Thunder & Lightning ... 1113.137060 \n", - "13032 Rain ... 714.964864 \n", - "13033 Rain ... 234.929046 \n", - "13034 Clear ... 526.895776 \n", - "13035 Thunder & Lightning ... 0.000000 \n", - "13036 Sunny ... 150.000000 \n", - "13037 Rain ... 691.944839 \n", - "13038 Heavy Fog ... 567.387339 \n", - "13039 Thunder & Lightning ... 690.092327 \n", - "13040 Cloudy ... 3.028293 \n", - "13041 Sunny ... 338.875531 \n", - "13042 Clear ... 375.129587 \n", - "13043 Clear ... 156.858481 \n", - "13044 Clear ... 354.106457 \n", - "13045 Rain ... 771.305442 \n", - "13046 Rain ... 542.955572 \n", - "13047 Thunder & Lightning ... 564.599857 \n", - "13048 Sunny ... 180.000000 \n", - "13049 Heavy Fog ... 835.954429 \n", - "13050 Sunny ... 451.755639 \n", - "13051 Sunny ... 507.451571 \n", - "13052 Cloudy ... 1044.451122 \n", - "13053 Hail ... 728.715904 \n", - "13054 Rain ... 402.929088 \n", - "13055 Rain ... 644.418029 \n", - "13056 Rain ... 937.540811 \n", - "13057 Hail ... 1697.404971 \n", - "13058 Heavy Fog ... 1610.761827 \n", - "\n", - " Origin OriginAirportID \\\n", - "0 Frankfurt am Main Airport FRA \n", - "1 Cape Town International Airport CPT \n", - "2 Venice Marco Polo Airport VE05 \n", - "3 Naples International Airport NA01 \n", - "4 Licenciado Benito Juarez International Airport AICM \n", - "5 Edmonton International Airport CYEG \n", - "6 Zurich Airport ZRH \n", - "7 Ciampino___G. B. Pastine International Airport RM12 \n", - "8 Milano Linate Airport MI11 \n", - "9 Sheremetyevo International Airport SVO \n", - "10 Albuquerque International Sunport Airport ABQ \n", - "11 Venice Marco Polo Airport VE05 \n", - "12 Licenciado Benito Juarez International Airport AICM \n", - "13 Naples International Airport NA01 \n", - "14 Ciampino___G. B. Pastine International Airport RM12 \n", - "15 Chengdu Shuangliu International Airport CTU \n", - "16 Licenciado Benito Juarez International Airport AICM \n", - "17 Cleveland Hopkins International Airport CLE \n", - "18 Olenya Air Base XLMO \n", - "19 Casper-Natrona County International Airport CPR \n", - "20 Erie International Tom Ridge Field ERI \n", - "21 Newark Liberty International Airport EWR \n", - "22 Copenhagen Kastrup Airport CPH \n", - "23 Seattle Tacoma International Airport SEA \n", - "24 Berlin-Tegel Airport TXL \n", - "25 Manchester Airport MAN \n", - "26 Helsinki Vantaa Airport HEL \n", - "27 Phoenix Sky Harbor International Airport PHX \n", - "28 New Chitose Airport CTS \n", - "29 Tulsa International Airport TUL \n", - "... ... ... \n", - "13029 Itami Airport ITM \n", - "13030 Tokyo Haneda International Airport HND \n", - "13031 OR Tambo International Airport JNB \n", - "13032 El Dorado International Airport BOG \n", - "13033 Jorge Chavez International Airport LIM \n", - "13034 Gimpo International Airport GMP \n", - "13035 Shanghai Pudong International Airport PVG \n", - "13036 Venice Marco Polo Airport VE05 \n", - "13037 Ukrainka Air Base XHBU \n", - "13038 OR Tambo International Airport JNB \n", - "13039 Montreal / Pierre Elliott Trudeau Internationa... YUL \n", - "13040 London Heathrow Airport LHR \n", - "13041 Casper-Natrona County International Airport CPR \n", - "13042 Frankfurt am Main Airport FRA \n", - "13043 Tokyo Haneda International Airport HND \n", - "13044 Vienna International Airport VIE \n", - "13045 Amsterdam Airport Schiphol AMS \n", - "13046 Winnipeg / James Armstrong Richardson Internat... YWG \n", - "13047 Jeju International Airport CJU \n", - "13048 Dubai International Airport DXB \n", - "13049 Ministro Pistarini International Airport EZE \n", - "13050 Beijing Capital International Airport PEK \n", - "13051 Leonardo da Vinci___Fiumicino Airport RM11 \n", - "13052 Bologna Guglielmo Marconi Airport BO08 \n", - "13053 Portland International Jetport Airport PWM \n", - "13054 Pisa International Airport PI05 \n", - "13055 Winnipeg / James Armstrong Richardson Internat... YWG \n", - "13056 Licenciado Benito Juarez International Airport AICM \n", - "13057 Itami Airport ITM \n", - "13058 Adelaide International Airport ADL \n", - "\n", - " OriginCityName OriginCountry \\\n", - "0 Frankfurt am Main DE \n", - "1 Cape Town ZA \n", - "2 Venice IT \n", - "3 Naples IT \n", - "4 Mexico City MX \n", - "5 Edmonton CA \n", - "6 Zurich CH \n", - "7 Rome IT \n", - "8 Milan IT \n", - "9 Moscow RU \n", - "10 Albuquerque US \n", - "11 Venice IT \n", - "12 Mexico City MX \n", - "13 Naples IT \n", - "14 Rome IT \n", - "15 Chengdu CN \n", - "16 Mexico City MX \n", - "17 Cleveland US \n", - "18 Olenegorsk RU \n", - "19 Casper US \n", - "20 Erie US \n", - "21 Newark US \n", - "22 Copenhagen DK \n", - "23 Seattle US \n", - "24 Berlin DE \n", - "25 Manchester GB \n", - "26 Helsinki FI \n", - "27 Phoenix US \n", - "28 Chitose / Tomakomai JP \n", - "29 Tulsa US \n", - "... ... ... \n", - "13029 Osaka JP \n", - "13030 Tokyo JP \n", - "13031 Johannesburg ZA \n", - "13032 Bogota CO \n", - "13033 Lima PE \n", - "13034 Seoul KR \n", - "13035 Shanghai CN \n", - "13036 Venice IT \n", - "13037 Belogorsk RU \n", - "13038 Johannesburg ZA \n", - "13039 Montreal CA \n", - "13040 London GB \n", - "13041 Casper US \n", - "13042 Frankfurt am Main DE \n", - "13043 Tokyo JP \n", - "13044 Vienna AT \n", - "13045 Amsterdam NL \n", - "13046 Winnipeg CA \n", - "13047 Jeju City KR \n", - "13048 Dubai AE \n", - "13049 Buenos Aires AR \n", - "13050 Beijing CN \n", - "13051 Rome IT \n", - "13052 Bologna IT \n", - "13053 Portland US \n", - "13054 Pisa IT \n", - "13055 Winnipeg CA \n", - "13056 Mexico City MX \n", - "13057 Osaka JP \n", - "13058 Adelaide AU \n", - "\n", - " OriginLocation OriginRegion \\\n", - "0 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n", - "1 {'lat': '-33.96480179', 'lon': '18.60169983'} SE-BD \n", - "2 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "3 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n", - "4 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "5 {'lat': '53.30970001', 'lon': '-113.5800018'} CA-AB \n", - "6 {'lat': '47.464699', 'lon': '8.54917'} CH-ZH \n", - "7 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n", - "8 {'lat': '45.445099', 'lon': '9.27674'} IT-25 \n", - "9 {'lat': '55.972599', 'lon': '37.4146'} RU-MOS \n", - "10 {'lat': '35.040199', 'lon': '-106.609001'} US-NM \n", - "11 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "12 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "13 {'lat': '40.886002', 'lon': '14.2908'} IT-72 \n", - "14 {'lat': '41.7994', 'lon': '12.5949'} IT-62 \n", - "15 {'lat': '30.57850075', 'lon': '103.9469986'} SE-BD \n", - "16 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "17 {'lat': '41.4117012', 'lon': '-81.84980011'} US-OH \n", - "18 {'lat': '68.15180206', 'lon': '33.46390152'} RU-MUR \n", - "19 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n", - "20 {'lat': '42.08312701', 'lon': '-80.17386675'} US-PA \n", - "21 {'lat': '40.69250107', 'lon': '-74.16870117'} US-NJ \n", - "22 {'lat': '55.61790085', 'lon': '12.65600014'} DK-84 \n", - "23 {'lat': '47.44900131', 'lon': '-122.3089981'} US-WA \n", - "24 {'lat': '52.5597', 'lon': '13.2877'} DE-BE \n", - "25 {'lat': '53.35369873', 'lon': '-2.274950027'} GB-ENG \n", - "26 {'lat': '60.31719971', 'lon': '24.9633007'} FI-ES \n", - "27 {'lat': '33.43429947', 'lon': '-112.012001'} US-AZ \n", - "28 {'lat': '42.77519989', 'lon': '141.6920013'} SE-BD \n", - "29 {'lat': '36.19839859', 'lon': '-95.88809967'} US-OK \n", - "... ... ... \n", - "13029 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", - "13030 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n", - "13031 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n", - "13032 {'lat': '4.70159', 'lon': '-74.1469'} CO-CUN \n", - "13033 {'lat': '-12.0219', 'lon': '-77.114304'} SE-BD \n", - "13034 {'lat': '37.5583', 'lon': '126.791'} SE-BD \n", - "13035 {'lat': '31.14340019', 'lon': '121.8050003'} SE-BD \n", - "13036 {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "13037 {'lat': '51.169997', 'lon': '128.445007'} RU-AMU \n", - "13038 {'lat': '-26.1392', 'lon': '28.246'} SE-BD \n", - "13039 {'lat': '45.47060013', 'lon': '-73.74079895'} CA-QC \n", - "13040 {'lat': '51.4706', 'lon': '-0.461941'} GB-ENG \n", - "13041 {'lat': '42.90800095', 'lon': '-106.4639969'} US-WY \n", - "13042 {'lat': '50.033333', 'lon': '8.570556'} DE-HE \n", - "13043 {'lat': '35.552299', 'lon': '139.779999'} SE-BD \n", - "13044 {'lat': '48.11029816', 'lon': '16.56970024'} AT-9 \n", - "13045 {'lat': '52.30860138', 'lon': '4.76388979'} NL-NH \n", - "13046 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", - "13047 {'lat': '33.51129913', 'lon': '126.4929962'} SE-BD \n", - "13048 {'lat': '25.25279999', 'lon': '55.36439896'} SE-BD \n", - "13049 {'lat': '-34.8222', 'lon': '-58.5358'} AR-B \n", - "13050 {'lat': '40.08010101', 'lon': '116.5849991'} SE-BD \n", - "13051 {'lat': '41.8002778', 'lon': '12.2388889'} IT-62 \n", - "13052 {'lat': '44.5354', 'lon': '11.2887'} IT-45 \n", - "13053 {'lat': '43.64619827', 'lon': '-70.30930328'} US-ME \n", - "13054 {'lat': '43.683899', 'lon': '10.3927'} IT-52 \n", - "13055 {'lat': '49.90999985', 'lon': '-97.23989868'} CA-MB \n", - "13056 {'lat': '19.4363', 'lon': '-99.072098'} MX-DIF \n", - "13057 {'lat': '34.78549957', 'lon': '135.4380035'} SE-BD \n", - "13058 {'lat': '-34.945', 'lon': '138.531006'} SE-BD \n", - "\n", - " OriginWeather dayOfWeek timestamp \n", - "0 Sunny 0 2018-01-01 00:00:00 \n", - "1 Clear 0 2018-01-01 18:27:00 \n", - "2 Rain 0 2018-01-01 17:11:14 \n", - "3 Thunder & Lightning 0 2018-01-01 10:33:28 \n", - "4 Damaging Wind 0 2018-01-01 05:13:00 \n", - "5 Rain 0 2018-01-01 01:43:03 \n", - "6 Clear 0 2018-01-01 13:49:53 \n", - "7 Thunder & Lightning 0 2018-01-01 04:54:59 \n", - "8 Heavy Fog 0 2018-01-01 12:09:35 \n", - "9 Cloudy 0 2018-01-01 12:09:35 \n", - "10 Rain 0 2018-01-01 22:06:14 \n", - "11 Rain 0 2018-01-01 11:52:34 \n", - "12 Heavy Fog 0 2018-01-01 02:13:46 \n", - "13 Rain 0 2018-01-01 14:21:13 \n", - "14 Cloudy 0 2018-01-01 17:42:53 \n", - "15 Thunder & Lightning 0 2018-01-01 19:55:32 \n", - "16 Thunder & Lightning 0 2018-01-01 07:49:27 \n", - "17 Rain 0 2018-01-01 01:30:47 \n", - "18 Hail 0 2018-01-01 07:58:17 \n", - "19 Cloudy 0 2018-01-01 00:02:06 \n", - "20 Cloudy 0 2018-01-01 01:08:20 \n", - "21 Clear 0 2018-01-01 01:08:20 \n", - "22 Sunny 0 2018-01-01 07:48:35 \n", - "23 Heavy Fog 0 2018-01-01 18:57:21 \n", - "24 Rain 0 2018-01-01 13:18:25 \n", - "25 Thunder & Lightning 0 2018-01-01 08:20:35 \n", - "26 Rain 0 2018-01-01 15:38:32 \n", - "27 Clear 0 2018-01-01 03:08:45 \n", - "28 Damaging Wind 0 2018-01-01 01:16:59 \n", - "29 Rain 0 2018-01-01 18:00:59 \n", - "... ... ... ... \n", - "13029 Sunny 6 2018-02-11 20:10:13 \n", - "13030 Clear 6 2018-02-11 18:59:53 \n", - "13031 Hail 6 2018-02-11 00:57:48 \n", - "13032 Thunder & Lightning 6 2018-02-11 12:02:49 \n", - "13033 Thunder & Lightning 6 2018-02-11 02:07:40 \n", - "13034 Sunny 6 2018-02-11 00:35:04 \n", - "13035 Thunder & Lightning 6 2018-02-11 11:19:12 \n", - "13036 Cloudy 6 2018-02-11 15:07:11 \n", - "13037 Damaging Wind 6 2018-02-11 10:24:42 \n", - "13038 Damaging Wind 6 2018-02-11 00:42:06 \n", - "13039 Thunder & Lightning 6 2018-02-11 10:56:31 \n", - "13040 Clear 6 2018-02-11 00:39:37 \n", - "13041 Rain 6 2018-02-11 10:24:30 \n", - "13042 Clear 6 2018-02-11 09:02:07 \n", - "13043 Thunder & Lightning 6 2018-02-11 04:45:06 \n", - "13044 Thunder & Lightning 6 2018-02-11 00:51:14 \n", - "13045 Sunny 6 2018-02-11 05:41:51 \n", - "13046 Hail 6 2018-02-11 10:02:21 \n", - "13047 Cloudy 6 2018-02-11 15:55:10 \n", - "13048 Hail 6 2018-02-11 04:11:14 \n", - "13049 Sunny 6 2018-02-11 10:13:32 \n", - "13050 Cloudy 6 2018-02-11 11:23:23 \n", - "13051 Hail 6 2018-02-11 01:13:50 \n", - "13052 Cloudy 6 2018-02-11 18:35:42 \n", - "13053 Clear 6 2018-02-11 19:02:10 \n", - "13054 Sunny 6 2018-02-11 20:42:25 \n", - "13055 Rain 6 2018-02-11 01:41:57 \n", - "13056 Sunny 6 2018-02-11 04:09:27 \n", - "13057 Hail 6 2018-02-11 08:28:21 \n", - "13058 Rain 6 2018-02-11 14:54:34 \n", - "\n", - "[13059 rows x 27 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd_df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Eland" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "import eland as ed" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "ed_df = ed.read_es('localhost', 'flights')" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AvgTicketPriceCancelledCarrierDestDestAirportIDDestCityNameDestCountryDestLocationDestRegionDestWeather...FlightTimeMinOriginOriginAirportIDOriginCityNameOriginCountryOriginLocationOriginRegionOriginWeatherdayOfWeektimestamp
PNr3fWsBVUchcQJXWbLE841.265642FalseKibana AirlinesSydney Kingsford Smith International AirportSYDSydneyAU{'lat': '-33.94609833', 'lon': '151.177002'}SE-BDRain...1030.770416Frankfurt am Main AirportFRAFrankfurt am MainDE{'lat': '50.033333', 'lon': '8.570556'}DE-HESunny02018-01-01 00:00:00
Pdr3fWsBVUchcQJXWbLE882.982662FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Sunny...464.389481Cape Town International AirportCPTCape TownZA{'lat': '-33.96480179', 'lon': '18.60169983'}SE-BDClear02018-01-01 18:27:00
Ptr3fWsBVUchcQJXWbLE190.636904FalseLogstash AirwaysVenice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Cloudy...0.000000Venice Marco Polo AirportVE05VeniceIT{'lat': '45.505299', 'lon': '12.3519'}IT-34Rain02018-01-01 17:11:14
P9r3fWsBVUchcQJXWbLE181.694216TrueKibana AirlinesTreviso-Sant'Angelo AirportTV01TrevisoIT{'lat': '45.648399', 'lon': '12.1944'}IT-34Clear...222.749059Naples International AirportNA01NaplesIT{'lat': '40.886002', 'lon': '14.2908'}IT-72Thunder & Lightning02018-01-01 10:33:28
QNr3fWsBVUchcQJXWbLE730.041778FalseKibana AirlinesXi'an Xianyang International AirportXIYXi'anCN{'lat': '34.447102', 'lon': '108.751999'}SE-BDClear...785.779071Licenciado Benito Juarez International AirportAICMMexico CityMX{'lat': '19.4363', 'lon': '-99.072098'}MX-DIFDamaging Wind02018-01-01 05:13:00
\n", - "

5 rows × 27 columns

\n", - "
" - ], - "text/plain": [ - " AvgTicketPrice Cancelled Carrier \\\n", - "PNr3fWsBVUchcQJXWbLE 841.265642 False Kibana Airlines \n", - "Pdr3fWsBVUchcQJXWbLE 882.982662 False Logstash Airways \n", - "Ptr3fWsBVUchcQJXWbLE 190.636904 False Logstash Airways \n", - "P9r3fWsBVUchcQJXWbLE 181.694216 True Kibana Airlines \n", - "QNr3fWsBVUchcQJXWbLE 730.041778 False Kibana Airlines \n", - "\n", - " Dest \\\n", - "PNr3fWsBVUchcQJXWbLE Sydney Kingsford Smith International Airport \n", - "Pdr3fWsBVUchcQJXWbLE Venice Marco Polo Airport \n", - "Ptr3fWsBVUchcQJXWbLE Venice Marco Polo Airport \n", - "P9r3fWsBVUchcQJXWbLE Treviso-Sant'Angelo Airport \n", - "QNr3fWsBVUchcQJXWbLE Xi'an Xianyang International Airport \n", - "\n", - " DestAirportID DestCityName DestCountry \\\n", - "PNr3fWsBVUchcQJXWbLE SYD Sydney AU \n", - "Pdr3fWsBVUchcQJXWbLE VE05 Venice IT \n", - "Ptr3fWsBVUchcQJXWbLE VE05 Venice IT \n", - "P9r3fWsBVUchcQJXWbLE TV01 Treviso IT \n", - "QNr3fWsBVUchcQJXWbLE XIY Xi'an CN \n", - "\n", - " DestLocation DestRegion \\\n", - "PNr3fWsBVUchcQJXWbLE {'lat': '-33.94609833', 'lon': '151.177002'} SE-BD \n", - "Pdr3fWsBVUchcQJXWbLE {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "Ptr3fWsBVUchcQJXWbLE {'lat': '45.505299', 'lon': '12.3519'} IT-34 \n", - "P9r3fWsBVUchcQJXWbLE {'lat': '45.648399', 'lon': '12.1944'} IT-34 \n", - "QNr3fWsBVUchcQJXWbLE {'lat': '34.447102', 'lon': '108.751999'} SE-BD \n", - "\n", - " DestWeather ... FlightTimeMin \\\n", - "PNr3fWsBVUchcQJXWbLE Rain ... 1030.770416 \n", - "Pdr3fWsBVUchcQJXWbLE Sunny ... 464.389481 \n", - "Ptr3fWsBVUchcQJXWbLE Cloudy ... 0.000000 \n", - "P9r3fWsBVUchcQJXWbLE Clear ... 222.749059 \n", - "QNr3fWsBVUchcQJXWbLE Clear ... 785.779071 \n", - "\n", - " Origin \\\n", - "PNr3fWsBVUchcQJXWbLE Frankfurt am Main Airport \n", - "Pdr3fWsBVUchcQJXWbLE Cape Town International Airport \n", - "Ptr3fWsBVUchcQJXWbLE Venice Marco Polo Airport \n", - "P9r3fWsBVUchcQJXWbLE Naples International Airport \n", - "QNr3fWsBVUchcQJXWbLE Licenciado Benito Juarez International Airport \n", - "\n", - " OriginAirportID OriginCityName OriginCountry \\\n", - "PNr3fWsBVUchcQJXWbLE FRA Frankfurt am Main DE \n", - "Pdr3fWsBVUchcQJXWbLE CPT Cape Town ZA \n", - "Ptr3fWsBVUchcQJXWbLE VE05 Venice IT \n", - "P9r3fWsBVUchcQJXWbLE NA01 Naples IT \n", - "QNr3fWsBVUchcQJXWbLE AICM Mexico City MX \n", - "\n", - " OriginLocation \\\n", - "PNr3fWsBVUchcQJXWbLE {'lat': '50.033333', 'lon': '8.570556'} \n", - "Pdr3fWsBVUchcQJXWbLE {'lat': '-33.96480179', 'lon': '18.60169983'} \n", - "Ptr3fWsBVUchcQJXWbLE {'lat': '45.505299', 'lon': '12.3519'} \n", - "P9r3fWsBVUchcQJXWbLE {'lat': '40.886002', 'lon': '14.2908'} \n", - "QNr3fWsBVUchcQJXWbLE {'lat': '19.4363', 'lon': '-99.072098'} \n", - "\n", - " OriginRegion OriginWeather dayOfWeek \\\n", - "PNr3fWsBVUchcQJXWbLE DE-HE Sunny 0 \n", - "Pdr3fWsBVUchcQJXWbLE SE-BD Clear 0 \n", - "Ptr3fWsBVUchcQJXWbLE IT-34 Rain 0 \n", - "P9r3fWsBVUchcQJXWbLE IT-72 Thunder & Lightning 0 \n", - "QNr3fWsBVUchcQJXWbLE MX-DIF Damaging Wind 0 \n", - "\n", - " timestamp \n", - "PNr3fWsBVUchcQJXWbLE 2018-01-01 00:00:00 \n", - "Pdr3fWsBVUchcQJXWbLE 2018-01-01 18:27:00 \n", - "Ptr3fWsBVUchcQJXWbLE 2018-01-01 17:11:14 \n", - "P9r3fWsBVUchcQJXWbLE 2018-01-01 10:33:28 \n", - "QNr3fWsBVUchcQJXWbLE 2018-01-01 05:13:00 \n", - "\n", - "[5 rows x 27 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ed_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AvgTicketPriceDistanceKilometersDistanceMilesFlightDelayMinFlightTimeHourFlightTimeMindayOfWeek
count13059.00000013059.00000013059.00000013059.00000013059.00000013059.00000013059.000000
mean628.2536897092.1424574406.85301047.3351718.518797511.1278422.835975
std266.3866614578.2631932844.80085596.7430065.579019334.7411351.939365
min100.0205310.0000000.0000000.0000000.0000000.0000000.000000
25%410.0089182470.5459741535.1261180.0000004.195650251.9387101.000000
50%640.3872857612.0724034729.9224700.0000008.385816503.1489753.000000
75%842.2770839735.6604636049.58338912.61824312.008909720.5057054.197761
max1199.72900419881.48242212353.780273360.00000031.7150341902.9019786.000000
\n", - "
" - ], - "text/plain": [ - " AvgTicketPrice DistanceKilometers DistanceMiles FlightDelayMin \\\n", - "count 13059.000000 13059.000000 13059.000000 13059.000000 \n", - "mean 628.253689 7092.142457 4406.853010 47.335171 \n", - "std 266.386661 4578.263193 2844.800855 96.743006 \n", - "min 100.020531 0.000000 0.000000 0.000000 \n", - "25% 410.008918 2470.545974 1535.126118 0.000000 \n", - "50% 640.387285 7612.072403 4729.922470 0.000000 \n", - "75% 842.277083 9735.660463 6049.583389 12.618243 \n", - "max 1199.729004 19881.482422 12353.780273 360.000000 \n", - "\n", - " FlightTimeHour FlightTimeMin dayOfWeek \n", - "count 13059.000000 13059.000000 13059.000000 \n", - "mean 8.518797 511.127842 2.835975 \n", - "std 5.579019 334.741135 1.939365 \n", - "min 0.000000 0.000000 0.000000 \n", - "25% 4.195650 251.938710 1.000000 \n", - "50% 8.385816 503.148975 3.000000 \n", - "75% 12.008909 720.505705 4.197761 \n", - "max 31.715034 1902.901978 6.000000 " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ed_df.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(13059, 27)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ed_df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['AvgTicketPrice', 'Cancelled', 'Carrier', 'Dest', 'DestAirportID',\n", - " 'DestCityName', 'DestCountry', 'DestLocation', 'DestRegion',\n", - " 'DestWeather', 'DistanceKilometers', 'DistanceMiles', 'FlightDelay',\n", - " 'FlightDelayMin', 'FlightDelayType', 'FlightNum', 'FlightTimeHour',\n", - " 'FlightTimeMin', 'Origin', 'OriginAirportID', 'OriginCityName',\n", - " 'OriginCountry', 'OriginLocation', 'OriginRegion', 'OriginWeather',\n", - " 'dayOfWeek', 'timestamp'],\n", - " dtype='object')" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ed_df.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AvgTicketPrice float64\n", - "Cancelled bool\n", - "Carrier object\n", - "Dest object\n", - "DestAirportID object\n", - "DestCityName object\n", - "DestCountry object\n", - "DestLocation object\n", - "DestRegion object\n", - "DestWeather object\n", - "DistanceKilometers float64\n", - "DistanceMiles float64\n", - "FlightDelay bool\n", - "FlightDelayMin int64\n", - "FlightDelayType object\n", - "FlightNum object\n", - "FlightTimeHour float64\n", - "FlightTimeMin float64\n", - "Origin object\n", - "OriginAirportID object\n", - "OriginCityName object\n", - "OriginCountry object\n", - "OriginLocation object\n", - "OriginRegion object\n", - "OriginWeather object\n", - "dayOfWeek int64\n", - "timestamp datetime64[ns]\n", - "dtype: object" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ed_df.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 13059 entries, 0 to 13058\n", - "Data columns (total 27 columns):\n", - "AvgTicketPrice 13059 non-null float64\n", - "Cancelled 13059 non-null bool\n", - "Carrier 13059 non-null object\n", - "Dest 13059 non-null object\n", - "DestAirportID 13059 non-null object\n", - "DestCityName 13059 non-null object\n", - "DestCountry 13059 non-null object\n", - "DestLocation 13059 non-null object\n", - "DestRegion 13059 non-null object\n", - "DestWeather 13059 non-null object\n", - "DistanceKilometers 13059 non-null float64\n", - "DistanceMiles 13059 non-null float64\n", - "FlightDelay 13059 non-null bool\n", - "FlightDelayMin 13059 non-null int64\n", - "FlightDelayType 13059 non-null object\n", - "FlightNum 13059 non-null object\n", - "FlightTimeHour 13059 non-null float64\n", - "FlightTimeMin 13059 non-null float64\n", - "Origin 13059 non-null object\n", - "OriginAirportID 13059 non-null object\n", - "OriginCityName 13059 non-null object\n", - "OriginCountry 13059 non-null object\n", - "OriginLocation 13059 non-null object\n", - "OriginRegion 13059 non-null object\n", - "OriginWeather 13059 non-null object\n", - "dayOfWeek 13059 non-null int64\n", - "timestamp 13059 non-null datetime64[ns]\n", - "dtypes: bool(2), datetime64[ns](1), float64(5), int64(2), object(17)\n", - "memory usage: 56.0 bytes\n" - ] - } - ], - "source": [ - "ed_df.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "AvgTicketPrice 13059\n", - "Cancelled 13059\n", - "Carrier 13059\n", - "Dest 13059\n", - "DestAirportID 13059\n", - "DestCityName 13059\n", - "DestCountry 13059\n", - "DestLocation 13059\n", - "DestRegion 13059\n", - "DestWeather 13059\n", - "DistanceKilometers 13059\n", - "DistanceMiles 13059\n", - "FlightDelay 13059\n", - "FlightDelayMin 13059\n", - "FlightDelayType 13059\n", - "FlightNum 13059\n", - "FlightTimeHour 13059\n", - "FlightTimeMin 13059\n", - "Origin 13059\n", - "OriginAirportID 13059\n", - "OriginCityName 13059\n", - "OriginCountry 13059\n", - "OriginLocation 13059\n", - "OriginRegion 13059\n", - "OriginWeather 13059\n", - "dayOfWeek 13059\n", - "timestamp 13059\n", - "dtype: int64" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ed_df.count()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "bool 2\n", - "datetime64[ns] 1\n", - "float64 5\n", - "int64 2\n", - "object 17\n", - "dtype: int64" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ed_df.get_dtype_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - " Carrier Dest \\\n", - "0 Kibana Airlines Sydney Kingsford Smith International Airport \n", - "1 Logstash Airways Venice Marco Polo Airport \n", - "2 Logstash Airways Venice Marco Polo Airport \n", - "3 Kibana Airlines Treviso-Sant'Angelo Airport \n", - "4 Kibana Airlines Xi'an Xianyang International Airport \n", - "5 JetBeats Genoa Cristoforo Colombo Airport \n", - "6 JetBeats Zurich Airport \n", - "7 Kibana Airlines Ottawa Macdonald-Cartier International Airport \n", - "8 Kibana Airlines Rajiv Gandhi International Airport \n", - "9 Logstash Airways Treviso-Sant'Angelo Airport \n", - "10 JetBeats Helsinki Vantaa Airport \n", - "11 Logstash Airways Vienna International Airport \n", - "12 Logstash Airways Shanghai Pudong International Airport \n", - "13 Logstash Airways Ottawa Macdonald-Cartier International Airport \n", - "14 Logstash Airways Luis Munoz Marin International Airport \n", - "15 Kibana Airlines Cologne Bonn Airport \n", - "16 Logstash Airways Venice Marco Polo Airport \n", - "17 ES-Air Ministro Pistarini International Airport \n", - "18 ES-Air Shanghai Pudong International Airport \n", - "19 JetBeats Indira Gandhi International Airport \n", - "20 JetBeats Wichita Mid Continent Airport \n", - "21 ES-Air Ottawa Macdonald-Cartier International Airport \n", - "22 JetBeats Itami Airport \n", - "23 Logstash Airways Vienna International Airport \n", - "24 Logstash Airways Charles de Gaulle International Airport \n", - "25 ES-Air Narita International Airport \n", - "26 Kibana Airlines Itami Airport \n", - "27 JetBeats San Diego International Airport \n", - "28 Kibana Airlines Verona Villafranca Airport \n", - "29 Logstash Airways Zurich Airport \n", - "... ... ... \n", - "13029 Kibana Airlines Chengdu Shuangliu International Airport \n", - "13030 ES-Air Narita International Airport \n", - "13031 Kibana Airlines Narita International Airport \n", - "13032 JetBeats Wichita Mid Continent Airport \n", - "13033 Logstash Airways Sheremetyevo International Airport \n", - "13034 ES-Air El Dorado International Airport \n", - "13035 JetBeats Turin Airport \n", - "13036 Kibana Airlines Winnipeg / James Armstrong Richardson Internat... \n", - "13037 ES-Air Chengdu Shuangliu International Airport \n", - "13038 JetBeats San Diego International Airport \n", - "13039 ES-Air Ministro Pistarini International Airport \n", - "13040 JetBeats Vienna International Airport \n", - "13041 ES-Air Louisville International Standiford Field \n", - "13042 Logstash Airways Ottawa Macdonald-Cartier International Airport \n", - "13043 Kibana Airlines Shanghai Pudong International Airport \n", - "13044 Kibana Airlines Zurich Airport \n", - "13045 Kibana Airlines London Heathrow Airport \n", - "13046 Kibana Airlines Ottawa Macdonald-Cartier International Airport \n", - "13047 ES-Air Manchester Airport \n", - "13048 Logstash Airways Comodoro Arturo Merino Benitez International A... \n", - "13049 Kibana Airlines Olenya Air Base \n", - "13050 Logstash Airways Sheremetyevo International Airport \n", - "13051 JetBeats Indira Gandhi International Airport \n", - "13052 Logstash Airways Stockholm-Arlanda Airport \n", - "13053 Kibana Airlines Rochester International Airport \n", - "13054 Logstash Airways New Chitose Airport \n", - "13055 Logstash Airways San Antonio International Airport \n", - "13056 JetBeats Zurich Airport \n", - "13057 ES-Air Vienna International Airport \n", - "13058 Kibana Airlines Rajiv Gandhi International Airport \n", - "\n", - " OriginRegion \n", - "0 DE-HE \n", - "1 SE-BD \n", - "2 IT-34 \n", - "3 IT-72 \n", - "4 MX-DIF \n", - "5 CA-AB \n", - "6 CH-ZH \n", - "7 IT-62 \n", - "8 IT-25 \n", - "9 RU-MOS \n", - "10 US-NM \n", - "11 IT-34 \n", - "12 MX-DIF \n", - "13 IT-72 \n", - "14 IT-62 \n", - "15 SE-BD \n", - "16 MX-DIF \n", - "17 US-OH \n", - "18 RU-MUR \n", - "19 US-WY \n", - "20 US-PA \n", - "21 US-NJ \n", - "22 DK-84 \n", - "23 US-WA \n", - "24 DE-BE \n", - "25 GB-ENG \n", - "26 FI-ES \n", - "27 US-AZ \n", - "28 SE-BD \n", - "29 US-OK \n", - "... ... \n", - "13029 SE-BD \n", - "13030 IT-82 \n", - "13031 US-KY \n", - "13032 US-WA \n", - "13033 US-OR \n", - "13034 SE-BD \n", - "13035 US-NC \n", - "13036 IT-34 \n", - "13037 IT-82 \n", - "13038 US-NY \n", - "13039 SE-BD \n", - "13040 CA-ON \n", - "13041 IT-25 \n", - "13042 GB-ENG \n", - "13043 SE-BD \n", - "13044 US-FL \n", - "13045 SE-BD \n", - "13046 MX-DIF \n", - "13047 SE-BD \n", - "13048 SE-BD \n", - "13049 SE-BD \n", - "13050 IT-52 \n", - "13051 IT-88 \n", - "13052 GB-ENG \n", - "13053 SE-BD \n", - "13054 SE-BD \n", - "13055 SE-BD \n", - "13056 CH-ZH \n", - "13057 RU-AMU \n", - "13058 SE-BD \n", - "\n", - "[13059 rows x 3 columns]" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ed_df['Carrier', 'Dest', 'OriginRegion']" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "ed_df2 = ed_df['Carrier', 'Dest', 'OriginRegion']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "t = ed_df2.head(10)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Index: 10 entries, PNr3fWsBVUchcQJXWbLE to Rdr3fWsBVUchcQJXWbLE\n", - "Data columns (total 3 columns):\n", - "Carrier 10 non-null object\n", - "Dest 10 non-null object\n", - "OriginRegion 10 non-null object\n", - "dtypes: object(3)\n", - "memory usage: 320.0+ bytes\n" - ] - } - ], - "source": [ - "t.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "tt = t.to_sparse()" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Index: 10 entries, PNr3fWsBVUchcQJXWbLE to Rdr3fWsBVUchcQJXWbLE\n", - "Data columns (total 3 columns):\n", - "Carrier 10 non-null Sparse[object, nan]\n", - "Dest 10 non-null Sparse[object, nan]\n", - "OriginRegion 10 non-null Sparse[object, nan]\n", - "dtypes: Sparse[object, nan](3)\n", - "memory usage: 344.0+ bytes\n" - ] - } - ], - "source": [ - "tt.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['PNr3fWsBVUchcQJXWbLE', 'Pdr3fWsBVUchcQJXWbLE', 'Ptr3fWsBVUchcQJXWbLE',\n", - " 'P9r3fWsBVUchcQJXWbLE', 'QNr3fWsBVUchcQJXWbLE', 'Qdr3fWsBVUchcQJXWbLE',\n", - " 'Qtr3fWsBVUchcQJXWbLE', 'Q9r3fWsBVUchcQJXWbLE', 'RNr3fWsBVUchcQJXWbLE',\n", - " 'Rdr3fWsBVUchcQJXWbLE'],\n", - " dtype='object')" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tt.index" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "print(type(pd_df.columns))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 13059 entries, 0 to 13058\n", - "Data columns (total 3 columns):\n", - "Carrier 13059 non-null object\n", - "Dest 13059 non-null object\n", - "OriginRegion 13059 non-null object\n", - "dtypes: object(3)\n", - "memory usage: 56.0 bytes\n" - ] - } - ], - "source": [ - "ed_df2.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - " Carrier Dest \\\n", - "0 Kibana Airlines Sydney Kingsford Smith International Airport \n", - "1 Logstash Airways Venice Marco Polo Airport \n", - "2 Logstash Airways Venice Marco Polo Airport \n", - "3 Kibana Airlines Treviso-Sant'Angelo Airport \n", - "4 Kibana Airlines Xi'an Xianyang International Airport \n", - "5 JetBeats Genoa Cristoforo Colombo Airport \n", - "6 JetBeats Zurich Airport \n", - "7 Kibana Airlines Ottawa Macdonald-Cartier International Airport \n", - "8 Kibana Airlines Rajiv Gandhi International Airport \n", - "9 Logstash Airways Treviso-Sant'Angelo Airport \n", - "10 JetBeats Helsinki Vantaa Airport \n", - "11 Logstash Airways Vienna International Airport \n", - "12 Logstash Airways Shanghai Pudong International Airport \n", - "13 Logstash Airways Ottawa Macdonald-Cartier International Airport \n", - "14 Logstash Airways Luis Munoz Marin International Airport \n", - "15 Kibana Airlines Cologne Bonn Airport \n", - "16 Logstash Airways Venice Marco Polo Airport \n", - "17 ES-Air Ministro Pistarini International Airport \n", - "18 ES-Air Shanghai Pudong International Airport \n", - "19 JetBeats Indira Gandhi International Airport \n", - "20 JetBeats Wichita Mid Continent Airport \n", - "21 ES-Air Ottawa Macdonald-Cartier International Airport \n", - "22 JetBeats Itami Airport \n", - "23 Logstash Airways Vienna International Airport \n", - "24 Logstash Airways Charles de Gaulle International Airport \n", - "25 ES-Air Narita International Airport \n", - "26 Kibana Airlines Itami Airport \n", - "27 JetBeats San Diego International Airport \n", - "28 Kibana Airlines Verona Villafranca Airport \n", - "29 Logstash Airways Zurich Airport \n", - "... ... ... \n", - "13029 Kibana Airlines Chengdu Shuangliu International Airport \n", - "13030 ES-Air Narita International Airport \n", - "13031 Kibana Airlines Narita International Airport \n", - "13032 JetBeats Wichita Mid Continent Airport \n", - "13033 Logstash Airways Sheremetyevo International Airport \n", - "13034 ES-Air El Dorado International Airport \n", - "13035 JetBeats Turin Airport \n", - "13036 Kibana Airlines Winnipeg / James Armstrong Richardson Internat... \n", - "13037 ES-Air Chengdu Shuangliu International Airport \n", - "13038 JetBeats San Diego International Airport \n", - "13039 ES-Air Ministro Pistarini International Airport \n", - "13040 JetBeats Vienna International Airport \n", - "13041 ES-Air Louisville International Standiford Field \n", - "13042 Logstash Airways Ottawa Macdonald-Cartier International Airport \n", - "13043 Kibana Airlines Shanghai Pudong International Airport \n", - "13044 Kibana Airlines Zurich Airport \n", - "13045 Kibana Airlines London Heathrow Airport \n", - "13046 Kibana Airlines Ottawa Macdonald-Cartier International Airport \n", - "13047 ES-Air Manchester Airport \n", - "13048 Logstash Airways Comodoro Arturo Merino Benitez International A... \n", - "13049 Kibana Airlines Olenya Air Base \n", - "13050 Logstash Airways Sheremetyevo International Airport \n", - "13051 JetBeats Indira Gandhi International Airport \n", - "13052 Logstash Airways Stockholm-Arlanda Airport \n", - "13053 Kibana Airlines Rochester International Airport \n", - "13054 Logstash Airways New Chitose Airport \n", - "13055 Logstash Airways San Antonio International Airport \n", - "13056 JetBeats Zurich Airport \n", - "13057 ES-Air Vienna International Airport \n", - "13058 Kibana Airlines Rajiv Gandhi International Airport \n", - "\n", - " OriginRegion \n", - "0 DE-HE \n", - "1 SE-BD \n", - "2 IT-34 \n", - "3 IT-72 \n", - "4 MX-DIF \n", - "5 CA-AB \n", - "6 CH-ZH \n", - "7 IT-62 \n", - "8 IT-25 \n", - "9 RU-MOS \n", - "10 US-NM \n", - "11 IT-34 \n", - "12 MX-DIF \n", - "13 IT-72 \n", - "14 IT-62 \n", - "15 SE-BD \n", - "16 MX-DIF \n", - "17 US-OH \n", - "18 RU-MUR \n", - "19 US-WY \n", - "20 US-PA \n", - "21 US-NJ \n", - "22 DK-84 \n", - "23 US-WA \n", - "24 DE-BE \n", - "25 GB-ENG \n", - "26 FI-ES \n", - "27 US-AZ \n", - "28 SE-BD \n", - "29 US-OK \n", - "... ... \n", - "13029 SE-BD \n", - "13030 IT-82 \n", - "13031 US-KY \n", - "13032 US-WA \n", - "13033 US-OR \n", - "13034 SE-BD \n", - "13035 US-NC \n", - "13036 IT-34 \n", - "13037 IT-82 \n", - "13038 US-NY \n", - "13039 SE-BD \n", - "13040 CA-ON \n", - "13041 IT-25 \n", - "13042 GB-ENG \n", - "13043 SE-BD \n", - "13044 US-FL \n", - "13045 SE-BD \n", - "13046 MX-DIF \n", - "13047 SE-BD \n", - "13048 SE-BD \n", - "13049 SE-BD \n", - "13050 IT-52 \n", - "13051 IT-88 \n", - "13052 GB-ENG \n", - "13053 SE-BD \n", - "13054 SE-BD \n", - "13055 SE-BD \n", - "13056 CH-ZH \n", - "13057 RU-AMU \n", - "13058 SE-BD \n", - "\n", - "[13059 rows x 3 columns]" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ed_df2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/eland/utils.py b/eland/utils.py index 074785b..a8e55a1 100644 --- a/eland/utils.py +++ b/eland/utils.py @@ -2,9 +2,11 @@ from eland import Client from eland import DataFrame from eland import Mappings + def read_es(es_params, index_pattern): return DataFrame(client=es_params, index_pattern=index_pattern) + def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size=10000, refresh=False): """ Append a pandas DataFrame to an Elasticsearch index. @@ -45,8 +47,8 @@ def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size= elif if_exists == "replace": client.indices().delete(destination_index) client.indices().create(destination_index, mapping) - #elif if_exists == "append": - # TODO validate mapping is compatible + # elif if_exists == "append": + # TODO validate mapping is compatible else: client.indices().create(destination_index, mapping) @@ -70,4 +72,3 @@ def pandas_to_es(df, es_params, destination_index, if_exists='fail', chunk_size= actions = [] client.bulk(actions, refresh=refresh) -